diff --git a/bin/calibre-3.39.1-x86_64.txz b/bin/calibre-3.39.1-x86_64.txz new file mode 100644 index 0000000..773b2ea Binary files /dev/null and b/bin/calibre-3.39.1-x86_64.txz differ diff --git a/scripts/add_missing_opf.py b/scripts/add_missing_opf.py new file mode 100644 index 0000000..7571770 --- /dev/null +++ b/scripts/add_missing_opf.py @@ -0,0 +1,97 @@ +from argparse import ArgumentParser +from pathlib import Path +import re +import sys +from hashlib import sha1 +from xml.etree import ElementTree as ET + + +ET.register_namespace('',"http://www.idpf.org/2007/opf") +ET.register_namespace('dc','http://purl.org/dc/elements/1.1/') + +def rewrite_opf(from_path, to_path, calibre_id, title): + et = ET.parse(from_path) + # print (ET.tostring(et.getroot(), method="xml", encoding="unicode")) + for tag in et.findall(".//{http://purl.org/dc/elements/1.1/}identifier[@{http://www.idpf.org/2007/opf}scheme='calibre']"): + print ("found calibre_id", tag.text) + tag.text = f"{calibre_id}" + for tag in et.findall(".//{http://purl.org/dc/elements/1.1/}title"): + if tag.text == "Untitled": + title = title.split("/", 1)[-1] + title = title.strip() + tag.text = title + et.write(to_path, xml_declaration = True, encoding = 'utf-8', method = 'xml') + +ap= ArgumentParser("add_missing_opf", description="add missing OPF files from one Calibre library to another, preserving calibre_id's") +ap.add_argument("library", help="the library to fix (keep)") +ap.add_argument("tmp", help="the tmp library to use as source of OPF files") +args = ap.parse_args() + +def hash_file (path): + ret = sha1() + with open(path, "rb") as fin: + while True: + data = fin.read() + if not data: + break + ret.update(data) + return ret.hexdigest() + +def metadata_files (library_path): + for author_folder in Path(library_path).glob("*"): + if author_folder.is_dir(): + for title_folder in author_folder.glob("*"): + m = re.search(r"^(.*)\((\d+)\)$", title_folder.name) + # print (f"{title_folder}, {m.groups()}") + if title_folder.is_dir() and m is not None: + title = m.group(1) + calibre_id = int(m.group(2)) + opf = title_folder / "metadata.opf" + # opf = list(title_folder.glob("metadata.opf")) + title_id = f"{author_folder.name}/{title}" + + book_media = list(title_folder.glob("*.pdf")) + list(title_folder.glob("*.epub")) + book_media.sort() + if len(book_media): + title_hash = hash_file(book_media[0]) + else: + title_hash = None + print (f"warning: no book media {title_folder}") + + yield title_id, title_hash, calibre_id, opf + + +print (f"PASS 1: {args.tmp}") +metadata_by_title = {} +for title, title_hash, calibre_id, metadata in metadata_files(args.tmp): + if metadata: + if title_hash in metadata_by_title: + print (f"warning: duplicate title: {title_hash}", file=sys.stderr) + metadata_by_title[title_hash] = metadata +print () +print (f"PASS 2: {args.library}") +replaced_count = 0 +missing_count = 0 +existing_count = 0 + +for title, title_hash, calibre_id, metadata in metadata_files(args.library): + if not metadata.exists(): + if title_hash not in metadata_by_title: + print (f"NO METADATA for {title}") + missing_count += 1 + continue + else: + old_metadata = metadata_by_title[title_hash] + # print (f"have metadata for {title}") + replaced_count += 1 + print (old_metadata) + rewrite_opf(old_metadata, metadata.parent / "metadata_new.opf", calibre_id, title) + else: + existing_count += 1 + +total = existing_count + replaced_count + missing_count +print (f"Of {total} items, {existing_count} had metadata, {replaced_count} can be replaced, {missing_count} missing.") + + + + diff --git a/scripts/add_missing_opf_finalize.py b/scripts/add_missing_opf_finalize.py new file mode 100644 index 0000000..7e7d1b9 --- /dev/null +++ b/scripts/add_missing_opf_finalize.py @@ -0,0 +1,25 @@ +from argparse import ArgumentParser +from pathlib import Path +import re +import sys + +ap= ArgumentParser("add_missing_opf_finalize") +ap.add_argument("library", help="the library to fix (keep)") +ap.add_argument("tmp", help="the tmp library to use as source of OPF files") +args = ap.parse_args() + +for author_folder in Path(args.library).glob("*"): + if author_folder.is_dir(): + for title_folder in author_folder.glob("*"): + m = re.search(r"^(.*)\((\d+)\)$", title_folder.name) + # print (f"{title_folder}, {m.groups()}") + if title_folder.is_dir() and m is not None: + title = m.group(1) + calibre_id = int(m.group(2)) + opf = title_folder / "metadata.opf" + opf_new = title_folder / "metadata_new.opf" + if (not opf.exists()) and opf_new.exists(): + print (f"Renaming {opf_new} -> {opf}") + opf_new.rename(opf) + +