scripts + bin
parent
4116d71678
commit
6d2000981c
Binary file not shown.
@ -0,0 +1,97 @@
|
||||
from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
import re
|
||||
import sys
|
||||
from hashlib import sha1
|
||||
from xml.etree import ElementTree as ET
|
||||
|
||||
|
||||
ET.register_namespace('',"http://www.idpf.org/2007/opf")
|
||||
ET.register_namespace('dc','http://purl.org/dc/elements/1.1/')
|
||||
|
||||
def rewrite_opf(from_path, to_path, calibre_id, title):
|
||||
et = ET.parse(from_path)
|
||||
# print (ET.tostring(et.getroot(), method="xml", encoding="unicode"))
|
||||
for tag in et.findall(".//{http://purl.org/dc/elements/1.1/}identifier[@{http://www.idpf.org/2007/opf}scheme='calibre']"):
|
||||
print ("found calibre_id", tag.text)
|
||||
tag.text = f"{calibre_id}"
|
||||
for tag in et.findall(".//{http://purl.org/dc/elements/1.1/}title"):
|
||||
if tag.text == "Untitled":
|
||||
title = title.split("/", 1)[-1]
|
||||
title = title.strip()
|
||||
tag.text = title
|
||||
et.write(to_path, xml_declaration = True, encoding = 'utf-8', method = 'xml')
|
||||
|
||||
ap= ArgumentParser("add_missing_opf", description="add missing OPF files from one Calibre library to another, preserving calibre_id's")
|
||||
ap.add_argument("library", help="the library to fix (keep)")
|
||||
ap.add_argument("tmp", help="the tmp library to use as source of OPF files")
|
||||
args = ap.parse_args()
|
||||
|
||||
def hash_file (path):
|
||||
ret = sha1()
|
||||
with open(path, "rb") as fin:
|
||||
while True:
|
||||
data = fin.read()
|
||||
if not data:
|
||||
break
|
||||
ret.update(data)
|
||||
return ret.hexdigest()
|
||||
|
||||
def metadata_files (library_path):
|
||||
for author_folder in Path(library_path).glob("*"):
|
||||
if author_folder.is_dir():
|
||||
for title_folder in author_folder.glob("*"):
|
||||
m = re.search(r"^(.*)\((\d+)\)$", title_folder.name)
|
||||
# print (f"{title_folder}, {m.groups()}")
|
||||
if title_folder.is_dir() and m is not None:
|
||||
title = m.group(1)
|
||||
calibre_id = int(m.group(2))
|
||||
opf = title_folder / "metadata.opf"
|
||||
# opf = list(title_folder.glob("metadata.opf"))
|
||||
title_id = f"{author_folder.name}/{title}"
|
||||
|
||||
book_media = list(title_folder.glob("*.pdf")) + list(title_folder.glob("*.epub"))
|
||||
book_media.sort()
|
||||
if len(book_media):
|
||||
title_hash = hash_file(book_media[0])
|
||||
else:
|
||||
title_hash = None
|
||||
print (f"warning: no book media {title_folder}")
|
||||
|
||||
yield title_id, title_hash, calibre_id, opf
|
||||
|
||||
|
||||
print (f"PASS 1: {args.tmp}")
|
||||
metadata_by_title = {}
|
||||
for title, title_hash, calibre_id, metadata in metadata_files(args.tmp):
|
||||
if metadata:
|
||||
if title_hash in metadata_by_title:
|
||||
print (f"warning: duplicate title: {title_hash}", file=sys.stderr)
|
||||
metadata_by_title[title_hash] = metadata
|
||||
print ()
|
||||
print (f"PASS 2: {args.library}")
|
||||
replaced_count = 0
|
||||
missing_count = 0
|
||||
existing_count = 0
|
||||
|
||||
for title, title_hash, calibre_id, metadata in metadata_files(args.library):
|
||||
if not metadata.exists():
|
||||
if title_hash not in metadata_by_title:
|
||||
print (f"NO METADATA for {title}")
|
||||
missing_count += 1
|
||||
continue
|
||||
else:
|
||||
old_metadata = metadata_by_title[title_hash]
|
||||
# print (f"have metadata for {title}")
|
||||
replaced_count += 1
|
||||
print (old_metadata)
|
||||
rewrite_opf(old_metadata, metadata.parent / "metadata_new.opf", calibre_id, title)
|
||||
else:
|
||||
existing_count += 1
|
||||
|
||||
total = existing_count + replaced_count + missing_count
|
||||
print (f"Of {total} items, {existing_count} had metadata, {replaced_count} can be replaced, {missing_count} missing.")
|
||||
|
||||
|
||||
|
||||
|
@ -0,0 +1,25 @@
|
||||
from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
import re
|
||||
import sys
|
||||
|
||||
ap= ArgumentParser("add_missing_opf_finalize")
|
||||
ap.add_argument("library", help="the library to fix (keep)")
|
||||
ap.add_argument("tmp", help="the tmp library to use as source of OPF files")
|
||||
args = ap.parse_args()
|
||||
|
||||
for author_folder in Path(args.library).glob("*"):
|
||||
if author_folder.is_dir():
|
||||
for title_folder in author_folder.glob("*"):
|
||||
m = re.search(r"^(.*)\((\d+)\)$", title_folder.name)
|
||||
# print (f"{title_folder}, {m.groups()}")
|
||||
if title_folder.is_dir() and m is not None:
|
||||
title = m.group(1)
|
||||
calibre_id = int(m.group(2))
|
||||
opf = title_folder / "metadata.opf"
|
||||
opf_new = title_folder / "metadata_new.opf"
|
||||
if (not opf.exists()) and opf_new.exists():
|
||||
print (f"Renaming {opf_new} -> {opf}")
|
||||
opf_new.rename(opf)
|
||||
|
||||
|
Loading…
Reference in New Issue