279 lines
8.2 KiB
Python
Executable File
279 lines
8.2 KiB
Python
Executable File
#!/Applications/calibre.app/Contents/MacOS/calibre-debug calibre_to_hugo.py
|
|
# See: https://manual.calibre-ebook.com/db_api.html
|
|
import os
|
|
import json
|
|
import copy
|
|
import shutil
|
|
import pathlib
|
|
import hashlib
|
|
import urllib
|
|
import calibre.library
|
|
|
|
import config
|
|
|
|
class CalibreLibrary: pass
|
|
|
|
class CalibreLibraryBook:
|
|
_db: CalibreLibrary
|
|
|
|
id: int
|
|
title: str
|
|
authors: str
|
|
filepath: str
|
|
filehash: str
|
|
filename: str
|
|
cover: str
|
|
|
|
def __init__(self, db: CalibreLibrary):
|
|
self._db = db
|
|
self.cover = None
|
|
self.filehash = None
|
|
|
|
""" Hash the filepath SHA256 to hex string """
|
|
def hash(self) -> str:
|
|
if not self.filehash:
|
|
self.filehash = hashlib.sha256(self.filepath.encode("utf8")).hexdigest()
|
|
|
|
return self.filehash
|
|
|
|
""" Get book properties for JSON serialize """
|
|
def to_json(self):
|
|
data = {}
|
|
for key in list(self.__dict__.keys()):
|
|
if key.startswith("_"):
|
|
continue
|
|
data[key] = self.__dict__[key]
|
|
|
|
return data
|
|
|
|
""" Generate the ebook filename based on the ebook file hash """
|
|
def ebook_filename(self) -> str:
|
|
filehash = self.hash()
|
|
ext = pathlib.Path(self.filepath).suffix
|
|
self.filename = f"{filehash}{ext}"
|
|
return self.filename
|
|
|
|
""" Save the ebook from the calibre library to the path """
|
|
def ebook_save(self, path: str) -> str:
|
|
filepath = os.path.join(path, self.ebook_filename())
|
|
|
|
if os.path.exists(filepath):
|
|
return
|
|
|
|
print(f"COPY {self.filepath} -> {filepath}")
|
|
shutil.copyfile(self.filepath, filepath)
|
|
return filepath
|
|
|
|
""" Save the ebook cover to the path """
|
|
def cover_save(self, path: str) -> str:
|
|
filehash = self.hash()
|
|
|
|
# NOTE: we assume by testing the cover is a .jpg so we check if it exists on disk already as "<hash>.jpg"
|
|
self.cover = os.path.join(path, f"{filehash}.jpg")
|
|
if os.path.exists(self.cover):
|
|
print(f"COVER {self.cover}")
|
|
return self.cover
|
|
|
|
cover_tmpfile = self._db.cover(self.id, as_path=True)
|
|
cover_ext = pathlib.Path(cover_tmpfile).suffix
|
|
|
|
self.cover = os.path.join(path, f"{filehash}{cover_ext}")
|
|
os.rename(cover_tmpfile, self.cover)
|
|
print(f"SAVE COVER {self.cover}")
|
|
|
|
return self.cover
|
|
|
|
class CalibreLibrary:
|
|
def __init__(self, library_path: str):
|
|
# First open the Calibre library and get a list of the book IDs
|
|
self._db = calibre.library.db(library_path).new_api
|
|
|
|
@staticmethod
|
|
def _get_filesize_str(path: str) -> str:
|
|
size = os.path.getsize(path)
|
|
if size < 1024:
|
|
return f"{size} bytes"
|
|
elif size < pow(1024,2):
|
|
return f"{round(size/1024, 2)} KB"
|
|
elif size < pow(1024,3):
|
|
return f"{round(size/(pow(1024,2)), 2)} MB"
|
|
elif size < pow(1024,4):
|
|
return f"{round(size/(pow(1024,3)), 2)} GB"
|
|
|
|
def books(self) -> 'list[CalibreLibraryBook]':
|
|
books = []
|
|
book_ids = self._db.all_book_ids()
|
|
|
|
for book_id in book_ids:
|
|
# TODO check loaded state with state of calibre based on book.id
|
|
# hashing takes way to long...
|
|
book = CalibreLibraryBook(self._db)
|
|
book.id = book_id
|
|
book.title = self._db.field_for("title", book.id)
|
|
book.authors = self._db.field_for("authors", book.id)
|
|
book.comments = self._db.field_for("comments", book.id)
|
|
book._metadata = self._db.get_metadata(book.id)
|
|
book.ids = book._metadata.get_identifiers()
|
|
|
|
# Select only first ebook format
|
|
formats = self._db.formats(book.id, verify_formats=True)
|
|
if len(formats) > 0:
|
|
book.filepath = self._db.format_abspath(book.id, formats[0])
|
|
book.filesize = self._get_filesize_str(book.filepath)
|
|
else:
|
|
book.filepath = None
|
|
book.filesize = 0
|
|
|
|
if book.hash() in config.CLEARWEB_FILTERED_BOOKS:
|
|
print(f"SKIP Book {book.title} filtered and not published on the clearweb")
|
|
continue
|
|
|
|
books.append(book)
|
|
|
|
return books
|
|
|
|
class PyroTechnyLibrary:
|
|
_calibre_library : CalibreLibrary
|
|
_path: str
|
|
_tempdir: str
|
|
_google_drive_file_db: list
|
|
|
|
def __init__(self, path: str, calibre_library: CalibreLibrary):
|
|
self._books = []
|
|
self._calibre_library = calibre_library
|
|
self._path = path
|
|
self._state = {}
|
|
self._state["books"] = []
|
|
|
|
# Create path directories
|
|
if not os.path.exists(self._path):
|
|
print(f"CREATE {self._path}")
|
|
os.makedirs(self._path, 0o755)
|
|
|
|
self._load_google_drive_file_db()
|
|
|
|
def _get_google_drive_value_from_filename(self, filename: str, key: str) -> str:
|
|
value = ""
|
|
for file in self._google_drive_file_db:
|
|
if filename == file["filename"]:
|
|
value = file[key]
|
|
break
|
|
|
|
if value == "":
|
|
printf(f"ERR unable to find {filename} in google drive db.json")
|
|
|
|
return value
|
|
|
|
def _load_google_drive_file_db(self):
|
|
print(f"LOAD db.json from Google Drive: {config.GOOGLE_DRIVE_EBOOK_LIBRRARY_DB_JSON_URL}")
|
|
resp = urllib.request.urlopen(config.GOOGLE_DRIVE_EBOOK_LIBRRARY_DB_JSON_URL)
|
|
self._google_drive_file_db = json.loads(resp.read())
|
|
|
|
def _generate_book_dl_page(self, path, book):
|
|
pass
|
|
|
|
def _generate_book_page(self, path, book):
|
|
filepath = os.path.join(path, f'{book.filehash}.md')
|
|
if os.path.exists(filepath):
|
|
os.remove(filepath)
|
|
|
|
# TODO: When we us a template file we can check if the template
|
|
# or script is newer than the target book page markdown file. To speed things up a bit
|
|
print(f"GEN {filepath}")
|
|
|
|
with open(filepath, "w") as fd:
|
|
data = f'''---
|
|
title: "{book.title}"
|
|
description: ""
|
|
featured_image: "/images/site/library-header.jpg"
|
|
type: page
|
|
---
|
|
|
|
'''
|
|
fd.write(data)
|
|
|
|
cover = os.path.basename(book.cover)
|
|
cover_url = os.path.join(config.LIBRARY_EBOOKS_THUMBS_URL_PATH, cover)
|
|
|
|
book_url = self._get_google_drive_value_from_filename(book.filename, "view_url")
|
|
book_dl_url = self._get_google_drive_value_from_filename(book.filename, "download_url")
|
|
|
|
authors = list(book.authors)
|
|
authors = ', '.join(authors)
|
|
if len(book.authors) == 1:
|
|
if book.authors[0] == "Unknown":
|
|
authors = None
|
|
|
|
fd.write(f'<a href="{book_url}" target="_blank"></a>\n')
|
|
|
|
if authors:
|
|
author_suffix = ''
|
|
if len(book.authors) > 1:
|
|
author_suffix = 's'
|
|
fd.write(f"* Author{author_suffix}: {authors}\n")
|
|
|
|
# IDs to book libraries and publishers
|
|
if book.ids and len(book.ids) > 0:
|
|
fd.write(f'* IDs:\n')
|
|
for key, value in book.ids.items():
|
|
if key == "amazon":
|
|
fd.write(f' * Amazon: <a href="https://www.amazon.com/dp/{value}" target="_blank">{value}</a>\n')
|
|
elif key == "google":
|
|
fd.write(f' * Google: <a href="https://books.google.com/books?id={value}" target="_blank">{value}</a>\n')
|
|
elif key == "isbn":
|
|
fd.write(f' * ISBN: <a href="https://www.worldcat.org/isbn/{value}" target="_blank">{value}</a>\n')
|
|
elif key == "doi":
|
|
fd.write(f' * DOI: <a href="https://dx.doi.org/{value}" target="_blank">{value}</a>\n')
|
|
|
|
# View & Download
|
|
fd.write(f'* <a href="{book_url}" target="_blank">View</a>\n\n')
|
|
fd.write(f'* [Download]({book_dl_url}) ({book.filesize})\n\n')
|
|
|
|
# Comments
|
|
if book.comments:
|
|
fd.write(f'## Description')
|
|
fd.write(f'{book.comments}\n\n')
|
|
|
|
# Back
|
|
fd.write(f'<br />[Back to library]({config.LIBRARY_EBOOKS_BASE_URL}/)\n')
|
|
|
|
def synchronize(self):
|
|
# Load books from calibre
|
|
books = self._calibre_library.books()
|
|
for book in books:
|
|
book.ebook_filename()
|
|
book.cover_save(config.HUGO_STATIC_CONTENT_LIBRARY_IMAGES_PATH)
|
|
book.ebook_save(config.HUGO_STATIC_CONTENT_LIBRARY_PATH)
|
|
self._state["books"].append(book)
|
|
|
|
""" Generate hugo markdown content files """
|
|
def generate(self):
|
|
# per-book page generation
|
|
book_page_path = config.HUGO_CONTENT_LIBRARY_PATH
|
|
if not os.path.exists(book_page_path):
|
|
os.makedirs(book_page_path, 0o755)
|
|
|
|
# TODO synced state instead of calibre copy....
|
|
for book in self._state["books"]:
|
|
self._generate_book_page(book_page_path, book)
|
|
|
|
def generate_library_sitemap(self, filepath: str):
|
|
if os.path.exists(filepath):
|
|
os.remove(filepath)
|
|
|
|
with open(filepath, "w") as fd:
|
|
fd.write("# E-books library sitemap\n\n")
|
|
|
|
for book in self._state["books"]:
|
|
fd.write(f"* [{book.title}]({config.LIBRARY_EBOOKS_BASE_URL}/ebooks/{book.hash()})\n")
|
|
|
|
def main():
|
|
calibre_library = CalibreLibrary(config.CALIBRE_LIBRARY_PATH)
|
|
pyrotechny_library = PyroTechnyLibrary(config.HUGO_STATIC_CONTENT_LIBRARY_PATH, calibre_library)
|
|
pyrotechny_library.synchronize()
|
|
pyrotechny_library.generate()
|
|
pyrotechny_library.generate_library_sitemap(config.HUGO_CONTENT_LIBRARY_SITEMAP_PATH)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|