Initial

2026-03-27 17:56:34 +01:00 · 2024-10-08 19:18:36 +02:00
commit 66ceff8513
718 changed files with 8339 additions and 0 deletions
--- a/cmd/pycache/config.cpython-311.pyc
+++ b/cmd/pycache/config.cpython-311.pyc
--- a/cmd/calibre_to_hugo.py
+++ b/cmd/calibre_to_hugo.py
@ -0,0 +1,274 @@
+#!/Applications/calibre.app/Contents/MacOS/calibre-debug calibre_to_hugo.py
+# See: https://manual.calibre-ebook.com/db_api.html
+import os
+import json
+import copy
+import shutil
+import pathlib
+import hashlib
+import urllib
+import calibre.library
+
+import config
+
+class CalibreLibrary: pass
+
+class CalibreLibraryBook:
+	_db: CalibreLibrary
+
+	id: int
+	title: str
+	authors: str
+	filepath: str
+	filehash: str
+	filename: str
+	cover: str
+
+	def __init__(self, db: CalibreLibrary):
+		self._db = db
+		self.cover = None
+		self.filehash = None
+
+	""" Hash the filepath SHA256 to hex string """
+	def hash(self) -> str:
+		if not self.filehash:
+			self.filehash = hashlib.sha256(self.filepath.encode("utf8")).hexdigest()
+
+		return self.filehash
+
+	""" Get book properties for JSON serialize """
+	def to_json(self):
+		data = {}
+		for key in list(self.__dict__.keys()):
+			if key.startswith("_"):
+				continue
+			data[key] = self.__dict__[key]
+
+		return data
+
+	""" Generate the ebook filename based on the ebook file hash """
+	def ebook_filename(self) -> str:
+		filehash = self.hash()
+		ext = pathlib.Path(self.filepath).suffix
+		self.filename = f"{filehash}{ext}"
+		return self.filename
+		
+	""" Save the ebook from the calibre library to the path """
+	def ebook_save(self, path: str) -> str:
+		filepath = os.path.join(path, self.ebook_filename())
+
+		if os.path.exists(filepath):
+			return
+
+		print(f"COPY {self.filepath} -> {filepath}")
+		shutil.copyfile(self.filepath, filepath)
+		return filepath
+
+	""" Save the ebook cover to the path """
+	def cover_save(self, path: str) -> str:
+		filehash = self.hash()
+
+		# NOTE: we assume by testing the cover is a .jpg so we check if it exists on disk already as "<hash>.jpg"
+		self.cover = os.path.join(path, f"{filehash}.jpg")
+		if os.path.exists(self.cover):
+			print(f"COVER {self.cover}")
+			return self.cover
+
+		cover_tmpfile = self._db.cover(self.id, as_path=True)
+		cover_ext = pathlib.Path(cover_tmpfile).suffix
+
+		self.cover = os.path.join(path, f"{filehash}{cover_ext}")
+		os.rename(cover_tmpfile, self.cover)
+		print(f"SAVE COVER {self.cover}")
+
+		return self.cover
+
+class CalibreLibrary:
+	def __init__(self, library_path: str):
+		# First open the Calibre library and get a list of the book IDs
+		self._db = calibre.library.db(library_path).new_api
+
+	@staticmethod
+	def _get_filesize_str(path: str) -> str:
+		size = os.path.getsize(path)
+		if size < 1024:
+			return f"{size} bytes"
+		elif size < pow(1024,2):
+			return f"{round(size/1024, 2)} KB"
+		elif size < pow(1024,3):
+			return f"{round(size/(pow(1024,2)), 2)} MB"
+		elif size < pow(1024,4):
+			return f"{round(size/(pow(1024,3)), 2)} GB"
+
+	def books(self) -> 'list[CalibreLibraryBook]':
+		books = []
+		book_ids = self._db.all_book_ids()
+
+		for book_id in book_ids:
+			# TODO check loaded state with state of calibre based on book.id
+			#      hashing takes way to long...
+			book = CalibreLibraryBook(self._db)
+			book.id = book_id
+			book.title = self._db.field_for("title", book.id)
+			book.authors = self._db.field_for("authors", book.id)
+			book.comments = self._db.field_for("comments", book.id) 
+			book._metadata = self._db.get_metadata(book.id)
+			book.ids = book._metadata.get_identifiers()
+
+			# Select only first ebook format 
+			formats = self._db.formats(book.id, verify_formats=True)
+			if len(formats) > 0:
+				book.filepath = self._db.format_abspath(book.id, formats[0])
+				book.filesize = self._get_filesize_str(book.filepath)
+			else:
+				book.filepath = None
+				book.filesize = 0
+
+			if book.hash() in config.CLEARWEB_FILTERED_BOOKS:
+				print(f"SKIP Book {book.title} filtered and not published on the clearweb")
+				continue
+
+			books.append(book)
+	
+		return books
+
+class PyroTechnyLibrary:
+	_calibre_library : CalibreLibrary
+	_path: str
+	_tempdir: str
+	_google_drive_file_db: list
+
+	def __init__(self, path: str, calibre_library: CalibreLibrary):
+		self._books = []
+		self._calibre_library = calibre_library
+		self._path = path
+		self._state = {}
+		self._state["books"] = []
+
+		# Create path directories
+		if not os.path.exists(self._path):
+			print(f"CREATE {self._path}")
+			os.makedirs(self._path, 0o755)
+
+		self._load_google_drive_file_db()
+
+	def _get_google_drive_value_from_filename(self, filename: str, key: str) -> str:
+		value = ""
+		for file in self._google_drive_file_db:
+			if filename == file["filename"]:
+				value = file[key]
+				break
+		return value
+
+	def _load_google_drive_file_db(self):
+		print(f"LOAD db.json from Google Drive: {config.GOOGLE_DRIVE_EBOOK_LIBRRARY_DB_JSON_URL}")
+		resp = urllib.request.urlopen(config.GOOGLE_DRIVE_EBOOK_LIBRRARY_DB_JSON_URL)
+		self._google_drive_file_db = json.loads(resp.read())
+
+	def _generate_book_dl_page(self, path, book):
+		pass
+
+	def _generate_book_page(self, path, book):
+		filepath = os.path.join(path, f'{book.filehash}.md')
+		if os.path.exists(filepath):
+			os.remove(filepath)
+
+		# TODO: When we us a template file we can check if the template
+		#       or script is newer than the target book page markdown file. To speed things up a bit
+		print(f"GEN {filepath}")
+
+		with open(filepath, "w") as fd:
+			data = f'''---
+title: "{book.title}"
+description: ""
+featured_image: "/images/site/library-header.jpg"
+type: page
+---
+
+'''
+			fd.write(data)
+
+			cover = os.path.basename(book.cover)
+			cover_url = os.path.join(config.LIBRARY_EBOOKS_THUMBS_URL_PATH, cover)
+
+			book_url = self._get_google_drive_value_from_filename(book.filename, "view_url")
+			book_dl_url = self._get_google_drive_value_from_filename(book.filename, "download_url")
+
+			authors = list(book.authors)
+			authors = ', '.join(authors)
+			if len(book.authors) == 1:
+				if book.authors[0] == "Unknown":
+					authors = None
+
+			fd.write(f'<a href="{book_url}" target="_blank">![{cover}]({cover_url})</a>\n')
+
+			if authors:
+				author_suffix = ''
+				if len(book.authors) > 1:
+					author_suffix = 's'
+				fd.write(f"* Author{author_suffix}: {authors}\n")
+			
+			# IDs to book libraries and publishers
+			if book.ids and len(book.ids) > 0:
+				fd.write(f'* IDs:\n')
+				for key, value in book.ids.items():
+					if key == "amazon":
+						fd.write(f'  * Amazon: <a href="https://www.amazon.com/dp/{value}" target="_blank">{value}</a>\n')
+					elif key == "google":
+						fd.write(f'  * Google: <a href="https://books.google.com/books?id={value}" target="_blank">{value}</a>\n')
+					elif key == "isbn":
+						fd.write(f'  * ISBN: <a href="https://www.worldcat.org/isbn/{value}" target="_blank">{value}</a>\n')
+					elif key == "doi":
+						fd.write(f'  * DOI: <a href="https://dx.doi.org/{value}" target="_blank">{value}</a>\n')
+						
+			# View & Download	
+			fd.write(f'* <a href="{book_url}" target="_blank">View</a>\n\n')
+			fd.write(f'* [Download]({book_dl_url}) ({book.filesize})\n\n')
+
+			# Comments
+			if book.comments:
+				fd.write(f'## Description')
+				fd.write(f'{book.comments}\n\n')
+
+			# Back
+			fd.write(f'<br />[Back to library]({config.LIBRARY_EBOOKS_BASE_URL}/)\n')
+	
+	def synchronize(self):
+		# Load books from calibre
+		books = self._calibre_library.books()
+		for book in books:
+			book.ebook_filename()
+			book.cover_save(config.HUGO_STATIC_CONTENT_LIBRARY_IMAGES_PATH)  
+			book.ebook_save(config.HUGO_STATIC_CONTENT_LIBRARY_PATH)
+			self._state["books"].append(book)
+
+	""" Generate hugo markdown content files """
+	def generate(self):
+		# per-book page generation
+		book_page_path = config.HUGO_CONTENT_LIBRARY_PATH
+		if not os.path.exists(book_page_path):
+			os.makedirs(book_page_path, 0o755)
+
+		# TODO synced state instead of calibre copy....
+		for book in self._state["books"]:
+			self._generate_book_page(book_page_path, book)
+
+	def generate_library_sitemap(self, filepath: str):
+		if os.path.exists(filepath):
+			os.remove(filepath)
+
+		with open(filepath, "w") as fd:
+			fd.write("# E-books library sitemap\n\n")
+
+			for book in self._state["books"]:
+				fd.write(f"* [{book.title}]({config.LIBRARY_EBOOKS_BASE_URL}/ebooks/{book.hash()})\n")
+
+def main():
+	calibre_library = CalibreLibrary(config.CALIBRE_LIBRARY_PATH)
+	pyrotechny_library = PyroTechnyLibrary(config.HUGO_STATIC_CONTENT_LIBRARY_PATH, calibre_library)
+	pyrotechny_library.synchronize()
+	pyrotechny_library.generate()
+	pyrotechny_library.generate_library_sitemap(config.HUGO_CONTENT_LIBRARY_SITEMAP_PATH)
+
+if __name__ == "__main__":
+    main()
--- a/cmd/calibre_to_torrent.py
+++ b/cmd/calibre_to_torrent.py
@ -0,0 +1,145 @@
+#!/Applications/calibre.app/Contents/MacOS/calibre-debug calibre_to_torrent.py
+# See: https://manual.calibre-ebook.com/db_api.html
+import os
+import json
+import copy
+import shutil
+import pathlib
+import hashlib
+import urllib
+import calibre.library
+
+import config
+
+class CalibreLibrary: pass
+
+class CalibreLibraryBook:
+	_db: CalibreLibrary
+
+	id: int
+	title: str
+	authors: str
+	filepath: str
+	filehash: str
+	filename: str
+	cover: str
+
+	def __init__(self, db: CalibreLibrary):
+		self._db = db
+		self.cover = None
+		self.filehash = None
+
+	""" Get book properties for JSON serialize """
+	def to_json(self):
+		data = {}
+		for key in list(self.__dict__.keys()):
+			if key.startswith("_"):
+				continue
+			data[key] = self.__dict__[key]
+
+		return data
+
+	""" Generate the ebook filename based on the ebook file hash """
+	def ebook_filename(self) -> str:
+		title = self.title
+		ext = pathlib.Path(self.filepath).suffix
+		self.filename = f"{title}{ext}"
+		self.filename = self.filename.replace(",", ".")
+		self.filename = self.filename.replace(";", "")
+		self.filename = self.filename.replace(":", ".")
+		self.filename = self.filename.replace("/", "-")
+		self.filename = self.filename.replace("'", "")
+		self.filename = self.filename.replace(" ", ".")
+		self.filename = self.filename.replace("..", ".")
+		return self.filename
+		
+	""" Save the ebook from the calibre library to the path """
+	def ebook_save(self, path: str) -> str:
+		filepath = os.path.join(path, self.ebook_filename())
+
+		if os.path.exists(filepath):
+			return
+
+		print(f"COPY {self.filepath} -> {filepath}")
+		shutil.copyfile(self.filepath, filepath)
+		return filepath
+
+
+class CalibreLibrary:
+	def __init__(self, library_path: str):
+		# First open the Calibre library and get a list of the book IDs
+		self._db = calibre.library.db(library_path).new_api
+
+	@staticmethod
+	def _get_filesize_str(path: str) -> str:
+		size = os.path.getsize(path)
+		if size < 1024:
+			return f"{size} bytes"
+		elif size < pow(1024,2):
+			return f"{round(size/1024, 2)} KB"
+		elif size < pow(1024,3):
+			return f"{round(size/(pow(1024,2)), 2)} MB"
+		elif size < pow(1024,4):
+			return f"{round(size/(pow(1024,3)), 2)} GB"
+
+	def books(self) -> 'list[CalibreLibraryBook]':
+		books = []
+		book_ids = self._db.all_book_ids()
+
+		for book_id in book_ids:
+			# TODO check loaded state with state of calibre based on book.id
+			#      hashing takes way to long...
+			book = CalibreLibraryBook(self._db)
+			book.id = book_id
+			book.title = self._db.field_for("title", book.id)
+			book.authors = self._db.field_for("authors", book.id)
+			book.comments = self._db.field_for("comments", book.id) 
+			book._metadata = self._db.get_metadata(book.id)
+			book.ids = book._metadata.get_identifiers()
+
+			# Select only first ebook format 
+			formats = self._db.formats(book.id, verify_formats=True)
+			if len(formats) > 0:
+				book.filepath = self._db.format_abspath(book.id, formats[0])
+				book.filesize = self._get_filesize_str(book.filepath)
+			else:
+				book.filepath = None
+				book.filesize = 0
+
+			books.append(book)
+	
+		return books
+
+class PyroTechnyEbookLibraryTorrent:
+	_calibre_library : CalibreLibrary
+	_path: str
+	_tempdir: str
+	_google_drive_file_db: list
+
+	def __init__(self, path: str, calibre_library: CalibreLibrary):
+		self._books = []
+		self._calibre_library = calibre_library
+		self._path = path
+		self._state = {}
+		self._state["books"] = []
+
+		# Create cache pat
+		if not os.path.exists(self._path):
+			print(f"CREATE {self._path}")
+			os.makedirs(self._path, 0o755)
+
+	def synchronize(self):
+		# Load books from calibre
+		books = self._calibre_library.books()
+		for book in books:
+			#print(f"{book.to_json()}")
+			book.ebook_save(self._path)
+
+def main():
+	calibre_library = CalibreLibrary(config.CALIBRE_LIBRARY_PATH)
+	pyrotechny_library = PyroTechnyEbookLibraryTorrent(config.EBOOK_LIBRARY_TORRENT_CACHE_DIR, calibre_library)
+	pyrotechny_library.synchronize()
+
+if __name__ == "__main__":
+    main()
+
--- a/cmd/config.py
+++ b/cmd/config.py
@ -0,0 +1,10 @@
+GOOGLE_DRIVE_EBOOK_LIBRRARY_DB_JSON_URL = "https://drive.google.com/uc?export=view&id=1eRGzaRg-CsweqivR9TnmX3relG7m4j6F&export=download"
+CALIBRE_LIBRARY_PATH = "/Users/jerry/files/pyrotechnics/ebooks"
+HUGO_STATIC_CONTENT_LIBRARY_PATH = "/Users/jerry/files/pyrotechnics/cache/pyrotechny.eu/library/ebooks"
+HUGO_CONTENT_LIBRARY_PATH = "/Users/jerry/src/github.com/xor-gate/pyrotechny.eu/src/content/library/ebooks"
+HUGO_CONTENT_LIBRARY_SITEMAP_PATH = "/Users/jerry/src/github.com/xor-gate/pyrotechny.eu/src/content/library/ebooks/sitemap.md"
+HUGO_STATIC_CONTENT_LIBRARY_IMAGES_PATH = "/Users/jerry/src/github.com/xor-gate/pyrotechny.eu/src/static/images/library"
+LIBRARY_EBOOKS_BASE_URL = "/library"
+LIBRARY_EBOOKS_THUMBS_URL_PATH = "/images/library"
+EBOOK_LIBRARY_TORRENT_CACHE_DIR = "/Users/jerry/files/pyrotechnics/cache/fireworks_and_pyrotechnics_ebook_library_by_pyrotechny_eu"
+CLEARWEB_FILTERED_BOOKS = ["b74c5626774ce2cfc7efe245aabce72e7e261295931008b843848d2f54f9954b"]
--- a/cmd/google_drive_db_json_to_hugo.py
+++ b/cmd/google_drive_db_json_to_hugo.py
@ -0,0 +1,10 @@
+#!/usr/bin/env python3
+import json
+
+import config
+from urllib.request import urlopen
+
+resp = urlopen(config.GOOGLE_DRIVE_EBOOK_LIBRRARY_DB_JSON_URL)
+data = json.loads(resp.read())
+
+print(data)
--- a/cmd/google_drive_library_sync.sh
+++ b/cmd/google_drive_library_sync.sh
@ -0,0 +1,6 @@
+#!/bin/sh
+DISK_PATH="/Users/jerry/files/pyrotechnics/cache/pyrotechny.eu/library/ebooks"
+GDRIVE_PATH="gdrive:library.pyrotechny.eu/ebooks"
+RCLONE_ARGS="--progress --config rclone.conf"
+#rclone --config rclone.conf ls ${GDRIVE_PATH}
+/usr/local/bin/rclone ${RCLONE_ARGS} sync ${DISK_PATH} ${GDRIVE_PATH}
--- a/cmd/google_drive_share_ebook.js
+++ b/cmd/google_drive_share_ebook.js
@ -0,0 +1,52 @@
+/**
+ * Google drive folder file public share creator
+ * For use on https://script.google.com/ platform (Google Apps Script)
+ * Shares the folderID files and writes a db.json to the folder
+ */
+function share_folder_files() {
+  var folderId = "1vRPsN2yBqSFVgVJTWarH43Sr7BLolZNS"; // library.pyrotechny.eu/ebooks
+  var files = DriveApp.getFolderById(folderId).getFiles();
+  var result = [];
+
+  while (files.hasNext()) {
+      var file = files.next();
+
+      sharing = file.getSharingAccess();
+      if (sharing != DriveApp.Access.ANYONE_WITH_LINK) {
+        file.setSharing(DriveApp.Access.ANYONE_WITH_LINK, DriveApp.Permission.VIEW);
+      }
+
+      var entry = {
+        filename: file.getName(),
+		    view_url: "https://drive.google.com/file/d/" + file.getId() + "/view",
+        download_url: "https://drive.google.com/uc?export=download&id=" + file.getId()
+      };
+  
+      result.push(entry);
+      Logger.log(entry["filename"] + " -> " + file.getId());
+  };
+
+  // Update or create db.json and share the file
+  var file = null;
+  files = DriveApp.getFolderById(folderId).getFilesByName("db.json");
+  while (files.hasNext()) {
+    file = files.next(); // NOTE: Only last file is updated, no duplicates are removed...
+  }
+
+  db_json_data = JSON.stringify(result);
+
+  if (!file) {
+    file = DriveApp.getFolderById(folderId).createFile("db.json", db_json_data, MimeType.PLAIN_TEXT);
+    sharing = file.getSharingAccess();
+    if (sharing != DriveApp.Access.ANYONE_WITH_LINK) {
+        file.setSharing(DriveApp.Access.ANYONE_WITH_LINK, DriveApp.Permission.VIEW);
+    }
+    Logger.log("Created db.json");
+  } else {
+    file.setContent(db_json_data);
+    Logger.log("Updated db.json");
+  }
+
+  download_url =  "https://drive.google.com/uc?export=view&id=" + file.getId();
+  Logger.log("db.json located at " + download_url)
+}
--- a/cmd/library_ebooks_create_torrent.sh
+++ b/cmd/library_ebooks_create_torrent.sh
@ -0,0 +1,9 @@
+#!/bin/sh
+# Needs py3createtorrent
+##
+OUTPUT_DIR="$HOME/files/pyrotechnics/cache"
+EBOOK_LIBRARY_DIRECTORY="${OUTPUT_DIR}/fireworks_and_pyrotechnics_ebook_library_by_pyrotechny_eu"
+NAME="fireworks_and_pyrotechnics_ebook_library_by_pyrotechny_eu" 
+COMMENT="Pyrotechnics and fireworks ebook library by PyroTechny.EU"
+
+py3createtorrent --exclude-pattern '.DS_Store' -o "${OUTPUT_DIR}" -n "${NAME}" -c "${COMMENT}" -t best20 ${EBOOK_LIBRARY_DIRECTORY}
--- a/cmd/links_to_hugo.py
+++ b/cmd/links_to_hugo.py
@ -0,0 +1,53 @@
+#!/usr/bin/env python311
+import pprint
+
+import yaml
+import jinja2
+
+
+class LinksYAMLDB:
+	def __init__(self, filename: str):
+		with open(filename, "r", encoding="utf-8") as fd:
+			self._db = yaml.load(fd, yaml.Loader)
+		self._process()
+
+	def _process(self):
+		self._categories = {}
+		for category in self._db["categories"]:
+			category_name = category.capitalize()
+			_category = self._categories[category_name] = {}
+			_category["links"] = []
+			for link in self._db["links"]:
+				if category in link["categories"]:
+					_category["links"].append(link)
+
+		pprint.pprint(self._categories)
+
+	def generate_categories_page(self):
+		env = jinja2.Environment(
+			loader=jinja2.FileSystemLoader("data"),
+			autoescape=jinja2.select_autoescape()
+		)
+		tmpl = env.get_template("links-categories-page.md.jinja2")
+		data = tmpl.render(categories=self._db["categories"])
+		with open("src/content/links/categories.md", "w", encoding="utf8") as fd:
+			fd.write(data)
+
+	def _generate_category_page(self, category: str, links: dict):
+		env = jinja2.Environment(
+			loader=jinja2.FileSystemLoader("data"),
+			autoescape=jinja2.select_autoescape()
+		)
+		tmpl = env.get_template("links-category-page.md.jinja2")
+		data = tmpl.render(name=category, links=links)
+		with open(f"src/content/links/category/{category}.md", "w", encoding="utf8") as fd:
+			fd.write(data)
+
+	def generate_category_pages(self):
+		for category in self._categories.keys():
+			self._generate_category_page(category, links=self._categories[category]["links"])
+
+
+links_db = LinksYAMLDB("data/links.yaml")
+links_db.generate_categories_page()
+links_db.generate_category_pages()
--- a/cmd/requirements.txt
+++ b/cmd/requirements.txt
@ -0,0 +1,9 @@
+certifi==2023.5.7
+charset-normalizer==3.2.0
+idna==3.4
+pip==22.3.1
+requests==2.31.0
+setuptools==65.5.1
+wheel==0.38.4
+PyYAML==6.0.1
+Jinja2==3.1.2