1
0
mirror of https://github.com/pyrotechny-eu/pyrotechny.eu.git synced 2026-03-27 17:56:34 +01:00
This commit is contained in:
pyrotechny-eu
2024-10-08 19:18:36 +02:00
commit 66ceff8513
718 changed files with 8339 additions and 0 deletions

Binary file not shown.

274
cmd/calibre_to_hugo.py Executable file
View File

@ -0,0 +1,274 @@
#!/Applications/calibre.app/Contents/MacOS/calibre-debug calibre_to_hugo.py
# See: https://manual.calibre-ebook.com/db_api.html
import os
import json
import copy
import shutil
import pathlib
import hashlib
import urllib
import calibre.library
import config
class CalibreLibrary: pass
class CalibreLibraryBook:
_db: CalibreLibrary
id: int
title: str
authors: str
filepath: str
filehash: str
filename: str
cover: str
def __init__(self, db: CalibreLibrary):
self._db = db
self.cover = None
self.filehash = None
""" Hash the filepath SHA256 to hex string """
def hash(self) -> str:
if not self.filehash:
self.filehash = hashlib.sha256(self.filepath.encode("utf8")).hexdigest()
return self.filehash
""" Get book properties for JSON serialize """
def to_json(self):
data = {}
for key in list(self.__dict__.keys()):
if key.startswith("_"):
continue
data[key] = self.__dict__[key]
return data
""" Generate the ebook filename based on the ebook file hash """
def ebook_filename(self) -> str:
filehash = self.hash()
ext = pathlib.Path(self.filepath).suffix
self.filename = f"{filehash}{ext}"
return self.filename
""" Save the ebook from the calibre library to the path """
def ebook_save(self, path: str) -> str:
filepath = os.path.join(path, self.ebook_filename())
if os.path.exists(filepath):
return
print(f"COPY {self.filepath} -> {filepath}")
shutil.copyfile(self.filepath, filepath)
return filepath
""" Save the ebook cover to the path """
def cover_save(self, path: str) -> str:
filehash = self.hash()
# NOTE: we assume by testing the cover is a .jpg so we check if it exists on disk already as "<hash>.jpg"
self.cover = os.path.join(path, f"{filehash}.jpg")
if os.path.exists(self.cover):
print(f"COVER {self.cover}")
return self.cover
cover_tmpfile = self._db.cover(self.id, as_path=True)
cover_ext = pathlib.Path(cover_tmpfile).suffix
self.cover = os.path.join(path, f"{filehash}{cover_ext}")
os.rename(cover_tmpfile, self.cover)
print(f"SAVE COVER {self.cover}")
return self.cover
class CalibreLibrary:
def __init__(self, library_path: str):
# First open the Calibre library and get a list of the book IDs
self._db = calibre.library.db(library_path).new_api
@staticmethod
def _get_filesize_str(path: str) -> str:
size = os.path.getsize(path)
if size < 1024:
return f"{size} bytes"
elif size < pow(1024,2):
return f"{round(size/1024, 2)} KB"
elif size < pow(1024,3):
return f"{round(size/(pow(1024,2)), 2)} MB"
elif size < pow(1024,4):
return f"{round(size/(pow(1024,3)), 2)} GB"
def books(self) -> 'list[CalibreLibraryBook]':
books = []
book_ids = self._db.all_book_ids()
for book_id in book_ids:
# TODO check loaded state with state of calibre based on book.id
# hashing takes way to long...
book = CalibreLibraryBook(self._db)
book.id = book_id
book.title = self._db.field_for("title", book.id)
book.authors = self._db.field_for("authors", book.id)
book.comments = self._db.field_for("comments", book.id)
book._metadata = self._db.get_metadata(book.id)
book.ids = book._metadata.get_identifiers()
# Select only first ebook format
formats = self._db.formats(book.id, verify_formats=True)
if len(formats) > 0:
book.filepath = self._db.format_abspath(book.id, formats[0])
book.filesize = self._get_filesize_str(book.filepath)
else:
book.filepath = None
book.filesize = 0
if book.hash() in config.CLEARWEB_FILTERED_BOOKS:
print(f"SKIP Book {book.title} filtered and not published on the clearweb")
continue
books.append(book)
return books
class PyroTechnyLibrary:
_calibre_library : CalibreLibrary
_path: str
_tempdir: str
_google_drive_file_db: list
def __init__(self, path: str, calibre_library: CalibreLibrary):
self._books = []
self._calibre_library = calibre_library
self._path = path
self._state = {}
self._state["books"] = []
# Create path directories
if not os.path.exists(self._path):
print(f"CREATE {self._path}")
os.makedirs(self._path, 0o755)
self._load_google_drive_file_db()
def _get_google_drive_value_from_filename(self, filename: str, key: str) -> str:
value = ""
for file in self._google_drive_file_db:
if filename == file["filename"]:
value = file[key]
break
return value
def _load_google_drive_file_db(self):
print(f"LOAD db.json from Google Drive: {config.GOOGLE_DRIVE_EBOOK_LIBRRARY_DB_JSON_URL}")
resp = urllib.request.urlopen(config.GOOGLE_DRIVE_EBOOK_LIBRRARY_DB_JSON_URL)
self._google_drive_file_db = json.loads(resp.read())
def _generate_book_dl_page(self, path, book):
pass
def _generate_book_page(self, path, book):
filepath = os.path.join(path, f'{book.filehash}.md')
if os.path.exists(filepath):
os.remove(filepath)
# TODO: When we us a template file we can check if the template
# or script is newer than the target book page markdown file. To speed things up a bit
print(f"GEN {filepath}")
with open(filepath, "w") as fd:
data = f'''---
title: "{book.title}"
description: ""
featured_image: "/images/site/library-header.jpg"
type: page
---
'''
fd.write(data)
cover = os.path.basename(book.cover)
cover_url = os.path.join(config.LIBRARY_EBOOKS_THUMBS_URL_PATH, cover)
book_url = self._get_google_drive_value_from_filename(book.filename, "view_url")
book_dl_url = self._get_google_drive_value_from_filename(book.filename, "download_url")
authors = list(book.authors)
authors = ', '.join(authors)
if len(book.authors) == 1:
if book.authors[0] == "Unknown":
authors = None
fd.write(f'<a href="{book_url}" target="_blank">![{cover}]({cover_url})</a>\n')
if authors:
author_suffix = ''
if len(book.authors) > 1:
author_suffix = 's'
fd.write(f"* Author{author_suffix}: {authors}\n")
# IDs to book libraries and publishers
if book.ids and len(book.ids) > 0:
fd.write(f'* IDs:\n')
for key, value in book.ids.items():
if key == "amazon":
fd.write(f' * Amazon: <a href="https://www.amazon.com/dp/{value}" target="_blank">{value}</a>\n')
elif key == "google":
fd.write(f' * Google: <a href="https://books.google.com/books?id={value}" target="_blank">{value}</a>\n')
elif key == "isbn":
fd.write(f' * ISBN: <a href="https://www.worldcat.org/isbn/{value}" target="_blank">{value}</a>\n')
elif key == "doi":
fd.write(f' * DOI: <a href="https://dx.doi.org/{value}" target="_blank">{value}</a>\n')
# View & Download
fd.write(f'* <a href="{book_url}" target="_blank">View</a>\n\n')
fd.write(f'* [Download]({book_dl_url}) ({book.filesize})\n\n')
# Comments
if book.comments:
fd.write(f'## Description')
fd.write(f'{book.comments}\n\n')
# Back
fd.write(f'<br />[Back to library]({config.LIBRARY_EBOOKS_BASE_URL}/)\n')
def synchronize(self):
# Load books from calibre
books = self._calibre_library.books()
for book in books:
book.ebook_filename()
book.cover_save(config.HUGO_STATIC_CONTENT_LIBRARY_IMAGES_PATH)
book.ebook_save(config.HUGO_STATIC_CONTENT_LIBRARY_PATH)
self._state["books"].append(book)
""" Generate hugo markdown content files """
def generate(self):
# per-book page generation
book_page_path = config.HUGO_CONTENT_LIBRARY_PATH
if not os.path.exists(book_page_path):
os.makedirs(book_page_path, 0o755)
# TODO synced state instead of calibre copy....
for book in self._state["books"]:
self._generate_book_page(book_page_path, book)
def generate_library_sitemap(self, filepath: str):
if os.path.exists(filepath):
os.remove(filepath)
with open(filepath, "w") as fd:
fd.write("# E-books library sitemap\n\n")
for book in self._state["books"]:
fd.write(f"* [{book.title}]({config.LIBRARY_EBOOKS_BASE_URL}/ebooks/{book.hash()})\n")
def main():
calibre_library = CalibreLibrary(config.CALIBRE_LIBRARY_PATH)
pyrotechny_library = PyroTechnyLibrary(config.HUGO_STATIC_CONTENT_LIBRARY_PATH, calibre_library)
pyrotechny_library.synchronize()
pyrotechny_library.generate()
pyrotechny_library.generate_library_sitemap(config.HUGO_CONTENT_LIBRARY_SITEMAP_PATH)
if __name__ == "__main__":
main()

145
cmd/calibre_to_torrent.py Executable file
View File

@ -0,0 +1,145 @@
#!/Applications/calibre.app/Contents/MacOS/calibre-debug calibre_to_torrent.py
# See: https://manual.calibre-ebook.com/db_api.html
import os
import json
import copy
import shutil
import pathlib
import hashlib
import urllib
import calibre.library
import config
class CalibreLibrary: pass
class CalibreLibraryBook:
_db: CalibreLibrary
id: int
title: str
authors: str
filepath: str
filehash: str
filename: str
cover: str
def __init__(self, db: CalibreLibrary):
self._db = db
self.cover = None
self.filehash = None
""" Get book properties for JSON serialize """
def to_json(self):
data = {}
for key in list(self.__dict__.keys()):
if key.startswith("_"):
continue
data[key] = self.__dict__[key]
return data
""" Generate the ebook filename based on the ebook file hash """
def ebook_filename(self) -> str:
title = self.title
ext = pathlib.Path(self.filepath).suffix
self.filename = f"{title}{ext}"
self.filename = self.filename.replace(",", ".")
self.filename = self.filename.replace(";", "")
self.filename = self.filename.replace(":", ".")
self.filename = self.filename.replace("/", "-")
self.filename = self.filename.replace("'", "")
self.filename = self.filename.replace(" ", ".")
self.filename = self.filename.replace("..", ".")
return self.filename
""" Save the ebook from the calibre library to the path """
def ebook_save(self, path: str) -> str:
filepath = os.path.join(path, self.ebook_filename())
if os.path.exists(filepath):
return
print(f"COPY {self.filepath} -> {filepath}")
shutil.copyfile(self.filepath, filepath)
return filepath
class CalibreLibrary:
def __init__(self, library_path: str):
# First open the Calibre library and get a list of the book IDs
self._db = calibre.library.db(library_path).new_api
@staticmethod
def _get_filesize_str(path: str) -> str:
size = os.path.getsize(path)
if size < 1024:
return f"{size} bytes"
elif size < pow(1024,2):
return f"{round(size/1024, 2)} KB"
elif size < pow(1024,3):
return f"{round(size/(pow(1024,2)), 2)} MB"
elif size < pow(1024,4):
return f"{round(size/(pow(1024,3)), 2)} GB"
def books(self) -> 'list[CalibreLibraryBook]':
books = []
book_ids = self._db.all_book_ids()
for book_id in book_ids:
# TODO check loaded state with state of calibre based on book.id
# hashing takes way to long...
book = CalibreLibraryBook(self._db)
book.id = book_id
book.title = self._db.field_for("title", book.id)
book.authors = self._db.field_for("authors", book.id)
book.comments = self._db.field_for("comments", book.id)
book._metadata = self._db.get_metadata(book.id)
book.ids = book._metadata.get_identifiers()
# Select only first ebook format
formats = self._db.formats(book.id, verify_formats=True)
if len(formats) > 0:
book.filepath = self._db.format_abspath(book.id, formats[0])
book.filesize = self._get_filesize_str(book.filepath)
else:
book.filepath = None
book.filesize = 0
books.append(book)
return books
class PyroTechnyEbookLibraryTorrent:
_calibre_library : CalibreLibrary
_path: str
_tempdir: str
_google_drive_file_db: list
def __init__(self, path: str, calibre_library: CalibreLibrary):
self._books = []
self._calibre_library = calibre_library
self._path = path
self._state = {}
self._state["books"] = []
# Create cache pat
if not os.path.exists(self._path):
print(f"CREATE {self._path}")
os.makedirs(self._path, 0o755)
def synchronize(self):
# Load books from calibre
books = self._calibre_library.books()
for book in books:
#print(f"{book.to_json()}")
book.ebook_save(self._path)
def main():
calibre_library = CalibreLibrary(config.CALIBRE_LIBRARY_PATH)
pyrotechny_library = PyroTechnyEbookLibraryTorrent(config.EBOOK_LIBRARY_TORRENT_CACHE_DIR, calibre_library)
pyrotechny_library.synchronize()
if __name__ == "__main__":
main()

10
cmd/config.py Normal file
View File

@ -0,0 +1,10 @@
GOOGLE_DRIVE_EBOOK_LIBRRARY_DB_JSON_URL = "https://drive.google.com/uc?export=view&id=1eRGzaRg-CsweqivR9TnmX3relG7m4j6F&export=download"
CALIBRE_LIBRARY_PATH = "/Users/jerry/files/pyrotechnics/ebooks"
HUGO_STATIC_CONTENT_LIBRARY_PATH = "/Users/jerry/files/pyrotechnics/cache/pyrotechny.eu/library/ebooks"
HUGO_CONTENT_LIBRARY_PATH = "/Users/jerry/src/github.com/xor-gate/pyrotechny.eu/src/content/library/ebooks"
HUGO_CONTENT_LIBRARY_SITEMAP_PATH = "/Users/jerry/src/github.com/xor-gate/pyrotechny.eu/src/content/library/ebooks/sitemap.md"
HUGO_STATIC_CONTENT_LIBRARY_IMAGES_PATH = "/Users/jerry/src/github.com/xor-gate/pyrotechny.eu/src/static/images/library"
LIBRARY_EBOOKS_BASE_URL = "/library"
LIBRARY_EBOOKS_THUMBS_URL_PATH = "/images/library"
EBOOK_LIBRARY_TORRENT_CACHE_DIR = "/Users/jerry/files/pyrotechnics/cache/fireworks_and_pyrotechnics_ebook_library_by_pyrotechny_eu"
CLEARWEB_FILTERED_BOOKS = ["b74c5626774ce2cfc7efe245aabce72e7e261295931008b843848d2f54f9954b"]

View File

@ -0,0 +1,10 @@
#!/usr/bin/env python3
import json
import config
from urllib.request import urlopen
resp = urlopen(config.GOOGLE_DRIVE_EBOOK_LIBRRARY_DB_JSON_URL)
data = json.loads(resp.read())
print(data)

View File

@ -0,0 +1,6 @@
#!/bin/sh
DISK_PATH="/Users/jerry/files/pyrotechnics/cache/pyrotechny.eu/library/ebooks"
GDRIVE_PATH="gdrive:library.pyrotechny.eu/ebooks"
RCLONE_ARGS="--progress --config rclone.conf"
#rclone --config rclone.conf ls ${GDRIVE_PATH}
/usr/local/bin/rclone ${RCLONE_ARGS} sync ${DISK_PATH} ${GDRIVE_PATH}

View File

@ -0,0 +1,52 @@
/**
* Google drive folder file public share creator
* For use on https://script.google.com/ platform (Google Apps Script)
* Shares the folderID files and writes a db.json to the folder
*/
function share_folder_files() {
var folderId = "1vRPsN2yBqSFVgVJTWarH43Sr7BLolZNS"; // library.pyrotechny.eu/ebooks
var files = DriveApp.getFolderById(folderId).getFiles();
var result = [];
while (files.hasNext()) {
var file = files.next();
sharing = file.getSharingAccess();
if (sharing != DriveApp.Access.ANYONE_WITH_LINK) {
file.setSharing(DriveApp.Access.ANYONE_WITH_LINK, DriveApp.Permission.VIEW);
}
var entry = {
filename: file.getName(),
view_url: "https://drive.google.com/file/d/" + file.getId() + "/view",
download_url: "https://drive.google.com/uc?export=download&id=" + file.getId()
};
result.push(entry);
Logger.log(entry["filename"] + " -> " + file.getId());
};
// Update or create db.json and share the file
var file = null;
files = DriveApp.getFolderById(folderId).getFilesByName("db.json");
while (files.hasNext()) {
file = files.next(); // NOTE: Only last file is updated, no duplicates are removed...
}
db_json_data = JSON.stringify(result);
if (!file) {
file = DriveApp.getFolderById(folderId).createFile("db.json", db_json_data, MimeType.PLAIN_TEXT);
sharing = file.getSharingAccess();
if (sharing != DriveApp.Access.ANYONE_WITH_LINK) {
file.setSharing(DriveApp.Access.ANYONE_WITH_LINK, DriveApp.Permission.VIEW);
}
Logger.log("Created db.json");
} else {
file.setContent(db_json_data);
Logger.log("Updated db.json");
}
download_url = "https://drive.google.com/uc?export=view&id=" + file.getId();
Logger.log("db.json located at " + download_url)
}

View File

@ -0,0 +1,9 @@
#!/bin/sh
# Needs py3createtorrent
##
OUTPUT_DIR="$HOME/files/pyrotechnics/cache"
EBOOK_LIBRARY_DIRECTORY="${OUTPUT_DIR}/fireworks_and_pyrotechnics_ebook_library_by_pyrotechny_eu"
NAME="fireworks_and_pyrotechnics_ebook_library_by_pyrotechny_eu"
COMMENT="Pyrotechnics and fireworks ebook library by PyroTechny.EU"
py3createtorrent --exclude-pattern '.DS_Store' -o "${OUTPUT_DIR}" -n "${NAME}" -c "${COMMENT}" -t best20 ${EBOOK_LIBRARY_DIRECTORY}

53
cmd/links_to_hugo.py Normal file
View File

@ -0,0 +1,53 @@
#!/usr/bin/env python311
import pprint
import yaml
import jinja2
class LinksYAMLDB:
def __init__(self, filename: str):
with open(filename, "r", encoding="utf-8") as fd:
self._db = yaml.load(fd, yaml.Loader)
self._process()
def _process(self):
self._categories = {}
for category in self._db["categories"]:
category_name = category.capitalize()
_category = self._categories[category_name] = {}
_category["links"] = []
for link in self._db["links"]:
if category in link["categories"]:
_category["links"].append(link)
pprint.pprint(self._categories)
def generate_categories_page(self):
env = jinja2.Environment(
loader=jinja2.FileSystemLoader("data"),
autoescape=jinja2.select_autoescape()
)
tmpl = env.get_template("links-categories-page.md.jinja2")
data = tmpl.render(categories=self._db["categories"])
with open("src/content/links/categories.md", "w", encoding="utf8") as fd:
fd.write(data)
def _generate_category_page(self, category: str, links: dict):
env = jinja2.Environment(
loader=jinja2.FileSystemLoader("data"),
autoescape=jinja2.select_autoescape()
)
tmpl = env.get_template("links-category-page.md.jinja2")
data = tmpl.render(name=category, links=links)
with open(f"src/content/links/category/{category}.md", "w", encoding="utf8") as fd:
fd.write(data)
def generate_category_pages(self):
for category in self._categories.keys():
self._generate_category_page(category, links=self._categories[category]["links"])
links_db = LinksYAMLDB("data/links.yaml")
links_db.generate_categories_page()
links_db.generate_category_pages()

9
cmd/requirements.txt Normal file
View File

@ -0,0 +1,9 @@
certifi==2023.5.7
charset-normalizer==3.2.0
idna==3.4
pip==22.3.1
requests==2.31.0
setuptools==65.5.1
wheel==0.38.4
PyYAML==6.0.1
Jinja2==3.1.2