import dataclasses import datetime import os import logging import pathlib import glob import typing import markdown import yaml import markupsafe import subprocess logger = logging.getLogger(__name__) # Known exceptions, these are raised when generating on program exiting error, and are displayed to the user nicely class ArticleHandlerException(Exception): pass class ArticleMetaDataMalformed(ArticleHandlerException): pass class ArticleNoMetaData(ArticleHandlerException): pass @dataclasses.dataclass class WebPage: url: str name: str @dataclasses.dataclass class MetaData(): description: str created: datetime.date @property def pretty_print(self) -> str: return f''' Description: {self.description} Created: {self.created}''' def truncate(text: str, max_length: int = 50): if len(text) < max_length: return text else: return text[0:max_length - 1] @dataclasses.dataclass class Article(WebPage): metadata: MetaData # date, description web_dir: tuple # Url path of the article, folder agnostic, removes .md source_path: str # Path to the markdown source file git_path: str # This is the top most path e.g. /article.md for single file articles, and /Article folder for folder source: str # Source markdown, without the metadata html: str # The generated HTML output modified: datetime.datetime # Article last modification date thumbnail_path: typing.Union[None, str] = None # Path to the thumbnail image to display in folder view, e.g. / folder_path: typing.Union[None, str] = None # Path to the folder path to the article folder, if applicable @property def pretty_print(self) -> str: return f''' Name: {self.name} Metadata: {self.metadata.pretty_print} Web dir: {self.web_dir} URL: {self.url} Source path: {self.source_path} Git path: {self.git_path} Thumbnail path: {self.thumbnail_path} Folder path: {self.folder_path} Modified: {self.modified} HTML: {truncate(self.html)} Source: {truncate(self.source)}''' @dataclasses.dataclass class Folder(WebPage): articles: typing.List[Article] = dataclasses.field(default_factory=list) sub_folders: typing.Dict[str, object] = dataclasses.field(default_factory=dict) class ArticleGenerator: def __init__(self, articles_path: str): self.articles_path = articles_path def get_web_dir(self, path, name) -> tuple[str, ...]: dir = path.split(name)[0] # Get dir for one file articles and folder articles web_path = pathlib.Path(dir).relative_to(pathlib.Path(self.articles_path)) return web_path.parts def parse_article_meta_data(self, source: str) -> typing.Tuple[str, MetaData]: if source.startswith('---'): meta_data_yml_end_char_index = source.find('---', 3) meta_data_yml = source[3:meta_data_yml_end_char_index] # Strip metadata text from source, before feeding it to the markdown reader source = source[meta_data_yml_end_char_index + 3:] meta_data = yaml.safe_load(meta_data_yml) meta_data['description'] = markupsafe.Markup(markdown.markdown(meta_data['description'])) try: meta_data = MetaData(**meta_data) except TypeError as type_error_exception: raise ArticleMetaDataMalformed(str(type_error_exception)) return source, meta_data else: raise ArticleNoMetaData(f'No metadata found') def get_article(self, path: str) -> Article: logger.info(path) article_args = {} filename, file_extension = os.path.splitext(path) basename = os.path.basename(filename) if basename == 'index': # Article type is folder article_folder_name = os.path.dirname(path) article_args['folder_path'] = article_folder_name dir_basename = os.path.basename(article_folder_name) article_args['name'] = dir_basename # Look for a thumbnail (file extension agnostic) thumbnail_paths = glob.glob(f'{article_args["folder_path"]}/Thumbnail*') if len(thumbnail_paths) > 1: logger.warning(f"More than one thumbnail found, using: {thumbnail_paths[0]}") if len(thumbnail_paths) == 0: logger.warning("No thumbnail image") else: article_args['thumbnail_path'] = '/'.join(pathlib.Path(thumbnail_paths[0]).parts[-2:]) article_args['git_path'] = article_args['folder_path'] else: # Article is one file article_args['name'] = os.path.basename(filename) article_args['git_path'] = path article_args['web_dir'] = self.get_web_dir(path, article_args['name']) article_args['source_path'] = path article_args[ 'url'] = f'{"/" if article_args["web_dir"] else ""}{"/".join(article_args["web_dir"])}/{article_args["name"]}' try: git_date = subprocess.run(['git', 'log', '-1', '--pretty=format:%ci', pathlib.Path(article_args['git_path']) \ .relative_to(self.articles_path)], cwd=self.articles_path, capture_output=True, text=True).stdout except FileNotFoundError: logger.warning('"git" isn\'t installed, modified dates will use file system (worse data)') git_date = '' if git_date == '': logger.warning("Article folder is not a git directory, modified dates will use file system (worse data)") # Get article modified date time from file system article_args['modified'] = datetime.datetime.utcfromtimestamp(os.path.getmtime(path)) \ .replace(tzinfo=datetime.datetime.now().astimezone().tzinfo) else: # Get article modified date time from git commit article_args['modified'] = datetime.datetime.fromisoformat(git_date) with open(path, 'r') as file: source = file.read() article_args['source'], article_args['metadata'] = self.parse_article_meta_data(source) article_args['html'] = markdown.markdown(article_args['source'], extensions= [ 'fenced_code', 'codehilite', 'tables', 'toc' # Automatically generates unique IDs for headers allowing for ID URL referral (Anchor) ]) article = Article(**article_args) return article def discover_folder_structure(self, article: Article, articles: Folder): previous_folder = articles for folder_name in article.web_dir: logger.debug(folder_name) if folder_name not in previous_folder.sub_folders: logger.debug('new') current_folder = Folder(url=f'{previous_folder.url}{folder_name}/', name=folder_name) previous_folder.sub_folders[folder_name] = current_folder else: logger.debug('reuse') current_folder = previous_folder.sub_folders[folder_name] previous_folder = current_folder logger.debug('') previous_folder.articles.append(article) def sort_articles(self, folder: Folder): folder.articles = sorted(folder.articles, key=lambda x: x.metadata.created if x.metadata.created is not None else datetime.date.min, reverse=True) for folder in folder.sub_folders.values(): self.sort_articles(folder) def discover_articles(self): articles_paths = glob.glob(f'{self.articles_path}/**/*.md', recursive=True) # Equivalent to ls ./**.md articles = Folder(url='/', name='index') for article_path in articles_paths: article = self.get_article(article_path) logger.debug(article.pretty_print) self.discover_folder_structure(article=article, articles=articles) self.sort_articles(articles) return articles