diff options
| author | user <user@node5.net> | 2024-03-11 15:19:35 +0100 |
|---|---|---|
| committer | user <user@node5.net> | 2024-03-11 15:19:35 +0100 |
| commit | b692ac3bafb0559a3d2c132187bafda1ce008ee7 (patch) | |
| tree | d18510b28226a0c0f3466f80060f3a727a7310cb | |
| parent | 8c956c553f36d9de7061ab27efb44d7e69ba52b7 (diff) | |
rename article_generator.py module -> article, make functions into article generator class
| -rw-r--r-- | src/article.py | 177 | ||||
| -rw-r--r-- | src/article_generator.py | 177 | ||||
| -rw-r--r-- | src/blog_node5_net.py | 17 |
3 files changed, 186 insertions, 185 deletions
diff --git a/src/article.py b/src/article.py new file mode 100644 index 0000000..afca0df --- /dev/null +++ b/src/article.py @@ -0,0 +1,177 @@ +import dataclasses +import datetime +import os +import logging +import pathlib +import glob +import typing +import markdown +import yaml + + +# Known exceptions, these are raised when generating on program exiting error, and are displayed to the user nicely +class ArticleHandlerException(Exception): + pass + + +class ArticleMetaDataMalformed(ArticleHandlerException): + pass + + +class ArticleNoMetaData(ArticleHandlerException): + pass + + +@dataclasses.dataclass +class WebPage: + url: str + name: str + + +@dataclasses.dataclass +class MetaData(): + description: str + created: datetime.date + + @property + def pretty_print(self) -> str: + return f''' + Description: {self.description} + Created: {self.created}''' + + +def truncate(text: str, max_length: int = 50): + if len(text) < max_length: + return text + else: + return text[0:max_length - 1] + + +@dataclasses.dataclass +class Article(WebPage): + metadata: MetaData + web_dir: tuple + source_path: str + source: str + html: str + modified: datetime.datetime + folder_path: typing.Union[None, str] = None + + @property + def pretty_print(self) -> str: + return f''' +Name: {self.name} +Metadata: {self.metadata.pretty_print} +Web dir: {self.web_dir} +URL: {self.url} +Source path: {self.source_path} +Folder path: {self.folder_path} +Modified: {self.modified} +HTML: {truncate(self.html)} +Source: {truncate(self.source)}''' + + +@dataclasses.dataclass +class Folder(WebPage): + articles: typing.List[Article] = dataclasses.field(default_factory=list) + sub_folders: typing.Dict[str, object] = dataclasses.field(default_factory=dict) + + +class ArticleGenerator: + def __init__(self, articles_path: str): + self.articles_path = articles_path + + def get_web_dir(self, path, name) -> tuple[str, ...]: + dir_structure = path.split(name)[0] + # Split into tuple, remove first part, assemble to path again, all to remove the first source dir agnostically + dir_structure_prefix_striped = pathlib.Path(dir_structure).parts[2:] + return dir_structure_prefix_striped + + def parse_article_meta_data(self, source: str) -> typing.Tuple[str, MetaData]: + if source.startswith('---'): + meta_data_yml_end_char_index = source.find('---', 3) + meta_data_yml = source[3:meta_data_yml_end_char_index] + # Strip metadata text from source, before feeding it to the markdown reader + source = source[meta_data_yml_end_char_index + 3:] + meta_data = yaml.safe_load(meta_data_yml) + + try: + meta_data = MetaData(**meta_data) + except TypeError as type_error_exception: + raise ArticleMetaDataMalformed(str(type_error_exception)) + return source, meta_data + else: + raise ArticleNoMetaData(f'No metadata found') + + def get_article(self, path: str) -> Article: + logging.debug(path) + article_args = {} + filename, file_extension = os.path.splitext(path) + + basename = os.path.basename(filename) + if basename == 'index': + # Article is the folder + article_folder_name = os.path.dirname(path) + article_args['folder_path'] = article_folder_name + dir_basename = os.path.basename(article_folder_name) + article_args['name'] = dir_basename + else: + # Article one file + article_args['name'] = os.path.basename(filename) + article_args['web_dir'] = self.get_web_dir(path, article_args['name']) + article_args['source_path'] = path + + article_args[ + 'url'] = f'{"/" if article_args["web_dir"] else ""}{"/".join(article_args["web_dir"])}/{article_args["name"]}' + + article_args['modified'] = datetime.datetime.utcfromtimestamp(os.path.getmtime(path)).replace( + tzinfo=datetime.datetime.now().astimezone().tzinfo) + + with open(path, 'r') as file: + source = file.read() + article_args['source'], article_args['metadata'] = self.parse_article_meta_data(source) + article_args['html'] = markdown.markdown(article_args['source'], extensions= + [ + 'fenced_code', + 'codehilite', + 'tables', + 'toc' # Automatically generates unique IDs for headers allowing for ID URL referral (Anchor) + ]) + + article = Article(**article_args) + return article + + def discover_folder_structure(self, article: Article, articles: Folder): + previous_folder = articles + for folder_name in article.web_dir: + logging.debug(folder_name) + + if folder_name not in previous_folder.sub_folders: + logging.debug('new') + current_folder = Folder(url=f'{previous_folder.url}{folder_name}/', name=folder_name) + previous_folder.sub_folders[folder_name] = current_folder + else: + logging.debug('reuse') + current_folder = previous_folder.sub_folders[folder_name] + previous_folder = current_folder + logging.debug('') + + previous_folder.articles.append(article) + + def sort_articles(self, folder: Folder): + folder.articles = sorted(folder.articles, + key=lambda + x: x.metadata.created if x.metadata.created is not None else datetime.date.min, + reverse=True) + for folder in folder.sub_folders.values(): + self.sort_articles(folder) + + def discover_articles(self): + articles_paths = glob.glob(f'{self.articles_path}/**/*.md', recursive=True) # Equivalent to ls ./**.md + articles = Folder(url='/', name='index') + for article_path in articles_paths: + article = self.get_article(article_path) + logging.debug(article.pretty_print) + self.discover_folder_structure(articles=articles, article=article) + self.sort_articles(articles) + return articles diff --git a/src/article_generator.py b/src/article_generator.py deleted file mode 100644 index 828f31a..0000000 --- a/src/article_generator.py +++ /dev/null @@ -1,177 +0,0 @@ -import dataclasses -import datetime -import os -import logging -import pathlib -import glob -import typing -import markdown -import yaml - - -# Known exceptions, these are raised when generating on program exiting error, and are displayed to the user nicely -class ArticleHandlerException(Exception): - pass - - -class ArticleMetaDataMalformed(ArticleHandlerException): - pass - - -class ArticleNoMetaData(ArticleHandlerException): - pass - - -@dataclasses.dataclass -class WebPage: - url: str - name: str - - -@dataclasses.dataclass -class MetaData(): - description: str - created: datetime.date - - @property - def pretty_print(self) -> str: - return f''' - Description: {self.description} - Created: {self.created}''' - - -def truncate(text: str, max_length: int = 50): - if len(text) < max_length: - return text - else: - return text[0:max_length - 1] - - -@dataclasses.dataclass -class Article(WebPage): - metadata: MetaData - web_dir: tuple - source_path: str - source: str - html: str - modified: datetime.datetime - folder_path: typing.Union[None, str] = None - - @property - def pretty_print(self) -> str: - return f''' -Name: {self.name} -Metadata: {self.metadata.pretty_print} -Web dir: {self.web_dir} -URL: {self.url} -Source path: {self.source_path} -Folder path: {self.folder_path} -Modified: {self.modified} -HTML: {truncate(self.html)} -Source: {truncate(self.source)}''' - - -@dataclasses.dataclass -class Folder(WebPage): - articles: typing.List[Article] = dataclasses.field(default_factory=list) - sub_folders: typing.Dict[str, object] = dataclasses.field(default_factory=dict) - - -def get_web_dir(path, name) -> tuple[str, ...]: - dir_structure = path.split(name)[0] - # Split into tuple, remove first part, assemble to path again, all to remove the first source dir agnostically - dir_structure_prefix_striped = pathlib.Path(dir_structure).parts[2:] - return dir_structure_prefix_striped - - -def parse_article_meta_data(source: str) -> typing.Tuple[str, MetaData]: - if source.startswith('---'): - meta_data_yml_end_char_index = source.find('---', 3) - meta_data_yml = source[3:meta_data_yml_end_char_index] - # Strip metadata text from source, before feeding it to the markdown reader - source = source[meta_data_yml_end_char_index + 3:] - meta_data = yaml.safe_load(meta_data_yml) - - try: - meta_data = MetaData(**meta_data) - except TypeError as type_error_exception: - raise ArticleMetaDataMalformed(str(type_error_exception)) - return source, meta_data - else: - raise ArticleNoMetaData(f'No metadata found') - - -def get_article(path: str) -> Article: - logging.debug(path) - article_args = {} - filename, file_extension = os.path.splitext(path) - - basename = os.path.basename(filename) - if basename == 'index': - # Article is the folder - article_folder_name = os.path.dirname(path) - article_args['folder_path'] = article_folder_name - dir_basename = os.path.basename(article_folder_name) - article_args['name'] = dir_basename - else: - # Article one file - article_args['name'] = os.path.basename(filename) - article_args['web_dir'] = get_web_dir(path, article_args['name']) - article_args['source_path'] = path - - article_args[ - 'url'] = f'{"/" if article_args["web_dir"] else ""}{"/".join(article_args["web_dir"])}/{article_args["name"]}' - - article_args['modified'] = datetime.datetime.utcfromtimestamp(os.path.getmtime(path)).replace( - tzinfo=datetime.datetime.now().astimezone().tzinfo) - - with open(path, 'r') as file: - source = file.read() - article_args['source'], article_args['metadata'] = parse_article_meta_data(source) - article_args['html'] = markdown.markdown(article_args['source'], extensions= - [ - 'fenced_code', - 'codehilite', - 'tables', - 'toc' # Automatically generates unique IDs for headers allowing for ID URL referral (Anchor) - ]) - - article = Article(**article_args) - return article - - -def discover_folder_structure(article: Article, articles: Folder): - previous_folder = articles - for folder_name in article.web_dir: - logging.debug(folder_name) - - if folder_name not in previous_folder.sub_folders: - logging.debug('new') - current_folder = Folder(url=f'{previous_folder.url}{folder_name}/', name=folder_name) - previous_folder.sub_folders[folder_name] = current_folder - else: - logging.debug('reuse') - current_folder = previous_folder.sub_folders[folder_name] - previous_folder = current_folder - logging.debug('') - - previous_folder.articles.append(article) - - -def sort_articles(folder: Folder): - folder.articles = sorted(folder.articles, - key=lambda x: x.metadata.created if x.metadata.created is not None else datetime.date.min, - reverse=True) - for folder in folder.sub_folders.values(): - sort_articles(folder) - - -def discover_articles(path): - articles_paths = glob.glob(f'{path}/**/*.md', recursive=True) # Equivalent to ls ./**.md - articles = Folder(url='/', name='index') - for article_path in articles_paths: - article = get_article(article_path) - logging.debug(article.pretty_print) - discover_folder_structure(articles=articles, article=article) - sort_articles(articles) - return articles diff --git a/src/blog_node5_net.py b/src/blog_node5_net.py index d4a2871..0782810 100644 --- a/src/blog_node5_net.py +++ b/src/blog_node5_net.py @@ -9,7 +9,7 @@ import urllib import flask import yaml -import article_generator +import article import db_handler import telegram_handler @@ -19,12 +19,14 @@ logger = logging.getLogger(__name__) site_root_folder_path = 'blog.node5.net' +article_generator = article.ArticleGenerator(os.path.join(site_root_folder_path, 'articles')) -def get_articles() -> article_generator.Folder: + +def get_articles() -> article.Folder: try: - articles = article_generator.discover_articles(os.path.join(site_root_folder_path, 'articles')) + articles = article_generator.discover_articles() return articles - except article_generator.ArticleHandlerException as exception: # Known exceptions + except article.ArticleHandlerException as exception: # Known exceptions logger.error(exception) exit(1) # Exit code 1: Code for generic error @@ -40,8 +42,8 @@ app = flask.Flask(__name__, template_folder=os.path.join('..', site_root_folder_ with open(os.path.join(site_root_folder_path, 'motd.yml'), 'r') as file: motd_list = yaml.safe_load(file.read()) -folders_by_url: typing.Dict[str, article_generator.Folder] = {} -articles_by_url: typing.Dict[str, article_generator.Article] = {} +folders_by_url: typing.Dict[str, article.Folder] = {} +articles_by_url: typing.Dict[str, article.Article] = {} @app.context_processor # Always inject site title to all render_templates @@ -100,7 +102,7 @@ def view_article(): return flask.render_template('article.html', article=article) -def register_urls(folder: article_generator.Folder): +def register_urls(folder: article.Folder): # Use recursion to traverse folder tree structure if strip_trailing_slash(folder.url) not in folders_by_url.keys(): # If statement is because it can be called multiple times, which would cause an error @@ -129,7 +131,6 @@ def load_articles(): register_urls(articles) - load_articles() |
