about summary refs log tree commit diff
path: root/src/article_handler.py
diff options
context:
space:
mode:
authoruser <user@node5.net>2024-03-10 12:04:05 +0100
committeruser <user@node5.net>2024-03-10 12:04:05 +0100
commit17c305641c0419664dc64adf210be797d5f16ee1 (patch)
tree2b2fef74cd03f9a9a214f1751729a82fb8d999b3 /src/article_handler.py
parent849dc4eabdb8ab175deee05ca54a39227e7e370c (diff)
rename article_handler.py -> article_generator.py
Diffstat (limited to 'src/article_handler.py')
-rw-r--r--src/article_handler.py172
1 files changed, 0 insertions, 172 deletions
diff --git a/src/article_handler.py b/src/article_handler.py
deleted file mode 100644
index ba60eff..0000000
--- a/src/article_handler.py
+++ /dev/null
@@ -1,172 +0,0 @@
-import dataclasses
-import datetime
-import os
-import logging
-import pathlib
-import glob
-import typing
-import markdown
-import yaml
-
-
-# Known exceptions, these are raised when generating on program exiting error, and are displayed to the user nicely
-class ArticleHandlerException(Exception):
-    pass
-
-
-class ArticleMetaDataMalformed(ArticleHandlerException):
-    pass
-
-
-class ArticleNoMetaData(ArticleHandlerException):
-    pass
-
-
-@dataclasses.dataclass
-class WebPage:
-    url: str
-    name: str
-
-
-@dataclasses.dataclass
-class MetaData():
-    description: str
-    created: datetime.date
-
-    @property
-    def pretty_print(self) -> str:
-        return f'''
-    Description: {self.description}
-    Created: {self.created}'''
-
-
-def truncate(text: str, max_length: int = 50):
-    if len(text) < max_length:
-        return text
-    else:
-        return text[0:max_length - 1]
-
-
-@dataclasses.dataclass
-class Article(WebPage):
-    metadata: MetaData
-    web_dir: tuple
-    source_path: str
-    source: str
-    html: str
-    folder_path: typing.Union[None, str] = None
-
-    @property
-    def pretty_print(self) -> str:
-        return f'''
-Name:        {self.name}
-Metadata:    {self.metadata.pretty_print}
-Web dir:     {self.web_dir}
-URL:         {self.url}
-Source path: {self.source_path}
-Folder path: {self.folder_path}
-HTML:        {truncate(self.html)}
-Source:      {truncate(self.source)}'''
-
-
-@dataclasses.dataclass
-class Folder(WebPage):
-    articles: typing.List[Article] = dataclasses.field(default_factory=list)
-    sub_folders: typing.Dict[str, object] = dataclasses.field(default_factory=dict)
-
-
-def get_web_dir(path, name) -> tuple[str, ...]:
-    dir_structure = path.split(name)[0]
-    # Split into tuple, remove first part, assemble to path again, all to remove the first source dir agnostically 
-    dir_structure_prefix_striped = pathlib.Path(dir_structure).parts[2:]
-    return dir_structure_prefix_striped
-
-
-def parse_article_meta_data(source: str) -> typing.Tuple[str, MetaData]:
-    if source.startswith('---'):
-        meta_data_yml_end_char_index = source.find('---', 3)
-        meta_data_yml = source[3:meta_data_yml_end_char_index]
-        # Strip metadata text from source, before feeding it to the markdown reader
-        source = source[meta_data_yml_end_char_index + 3:]
-        meta_data = yaml.safe_load(meta_data_yml)
-
-        try:
-            meta_data = MetaData(**meta_data)
-        except TypeError as type_error_exception:
-            raise ArticleMetaDataMalformed(str(type_error_exception))
-        return source, meta_data
-    else:
-        raise ArticleNoMetaData(f'No metadata found')
-
-
-def get_article(path: str) -> Article:
-    logging.debug(path)
-    article_args = {}
-    filename, file_extension = os.path.splitext(path)
-
-    basename = os.path.basename(filename)
-    if basename == 'index':
-        # Article is the folder
-        article_folder_name = os.path.dirname(path)
-        article_args['folder_path'] = article_folder_name
-        dir_basename = os.path.basename(article_folder_name)
-        article_args['name'] = dir_basename
-    else:
-        # Article one file
-        article_args['name'] = os.path.basename(filename)
-    article_args['web_dir'] = get_web_dir(path, article_args['name'])
-    article_args['source_path'] = path
-
-    article_args[
-        'url'] = f'{"/" if article_args["web_dir"] else ""}{"/".join(article_args["web_dir"])}/{article_args["name"]}'
-
-    with open(path, 'r') as file:
-        source = file.read()
-    article_args['source'], article_args['metadata'] = parse_article_meta_data(source)
-    article_args['html'] = markdown.markdown(article_args['source'], extensions=
-    [
-        'fenced_code',
-        'codehilite',
-        'tables',
-        'toc'  # Automatically generates unique IDs for headers allowing for ID URL referral (Anchor)
-    ])
-
-    article = Article(**article_args)
-    return article
-
-
-def discover_folder_structure(article: Article, articles: Folder):
-    previous_folder = articles
-    for folder_name in article.web_dir:
-        logging.debug(folder_name)
-
-        if folder_name not in previous_folder.sub_folders:
-            logging.debug('new')
-            current_folder = Folder(url=f'{previous_folder.url}{folder_name}/', name=folder_name)
-            previous_folder.sub_folders[folder_name] = current_folder
-        else:
-            logging.debug('reuse')
-            current_folder = previous_folder.sub_folders[folder_name]
-        previous_folder = current_folder
-        logging.debug('')
-
-    previous_folder.articles.append(article)
-
-
-def sort_articles(folder: Folder):
-    folder.articles = sorted(folder.articles,
-                             key=lambda x: x.metadata.created if x.metadata.created is not None else datetime.date.min,
-                             reverse=True)
-    for folder in folder.sub_folders.values():
-        sort_articles(folder)
-
-
-def discover_articles(path):
-    articles_paths = glob.glob(f'{path}/**/*.md', recursive=True)  # Equivalent to ls ./**.md
-    articles = Folder(url='/', name='index')
-    for article_path in articles_paths:
-        article = get_article(article_path)
-        logging.debug(article.pretty_print)
-        discover_folder_structure(articles=articles, article=article)
-    sort_articles(articles)
-    return articles