aboutsummaryrefslogtreecommitdiff
path: root/src/article_handler.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/article_handler.py')
-rw-r--r--src/article_handler.py172
1 files changed, 0 insertions, 172 deletions
diff --git a/src/article_handler.py b/src/article_handler.py
deleted file mode 100644
index ba60eff..0000000
--- a/src/article_handler.py
+++ /dev/null
@@ -1,172 +0,0 @@
-import dataclasses
-import datetime
-import os
-import logging
-import pathlib
-import glob
-import typing
-import markdown
-import yaml
-
-
-# Known exceptions, these are raised when generating on program exiting error, and are displayed to the user nicely
-class ArticleHandlerException(Exception):
- pass
-
-
-class ArticleMetaDataMalformed(ArticleHandlerException):
- pass
-
-
-class ArticleNoMetaData(ArticleHandlerException):
- pass
-
-
-@dataclasses.dataclass
-class WebPage:
- url: str
- name: str
-
-
-@dataclasses.dataclass
-class MetaData():
- description: str
- created: datetime.date
-
- @property
- def pretty_print(self) -> str:
- return f'''
- Description: {self.description}
- Created: {self.created}'''
-
-
-def truncate(text: str, max_length: int = 50):
- if len(text) < max_length:
- return text
- else:
- return text[0:max_length - 1]
-
-
-@dataclasses.dataclass
-class Article(WebPage):
- metadata: MetaData
- web_dir: tuple
- source_path: str
- source: str
- html: str
- folder_path: typing.Union[None, str] = None
-
- @property
- def pretty_print(self) -> str:
- return f'''
-Name: {self.name}
-Metadata: {self.metadata.pretty_print}
-Web dir: {self.web_dir}
-URL: {self.url}
-Source path: {self.source_path}
-Folder path: {self.folder_path}
-HTML: {truncate(self.html)}
-Source: {truncate(self.source)}'''
-
-
-@dataclasses.dataclass
-class Folder(WebPage):
- articles: typing.List[Article] = dataclasses.field(default_factory=list)
- sub_folders: typing.Dict[str, object] = dataclasses.field(default_factory=dict)
-
-
-def get_web_dir(path, name) -> tuple[str, ...]:
- dir_structure = path.split(name)[0]
- # Split into tuple, remove first part, assemble to path again, all to remove the first source dir agnostically
- dir_structure_prefix_striped = pathlib.Path(dir_structure).parts[2:]
- return dir_structure_prefix_striped
-
-
-def parse_article_meta_data(source: str) -> typing.Tuple[str, MetaData]:
- if source.startswith('---'):
- meta_data_yml_end_char_index = source.find('---', 3)
- meta_data_yml = source[3:meta_data_yml_end_char_index]
- # Strip metadata text from source, before feeding it to the markdown reader
- source = source[meta_data_yml_end_char_index + 3:]
- meta_data = yaml.safe_load(meta_data_yml)
-
- try:
- meta_data = MetaData(**meta_data)
- except TypeError as type_error_exception:
- raise ArticleMetaDataMalformed(str(type_error_exception))
- return source, meta_data
- else:
- raise ArticleNoMetaData(f'No metadata found')
-
-
-def get_article(path: str) -> Article:
- logging.debug(path)
- article_args = {}
- filename, file_extension = os.path.splitext(path)
-
- basename = os.path.basename(filename)
- if basename == 'index':
- # Article is the folder
- article_folder_name = os.path.dirname(path)
- article_args['folder_path'] = article_folder_name
- dir_basename = os.path.basename(article_folder_name)
- article_args['name'] = dir_basename
- else:
- # Article one file
- article_args['name'] = os.path.basename(filename)
- article_args['web_dir'] = get_web_dir(path, article_args['name'])
- article_args['source_path'] = path
-
- article_args[
- 'url'] = f'{"/" if article_args["web_dir"] else ""}{"/".join(article_args["web_dir"])}/{article_args["name"]}'
-
- with open(path, 'r') as file:
- source = file.read()
- article_args['source'], article_args['metadata'] = parse_article_meta_data(source)
- article_args['html'] = markdown.markdown(article_args['source'], extensions=
- [
- 'fenced_code',
- 'codehilite',
- 'tables',
- 'toc' # Automatically generates unique IDs for headers allowing for ID URL referral (Anchor)
- ])
-
- article = Article(**article_args)
- return article
-
-
-def discover_folder_structure(article: Article, articles: Folder):
- previous_folder = articles
- for folder_name in article.web_dir:
- logging.debug(folder_name)
-
- if folder_name not in previous_folder.sub_folders:
- logging.debug('new')
- current_folder = Folder(url=f'{previous_folder.url}{folder_name}/', name=folder_name)
- previous_folder.sub_folders[folder_name] = current_folder
- else:
- logging.debug('reuse')
- current_folder = previous_folder.sub_folders[folder_name]
- previous_folder = current_folder
- logging.debug('')
-
- previous_folder.articles.append(article)
-
-
-def sort_articles(folder: Folder):
- folder.articles = sorted(folder.articles,
- key=lambda x: x.metadata.created if x.metadata.created is not None else datetime.date.min,
- reverse=True)
- for folder in folder.sub_folders.values():
- sort_articles(folder)
-
-
-def discover_articles(path):
- articles_paths = glob.glob(f'{path}/**/*.md', recursive=True) # Equivalent to ls ./**.md
- articles = Folder(url='/', name='index')
- for article_path in articles_paths:
- article = get_article(article_path)
- logging.debug(article.pretty_print)
- discover_folder_structure(articles=articles, article=article)
- sort_articles(articles)
- return articles