aboutsummaryrefslogtreecommitdiff
path: root/src/article_handler.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/article_handler.py')
-rw-r--r--src/article_handler.py141
1 files changed, 141 insertions, 0 deletions
diff --git a/src/article_handler.py b/src/article_handler.py
new file mode 100644
index 0000000..2655bcc
--- /dev/null
+++ b/src/article_handler.py
@@ -0,0 +1,141 @@
+import dataclasses
+import datetime
+import os
+import logging
+import pathlib
+import glob
+import typing
+import yaml
+
+
+# Known exceptions, these are raised when generating on program exiting error, and are displayed to the user nicely
+class ArticleHandlerException(Exception):
+ pass
+
+
+class ArticleMetaDataMalformed(ArticleHandlerException):
+ pass
+
+
+class ArticleNoMetaData(ArticleHandlerException):
+ pass
+
+
+@dataclasses.dataclass
+class MetaData:
+ description: str
+ created: datetime.date
+
+ @property
+ def pretty_print(self) -> str:
+ return f'''
+ Description: {self.description}
+ Created: {self.created}'''
+
+
+@dataclasses.dataclass
+class Article:
+ title: str
+ meta_data: MetaData
+ web_dir: tuple
+ source_path: str | os.PathLike
+ source: str
+ folder_path: typing.Union[None, str | os.PathLike] = None
+
+ @property
+ def web_path(self):
+ return f'{"/" if self.web_dir else ""}{"/".join(self.web_dir)}/{self.title}'
+
+ @property
+ def pretty_print(self) -> str:
+ return f'''
+Title: {self.title}
+Meta data: {self.meta_data.pretty_print}
+Web dir: {self.web_dir}
+Web path: {self.web_path}
+Source path: {self.source_path}
+Folder path: {self.folder_path}
+Source: {f'{self.source[0:49]}...' if len(self.source) >= 50 else self.source}''' # Truncate long source text
+
+
+@dataclasses.dataclass
+class Folder(list):
+ articles: typing.List[Article] = dataclasses.field(default_factory=list)
+ sub_folders: typing.Dict[str, typing.Self] = dataclasses.field(default_factory=dict)
+
+
+def get_web_dir(path, title) -> typing.Tuple[str]:
+ dir_structure = path.split(title)[0]
+ # Split into tuple, remove first part, assemble to path again, all to remove the first source dir agnostically
+ dir_structure_prefix_striped = pathlib.Path(dir_structure).parts[1:]
+ return dir_structure_prefix_striped
+
+
+def parse_article_meta_data(source: str) -> typing.Tuple[str, MetaData]:
+ if source.startswith('---'):
+ meta_data_yml_end_char_index = source.find('---', 3)
+ meta_data_yml = source[3:meta_data_yml_end_char_index]
+ # Strip meta data text from source, before feeding it to the markdown reader
+ source = source[meta_data_yml_end_char_index + 3:]
+ meta_data = yaml.safe_load(meta_data_yml)
+
+ try:
+ meta_data = MetaData(**meta_data)
+ except TypeError as type_error_exception:
+ raise ArticleMetaDataMalformed(str(type_error_exception))
+ return source, meta_data
+ else:
+ raise ArticleNoMetaData(f'No metadata found')
+
+
+def get_article(path: str | os.PathLike) -> Article:
+ logging.debug(path)
+ article_args = {}
+ filename, file_extension = os.path.splitext(path)
+
+ basename = os.path.basename(filename)
+ if basename == 'index':
+ # Article is the folder
+ article_folder_name = os.path.dirname(path)
+ dir_basename = os.path.basename(article_folder_name)
+ article_args['title'] = dir_basename
+ else:
+ # Article one file
+ article_args['title'] = os.path.basename(filename)
+ article_args['web_dir'] = get_web_dir(path, article_args['title'])
+ article_args['source_path'] = path
+
+ with open(path, 'r') as file:
+ source = file.read()
+ article_args['source'], article_args['meta_data'] = parse_article_meta_data(source)
+
+ article = Article(**article_args)
+ return article
+
+
+def discover_folder_structure(article: Article, articles: typing.List[Article]):
+ previous_folder = articles
+ for folder_name in article.web_dir:
+ logging.debug(folder_name)
+
+ if folder_name not in previous_folder.sub_folders:
+ logging.debug('new')
+ current_folder = Folder()
+ previous_folder.sub_folders[folder_name] = current_folder
+ else:
+ logging.debug('reuse')
+ current_folder = previous_folder.sub_folders[folder_name]
+ previous_folder = current_folder
+ logging.debug('')
+
+ previous_folder.articles.append(article)
+
+
+def discover_articles(path):
+ articles_paths = glob.glob(f'{path}/**/*.md', recursive=True) # Equivalent to ls ./**.md
+ articles = Folder()
+ for article_path in articles_paths:
+ article = get_article(article_path)
+ logging.debug(article.pretty_print)
+ discover_folder_structure(articles=articles, article=article)
+ return articles