#!/usr/bin/python import markdown # Markdown -> HTML import yaml # Config file import os # Files / directories import glob # Find *.md import logging # Log messages import dataclasses # Shorthand data classes import datetime # Created, modified date import jinja2 # HTML Templates import shutil # Copy files and directories import livereload # Generate on content source file change and reload browser import datetime # Read article dates metadata import argparse # Command line arguments logging.basicConfig(format='%(asctime)s,%(msecs)03d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', datefmt='%Y-%m-%d:%H:%M:%S') logger = logging.getLogger('blog_generator') logger.setLevel(level=logging.INFO) # Only log INFO and above (Not debug) # Known exceptions, these are raised when generating on program exiting error, and are displayed to the user nicely class BlogGeneratorException(Exception): pass class FoundMoreThanOneSourceFileException(BlogGeneratorException): pass class MarkDownException(BlogGeneratorException): pass class TemplateNotFoundException(BlogGeneratorException): pass class SourceDirectoryNotFoundException(BlogGeneratorException): pass @dataclasses.dataclass class Article: dir_path: str source_path: str source: str name: str html: str output_dir_path: str output_path: str source_file_name: str # meta data, can't be required, due to Article.__dict__.keys() title: str = None description: str = None created: datetime.date = None modified: datetime.date = None '''Hacks dreamt up by the utterly deranged It produceses a list of the names of the attrubutes of the article class, used to check if it's a know meta data attribute, so custom attributes can be added to any article and referenced in the templates.''' meta_data_keys = [key for key in Article.__dict__.keys() if not key.startswith('_')] def generate_article(jinja_env: jinja2.environment.Environment, known_paths: dict, article_source_path: str): logger.info(f'Generating article: {article_source_path}') # Article can be a markdown file at the root of the articles folder, or it can be within a folder and called index article_source_file = os.path.basename(article_source_path) article_source_file_name = os.path.splitext(article_source_file)[0] article_only_one_file = article_source_file_name != 'index' if article_only_one_file: # Use file name article_dir_path = None article_name = article_source_file_name else: # Use folder name article_dir_path = os.path.dirname(article_source_path) article_name = os.path.basename(article_dir_path) logger.debug(article_name) with open(article_source_path, 'r') as file: source = file.read() # Check for article META data if source.startswith('---'): meta_data_yml_end_char_index = source.find('---', 3) meta_data_yml = source[3:meta_data_yml_end_char_index] # Strip meta data text from source, before feeding it to the markdown reader source = source[meta_data_yml_end_char_index+3:] meta_data = yaml.safe_load(meta_data_yml) logger.debug(f'Meta data: {meta_data}') # Gets additional meta data may be added, and referenced in the templates. known_meta_data = {key: value for key, value in meta_data.items() if key in meta_data_keys} else: logger.warning(f'No metadata found in article: {article_name}') try: html = markdown.markdown(source, extensions=['fenced_code', 'codehilite', 'tables']) except Exception as ex: raise MarkDownException() # Create output article_output_dir_path = os.path.join(known_paths['output'], article_name) if not os.path.exists(article_output_dir_path): os.makedirs(article_output_dir_path) article_output_path = os.path.join(article_output_dir_path, 'index.html') article = Article(name=article_name, dir_path=article_dir_path, source_path=article_source_path, source=source, html=html, output_dir_path=article_output_dir_path, output_path=article_output_path, source_file_name=article_source_file_name, **known_meta_data if "known_meta_data" in locals() else {}) # Delete existing output file, if it exists if os.path.exists(article.output_path): os.remove(article.output_path) with open(article.output_path, "a") as file: output = jinja_env.get_template('article.html').render(article=article) file.write(output) if not article_only_one_file: # Copy other article files e.g. images source_file_names = os.listdir(article.dir_path) # Don't copy source markdown # source_file_names.remove(article.source_file_name) for source_file_name in source_file_names: source_file_path = os.path.join(article.dir_path, source_file_name) destination_file_path = os.path.join(article.output_dir_path, source_file_name) shutil.copyfile(source_file_path, destination_file_path) return article def generate_rest_of_site(jinja_env: jinja2.environment.Environment, known_paths: dict, articles: list[Article]): logger.info('Generating rest of site') # Sort articles by created date descending articles_sorted = [] articles_sorted = sorted(articles , key=lambda x: x.created if x.created is not None else datetime.date.min, reverse=True) # Generate index index_output_path = os.path.join(known_paths['output'], 'index.html') with open(index_output_path , "a") as file: output = jinja_env.get_template('index.html').render(articles=articles_sorted) file.write(output) # Copy static content for item in os.listdir(known_paths['static']): source = os.path.join(known_paths['static'], item) destination = os.path.join(known_paths['output'], item) if os.path.isfile(source): shutil.copy(source, destination) elif os.path.isdir(source): shutil.copytree(source, destination) def generate_all(jinja_env: jinja2.environment.Environment, known_paths: dict, articles_source_path: list[str]): logger.info('Generating all') articles = [] if os.path.exists(known_paths['output']): # Remove output directory if it exists shutil.rmtree(known_paths['output']) os.makedirs(known_paths['output']) for article_source_path in articles_source_path: article = generate_article(jinja_env, known_paths, article_source_path) if article != None: articles.append(article) generate_rest_of_site(jinja_env, known_paths, articles) def find_articles(known_paths: dict) -> tuple[list, str]: articles_source_glob_pattern = f'{known_paths["articles"]}/**/*.md' articles_source_path = glob.glob(articles_source_glob_pattern, recursive=True) print(f'Watching: {articles_source_glob_pattern} {articles_source_path}') return articles_source_path, articles_source_glob_pattern def live_reload(jinja_env: jinja2.environment.Environment, known_paths: dict): articles_source_path, articles_source_glob_pattern = find_articles(known_paths) def regenerate_article(path): logger.info(path) generate_article(jinja_env, known_paths, path[0]) def regenerate_all(): generate_all(jinja_env, known_paths, articles_source_path) generate_all(jinja_env, known_paths, articles_source_path) server = livereload.Server() server.watch(articles_source_glob_pattern, lambda paths: regenerate_article(paths)) for directory in ['templates', 'static']: # Ignore output server.watch(known_paths[directory], regenerate_all) server.serve(root=known_paths['output']) def main(): source_content_root = None known_paths = {} parser = argparse.ArgumentParser(description='Static blog site generator') parser.add_argument('--action', choices=['live_reload', 'generate'], default='live_reload', nargs='?', help='serve the page to live preview changes, or just generate once') parser.add_argument('--directory', default=".", nargs='?', help='the directory containing the site source files, defaults to current working directory') # Parse arguments args = parser.parse_args() try: known_paths['source_content_root'] = args.directory if not os.path.exists(known_paths['source_content_root']): raise SourceDirectoryNotFoundException(f'Source directory not found: {known_paths["source_content_root"]}') # Check if source directory contains the correct structure if os.path.isdir(known_paths['source_content_root']): for directory in ['articles', 'templates', 'static']: path = os.path.join(known_paths['source_content_root'], directory) if not os.path.exists(path): raise SourceDirectoryNotFoundException(f'Source directory not found: {path}') known_paths[directory] = path known_paths['output'] = os.path.join(known_paths['source_content_root'], 'output') # Setup jinja enviroment # Dict values globally accesible in templates e.g. root title # Get name of directory, in case it's relative jinja_global = {'title': os.path.basename(os.path.realpath(args.directory))} jinja_env = jinja2.Environment(loader=jinja2.FileSystemLoader(known_paths['templates'])) jinja_env.globals.update(jinja_global) match args.action: # Switch statement case 'generate': articles_source_path, articles_source_glob_pattern = find_articles(known_paths) generate_all(jinja_env, known_paths, articles_source_path) case 'live_reload': live_reload(jinja_env, known_paths) # Known custom errors, avoid stack trace, merely print the pretty exception message except SourceDirectoryNotFoundException as exception: logger.error(f'{exception}\n') parser.print_help() exit(1) except BlogGeneratorException as exception: logger.error(exception) exit(1) # Exit code 1: Code for generic errors if __name__ == "__main__": main()