blog_generator.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244

#!/usr/bin/python
import markdown # Markdown -> HTML
import yaml # Config file
import os # Files / directories
import glob # Find *.md
import logging # Log messages
import dataclasses # Shorthand data classes
import datetime # Created, modified date
import jinja2 # HTML Templates
import shutil # Copy files and directories
import livereload # Generate on content source file change and reload browser
import datetime # Read article dates metadata
import argparse # Command line arguments

logging.basicConfig(format='%(asctime)s,%(msecs)03d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s',
                    datefmt='%Y-%m-%d:%H:%M:%S')
logger = logging.getLogger('blog_generator')
logger.setLevel(level=logging.INFO) # Only log INFO and above (Not debug)

# Known exceptions, these are raised when generating on program exiting error, and are displayed to the user nicely
class BlogGeneratorException(Exception):
    pass

class FoundMoreThanOneSourceFileException(BlogGeneratorException):
    pass

class MarkDownException(BlogGeneratorException):
    pass

class TemplateNotFoundException(BlogGeneratorException):
    pass

class SourceDirectoryNotFoundException(BlogGeneratorException):
    pass

@dataclasses.dataclass
class Article:
    dir_path: str
    source_path: str
    source: str
    name: str
    html: str
    output_dir_path: str
    output_path: str
    source_file_name: str
    
    # meta data, can't be required, due to Article.__dict__.keys()
    title: str = None
    description: str = None
    created: datetime.date = None
    modified: datetime.date = None

'''Hacks dreamt up by the utterly deranged
It produceses a list of the names of the attrubutes of the article class, used to check if it's a know meta data 
attribute, so custom attributes can be added to any article and referenced in the templates.'''
meta_data_keys = [key for key in Article.__dict__.keys() if not key.startswith('_')] 

def generate_article(jinja_env: jinja2.environment.Environment, known_paths: dict, article_source_path: str):
    logger.info(f'Generating article: {article_source_path}')
    
    # Article can be a markdown file at the root of the articles folder, or it can be within a folder and called index
    article_source_file = os.path.basename(article_source_path)
    article_source_file_name = os.path.splitext(article_source_file)[0]
    article_only_one_file = article_source_file_name != 'index'
    if article_only_one_file:
        # Use file name
        article_dir_path = None
        article_name = article_source_file_name
    else:
        # Use folder name
        article_dir_path = os.path.dirname(article_source_path)
        article_name = os.path.basename(article_dir_path)
    logger.debug(article_name)

    with open(article_source_path, 'r') as file:
        source = file.read()

    # Check for article META data
    if source.startswith('---'):
        meta_data_yml_end_char_index = source.find('---', 3)
        meta_data_yml = source[3:meta_data_yml_end_char_index]
        # Strip meta data text from source, before feeding it to the markdown reader
        source = source[meta_data_yml_end_char_index+3:]
        meta_data = yaml.safe_load(meta_data_yml)

        logger.debug(f'Meta data: {meta_data}')
        # Gets additional meta data may be added, and referenced in the templates.
        known_meta_data = {key: value for key, value in meta_data.items() if key in meta_data_keys}
    else:
        logger.warning(f'No metadata found in article: {article_name}')

    try:
        html = markdown.markdown(source, extensions=['fenced_code', 'codehilite', 'tables'])
    except Exception as ex:
        raise MarkDownException()

    # Create output
    article_output_dir_path = os.path.join(known_paths['output'], article_name)
    if not os.path.exists(article_output_dir_path):
        os.makedirs(article_output_dir_path)

    article_output_path = os.path.join(article_output_dir_path, 'index.html')

    article = Article(name=article_name, dir_path=article_dir_path, source_path=article_source_path, source=source,
                      html=html, output_dir_path=article_output_dir_path, output_path=article_output_path, 
                      source_file_name=article_source_file_name, 
                      **known_meta_data if "known_meta_data" in locals() else {})

    # Delete existing output file, if it exists
    if os.path.exists(article.output_path):
        os.remove(article.output_path)

    with open(article.output_path, "a") as file:
        output = jinja_env.get_template('article.html').render(article=article)
        file.write(output)

    if not article_only_one_file:
        # Copy other article files e.g. images
        source_file_names = os.listdir(article.dir_path)
        # Don't copy source markdown
        # source_file_names.remove(article.source_file_name)
        for source_file_name in source_file_names:
            source_file_path = os.path.join(article.dir_path, source_file_name)
            destination_file_path = os.path.join(article.output_dir_path, source_file_name)
            shutil.copyfile(source_file_path, destination_file_path)

    return article

def generate_rest_of_site(jinja_env: jinja2.environment.Environment, known_paths: dict, articles: list[Article]):
    logger.info('Generating rest of site')

    # Sort articles by created date descending
    articles_sorted = []
    articles_sorted = sorted(articles , key=lambda x: x.created if x.created is not None else datetime.date.min, reverse=True)

    # Generate index
    index_output_path = os.path.join(known_paths['output'], 'index.html')
    with open(index_output_path , "a") as file:
        output = jinja_env.get_template('index.html').render(articles=articles_sorted)
        file.write(output)

    # Copy static content
    for item in os.listdir(known_paths['static']):
        source = os.path.join(known_paths['static'], item)
        destination = os.path.join(known_paths['output'], item)
        if os.path.isfile(source):
            shutil.copy(source, destination)
        elif os.path.isdir(source):
            shutil.copytree(source, destination)


def generate_all(jinja_env: jinja2.environment.Environment, known_paths: dict, articles_source_path: list[str]):
    logger.info('Generating all')
    
    articles = []

    if os.path.exists(known_paths['output']):
        # Remove output directory if it exists
        shutil.rmtree(known_paths['output'])
    os.makedirs(known_paths['output'])

    for article_source_path in articles_source_path:
        article = generate_article(jinja_env, known_paths, article_source_path)
        if article != None:
            articles.append(article)

    generate_rest_of_site(jinja_env, known_paths, articles)

def find_articles(known_paths: dict) -> tuple[list, str]:
    articles_source_glob_pattern = f'{known_paths["articles"]}/**/*.md'
    articles_source_path = glob.glob(articles_source_glob_pattern, recursive=True)
    print(f'Watching: {articles_source_glob_pattern} {articles_source_path}')
    return articles_source_path, articles_source_glob_pattern 


def live_reload(jinja_env: jinja2.environment.Environment, known_paths: dict):
    articles_source_path, articles_source_glob_pattern = find_articles(known_paths)
    
    def regenerate_article(path):
        logger.info(path)
        generate_article(jinja_env, known_paths, path[0])

    def regenerate_all():
        generate_all(jinja_env, known_paths, articles_source_path)

    generate_all(jinja_env, known_paths, articles_source_path)
    server = livereload.Server()
    
    server.watch(articles_source_glob_pattern, lambda paths: regenerate_article(paths))
    for directory in ['templates', 'static']: # Ignore output
        server.watch(known_paths[directory], regenerate_all)
    server.serve(root=known_paths['output'])

def main():
    source_content_root = None
    known_paths = {}

    parser = argparse.ArgumentParser(description='Static blog site generator')
    parser.add_argument('--action', choices=['live_reload', 'generate'], default='live_reload', nargs='?',
                        help='serve the page to live preview changes, or just generate once')
    parser.add_argument('--directory', default=".", nargs='?',
                        help='the directory containing the site source files, defaults to current working directory')
    # Parse arguments
    args = parser.parse_args()

    try: 
        known_paths['source_content_root'] = args.directory
        if not os.path.exists(known_paths['source_content_root']):
            raise SourceDirectoryNotFoundException(f'Source directory not found: {known_paths["source_content_root"]}')

        # Check if source directory contains the correct structure
        if os.path.isdir(known_paths['source_content_root']):
            for directory in ['articles', 'templates', 'static']:
                path = os.path.join(known_paths['source_content_root'], directory)
                if not os.path.exists(path):
                    raise SourceDirectoryNotFoundException(f'Source directory not found: {path}')
                known_paths[directory] = path
            known_paths['output'] = os.path.join(known_paths['source_content_root'], 'output')

        # Setup jinja enviroment
        # Dict values globally accesible in templates e.g. root title
        # Get name of directory, in case it's relative
        jinja_global = {'title': os.path.basename(os.path.realpath(args.directory))}
        jinja_env = jinja2.Environment(loader=jinja2.FileSystemLoader(known_paths['templates']))
        jinja_env.globals.update(jinja_global)
        
        match args.action: # Switch statement
            case 'generate':
                articles_source_path, articles_source_glob_pattern = find_articles(known_paths)
                generate_all(jinja_env, known_paths, articles_source_path)
            case 'live_reload':
                live_reload(jinja_env, known_paths)
    # Known custom errors, avoid stack trace, merely print the pretty exception message
    except SourceDirectoryNotFoundException as exception:
        logger.error(f'{exception}\n')
        parser.print_help()
        exit(1)
    except BlogGeneratorException as exception:
        logger.error(exception)
        exit(1) # Exit code 1: Code for generic errors

if __name__ == "__main__":
    main()