#!/usr/bin/env python3
+import os
import shutil
+import sys
+import xmlschema
from argparse import ArgumentParser
+from contextlib import contextmanager
+from datetime import datetime, UTC
+from functools import cached_property
+from hashlib import sha256
+from itertools import chain
from pathlib import Path
+from shutil import rmtree
+from xml.etree import ElementTree
from md import MDRenderer
-from utils import get_content, write_content
+
+
+def path_len_parts(p):
+ return len(p.parts)
+
+
+@contextmanager
+def cleanup_existing_output(output_path):
+ existing_output = []
+ if not output_path.exists():
+ output_path.mkdir(0o755, True, True)
+ else:
+ for current, *dirs_and_files in os.walk(output_path):
+ current_path = Path(current)
+ existing_output.extend(
+ current_path / f for f in chain.from_iterable(dirs_and_files)
+ )
+ yield existing_output
+ for path in sorted(existing_output, key=path_len_parts, reverse=True):
+ is_dir = path.is_dir()
+ print(f"deleting {str(path)}{'/' if is_dir else ''}")
+ if is_dir:
+ path.rmdir()
+ else:
+ path.unlink()
+
+
+class WebsiteGenerator:
+ STATIC_FILES = [
+ "style.css",
+ ]
+ SITEMAP_NAMESPACE = "http://www.sitemaps.org/schemas/sitemap/0.9"
+ SITEMAP_SCHEMA_URL = "http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
+
+ def __init__(self, base_url, build_path, output_path):
+ self.base_url = base_url
+ if build_path.exists():
+ shutil.rmtree(build_path)
+ build_path.mkdir(0o755, True)
+ self.build_path = build_path
+ self.output_path = output_path
+ self.source_path = Path(__file__).parent
+ with (self.source_path / "template.html").open("rt") as fh:
+ self.template = fh.read()
+ self.files_and_urls = []
+
+ @cached_property
+ def content_path(self):
+ return self.source_path / "content"
+
+ AutoContent = type("AutoContentType", (), {})()
+
+ def add_path(self, in_path, out_path, content=None, url=None):
+ if out_path in (x[0] for x in self.files_and_urls):
+ raise ValueError("Cannot add the same file multiple times")
+ self.files_and_urls.append((out_path, url))
+ if in_path.is_dir():
+ print(f"creating {str(out_path.relative_to(self.build_path))}/")
+ out_path.mkdir(0o755)
+ elif content is not None:
+ if content is self.AutoContent:
+ with in_path.open("rb") as fh:
+ content = fh.read()
+ print(f"writing {str(out_path.relative_to(self.build_path))}")
+ with out_path.open("wb" if isinstance(content, bytes) else "wt") as fh:
+ fh.write(content)
+ else:
+ raise ValueError("No content provided.")
+
+ def render_page(self, nav, in_path, url):
+ with in_path.open("rt") as fh:
+ renderer = MDRenderer(fh.read(), url)
+ return self.template.format(
+ nav=nav,
+ page=renderer.render_html(),
+ pagemeta=renderer.render_html_pagemeta(),
+ ), url
+
+ def get_url(self, out_path):
+ url = f"{self.base_url}{out_path.relative_to(self.build_path)}"
+ if url.endswith("/index.html"):
+ url = url[:-len("index.html")]
+ return url
+
+ def build(self):
+ print("==> building", self.base_url)
+ with (self.content_path / "nav.md").open("rt") as fh:
+ nav = MDRenderer(fh.read()).render_html()
+ index_md = self.content_path / "index.md"
+ index_html = self.build_path / "index.html"
+ self.add_path(
+ index_md,
+ index_html,
+ *self.render_page(nav, index_md, self.get_url(index_html)),
+ )
+ blog_md = self.content_path / "blog" / "index.md"
+ blog_html = self.build_path / "blog" / "index.html"
+ self.add_path(blog_md.parent, blog_html.parent)
+ self.add_path(
+ blog_md,
+ blog_html,
+ *self.render_page(nav, blog_md, self.get_url(blog_html)),
+ )
+ for static_file in self.STATIC_FILES:
+ self.add_path(
+ self.source_path / static_file,
+ self.build_path / static_file,
+ self.AutoContent,
+ )
+
+ def sync(self):
+ print("==> syncing", self.base_url)
+ with cleanup_existing_output(self.output_path) as existing_output:
+ sitemap = []
+ for src_path, url in self.files_and_urls:
+ rel_path = src_path.relative_to(self.build_path)
+ dest_path = self.output_path / rel_path
+ if dest_path in existing_output:
+ existing_output.remove(dest_path)
+ if src_path.is_dir():
+ if not dest_path.exists():
+ print("creating", str(rel_path))
+ dest_path.mkdir(0o755)
+ continue
+ update = None if dest_path.exists() else "creating"
+ with src_path.open("rb") as fh:
+ src_content = fh.read()
+ if update is None:
+ with dest_path.open("rb") as fh:
+ if sha256(src_content).digest() != sha256(fh.read()).digest():
+ update = "updating"
+ if update is not None:
+ print(update, rel_path)
+ with dest_path.open("wb") as out_fh:
+ out_fh.write(src_content)
+ if url is None:
+ continue
+ sitemap.append(
+ {
+ "loc": url,
+ "lastmod": datetime.fromtimestamp(
+ dest_path.stat().st_mtime,
+ UTC,
+ ).isoformat(timespec="seconds"),
+ }
+ )
+ self.generate_sitemap(
+ self.output_path / "sitemap.xml", sitemap, existing_output
+ )
+
+ @classmethod
+ def generate_sitemap(cls, sitemap_xml, urls, existing_output):
+ if sitemap_xml in existing_output:
+ existing_output.remove(sitemap_xml)
+ schema = xmlschema.XMLSchema(cls.SITEMAP_SCHEMA_URL)
+ with open(sitemap_xml, "wb") as fh:
+ ElementTree.register_namespace("", cls.SITEMAP_NAMESPACE)
+ fh.write(b"<?xml version='1.0' encoding='UTF-8'?>\n")
+ fh.write(
+ ElementTree.tostring(
+ schema.encode(
+ {
+ "@xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance",
+ "@xmlns": cls.SITEMAP_NAMESPACE,
+ "@xsi:schemaLocation": (
+ f"{cls.SITEMAP_NAMESPACE} {cls.SITEMAP_SCHEMA_URL}"
+ ),
+ "url": urls,
+ }
+ )
+ )
+ )
+ print(f"Validating XML {repr(sitemap_xml.name)}...", end=" ")
+ sys.stdout.flush()
+ schema.validate(sitemap_xml)
+ print("done")
+
+ def cleanup(self):
+ rmtree(self.build_path)
def main():
ap = ArgumentParser()
+ ap.add_argument("--base-url", default="https://www.mar77i.info/")
+ ap.add_argument("--build-dir", default="/dev/shm/build")
ap.add_argument("--output-dir", default="/dev/shm/output")
args = ap.parse_args()
- source_path = Path(__file__).parent
- content_path = source_path / "content"
- output_path = Path(args.output_dir)
- if output_path.exists():
- shutil.rmtree(output_path)
- output_path.mkdir(0o755)
- template = get_content(source_path / "template.html")
- context = {
- "nav": MDRenderer(get_content(content_path / "nav.md")).render_html()[1],
- }
- context["title"], context["page"] = MDRenderer(
- get_content(content_path / "index.md")
- ).render_html()
- write_content(output_path / "index.html", template.format(**context))
-
- #context["blogs"] = []
- #context["hashtags"] = []
- #for file in (content_path / "blog").iterdir():
- # if file.name == "index.md" or not file.name.endswith(".md"):
- # continue
- # context["blogs"].append(f"blog/{file.name[:-3]}.html")
- # context["title"], context["page"] = MDRenderer(get_content(file)).render_html()
- # write_content(
- # blog_path / f"{file.name[:-3]}.html", template.render(context),
- # )
- #context["title"], context["page"] = MDRenderer(
- # get_content(content_path / "blog" / "index.md")
- #).render_html()
-
- blog_path = output_path / "blog"
- blog_path.mkdir(0o755)
- context["title"], context["page"] = "Blog stub", "<h2>Blog stub</h2>"
- write_content(blog_path / "index.html", template.format(**context))
- write_content(output_path / "style.css", get_content(source_path / "style.css"))
+ website_gen = WebsiteGenerator(
+ args.base_url, Path(args.build_dir), Path(args.output_dir)
+ )
+ website_gen.build()
+ website_gen.sync()
+ website_gen.cleanup()
if __name__ == "__main__":
import os
from io import StringIO
+from html import escape
+from urllib.parse import quote_plus
_registered_tags = []
def __init__(self):
self.lines = []
- self.content = ""
self.sio = StringIO()
+
self.end_backslash = None
self.backslash = False
+ self.attributes = []
self.states = []
def check_states(self, tag_class):
return links[-1].text is None or links[-1].start is None
return True
- def handle_backslash(self, i, c):
- if i == self.end_backslash:
- self.end_backslash = None
- self.backslash = False
- return False
- elif self.end_backslash is not None:
- pass
- elif self.content[i:i + len(os.linesep)] == os.linesep:
+ def handle_backslash(self, content, i, c):
+ if self.end_backslash is not None:
+ done = i == self.end_backslash
+ if done:
+ self.end_backslash = None
+ self.backslash = False
+ # continue only if we're not at end_backslash
+ return not done
+ if content[i:i + len(os.linesep)] == os.linesep:
self.sio.write(" ")
self.end_backslash = i + len(os.linesep)
else:
self.backslash = False
return True
+# def handle_attributes(self, c):
+# if c not in " }=":
+# return False
+# self.sio.seek(self.attribute_since)
+# value = self.sio.read()
+# self.sio.seek(self.attribute_since)
+# self.sio.truncate()
+#
+# if c == "=":
+# if self.attribute_key is not None:
+# raise AttributeError("Key already specified!")
+# self.attribute_key = value
+# return True
+# if c == "}":
+# self.attribute_since = None
+# return True
+
def handle_tag(self, c):
for tag_class in _registered_tags:
if c == tag_class.char and self.check_states(tag_class):
return tag_class(self.states, self.sio)
return None
+ def maybe_get_attributes(self, content):
+ if not content.startswith("{"):
+ return content
+ sio = StringIO()
+ content = content[1:]
+ backslash = False
+ key_pos = None
+ for i, c in enumerate(content):
+ if backslash:
+ sio.write(c)
+ continue
+ if c in " }":
+ value = sio.getvalue()
+ if key_pos is None:
+ item = value, ""
+ else:
+ item = value[:key_pos], value[key_pos:]
+ self.attributes.append(item)
+ sio.seek(0)
+ sio.truncate()
+ if c == "}":
+ return content[i + 1:]
+ key_pos = None
+ continue
+ elif c == "\\":
+ backslash = True
+ continue
+ elif c == "=":
+ key_pos = sio.tell()
+ continue
+ sio.write(c)
+ raise ValueError("Attribute list: missing closing '}'")
+
def render_inner(self):
if self.sio.getvalue():
- assert not self.lines
+ assert self.lines is None
return self.sio.getvalue()
- self.content = os.linesep.join(self.lines)
- for i, c in enumerate(self.content):
- if self.backslash and self.handle_backslash(i, c):
+ content = self.maybe_get_attributes(os.linesep.join(self.lines))
+ for i, c in enumerate(content):
+ if self.backslash and self.handle_backslash(content, i, c):
continue
if c == "\\":
self.backslash = True
- continue
- if not self.handle_tag(c):
+ elif not self.handle_tag(c):
(self.states[-1] if self.states else self.sio).write(c)
assert len(self.states) == 0, self.states
assert not self.backslash
- self.lines.clear()
+ self.lines = None
return self.sio.getvalue()
def render_outer(self):
- return f"<{self.name}>{self.render_inner()}</{self.name}>"
+ inner_html = self.render_inner()
+ sio = StringIO()
+ for key, value in self.attributes:
+ sio.write(f' {key}="{escape(value)}"' if value else f" {key}")
+ return f"<{self.name}{sio.getvalue()}>{inner_html}</{self.name}>"
class Paragraph(MDLineTag):
name = "h2"
+class Heading3(MDLineTag):
+ name = "h3"
+
+
class BulletList(MDLineTag):
name = "ul"
"""
Simplified markdown to html translator.
"""
- def __init__(self, page):
+ def __init__(self, page, url=""):
self.page = page
+ self.url = url
self.sio = StringIO()
self.tag = None
self.tags = []
self.tags.append(self.tag)
return True
+ def render_html_pagemeta(self):
+ """
+ <link rel="canonical" href="{canonical_url}">
+ <meta name="title" content="{title}">
+ <meta name="description" content="{description}">
+ <title>mar77i.info ¬ {title}</title>
+ """
+ assert isinstance(self.tags[0], Heading2)
+ title = escape(self.tags[0].render_inner())
+ if isinstance(self.tags[1], Paragraph):
+ description = self.tags[1].render_inner()
+ else:
+ description = ""
+ return os.linesep.join(
+ (
+ f'<link rel="canonical" href="{quote_plus(self.url, "/:")}">',
+ f'<meta name="title" content="{escape(title)}">',
+ f'<meta name="description" content="{escape(description)}">',
+ f"<title>mar77i.info ¬ {title}</title>",
+ )
+ )
+
def render_html(self):
- title = None
for line in self.page.split(os.linesep):
if line.startswith("# "):
self.set_tag(Heading2)
- if title is None:
- title = self.tag
line = line[2:]
+ elif line.startswith("## "):
+ self.set_tag(Heading3)
+ line = line[3:]
elif line.startswith("- "):
if not self.set_tag(BulletList):
self.tag.lines = None
else:
self.set_tag(Paragraph)
self.tag.lines.append(line)
- return (
- title.render_inner() if title else None,
- os.linesep.join(t.render_outer() for t in self.tags),
- )
+ return os.linesep.join(t.render_outer() for t in self.tags)