-from collections import OrderedDict
-from html import escape
+import os
from io import StringIO
+class MDTag:
+ registered_tags = []
+ char: str
+ tag_name: str
+
+ def __init_subclass__(cls, **kwargs):
+ super().__init_subclass__(**kwargs)
+ MDTag.registered_tags.append(cls)
+
+ def __init__(self, states, sio):
+ self.states = states
+ self.sio = sio
+ states.append(self)
+ self.start = sio.tell()
+
+ def write(self, c):
+ if c == self.char:
+ c = self.end()
+ self.sio.write(c)
+
+ def end(self):
+ self.states.pop()
+ return f"<{self.tag_name}>{self.cut()}</{self.tag_name}>"
+
+ def cut(self):
+ self.sio.seek(self.start)
+ value = self.sio.read()
+ self.sio.seek(self.start)
+ self.sio.truncate()
+ return value
+
+
+class Bold(MDTag):
+ char = "*"
+ tag_name = "b"
+
+
+class Italic(MDTag):
+ char = "/"
+ tag_name = "i"
+
+
+class Underline(MDTag):
+ char = "_"
+ tag_name = "u"
+
+
+class Code(MDTag):
+ char = "`"
+ tag_name = "code"
+
+
+class Link(MDTag):
+ char = "["
+ tag_name = "a"
+ start: int | None
+
+ def __init__(self, states, sio):
+ super().__init__(states, sio)
+ self.text = None
+
+ def write(self, c):
+ if self.text is None:
+ if c != "]":
+ self.sio.write(c)
+ return
+ self.text = self.cut()
+ self.start = None
+ return
+ elif self.start is None:
+ if c != "(":
+ raise ValueError("Expected '('")
+ self.start = self.sio.tell()
+ return
+ elif c != ")":
+ self.sio.write(c)
+ return
+ self.sio.write(f'<a href="{self.cut()}">{self.text}</a>')
+ self.states.pop()
+
+
class Paragraph:
name = "p"
def __init__(self):
self.lines = []
- self.attributes = OrderedDict()
-
- def join_attrs(self):
- if not self.attributes:
- return ""
- return "".join(
- f' {key}="{escape(value)}"' for key, value in self.attributes.items()
- )
-
- def add_link(self, sio, link):
- sio.seek(link[0])
- text = sio.read(link[1] - link[0])
- url = sio.read()
- sio.seek(link[0])
- sio.truncate()
- sio.write('<a href="')
- sio.write(escape(url.strip()))
- sio.write('">')
- sio.write(escape(text, False))
- sio.write("</a>")
- return None
+ self.content = ""
+ self.sio = StringIO()
+ self.end_backslash = None
+ self.backslash = False
+ self.states = []
+
+ def check_states(self, tag_class):
+ if len(self.states) == 0:
+ return True
+ if isinstance(self.states[-1], tag_class):
+ return False
+ links = [state for state in self.states if isinstance(state, Link)]
+ if len(links) > 0:
+ return links[-1].text is None or links[-1].start is None
+ return True
+
+ def handle_backslash(self, i, c):
+ if i == self.end_backslash:
+ self.end_backslash = None
+ self.backslash = False
+ return False
+ elif self.end_backslash is not None:
+ pass
+ elif self.content[i:i + len(os.linesep)] == os.linesep:
+ self.sio.write(" ")
+ self.end_backslash = i + len(os.linesep)
+ else:
+ self.sio.write(c)
+ self.backslash = False
+ return True
def join_lines(self):
- content = '\n'.join(self.lines)
- sio = StringIO()
- backslash = False
- link = None
- for i, c in enumerate(content):
- if backslash:
- assert c in "\\()[]`*/_\n"
- if c == "\n":
- c = " "
- sio.write(c)
- elif link is not None:
- if c == "[]()"[len(link)]:
- link.append(sio.tell())
- if len(link) == 4:
- link = self.add_link(sio, link)
- continue
+ if self.sio.getvalue():
+ assert not self.lines
+ return self.sio.getvalue()
+ self.content = os.linesep.join(self.lines)
+ for i, c in enumerate(self.content):
+ if self.backslash and self.handle_backslash(i, c):
+ continue
if c == "\\":
- backslash = True
+ self.backslash = True
continue
- elif c == "[":
- link = [sio.tell()]
+ tag = None
+ for tag_class in MDTag.registered_tags:
+ if c == tag_class.char and self.check_states(tag_class):
+ tag = tag_class(self.states, self.sio)
+ break
+ if tag is not None:
continue
- sio.write(c)
- assert backslash is False and link is None
- return sio.getvalue()
+ if len(self.states) > 0:
+ self.states[-1].write(c)
+ else:
+ self.sio.write(c)
+ assert len(self.states) == 0, self.states
+ assert not self.backslash
+ self.lines.clear()
+ return self.sio.getvalue()
def join(self):
- return f"<{self.name}{self.join_attrs()}>{self.join_lines()}</{self.name}>"
+ return f"<{self.name}>{self.join_lines()}</{self.name}>"
class Heading2(Paragraph):
self.list_items.append(self.ListItem())
def join_lines(self):
- return "\n".join(li.join() for li in self.list_items)
+ return os.linesep.join(li.join() for li in self.list_items)
class MDRenderer:
"""
Simplified markdown to html translator.
"""
- METACHARS = "\\[*/_~`"
- INLINE_TAGS = {
- "*": "b",
- "/": "i",
- "_": "u",
- "~": "del",
- "`": "code",
- }
-
def __init__(self, page):
self.page = page
- self.tag = None
self.sio = StringIO()
+ self.tag = None
+ self.tags = []
+
+ def set_tag(self, tag_class):
+ if isinstance(self.tag, tag_class):
+ return False
+ self.tag = tag_class()
+ self.tags.append(self.tag)
+ return True
def render_html(self):
- # let's initially support #/<h2>, <p>, and single level lists <ul><li>
- # where two spaces after a bullet line continues the list item.
- # backslash escapes transform newlines to spaces in the output
- # inline tags are evaluated when a tag is closed,
- # and the in-between text is then html escaped
- #
- # return title, html
- tags = []
- tag = None
title = None
- for line in self.page.split("\n"):
+ for line in self.page.split(os.linesep):
if line.startswith("# "):
- if not isinstance(tag, Heading2):
- tag = Heading2()
- if title is None:
- title = tag
- tags.append(tag)
+ self.set_tag(Heading2)
+ if title is None:
+ title = self.tag
line = line[2:]
elif line.startswith("- "):
- if not isinstance(tag, BulletList):
- tag = BulletList()
- tags.append(tag)
- else:
- tag.lines = []
+ if not self.set_tag(BulletList):
+ self.tag.lines = None
line = line[2:]
elif line.startswith(" "):
- assert isinstance(tag, BulletList)
+ assert isinstance(self.tag, BulletList)
line = line[2:]
elif line == "":
- tag = None
+ self.tag = None
continue
else:
- if not isinstance(tag, Paragraph):
- tag = Paragraph()
- tags.append(tag)
- tag.lines.append(line)
- return title.join_lines() if title else None, "\n".join(t.join() for t in tags)
+ self.set_tag(Paragraph)
+ self.tag.lines.append(line)
+ return (
+ title.join_lines() if title else None,
+ os.linesep.join(t.join() for t in self.tags),
+ )