gemtext2html

#!/usr/bin/env python3 """This is a python module that will parse gemtext and convert it to html5 """ # -*- coding: utf-8 -*- import sys # Type aliases State = int Tag = str class GemParser: """This is the main parser class """ def __init__(self): """Constructor for the GemParser class """ self.mstate: State = StateEnum.INITIAL self.recurse: bool = False def get_document_from_gemfile(self, filename: str) -> str: """This subroutine will read a file line by line and convert it to html Args: filename (str): A file name corresponing to a file of gemtext Returns: str: A valid html5 document as string """ rdocument: str = '\n\n\n' rdocument += 'gemtext2html\n\n\n' with open(filename) as gemtext: mline: str = gemtext.readline() while mline: rline: str = self.parse_line(mline) if rline is not None and rline != str() and rline != '\n': rdocument += rline mline = gemtext.readline() rdocument += '{}\n\n'.format(self.get_end_tag()) return rdocument def get_end_tag(self) -> str: """A subroutine that will emit the correct end tag for the state Returns: str: A html end tag """ tag: list[str] = list() tag.append('') tag.append('

\n') tag.append('\n') tag.append('\n') tag.append('\n') tag.append('\n') tag.append('\n') return tag[self.mstate] def get_start_tag(self) -> str: """A subroutine to emit the correct html start tag for the state Returns: str: A html start tag """ tag: list[str] = list() tag.append('') tag.append('

\n') tag.append('') tag.append('\n') tag.append('') tag.append('

\n') tag.append('

\n') return tag[self.mstate] def is_text(self, line: str) -> bool: """A function that will check if this is a paragraph of text Args: line (str): A line of gemtext Returns: bool: True if it is text, False if it is something else """ return (not (line == '' or line == '\n') and self.mstate == StateEnum.INITIAL and (not line.startswith(TagEnum.LINK) and not line.startswith(TagEnum.PRE) and not line.startswith(TagEnum.HEADING) and not line.startswith(TagEnum.UNORDERED) and not line.startswith(TagEnum.QUOTE))) def parse_heading(self, line: str, level: int = 0) -> str: """A function that will recursively call it self to get the correct heading level Args: line (str): A gemtext heading line level (int, optional): The heading level to start from. Defaults to 0. Returns: str: A html heading tag of the correct level """ if line.startswith(TagEnum.HEADING): return self.parse_heading(line[1:], level + 1) else: return "{}{}{}".format(self.get_start_tag().format(level), line.strip().rstrip('\n'), self.get_end_tag().format(level)) def parse_line(self, line: str) -> str: """This subroutine will parse a single line of gemtext and enter the correct state and output the corresponding html Args: line (string): A string of gemtext Returns: str: A string of html """ rstring: str = str() is_text: bool = self.is_text(line) starts_with_pre: bool = line.startswith(TagEnum.PRE) starts_with_ul: bool = line.startswith(TagEnum.UNORDERED) starts_with_quote: bool = line.startswith(TagEnum.QUOTE) if line == '\n' and self.mstate != StateEnum.PRETEXT: return rstring # This is blockquote end tag if self.mstate == StateEnum.QUOTE and not starts_with_quote: rstring += self.get_end_tag() self.recurse = True # This is paragraph end tag if self.mstate == StateEnum.TEXT and not is_text: rstring += self.get_end_tag() self.recurse = True # This is ul end tag if self.mstate == StateEnum.UNORDERED and not starts_with_ul: rstring += self.get_end_tag() self.recurse = True # This is a pre start tag if starts_with_pre and self.mstate != StateEnum.PRETEXT: self.mstate = StateEnum.PRETEXT label: str = line[4:].rstrip('\n').strip() rstring += self.get_start_tag().format( ' aria-label="{}"'.format(label) if label else '') return rstring # This is in pre formatted text elif self.mstate == StateEnum.PRETEXT and not starts_with_pre: rstring += line return rstring # This is a pre end tag elif starts_with_pre and self.mstate == StateEnum.PRETEXT: rstring += self.get_end_tag() self.mstate = StateEnum.INITIAL return rstring else: # This is paragraph start tag if is_text and self.mstate != StateEnum.TEXT: self.mstate = StateEnum.TEXT rstring += self.get_start_tag() rstring += line return rstring # This is in paragraph if is_text and self.mstate == StateEnum.TEXT: rstring += line return rstring # This is a heading if line.startswith(TagEnum.HEADING): self.mstate = StateEnum.HEADING rstring += self.parse_heading(line) self.mstate = StateEnum.INITIAL return rstring # This is a link if line.startswith(TagEnum.LINK): self.mstate = StateEnum.LINK rstring += self.parse_link(line) self.mstate = StateEnum.INITIAL return rstring # This is ul start tag if starts_with_ul and not self.mstate == StateEnum.UNORDERED: self.mstate = StateEnum.UNORDERED rstring += self.get_start_tag() rstring += '
{}
\n'.format(line[1:].strip()) return rstring # This is in middle of unordered list if starts_with_ul and self.mstate == StateEnum.UNORDERED: rstring += '
{}
\n'.format(line[1:].strip()) return rstring # This is blockquote start tag if starts_with_quote and self.mstate != StateEnum.QUOTE: self.mstate = StateEnum.QUOTE rstring += self.get_start_tag() rstring += line[1:] return rstring # This is in quote if starts_with_quote: rstring += line[1:] return rstring if self.recurse: self.recurse = False self.mstate = StateEnum.INITIAL # Recurse so we don't miss whats next return "{}{}".format(rstring, self.parse_line(line)) else: print("We should never be here, the line is: {}".format(line)) sys.exit(1) def parse_link(self, line: str) -> str: """This function will parse a gemtext link Args: line (str): A link line of gemtext Returns: str: A html fragment with a tag """ linearr: list[str] = line[2:].strip().split(' ') link: str = linearr[0] anchor: str = str().join(linearr[1:]) if not anchor: anchor = link rline: str = self.get_start_tag().format(link) rline += anchor rline += self.get_end_tag() return rline class StateEnum: """This is an enumeration of the states of the state machine """ INITIAL: State = 0 TEXT: State = 1 LINK: State = 2 PRETEXT: State = 3 HEADING: State = 4 UNORDERED: State = 5 QUOTE: State = 6 class TagEnum: """This is an enumeration of the possible gemtext tags """ LINK: Tag = '=>' PRE: Tag = '```' HEADING: Tag = '#' UNORDERED: Tag = '*' QUOTE: Tag = '>' if __name__ == '__main__': mparser: GemParser = GemParser() document: str = mparser.get_document_from_gemfile(sys.argv[1]) print(document)