gemtext2html

#!/usr/bin/env python3 """This is a python module that will parse gemtext and convert it to html5 """ # -*- coding: utf-8 -*- import sys class GemParser: """This is the main parser class """ def __init__(self): """Constructor for the GemParser class """ self.mstate = State.INITIAL self.recurse = False def get_document_from_gemfile(self, filename): """This subroutine will read a file line by line and convert it to html Args: filename (str): A file name corresponing to a file of gemtext Returns: str: A valid html5 document as string """ rdocument = '\n\n\n' rdocument += 'gemtext2html\n\n\n' with open(filename) as gemtext: mline: str = gemtext.readline() while mline: rline = self.parse_line(mline) if rline is not None and rline != str() and rline != '\n': rdocument += rline mline = gemtext.readline() rdocument += '{}\n\n'.format(self.get_end_tag()) return rdocument def get_end_tag(self): """A subroutine that will emit the correct end tag for the state Returns: str: A html end tag """ tag = list() tag.append('') tag.append('

\n') tag.append('\n') tag.append('\n') tag.append('\n') tag.append('\n') tag.append('\n') return tag[self.mstate] def get_start_tag(self): """A subroutine to emit the correct html start tag for the state Returns: str: A html start tag """ tag = list() tag.append('') tag.append('

\n') tag.append('') tag.append('\n') tag.append('') tag.append('

\n') tag.append('

\n') return tag[self.mstate] def is_text(self, line): """A function that will check if this is a paragraph of text Args: line (str): A line of gemtext Returns: bool: True if it is text, False if it is something else """ return (not (line == '' or line == '\n') and self.mstate == State.INITIAL and (not line.startswith(Tag.LINK) and not line.startswith(Tag.PRE) and not line.startswith(Tag.HEADING) and not line.startswith(Tag.UNORDERED) and not line.startswith(Tag.QUOTE))) def parse_heading(self, line, level=0): """A function that will recursively call it self to get the correct heading level Args: line (str): A gemtext heading line level (int, optional): The heading level to start from. Defaults to 0. Returns: str: A html heading tag of the correct level """ if line.startswith(Tag.HEADING): return self.parse_heading(line[1:], level + 1) else: return "{}{}{}".format(self.get_start_tag().format(level), line.strip().rstrip('\n'), self.get_end_tag().format(level)) def parse_line(self, line): """This subroutine will parse a single line of gemtext and enter the correct state and output the corresponding html Args: line (string): A string of gemtext Returns: str: A string of html """ rstring = str() is_text = self.is_text(line) starts_with_pre = line.startswith(Tag.PRE) starts_with_ul = line.startswith(Tag.UNORDERED) starts_with_quote = line.startswith(Tag.QUOTE) if line == '\n' and self.mstate != State.PRETEXT: return rstring # This is blockquote end tag if self.mstate == State.QUOTE and not starts_with_quote: rstring += self.get_end_tag() self.recurse = True # This is paragraph end tag if self.mstate == State.TEXT and not is_text: rstring += self.get_end_tag() self.recurse = True # This is ul end tag if self.mstate == State.UNORDERED and not starts_with_ul: rstring += self.get_end_tag() self.recurse = True # This is a pre start tag if starts_with_pre and self.mstate != State.PRETEXT: self.mstate = State.PRETEXT rstring += self.get_start_tag().format(' aria-label="{}"'.format( line[4:].rstrip('\n'))) return rstring # This is in pre formatted text elif self.mstate == State.PRETEXT and not starts_with_pre: rstring += line return rstring # This is a pre end tag elif starts_with_pre and self.mstate == State.PRETEXT: rstring += self.get_end_tag() self.mstate = State.INITIAL return rstring else: # This is paragraph start tag if is_text and self.mstate != State.TEXT: self.mstate = State.TEXT rstring += self.get_start_tag() rstring += line return rstring # This is in paragraph if is_text and self.mstate == State.TEXT: rstring += line return rstring # This is a heading if line.startswith(Tag.HEADING): self.mstate = State.HEADING rstring += self.parse_heading(line) self.mstate = State.INITIAL return rstring # This is a link if line.startswith(Tag.LINK): self.mstate = State.LINK rstring += self.parse_link(line) self.mstate = State.INITIAL return rstring # This is ul start tag if starts_with_ul and not self.mstate == State.UNORDERED: self.mstate = State.UNORDERED rstring += self.get_start_tag() rstring += '
{}
\n'.format(line[1:].strip()) return rstring # This is in middle of unordered list if starts_with_ul and self.mstate == State.UNORDERED: rstring += '
{}
\n'.format(line[1:].strip()) return rstring # This is blockquote start tag if starts_with_quote and self.mstate != State.QUOTE: self.mstate = State.QUOTE rstring += self.get_start_tag() rstring += line[1:] return rstring # This is in quote if starts_with_quote: rstring += line[1:] return rstring if self.recurse: self.recurse = False self.mstate = State.INITIAL # Recurse so we don't miss whats next return "{}{}".format(rstring, self.parse_line(line)) else: print("We should never be here, the line is: {}".format(line)) sys.exit(1) def parse_link(self, line): """This function will parse a gemtext link Args: line (str): A link line of gemtext Returns: str: A html fragment with a tag """ linearr = line[2:].strip().split(' ') link = linearr[0] anchor = str().join(linearr[1:]) if not anchor: anchor = link rline = self.get_start_tag().format(link) rline += anchor rline += self.get_end_tag() return rline class State: """This is an enumeration of the states of the state machine """ INITIAL = 0 TEXT = 1 LINK = 2 PRETEXT = 3 HEADING = 4 UNORDERED = 5 QUOTE = 6 class Tag: """This is an enumeration of the possible gemtext tags """ LINK = '=>' PRE = '```' HEADING = '#' UNORDERED = '*' QUOTE = '>' if __name__ == '__main__': mparser: GemParser = GemParser() document = mparser.get_document_from_gemfile(sys.argv[1]) print(document)