#!/usr/bin/env python3 # -*- coding: utf-8 -*- import sys class State: INITIAL = 0 TEXT = 1 LINK = 2 PRETEXT = 3 HEADING = 4 UNORDERED = 5 QUOTE = 6 class Tag: LINK = '=>' PRE = '```' HEADING = '#' UNORDERED = '*' QUOTE = '>' class GemParser: def __init__(self): self.mstate = State.INITIAL self.toggle = False def parse_line(self, line): rstring = str() # This is a pre start tag if line.startswith(Tag.PRE) and not self.toggle: self.mstate = State.PRETEXT self.toggle = True rstring = self.get_start_tag().format(' aria-label="{}"'.format( line[4:].rstrip('\n'))) # This is a pre end tag elif line.startswith(Tag.PRE): rstring = self.get_end_tag() self.mstate = State.INITIAL self.toggle = False # This is in pre formatted text elif self.mstate == State.PRETEXT: rstring = line else: # This is ul start tag if line.startswith(Tag.UNORDERED) and not self.toggle: self.mstate = State.UNORDERED self.toggle = True rstring = self.get_start_tag() rstring += '
  • {}
  • \n'.format(line[1:].strip()) # This is in middle of unordered list elif line.startswith(Tag.UNORDERED): rstring += '
  • {}
  • \n'.format(line[1:].strip()) # This is ul end tag elif self.mstate == State.UNORDERED and not line.startswith( Tag.UNORDERED): rstring += self.get_end_tag() self.mstate = State.INITIAL self.toggle = False # Recurse so we don't miss whats next rstring += self.parse_line(line) # This is blockquote start tag if line.startswith(Tag.QUOTE) and not self.toggle: self.mstate = State.QUOTE self.toggle = True rstring = self.get_start_tag() rstring += line[1:] # This is in quote elif line.startswith(Tag.QUOTE): rstring += line[1:] # FIXME: If a list is placed directly after a quote there will ba a list item in the quote... # This is blockquote end tag elif self.mstate == State.QUOTE and not line.startswith(Tag.QUOTE): rstring += self.get_end_tag() self.mstate = State.INITIAL self.toggle = False # Recurse so we don't miss whats next rstring += self.parse_line(line) # This is paragraph start tag if self.is_text(line) and not self.toggle: self.mstate = State.TEXT self.toggle = True rstring = self.get_start_tag() rstring += line # This is in paragraph elif self.is_text(line): rstring += line # This is paragraph end tag elif self.mstate == State.TEXT: rstring += self.get_end_tag() self.mstate = State.INITIAL self.toggle = False # Recurse so we don't miss whats next rstring += self.parse_line(line) # This is a link if line.startswith(Tag.LINK): self.mstate = State.LINK rstring = self.parse_link(line) self.mstate = State.INITIAL return rstring def parse_link(self, line): linearr = line[2:].strip().split(' ') link = linearr[0] anchor = str().join(linearr[1:]) if not anchor: anchor = link rline = self.get_start_tag().format(link) rline += anchor rline += self.get_end_tag() return rline def is_text(self, line): return (not (line == '' or line == '\n') and self.mstate == State.INITIAL and (not line.startswith(Tag.LINK) and not line.startswith(Tag.PRE) and not line.startswith(Tag.HEADING) and not line.startswith(Tag.UNORDERED) and not line.startswith(Tag.QUOTE))) def get_start_tag(self): tag = list() tag.append('') tag.append('

    \n') tag.append('') tag.append('\n') tag.append('') tag.append('

    \n') tag.append('\n') return tag[self.mstate] def get_document_from_gemfile(self, filename): rdocument = '\n\n\ngemtext2html\n\n\n' with open(filename) as gemtext: mline: str = gemtext.readline() while mline: rdocument += self.parse_line(mline) mline = gemtext.readline() if self.mstate != State.INITIAL: rdocument += self.get_end_tag() rdocument += '\n\n' return rdocument if __name__ == '__main__': mparser: GemParser = GemParser() document = mparser.get_document_from_gemfile(sys.argv[1]) print(document)