#!/usr/bin/env python3 # -*- coding: utf-8 -*- import sys class State: INITIAL = 0 TEXT = 1 LINK = 2 PRETEXT = 3 HEADING = 4 UNORDERED = 5 QUOTE = 6 class Tag: LINK = '=>' PRE = '```' HEADING = '#' UNORDERED = '*' QUOTE = '>' class GemParser: def __init__(self): self.mstate = State.INITIAL self.recurse = False def parse_line(self, line): rstring = str() is_text = self.is_text(line) starts_with_pre = line.startswith(Tag.PRE) starts_with_ul = line.startswith(Tag.UNORDERED) starts_with_quote = line.startswith(Tag.QUOTE) if line == '\n' and self.mstate != State.PRETEXT: return rstring # This is blockquote end tag if self.mstate == State.QUOTE and not starts_with_quote: rstring += self.get_end_tag() self.recurse = True # This is paragraph end tag if self.mstate == State.TEXT and not is_text: rstring += self.get_end_tag() self.recurse = True # This is ul end tag if self.mstate == State.UNORDERED and not starts_with_ul: rstring += self.get_end_tag() self.recurse = True # This is a pre start tag if starts_with_pre and self.mstate != State.PRETEXT: self.mstate = State.PRETEXT rstring += self.get_start_tag().format(' aria-label="{}"'.format( line[4:].rstrip('\n'))) return rstring # This is in pre formatted text elif self.mstate == State.PRETEXT and not starts_with_pre: rstring += line return rstring # This is a pre end tag elif starts_with_pre and self.mstate == State.PRETEXT: rstring += self.get_end_tag() self.mstate = State.INITIAL return rstring else: # This is paragraph start tag if is_text and self.mstate != State.TEXT: self.mstate = State.TEXT rstring += self.get_start_tag() rstring += line return rstring # This is in paragraph if is_text and self.mstate == State.TEXT: rstring += line return rstring # This is a heading if line.startswith(Tag.HEADING): self.mstate = State.HEADING rstring += self.parse_heading(line) self.mstate = State.INITIAL return rstring # This is a link if line.startswith(Tag.LINK): self.mstate = State.LINK rstring += self.parse_link(line) self.mstate = State.INITIAL return rstring # This is ul start tag if starts_with_ul and not self.mstate == State.UNORDERED: self.mstate = State.UNORDERED rstring += self.get_start_tag() rstring += '
  • {}
  • \n'.format(line[1:].strip()) return rstring # This is in middle of unordered list if starts_with_ul and self.mstate == State.UNORDERED: rstring += '
  • {}
  • \n'.format(line[1:].strip()) return rstring # This is blockquote start tag if starts_with_quote and self.mstate != State.QUOTE: self.mstate = State.QUOTE rstring += self.get_start_tag() rstring += line[1:] return rstring # This is in quote if starts_with_quote: rstring += line[1:] return rstring if self.recurse: self.recurse = False self.mstate = State.INITIAL # Recurse so we don't miss whats next return "{}{}".format(rstring, self.parse_line(line)) else: print("We should never be here, the line is: {}".format(line)) sys.exit(1) def parse_link(self, line): linearr = line[2:].strip().split(' ') link = linearr[0] anchor = str().join(linearr[1:]) if not anchor: anchor = link rline = self.get_start_tag().format(link) rline += anchor rline += self.get_end_tag() return rline def is_text(self, line): return (not (line == '' or line == '\n') and self.mstate == State.INITIAL and (not line.startswith(Tag.LINK) and not line.startswith(Tag.PRE) and not line.startswith(Tag.HEADING) and not line.startswith(Tag.UNORDERED) and not line.startswith(Tag.QUOTE))) def get_start_tag(self): tag = list() tag.append('') tag.append('

    \n') tag.append('') tag.append('\n') tag.append('') tag.append('

    \n') tag.append('\n') return tag[self.mstate] def get_document_from_gemfile(self, filename): rdocument = '\n\n\ngemtext2html\n\n\n' with open(filename) as gemtext: mline: str = gemtext.readline() while mline: rline = self.parse_line(mline) if rline is not None and rline != str() and rline != '\n': rdocument += rline mline = gemtext.readline() rdocument += '{}\n\n'.format(self.get_end_tag()) return rdocument def parse_heading(self, line, level=0): if line.startswith(Tag.HEADING): return self.parse_heading(line[1:], level + 1) else: return "{}{}{}".format(self.get_start_tag().format(level), line.strip().rstrip('\n'), self.get_end_tag().format(level)) if __name__ == '__main__': mparser: GemParser = GemParser() document = mparser.get_document_from_gemfile(sys.argv[1]) print(document)