diff --git a/src/convert.py b/src/convert.py new file mode 100755 index 0000000..0167115 --- /dev/null +++ b/src/convert.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +import sys + + +class State: + INITIAL = 0 + TEXT = 1 + LINK = 2 + PRETEXT = 3 + HEADING = 4 + UNORDERED = 5 + QUOTE = 6 + + +class Tag: + LINK = '=>' + PRE = '```' + HEADING = '#' + UNORDERED = '*' + QUOTE = '>' + + +class GemParser: + def __init__(self): + self.mstate = State.INITIAL + self.toggle = False + + def parse_line(self, line): + rstring = str() + # This is a pre start tag + if line.startswith(Tag.PRE) and not self.toggle: + self.mstate = State.PRETEXT + self.toggle = True + rstring = self.get_start_tag().format(' aria-label="{}"'.format( + line[4:].rstrip('\n'))) + # This is a pre end tag + elif line.startswith(Tag.PRE): + rstring = self.get_end_tag() + self.mstate = State.INITIAL + self.toggle = False + # This is in pre formatted text + elif self.mstate == State.PRETEXT: + rstring = line + else: + # This is ul start tag + if line.startswith(Tag.UNORDERED) and not self.toggle: + self.mstate = State.UNORDERED + self.toggle = True + rstring = self.get_start_tag() + rstring += '
  • {}
  • \n'.format(line[1:].strip()) + # This is in middle of unordered list + elif line.startswith(Tag.UNORDERED): + rstring += '
  • {}
  • \n'.format(line[1:].strip()) + # This is ul end tag + elif self.mstate == State.UNORDERED and not line.startswith( + Tag.UNORDERED): + rstring += self.get_end_tag() + self.mstate = State.INITIAL + self.toggle = False + # Recurse so we don't miss whats next + rstring += self.parse_line(line) + # This is blockquote start tag + if line.startswith(Tag.QUOTE) and not self.toggle: + self.mstate = State.QUOTE + self.toggle = True + rstring = self.get_start_tag() + rstring += line[1:] + # This is in quote + elif line.startswith(Tag.QUOTE): + rstring += line[1:] + # FIXME: If a list is placed directly after a quote there will ba a list item in the quote... + # This is blockquote end tag + elif self.mstate == State.QUOTE and not line.startswith(Tag.QUOTE): + rstring += self.get_end_tag() + self.mstate = State.INITIAL + self.toggle = False + # Recurse so we don't miss whats next + rstring += self.parse_line(line) + # This is paragraph start tag + if self.is_text(line) and not self.toggle: + self.mstate = State.TEXT + self.toggle = True + rstring = self.get_start_tag() + rstring += line + # This is in paragraph + elif self.is_text(line): + rstring += line + # This is paragraph end tag + elif self.mstate == State.TEXT: + rstring += self.get_end_tag() + self.mstate = State.INITIAL + self.toggle = False + # Recurse so we don't miss whats next + rstring += self.parse_line(line) + # This is a link + if line.startswith(Tag.LINK): + self.mstate = State.LINK + rstring = self.parse_link(line) + self.mstate = State.INITIAL + return rstring + + def parse_link(self, line): + linearr = line[2:].strip().split(' ') + link = linearr[0] + anchor = str().join(linearr[1:]) + if not anchor: + anchor = link + rline = self.get_start_tag().format(link) + rline += anchor + rline += self.get_end_tag() + return rline + + def is_text(self, line): + return (not (line == '' or line == '\n') + and self.mstate == State.INITIAL and + (not line.startswith(Tag.LINK) and not line.startswith(Tag.PRE) + and not line.startswith(Tag.HEADING) + and not line.startswith(Tag.UNORDERED) + and not line.startswith(Tag.QUOTE))) + + def get_start_tag(self): + tag = list() + tag.append('') + tag.append('

    \n') + tag.append('') + tag.append('\n') + tag.append('') + tag.append('

    \n') + tag.append('\n') + return tag[self.mstate] + + def get_document_from_gemfile(self, filename): + rdocument = '\n\n\ngemtext2html\n\n\n' + with open(filename) as gemtext: + mline: str = gemtext.readline() + while mline: + rdocument += self.parse_line(mline) + mline = gemtext.readline() + if self.mstate != State.INITIAL: + rdocument += self.get_end_tag() + rdocument += '\n\n' + return rdocument + + +if __name__ == '__main__': + mparser: GemParser = GemParser() + document = mparser.get_document_from_gemfile(sys.argv[1]) + print(document)