diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d86bb49 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +.idea/.name +.idea/gemtext2html.iml +.idea/misc.xml +.idea/modules.xml +.idea/vcs.xml +.idea/inspectionProfiles/profiles_settings.xml +.idea/inspectionProfiles/Project_Default.xml +.vscode/launch.json +.vscode/settings.json +.vscode/.ropeproject/config.py +.vscode/.ropeproject/objectdb diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..8e9de34 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,5 @@ +# Default ignored files +/shelf/ +/workspace.xml +/.idea/ +/.vscode/ \ No newline at end of file diff --git a/src/convert.py b/src/convert.py index 0167115..c24f0bd 100755 --- a/src/convert.py +++ b/src/convert.py @@ -24,81 +24,94 @@ class Tag: class GemParser: def __init__(self): self.mstate = State.INITIAL - self.toggle = False + self.recurse = False def parse_line(self, line): rstring = str() + is_text = self.is_text(line) + starts_with_pre = line.startswith(Tag.PRE) + starts_with_ul = line.startswith(Tag.UNORDERED) + starts_with_quote = line.startswith(Tag.QUOTE) + if line == '\n' and self.mstate != State.PRETEXT: + return rstring + # This is blockquote end tag + if self.mstate == State.QUOTE and not starts_with_quote: + rstring += self.get_end_tag() + self.recurse = True + # This is paragraph end tag + if self.mstate == State.TEXT and not is_text: + rstring += self.get_end_tag() + self.recurse = True + # This is ul end tag + if self.mstate == State.UNORDERED and not starts_with_ul: + rstring += self.get_end_tag() + self.recurse = True # This is a pre start tag - if line.startswith(Tag.PRE) and not self.toggle: + if starts_with_pre and self.mstate != State.PRETEXT: self.mstate = State.PRETEXT - self.toggle = True - rstring = self.get_start_tag().format(' aria-label="{}"'.format( + rstring += self.get_start_tag().format(' aria-label="{}"'.format( line[4:].rstrip('\n'))) + return rstring + # This is in pre formatted text + elif self.mstate == State.PRETEXT and not starts_with_pre: + rstring += line + return rstring # This is a pre end tag - elif line.startswith(Tag.PRE): - rstring = self.get_end_tag() + elif starts_with_pre and self.mstate == State.PRETEXT: + rstring += self.get_end_tag() self.mstate = State.INITIAL - self.toggle = False - # This is in pre formatted text - elif self.mstate == State.PRETEXT: - rstring = line + return rstring else: - # This is ul start tag - if line.startswith(Tag.UNORDERED) and not self.toggle: - self.mstate = State.UNORDERED - self.toggle = True - rstring = self.get_start_tag() - rstring += '
  • {}
  • \n'.format(line[1:].strip()) - # This is in middle of unordered list - elif line.startswith(Tag.UNORDERED): - rstring += '
  • {}
  • \n'.format(line[1:].strip()) - # This is ul end tag - elif self.mstate == State.UNORDERED and not line.startswith( - Tag.UNORDERED): - rstring += self.get_end_tag() - self.mstate = State.INITIAL - self.toggle = False - # Recurse so we don't miss whats next - rstring += self.parse_line(line) - # This is blockquote start tag - if line.startswith(Tag.QUOTE) and not self.toggle: - self.mstate = State.QUOTE - self.toggle = True - rstring = self.get_start_tag() - rstring += line[1:] - # This is in quote - elif line.startswith(Tag.QUOTE): - rstring += line[1:] - # FIXME: If a list is placed directly after a quote there will ba a list item in the quote... - # This is blockquote end tag - elif self.mstate == State.QUOTE and not line.startswith(Tag.QUOTE): - rstring += self.get_end_tag() - self.mstate = State.INITIAL - self.toggle = False - # Recurse so we don't miss whats next - rstring += self.parse_line(line) # This is paragraph start tag - if self.is_text(line) and not self.toggle: + if is_text and self.mstate != State.TEXT: self.mstate = State.TEXT - self.toggle = True - rstring = self.get_start_tag() + rstring += self.get_start_tag() rstring += line + return rstring # This is in paragraph - elif self.is_text(line): + if is_text and self.mstate == State.TEXT: rstring += line - # This is paragraph end tag - elif self.mstate == State.TEXT: - rstring += self.get_end_tag() + return rstring + # This is a heading + if line.startswith(Tag.HEADING): + self.mstate = State.HEADING + rstring += self.parse_heading(line) self.mstate = State.INITIAL - self.toggle = False - # Recurse so we don't miss whats next - rstring += self.parse_line(line) + return rstring # This is a link if line.startswith(Tag.LINK): self.mstate = State.LINK - rstring = self.parse_link(line) + rstring += self.parse_link(line) self.mstate = State.INITIAL - return rstring + return rstring + # This is ul start tag + if starts_with_ul and not self.mstate == State.UNORDERED: + self.mstate = State.UNORDERED + rstring += self.get_start_tag() + rstring += '
  • {}
  • \n'.format(line[1:].strip()) + return rstring + # This is in middle of unordered list + if starts_with_ul and self.mstate == State.UNORDERED: + rstring += '
  • {}
  • \n'.format(line[1:].strip()) + return rstring + # This is blockquote start tag + if starts_with_quote and self.mstate != State.QUOTE: + self.mstate = State.QUOTE + rstring += self.get_start_tag() + rstring += line[1:] + return rstring + # This is in quote + if starts_with_quote: + rstring += line[1:] + return rstring + if self.recurse: + self.recurse = False + self.mstate = State.INITIAL + # Recurse so we don't miss whats next + return "{}{}".format(rstring, self.parse_line(line)) + else: + print("We should never be here, the line is: {}".format(line)) + sys.exit(1) def parse_link(self, line): linearr = line[2:].strip().split(' ') @@ -146,13 +159,20 @@ class GemParser: with open(filename) as gemtext: mline: str = gemtext.readline() while mline: - rdocument += self.parse_line(mline) + rline = self.parse_line(mline) + if rline is not None and rline != str() and rline != '\n': + rdocument += rline mline = gemtext.readline() - if self.mstate != State.INITIAL: - rdocument += self.get_end_tag() - rdocument += '\n\n' + rdocument += '{}\n\n'.format(self.get_end_tag()) return rdocument + def parse_heading(self, line, level=0): + if line.startswith(Tag.HEADING): + return self.parse_heading(line[1:], level + 1) + else: + return "{}{}{}".format(self.get_start_tag().format(level), line.strip().rstrip('\n'), + self.get_end_tag().format(level)) + if __name__ == '__main__': mparser: GemParser = GemParser()