Don't set a label if there is none

Add type hints.
This is helpful for better ide support and for my own thinking...
3 changed files with 264 additions and 160 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,12 @@
+.idea/.name
+.idea/gemtext2html.iml
+.idea/inspectionProfiles/Project_Default.xml
+.idea/inspectionProfiles/profiles_settings.xml
+.idea/misc.xml
+.idea/modules.xml
+.idea/vcs.xml
+.idea/workspace.xml
+.vscode/.ropeproject/config.py
+.vscode/.ropeproject/objectdb
+.vscode/launch.json
+.vscode/settings.json
--- a/src/convert.py
+++ b/src/convert.py
@ -1,160 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import sys
-
-
-class State:
-    INITIAL = 0
-    TEXT = 1
-    LINK = 2
-    PRETEXT = 3
-    HEADING = 4
-    UNORDERED = 5
-    QUOTE = 6
-
-
-class Tag:
-    LINK = '=>'
-    PRE = '```'
-    HEADING = '#'
-    UNORDERED = '*'
-    QUOTE = '>'
-
-
-class GemParser:
-    def __init__(self):
-        self.mstate = State.INITIAL
-        self.toggle = False
-
-    def parse_line(self, line):
-        rstring = str()
-        # This is a pre start tag
-        if line.startswith(Tag.PRE) and not self.toggle:
-            self.mstate = State.PRETEXT
-            self.toggle = True
-            rstring = self.get_start_tag().format(' aria-label="{}"'.format(
-                line[4:].rstrip('\n')))
-        # This is a pre end tag
-        elif line.startswith(Tag.PRE):
-            rstring = self.get_end_tag()
-            self.mstate = State.INITIAL
-            self.toggle = False
-        # This is in pre formatted text
-        elif self.mstate == State.PRETEXT:
-            rstring = line
-        else:
-            # This is ul start tag
-            if line.startswith(Tag.UNORDERED) and not self.toggle:
-                self.mstate = State.UNORDERED
-                self.toggle = True
-                rstring = self.get_start_tag()
-                rstring += '<li>{}</li>\n'.format(line[1:].strip())
-            # This is in middle of unordered list
-            elif line.startswith(Tag.UNORDERED):
-                rstring += '<li>{}</li>\n'.format(line[1:].strip())
-            # This is ul end tag
-            elif self.mstate == State.UNORDERED and not line.startswith(
-                    Tag.UNORDERED):
-                rstring += self.get_end_tag()
-                self.mstate = State.INITIAL
-                self.toggle = False
-                # Recurse so we don't miss whats next
-                rstring += self.parse_line(line)
-            # This is blockquote start tag
-            if line.startswith(Tag.QUOTE) and not self.toggle:
-                self.mstate = State.QUOTE
-                self.toggle = True
-                rstring = self.get_start_tag()
-                rstring += line[1:]
-            # This is in quote
-            elif line.startswith(Tag.QUOTE):
-                rstring += line[1:]
-            # FIXME: If a list is placed directly after a quote there will ba a list item in the quote...
-            # This is blockquote end tag
-            elif self.mstate == State.QUOTE and not line.startswith(Tag.QUOTE):
-                rstring += self.get_end_tag()
-                self.mstate = State.INITIAL
-                self.toggle = False
-                # Recurse so we don't miss whats next
-                rstring += self.parse_line(line)
-            # This is paragraph start tag
-            if self.is_text(line) and not self.toggle:
-                self.mstate = State.TEXT
-                self.toggle = True
-                rstring = self.get_start_tag()
-                rstring += line
-            # This is in paragraph
-            elif self.is_text(line):
-                rstring += line
-            # This is paragraph end tag
-            elif self.mstate == State.TEXT:
-                rstring += self.get_end_tag()
-                self.mstate = State.INITIAL
-                self.toggle = False
-                # Recurse so we don't miss whats next
-                rstring += self.parse_line(line)
-            # This is a link
-            if line.startswith(Tag.LINK):
-                self.mstate = State.LINK
-                rstring = self.parse_link(line)
-                self.mstate = State.INITIAL
-        return rstring
-
-    def parse_link(self, line):
-        linearr = line[2:].strip().split(' ')
-        link = linearr[0]
-        anchor = str().join(linearr[1:])
-        if not anchor:
-            anchor = link
-        rline = self.get_start_tag().format(link)
-        rline += anchor
-        rline += self.get_end_tag()
-        return rline
-
-    def is_text(self, line):
-        return (not (line == '' or line == '\n')
-                and self.mstate == State.INITIAL and
-                (not line.startswith(Tag.LINK) and not line.startswith(Tag.PRE)
-                 and not line.startswith(Tag.HEADING)
-                 and not line.startswith(Tag.UNORDERED)
-                 and not line.startswith(Tag.QUOTE)))
-
-    def get_start_tag(self):
-        tag = list()
-        tag.append('')
-        tag.append('<p>\n')
-        tag.append('<a href="{}">')
-        tag.append('<pre{}>\n')
-        tag.append('<h{}>')
-        tag.append('<ul>\n')
-        tag.append('<blockquote>\n')
-        return tag[self.mstate]
-
-    def get_end_tag(self):
-        tag = list()
-        tag.append('')
-        tag.append('</p>\n')
-        tag.append('</a>\n')
-        tag.append('</pre>\n')
-        tag.append('</h{}>\n')
-        tag.append('</ul>\n')
-        tag.append('</blockquote>\n')
-        return tag[self.mstate]
-
-    def get_document_from_gemfile(self, filename):
-        rdocument = '<!DOCTYPE html>\n<html>\n<head>\n<title>gemtext2html</title>\n</head>\n<body>\n'
-        with open(filename) as gemtext:
-            mline: str = gemtext.readline()
-            while mline:
-                rdocument += self.parse_line(mline)
-                mline = gemtext.readline()
-            if self.mstate != State.INITIAL:
-                rdocument += self.get_end_tag()
-        rdocument += '</body>\n</html>\n'
-        return rdocument
-
-
-if __name__ == '__main__':
-    mparser: GemParser = GemParser()
-    document = mparser.get_document_from_gemfile(sys.argv[1])
-    print(document)
--- a/src/convert/init.py
+++ b/src/convert/init.py
@ -0,0 +1,252 @@
+#!/usr/bin/env python3
+"""This is a python module that will parse gemtext and convert it to html5
+"""
+# -*- coding: utf-8 -*-
+import sys
+
+# Type aliases
+State = int
+Tag = str
+
+
+class GemParser:
+    """This is the main parser class
+    """
+
+    def __init__(self):
+        """Constructor for the GemParser class
+        """
+        self.mstate: State = StateEnum.INITIAL
+        self.recurse: bool = False
+
+    def get_document_from_gemfile(self, filename: str) -> str:
+        """This subroutine will read a file line by line and convert it to html
+
+        Args:
+            filename (str): A file name corresponing to a file of gemtext
+
+        Returns:
+            str: A valid html5 document as string
+        """
+        rdocument: str = '<!DOCTYPE html>\n<html>\n<head>\n'
+        rdocument += '<title>gemtext2html</title>\n</head>\n<body>\n'
+        with open(filename) as gemtext:
+            mline: str = gemtext.readline()
+            while mline:
+                rline: str = self.parse_line(mline)
+                if rline is not None and rline != str() and rline != '\n':
+                    rdocument += rline
+                mline = gemtext.readline()
+        rdocument += '{}</body>\n</html>\n'.format(self.get_end_tag())
+        return rdocument
+
+    def get_end_tag(self) -> str:
+        """A subroutine that will emit the correct end tag for the state
+
+        Returns:
+            str: A html end tag
+        """
+        tag: list[str] = list()
+        tag.append('')
+        tag.append('</p>\n')
+        tag.append('</a>\n')
+        tag.append('</pre>\n')
+        tag.append('</h{}>\n')
+        tag.append('</ul>\n')
+        tag.append('</blockquote>\n')
+        return tag[self.mstate]
+
+    def get_start_tag(self) -> str:
+        """A subroutine to emit the correct html start tag for the state
+
+        Returns:
+            str: A html start tag
+        """
+        tag: list[str] = list()
+        tag.append('')
+        tag.append('<p>\n')
+        tag.append('<a href="{}">')
+        tag.append('<pre{}>\n')
+        tag.append('<h{}>')
+        tag.append('<ul>\n')
+        tag.append('<blockquote>\n')
+        return tag[self.mstate]
+
+    def is_text(self, line: str) -> bool:
+        """A function that will check if this is a paragraph of text
+
+        Args:
+            line (str): A line of gemtext
+
+        Returns:
+            bool: True if it is text, False if it is something else
+        """
+        return (not (line == '' or line == '\n')
+                and self.mstate == StateEnum.INITIAL and
+                (not line.startswith(TagEnum.LINK) and not line.startswith(TagEnum.PRE)
+                 and not line.startswith(TagEnum.HEADING)
+                 and not line.startswith(TagEnum.UNORDERED)
+                 and not line.startswith(TagEnum.QUOTE)))
+
+    def parse_heading(self, line: str, level: int = 0) -> str:
+        """A function that will recursively call it self to get the correct
+        heading level
+
+        Args:
+            line (str): A gemtext heading line
+            level (int, optional): The heading level to start from. Defaults to 0.
+
+        Returns:
+            str: A html heading tag of the correct level
+        """
+        if line.startswith(TagEnum.HEADING):
+            return self.parse_heading(line[1:], level + 1)
+        else:
+            return "{}{}{}".format(self.get_start_tag().format(level),
+                                   line.strip().rstrip('\n'),
+                                   self.get_end_tag().format(level))
+
+    def parse_line(self, line: str) -> str:
+        """This subroutine will parse a single line of gemtext and enter the
+        correct state and output the corresponding html
+
+        Args:
+            line (string): A string of gemtext
+
+        Returns:
+            str: A string of html
+        """
+        rstring: str = str()
+        is_text: bool = self.is_text(line)
+        starts_with_pre: bool = line.startswith(TagEnum.PRE)
+        starts_with_ul: bool = line.startswith(TagEnum.UNORDERED)
+        starts_with_quote: bool = line.startswith(TagEnum.QUOTE)
+        if line == '\n' and self.mstate != StateEnum.PRETEXT:
+            return rstring
+        # This is blockquote end tag
+        if self.mstate == StateEnum.QUOTE and not starts_with_quote:
+            rstring += self.get_end_tag()
+            self.recurse = True
+        # This is paragraph end tag
+        if self.mstate == StateEnum.TEXT and not is_text:
+            rstring += self.get_end_tag()
+            self.recurse = True
+        # This is ul end tag
+        if self.mstate == StateEnum.UNORDERED and not starts_with_ul:
+            rstring += self.get_end_tag()
+            self.recurse = True
+        # This is a pre start tag
+        if starts_with_pre and self.mstate != StateEnum.PRETEXT:
+            self.mstate = StateEnum.PRETEXT
+            label: str = line[4:].rstrip('\n').strip()
+            rstring += self.get_start_tag().format(
+                ' aria-label="{}"'.format(label) if label else '')
+
+            return rstring
+        # This is in pre formatted text
+        elif self.mstate == StateEnum.PRETEXT and not starts_with_pre:
+            rstring += line
+            return rstring
+        # This is a pre end tag
+        elif starts_with_pre and self.mstate == StateEnum.PRETEXT:
+            rstring += self.get_end_tag()
+            self.mstate = StateEnum.INITIAL
+            return rstring
+        else:
+            # This is paragraph start tag
+            if is_text and self.mstate != StateEnum.TEXT:
+                self.mstate = StateEnum.TEXT
+                rstring += self.get_start_tag()
+                rstring += line
+                return rstring
+            # This is in paragraph
+            if is_text and self.mstate == StateEnum.TEXT:
+                rstring += line
+                return rstring
+            # This is a heading
+            if line.startswith(TagEnum.HEADING):
+                self.mstate = StateEnum.HEADING
+                rstring += self.parse_heading(line)
+                self.mstate = StateEnum.INITIAL
+                return rstring
+            # This is a link
+            if line.startswith(TagEnum.LINK):
+                self.mstate = StateEnum.LINK
+                rstring += self.parse_link(line)
+                self.mstate = StateEnum.INITIAL
+                return rstring
+            # This is ul start tag
+            if starts_with_ul and not self.mstate == StateEnum.UNORDERED:
+                self.mstate = StateEnum.UNORDERED
+                rstring += self.get_start_tag()
+                rstring += '<li>{}</li>\n'.format(line[1:].strip())
+                return rstring
+            # This is in middle of unordered list
+            if starts_with_ul and self.mstate == StateEnum.UNORDERED:
+                rstring += '<li>{}</li>\n'.format(line[1:].strip())
+                return rstring
+            # This is blockquote start tag
+            if starts_with_quote and self.mstate != StateEnum.QUOTE:
+                self.mstate = StateEnum.QUOTE
+                rstring += self.get_start_tag()
+                rstring += line[1:]
+                return rstring
+            # This is in quote
+            if starts_with_quote:
+                rstring += line[1:]
+                return rstring
+        if self.recurse:
+            self.recurse = False
+            self.mstate = StateEnum.INITIAL
+            # Recurse so we don't miss whats next
+            return "{}{}".format(rstring, self.parse_line(line))
+        else:
+            print("We should never be here, the line is: {}".format(line))
+            sys.exit(1)
+
+    def parse_link(self, line: str) -> str:
+        """This function will parse a gemtext link
+
+        Args:
+            line (str): A link line of gemtext
+
+        Returns:
+            str: A html fragment with a <a> tag
+        """
+        linearr: list[str] = line[2:].strip().split(' ')
+        link: str = linearr[0]
+        anchor: str = str().join(linearr[1:])
+        if not anchor:
+            anchor = link
+        rline: str = self.get_start_tag().format(link)
+        rline += anchor
+        rline += self.get_end_tag()
+        return rline
+
+
+class StateEnum:
+    """This is an enumeration of the states of the state machine
+    """
+    INITIAL: State = 0
+    TEXT: State = 1
+    LINK: State = 2
+    PRETEXT: State = 3
+    HEADING: State = 4
+    UNORDERED: State = 5
+    QUOTE: State = 6
+
+
+class TagEnum:
+    """This is an enumeration of the possible gemtext tags
+    """
+    LINK: Tag = '=>'
+    PRE: Tag = '```'
+    HEADING: Tag = '#'
+    UNORDERED: Tag = '*'
+    QUOTE: Tag = '>'
+
+
+if __name__ == '__main__':
+    mparser: GemParser = GemParser()
+    document: str = mparser.get_document_from_gemfile(sys.argv[1])
+    print(document)
Author	SHA1	Message	Date
Micke Nordin	76942c0b3e	Don't set a label if there is none	3 years ago
Micke Nordin	806b51615a	Add type hints. This is helpful for better ide support and for my own thinking...	3 years ago
Micke Nordin	aa74299881	Add docstrings and convert to package	3 years ago
Micke Nordin	bee7b41a51	Working parser/converter The parser now works as expected with no known bugs. As expected from the spec each line of ordinary text is treated as a separate paragraph.	3 years ago