Working parser/converter

The parser now works as expected with no known bugs.

As expected from the spec each line of ordinary text is treated as a separate
paragraph.
master
Micke Nordin 4 years ago
parent 3cd0ce594c
commit aecbe362ad
Signed by: micke
GPG Key ID: 014B273D614BE877

11
.gitignore vendored

@ -0,0 +1,11 @@
.idea/.name
.idea/gemtext2html.iml
.idea/misc.xml
.idea/modules.xml
.idea/vcs.xml
.idea/inspectionProfiles/profiles_settings.xml
.idea/inspectionProfiles/Project_Default.xml
.vscode/launch.json
.vscode/settings.json
.vscode/.ropeproject/config.py
.vscode/.ropeproject/objectdb

5
.idea/.gitignore vendored

@ -0,0 +1,5 @@
# Default ignored files
/shelf/
/workspace.xml
/.idea/
/.vscode/

@ -24,81 +24,94 @@ class Tag:
class GemParser: class GemParser:
def __init__(self): def __init__(self):
self.mstate = State.INITIAL self.mstate = State.INITIAL
self.toggle = False self.recurse = False
def parse_line(self, line): def parse_line(self, line):
rstring = str() rstring = str()
is_text = self.is_text(line)
starts_with_pre = line.startswith(Tag.PRE)
starts_with_ul = line.startswith(Tag.UNORDERED)
starts_with_quote = line.startswith(Tag.QUOTE)
if line == '\n' and self.mstate != State.PRETEXT:
return rstring
# This is blockquote end tag
if self.mstate == State.QUOTE and not starts_with_quote:
rstring += self.get_end_tag()
self.recurse = True
# This is paragraph end tag
if self.mstate == State.TEXT and not is_text:
rstring += self.get_end_tag()
self.recurse = True
# This is ul end tag
if self.mstate == State.UNORDERED and not starts_with_ul:
rstring += self.get_end_tag()
self.recurse = True
# This is a pre start tag # This is a pre start tag
if line.startswith(Tag.PRE) and not self.toggle: if starts_with_pre and self.mstate != State.PRETEXT:
self.mstate = State.PRETEXT self.mstate = State.PRETEXT
self.toggle = True rstring += self.get_start_tag().format(' aria-label="{}"'.format(
rstring = self.get_start_tag().format(' aria-label="{}"'.format(
line[4:].rstrip('\n'))) line[4:].rstrip('\n')))
return rstring
# This is in pre formatted text
elif self.mstate == State.PRETEXT and not starts_with_pre:
rstring += line
return rstring
# This is a pre end tag # This is a pre end tag
elif line.startswith(Tag.PRE): elif starts_with_pre and self.mstate == State.PRETEXT:
rstring = self.get_end_tag() rstring += self.get_end_tag()
self.mstate = State.INITIAL self.mstate = State.INITIAL
self.toggle = False return rstring
# This is in pre formatted text
elif self.mstate == State.PRETEXT:
rstring = line
else: else:
# This is ul start tag
if line.startswith(Tag.UNORDERED) and not self.toggle:
self.mstate = State.UNORDERED
self.toggle = True
rstring = self.get_start_tag()
rstring += '<li>{}</li>\n'.format(line[1:].strip())
# This is in middle of unordered list
elif line.startswith(Tag.UNORDERED):
rstring += '<li>{}</li>\n'.format(line[1:].strip())
# This is ul end tag
elif self.mstate == State.UNORDERED and not line.startswith(
Tag.UNORDERED):
rstring += self.get_end_tag()
self.mstate = State.INITIAL
self.toggle = False
# Recurse so we don't miss whats next
rstring += self.parse_line(line)
# This is blockquote start tag
if line.startswith(Tag.QUOTE) and not self.toggle:
self.mstate = State.QUOTE
self.toggle = True
rstring = self.get_start_tag()
rstring += line[1:]
# This is in quote
elif line.startswith(Tag.QUOTE):
rstring += line[1:]
# FIXME: If a list is placed directly after a quote there will ba a list item in the quote...
# This is blockquote end tag
elif self.mstate == State.QUOTE and not line.startswith(Tag.QUOTE):
rstring += self.get_end_tag()
self.mstate = State.INITIAL
self.toggle = False
# Recurse so we don't miss whats next
rstring += self.parse_line(line)
# This is paragraph start tag # This is paragraph start tag
if self.is_text(line) and not self.toggle: if is_text and self.mstate != State.TEXT:
self.mstate = State.TEXT self.mstate = State.TEXT
self.toggle = True rstring += self.get_start_tag()
rstring = self.get_start_tag()
rstring += line rstring += line
return rstring
# This is in paragraph # This is in paragraph
elif self.is_text(line): if is_text and self.mstate == State.TEXT:
rstring += line rstring += line
# This is paragraph end tag return rstring
elif self.mstate == State.TEXT: # This is a heading
rstring += self.get_end_tag() if line.startswith(Tag.HEADING):
self.mstate = State.HEADING
rstring += self.parse_heading(line)
self.mstate = State.INITIAL self.mstate = State.INITIAL
self.toggle = False return rstring
# Recurse so we don't miss whats next
rstring += self.parse_line(line)
# This is a link # This is a link
if line.startswith(Tag.LINK): if line.startswith(Tag.LINK):
self.mstate = State.LINK self.mstate = State.LINK
rstring = self.parse_link(line) rstring += self.parse_link(line)
self.mstate = State.INITIAL self.mstate = State.INITIAL
return rstring return rstring
# This is ul start tag
if starts_with_ul and not self.mstate == State.UNORDERED:
self.mstate = State.UNORDERED
rstring += self.get_start_tag()
rstring += '<li>{}</li>\n'.format(line[1:].strip())
return rstring
# This is in middle of unordered list
if starts_with_ul and self.mstate == State.UNORDERED:
rstring += '<li>{}</li>\n'.format(line[1:].strip())
return rstring
# This is blockquote start tag
if starts_with_quote and self.mstate != State.QUOTE:
self.mstate = State.QUOTE
rstring += self.get_start_tag()
rstring += line[1:]
return rstring
# This is in quote
if starts_with_quote:
rstring += line[1:]
return rstring
if self.recurse:
self.recurse = False
self.mstate = State.INITIAL
# Recurse so we don't miss whats next
return "{}{}".format(rstring, self.parse_line(line))
else:
print("We should never be here, the line is: {}".format(line))
sys.exit(1)
def parse_link(self, line): def parse_link(self, line):
linearr = line[2:].strip().split(' ') linearr = line[2:].strip().split(' ')
@ -146,13 +159,20 @@ class GemParser:
with open(filename) as gemtext: with open(filename) as gemtext:
mline: str = gemtext.readline() mline: str = gemtext.readline()
while mline: while mline:
rdocument += self.parse_line(mline) rline = self.parse_line(mline)
if rline is not None and rline != str() and rline != '\n':
rdocument += rline
mline = gemtext.readline() mline = gemtext.readline()
if self.mstate != State.INITIAL: rdocument += '{}</body>\n</html>\n'.format(self.get_end_tag())
rdocument += self.get_end_tag()
rdocument += '</body>\n</html>\n'
return rdocument return rdocument
def parse_heading(self, line, level=0):
if line.startswith(Tag.HEADING):
return self.parse_heading(line[1:], level + 1)
else:
return "{}{}{}".format(self.get_start_tag().format(level), line.strip().rstrip('\n'),
self.get_end_tag().format(level))
if __name__ == '__main__': if __name__ == '__main__':
mparser: GemParser = GemParser() mparser: GemParser = GemParser()

Loading…
Cancel
Save