Working parser/converter
The parser now works as expected with no known bugs. As expected from the spec each line of ordinary text is treated as a separate paragraph.
This commit is contained in:
parent
3cd0ce594c
commit
aecbe362ad
3 changed files with 97 additions and 61 deletions
11
.gitignore
vendored
Normal file
11
.gitignore
vendored
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
.idea/.name
|
||||||
|
.idea/gemtext2html.iml
|
||||||
|
.idea/misc.xml
|
||||||
|
.idea/modules.xml
|
||||||
|
.idea/vcs.xml
|
||||||
|
.idea/inspectionProfiles/profiles_settings.xml
|
||||||
|
.idea/inspectionProfiles/Project_Default.xml
|
||||||
|
.vscode/launch.json
|
||||||
|
.vscode/settings.json
|
||||||
|
.vscode/.ropeproject/config.py
|
||||||
|
.vscode/.ropeproject/objectdb
|
5
.idea/.gitignore
generated
vendored
Normal file
5
.idea/.gitignore
generated
vendored
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
# Default ignored files
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
||||||
|
/.idea/
|
||||||
|
/.vscode/
|
152
src/convert.py
152
src/convert.py
|
@ -24,81 +24,94 @@ class Tag:
|
||||||
class GemParser:
|
class GemParser:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.mstate = State.INITIAL
|
self.mstate = State.INITIAL
|
||||||
self.toggle = False
|
self.recurse = False
|
||||||
|
|
||||||
def parse_line(self, line):
|
def parse_line(self, line):
|
||||||
rstring = str()
|
rstring = str()
|
||||||
# This is a pre start tag
|
is_text = self.is_text(line)
|
||||||
if line.startswith(Tag.PRE) and not self.toggle:
|
starts_with_pre = line.startswith(Tag.PRE)
|
||||||
self.mstate = State.PRETEXT
|
starts_with_ul = line.startswith(Tag.UNORDERED)
|
||||||
self.toggle = True
|
starts_with_quote = line.startswith(Tag.QUOTE)
|
||||||
rstring = self.get_start_tag().format(' aria-label="{}"'.format(
|
if line == '\n' and self.mstate != State.PRETEXT:
|
||||||
line[4:].rstrip('\n')))
|
return rstring
|
||||||
# This is a pre end tag
|
|
||||||
elif line.startswith(Tag.PRE):
|
|
||||||
rstring = self.get_end_tag()
|
|
||||||
self.mstate = State.INITIAL
|
|
||||||
self.toggle = False
|
|
||||||
# This is in pre formatted text
|
|
||||||
elif self.mstate == State.PRETEXT:
|
|
||||||
rstring = line
|
|
||||||
else:
|
|
||||||
# This is ul start tag
|
|
||||||
if line.startswith(Tag.UNORDERED) and not self.toggle:
|
|
||||||
self.mstate = State.UNORDERED
|
|
||||||
self.toggle = True
|
|
||||||
rstring = self.get_start_tag()
|
|
||||||
rstring += '<li>{}</li>\n'.format(line[1:].strip())
|
|
||||||
# This is in middle of unordered list
|
|
||||||
elif line.startswith(Tag.UNORDERED):
|
|
||||||
rstring += '<li>{}</li>\n'.format(line[1:].strip())
|
|
||||||
# This is ul end tag
|
|
||||||
elif self.mstate == State.UNORDERED and not line.startswith(
|
|
||||||
Tag.UNORDERED):
|
|
||||||
rstring += self.get_end_tag()
|
|
||||||
self.mstate = State.INITIAL
|
|
||||||
self.toggle = False
|
|
||||||
# Recurse so we don't miss whats next
|
|
||||||
rstring += self.parse_line(line)
|
|
||||||
# This is blockquote start tag
|
|
||||||
if line.startswith(Tag.QUOTE) and not self.toggle:
|
|
||||||
self.mstate = State.QUOTE
|
|
||||||
self.toggle = True
|
|
||||||
rstring = self.get_start_tag()
|
|
||||||
rstring += line[1:]
|
|
||||||
# This is in quote
|
|
||||||
elif line.startswith(Tag.QUOTE):
|
|
||||||
rstring += line[1:]
|
|
||||||
# FIXME: If a list is placed directly after a quote there will ba a list item in the quote...
|
|
||||||
# This is blockquote end tag
|
# This is blockquote end tag
|
||||||
elif self.mstate == State.QUOTE and not line.startswith(Tag.QUOTE):
|
if self.mstate == State.QUOTE and not starts_with_quote:
|
||||||
rstring += self.get_end_tag()
|
rstring += self.get_end_tag()
|
||||||
self.mstate = State.INITIAL
|
self.recurse = True
|
||||||
self.toggle = False
|
|
||||||
# Recurse so we don't miss whats next
|
|
||||||
rstring += self.parse_line(line)
|
|
||||||
# This is paragraph start tag
|
|
||||||
if self.is_text(line) and not self.toggle:
|
|
||||||
self.mstate = State.TEXT
|
|
||||||
self.toggle = True
|
|
||||||
rstring = self.get_start_tag()
|
|
||||||
rstring += line
|
|
||||||
# This is in paragraph
|
|
||||||
elif self.is_text(line):
|
|
||||||
rstring += line
|
|
||||||
# This is paragraph end tag
|
# This is paragraph end tag
|
||||||
elif self.mstate == State.TEXT:
|
if self.mstate == State.TEXT and not is_text:
|
||||||
|
rstring += self.get_end_tag()
|
||||||
|
self.recurse = True
|
||||||
|
# This is ul end tag
|
||||||
|
if self.mstate == State.UNORDERED and not starts_with_ul:
|
||||||
|
rstring += self.get_end_tag()
|
||||||
|
self.recurse = True
|
||||||
|
# This is a pre start tag
|
||||||
|
if starts_with_pre and self.mstate != State.PRETEXT:
|
||||||
|
self.mstate = State.PRETEXT
|
||||||
|
rstring += self.get_start_tag().format(' aria-label="{}"'.format(
|
||||||
|
line[4:].rstrip('\n')))
|
||||||
|
return rstring
|
||||||
|
# This is in pre formatted text
|
||||||
|
elif self.mstate == State.PRETEXT and not starts_with_pre:
|
||||||
|
rstring += line
|
||||||
|
return rstring
|
||||||
|
# This is a pre end tag
|
||||||
|
elif starts_with_pre and self.mstate == State.PRETEXT:
|
||||||
rstring += self.get_end_tag()
|
rstring += self.get_end_tag()
|
||||||
self.mstate = State.INITIAL
|
self.mstate = State.INITIAL
|
||||||
self.toggle = False
|
return rstring
|
||||||
# Recurse so we don't miss whats next
|
else:
|
||||||
rstring += self.parse_line(line)
|
# This is paragraph start tag
|
||||||
|
if is_text and self.mstate != State.TEXT:
|
||||||
|
self.mstate = State.TEXT
|
||||||
|
rstring += self.get_start_tag()
|
||||||
|
rstring += line
|
||||||
|
return rstring
|
||||||
|
# This is in paragraph
|
||||||
|
if is_text and self.mstate == State.TEXT:
|
||||||
|
rstring += line
|
||||||
|
return rstring
|
||||||
|
# This is a heading
|
||||||
|
if line.startswith(Tag.HEADING):
|
||||||
|
self.mstate = State.HEADING
|
||||||
|
rstring += self.parse_heading(line)
|
||||||
|
self.mstate = State.INITIAL
|
||||||
|
return rstring
|
||||||
# This is a link
|
# This is a link
|
||||||
if line.startswith(Tag.LINK):
|
if line.startswith(Tag.LINK):
|
||||||
self.mstate = State.LINK
|
self.mstate = State.LINK
|
||||||
rstring = self.parse_link(line)
|
rstring += self.parse_link(line)
|
||||||
self.mstate = State.INITIAL
|
self.mstate = State.INITIAL
|
||||||
return rstring
|
return rstring
|
||||||
|
# This is ul start tag
|
||||||
|
if starts_with_ul and not self.mstate == State.UNORDERED:
|
||||||
|
self.mstate = State.UNORDERED
|
||||||
|
rstring += self.get_start_tag()
|
||||||
|
rstring += '<li>{}</li>\n'.format(line[1:].strip())
|
||||||
|
return rstring
|
||||||
|
# This is in middle of unordered list
|
||||||
|
if starts_with_ul and self.mstate == State.UNORDERED:
|
||||||
|
rstring += '<li>{}</li>\n'.format(line[1:].strip())
|
||||||
|
return rstring
|
||||||
|
# This is blockquote start tag
|
||||||
|
if starts_with_quote and self.mstate != State.QUOTE:
|
||||||
|
self.mstate = State.QUOTE
|
||||||
|
rstring += self.get_start_tag()
|
||||||
|
rstring += line[1:]
|
||||||
|
return rstring
|
||||||
|
# This is in quote
|
||||||
|
if starts_with_quote:
|
||||||
|
rstring += line[1:]
|
||||||
|
return rstring
|
||||||
|
if self.recurse:
|
||||||
|
self.recurse = False
|
||||||
|
self.mstate = State.INITIAL
|
||||||
|
# Recurse so we don't miss whats next
|
||||||
|
return "{}{}".format(rstring, self.parse_line(line))
|
||||||
|
else:
|
||||||
|
print("We should never be here, the line is: {}".format(line))
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
def parse_link(self, line):
|
def parse_link(self, line):
|
||||||
linearr = line[2:].strip().split(' ')
|
linearr = line[2:].strip().split(' ')
|
||||||
|
@ -146,13 +159,20 @@ class GemParser:
|
||||||
with open(filename) as gemtext:
|
with open(filename) as gemtext:
|
||||||
mline: str = gemtext.readline()
|
mline: str = gemtext.readline()
|
||||||
while mline:
|
while mline:
|
||||||
rdocument += self.parse_line(mline)
|
rline = self.parse_line(mline)
|
||||||
|
if rline is not None and rline != str() and rline != '\n':
|
||||||
|
rdocument += rline
|
||||||
mline = gemtext.readline()
|
mline = gemtext.readline()
|
||||||
if self.mstate != State.INITIAL:
|
rdocument += '{}</body>\n</html>\n'.format(self.get_end_tag())
|
||||||
rdocument += self.get_end_tag()
|
|
||||||
rdocument += '</body>\n</html>\n'
|
|
||||||
return rdocument
|
return rdocument
|
||||||
|
|
||||||
|
def parse_heading(self, line, level=0):
|
||||||
|
if line.startswith(Tag.HEADING):
|
||||||
|
return self.parse_heading(line[1:], level + 1)
|
||||||
|
else:
|
||||||
|
return "{}{}{}".format(self.get_start_tag().format(level), line.strip().rstrip('\n'),
|
||||||
|
self.get_end_tag().format(level))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
mparser: GemParser = GemParser()
|
mparser: GemParser = GemParser()
|
||||||
|
|
Loading…
Add table
Reference in a new issue