diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d86bb49
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,11 @@
+.idea/.name
+.idea/gemtext2html.iml
+.idea/misc.xml
+.idea/modules.xml
+.idea/vcs.xml
+.idea/inspectionProfiles/profiles_settings.xml
+.idea/inspectionProfiles/Project_Default.xml
+.vscode/launch.json
+.vscode/settings.json
+.vscode/.ropeproject/config.py
+.vscode/.ropeproject/objectdb
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..8e9de34
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,5 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+/.idea/
+/.vscode/
\ No newline at end of file
diff --git a/src/convert.py b/src/convert.py
index 0167115..c24f0bd 100755
--- a/src/convert.py
+++ b/src/convert.py
@@ -24,81 +24,94 @@ class Tag:
class GemParser:
def __init__(self):
self.mstate = State.INITIAL
- self.toggle = False
+ self.recurse = False
def parse_line(self, line):
rstring = str()
+ is_text = self.is_text(line)
+ starts_with_pre = line.startswith(Tag.PRE)
+ starts_with_ul = line.startswith(Tag.UNORDERED)
+ starts_with_quote = line.startswith(Tag.QUOTE)
+ if line == '\n' and self.mstate != State.PRETEXT:
+ return rstring
+ # This is blockquote end tag
+ if self.mstate == State.QUOTE and not starts_with_quote:
+ rstring += self.get_end_tag()
+ self.recurse = True
+ # This is paragraph end tag
+ if self.mstate == State.TEXT and not is_text:
+ rstring += self.get_end_tag()
+ self.recurse = True
+ # This is ul end tag
+ if self.mstate == State.UNORDERED and not starts_with_ul:
+ rstring += self.get_end_tag()
+ self.recurse = True
# This is a pre start tag
- if line.startswith(Tag.PRE) and not self.toggle:
+ if starts_with_pre and self.mstate != State.PRETEXT:
self.mstate = State.PRETEXT
- self.toggle = True
- rstring = self.get_start_tag().format(' aria-label="{}"'.format(
+ rstring += self.get_start_tag().format(' aria-label="{}"'.format(
line[4:].rstrip('\n')))
+ return rstring
+ # This is in pre formatted text
+ elif self.mstate == State.PRETEXT and not starts_with_pre:
+ rstring += line
+ return rstring
# This is a pre end tag
- elif line.startswith(Tag.PRE):
- rstring = self.get_end_tag()
+ elif starts_with_pre and self.mstate == State.PRETEXT:
+ rstring += self.get_end_tag()
self.mstate = State.INITIAL
- self.toggle = False
- # This is in pre formatted text
- elif self.mstate == State.PRETEXT:
- rstring = line
+ return rstring
else:
- # This is ul start tag
- if line.startswith(Tag.UNORDERED) and not self.toggle:
- self.mstate = State.UNORDERED
- self.toggle = True
- rstring = self.get_start_tag()
- rstring += '
{}\n'.format(line[1:].strip())
- # This is in middle of unordered list
- elif line.startswith(Tag.UNORDERED):
- rstring += '{}\n'.format(line[1:].strip())
- # This is ul end tag
- elif self.mstate == State.UNORDERED and not line.startswith(
- Tag.UNORDERED):
- rstring += self.get_end_tag()
- self.mstate = State.INITIAL
- self.toggle = False
- # Recurse so we don't miss whats next
- rstring += self.parse_line(line)
- # This is blockquote start tag
- if line.startswith(Tag.QUOTE) and not self.toggle:
- self.mstate = State.QUOTE
- self.toggle = True
- rstring = self.get_start_tag()
- rstring += line[1:]
- # This is in quote
- elif line.startswith(Tag.QUOTE):
- rstring += line[1:]
- # FIXME: If a list is placed directly after a quote there will ba a list item in the quote...
- # This is blockquote end tag
- elif self.mstate == State.QUOTE and not line.startswith(Tag.QUOTE):
- rstring += self.get_end_tag()
- self.mstate = State.INITIAL
- self.toggle = False
- # Recurse so we don't miss whats next
- rstring += self.parse_line(line)
# This is paragraph start tag
- if self.is_text(line) and not self.toggle:
+ if is_text and self.mstate != State.TEXT:
self.mstate = State.TEXT
- self.toggle = True
- rstring = self.get_start_tag()
+ rstring += self.get_start_tag()
rstring += line
+ return rstring
# This is in paragraph
- elif self.is_text(line):
+ if is_text and self.mstate == State.TEXT:
rstring += line
- # This is paragraph end tag
- elif self.mstate == State.TEXT:
- rstring += self.get_end_tag()
+ return rstring
+ # This is a heading
+ if line.startswith(Tag.HEADING):
+ self.mstate = State.HEADING
+ rstring += self.parse_heading(line)
self.mstate = State.INITIAL
- self.toggle = False
- # Recurse so we don't miss whats next
- rstring += self.parse_line(line)
+ return rstring
# This is a link
if line.startswith(Tag.LINK):
self.mstate = State.LINK
- rstring = self.parse_link(line)
+ rstring += self.parse_link(line)
self.mstate = State.INITIAL
- return rstring
+ return rstring
+ # This is ul start tag
+ if starts_with_ul and not self.mstate == State.UNORDERED:
+ self.mstate = State.UNORDERED
+ rstring += self.get_start_tag()
+ rstring += '{}\n'.format(line[1:].strip())
+ return rstring
+ # This is in middle of unordered list
+ if starts_with_ul and self.mstate == State.UNORDERED:
+ rstring += '{}\n'.format(line[1:].strip())
+ return rstring
+ # This is blockquote start tag
+ if starts_with_quote and self.mstate != State.QUOTE:
+ self.mstate = State.QUOTE
+ rstring += self.get_start_tag()
+ rstring += line[1:]
+ return rstring
+ # This is in quote
+ if starts_with_quote:
+ rstring += line[1:]
+ return rstring
+ if self.recurse:
+ self.recurse = False
+ self.mstate = State.INITIAL
+ # Recurse so we don't miss whats next
+ return "{}{}".format(rstring, self.parse_line(line))
+ else:
+ print("We should never be here, the line is: {}".format(line))
+ sys.exit(1)
def parse_link(self, line):
linearr = line[2:].strip().split(' ')
@@ -146,13 +159,20 @@ class GemParser:
with open(filename) as gemtext:
mline: str = gemtext.readline()
while mline:
- rdocument += self.parse_line(mline)
+ rline = self.parse_line(mline)
+ if rline is not None and rline != str() and rline != '\n':
+ rdocument += rline
mline = gemtext.readline()
- if self.mstate != State.INITIAL:
- rdocument += self.get_end_tag()
- rdocument += '