This is a working parser and converter of gemtext to html. It can properly parse and convert all parts of a gemtext document, however there are still bugs and the html might not be 100% valid html5 yet. A known bug is that a list directly after a blockquote, without a newline in between will cause the quote to contain the first element of the list.master
parent
0694a46f90
commit
3cd0ce594c
@ -0,0 +1,160 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
import sys
|
||||
|
||||
|
||||
class State:
|
||||
INITIAL = 0
|
||||
TEXT = 1
|
||||
LINK = 2
|
||||
PRETEXT = 3
|
||||
HEADING = 4
|
||||
UNORDERED = 5
|
||||
QUOTE = 6
|
||||
|
||||
|
||||
class Tag:
|
||||
LINK = '=>'
|
||||
PRE = '```'
|
||||
HEADING = '#'
|
||||
UNORDERED = '*'
|
||||
QUOTE = '>'
|
||||
|
||||
|
||||
class GemParser:
|
||||
def __init__(self):
|
||||
self.mstate = State.INITIAL
|
||||
self.toggle = False
|
||||
|
||||
def parse_line(self, line):
|
||||
rstring = str()
|
||||
# This is a pre start tag
|
||||
if line.startswith(Tag.PRE) and not self.toggle:
|
||||
self.mstate = State.PRETEXT
|
||||
self.toggle = True
|
||||
rstring = self.get_start_tag().format(' aria-label="{}"'.format(
|
||||
line[4:].rstrip('\n')))
|
||||
# This is a pre end tag
|
||||
elif line.startswith(Tag.PRE):
|
||||
rstring = self.get_end_tag()
|
||||
self.mstate = State.INITIAL
|
||||
self.toggle = False
|
||||
# This is in pre formatted text
|
||||
elif self.mstate == State.PRETEXT:
|
||||
rstring = line
|
||||
else:
|
||||
# This is ul start tag
|
||||
if line.startswith(Tag.UNORDERED) and not self.toggle:
|
||||
self.mstate = State.UNORDERED
|
||||
self.toggle = True
|
||||
rstring = self.get_start_tag()
|
||||
rstring += '<li>{}</li>\n'.format(line[1:].strip())
|
||||
# This is in middle of unordered list
|
||||
elif line.startswith(Tag.UNORDERED):
|
||||
rstring += '<li>{}</li>\n'.format(line[1:].strip())
|
||||
# This is ul end tag
|
||||
elif self.mstate == State.UNORDERED and not line.startswith(
|
||||
Tag.UNORDERED):
|
||||
rstring += self.get_end_tag()
|
||||
self.mstate = State.INITIAL
|
||||
self.toggle = False
|
||||
# Recurse so we don't miss whats next
|
||||
rstring += self.parse_line(line)
|
||||
# This is blockquote start tag
|
||||
if line.startswith(Tag.QUOTE) and not self.toggle:
|
||||
self.mstate = State.QUOTE
|
||||
self.toggle = True
|
||||
rstring = self.get_start_tag()
|
||||
rstring += line[1:]
|
||||
# This is in quote
|
||||
elif line.startswith(Tag.QUOTE):
|
||||
rstring += line[1:]
|
||||
# FIXME: If a list is placed directly after a quote there will ba a list item in the quote...
|
||||
# This is blockquote end tag
|
||||
elif self.mstate == State.QUOTE and not line.startswith(Tag.QUOTE):
|
||||
rstring += self.get_end_tag()
|
||||
self.mstate = State.INITIAL
|
||||
self.toggle = False
|
||||
# Recurse so we don't miss whats next
|
||||
rstring += self.parse_line(line)
|
||||
# This is paragraph start tag
|
||||
if self.is_text(line) and not self.toggle:
|
||||
self.mstate = State.TEXT
|
||||
self.toggle = True
|
||||
rstring = self.get_start_tag()
|
||||
rstring += line
|
||||
# This is in paragraph
|
||||
elif self.is_text(line):
|
||||
rstring += line
|
||||
# This is paragraph end tag
|
||||
elif self.mstate == State.TEXT:
|
||||
rstring += self.get_end_tag()
|
||||
self.mstate = State.INITIAL
|
||||
self.toggle = False
|
||||
# Recurse so we don't miss whats next
|
||||
rstring += self.parse_line(line)
|
||||
# This is a link
|
||||
if line.startswith(Tag.LINK):
|
||||
self.mstate = State.LINK
|
||||
rstring = self.parse_link(line)
|
||||
self.mstate = State.INITIAL
|
||||
return rstring
|
||||
|
||||
def parse_link(self, line):
|
||||
linearr = line[2:].strip().split(' ')
|
||||
link = linearr[0]
|
||||
anchor = str().join(linearr[1:])
|
||||
if not anchor:
|
||||
anchor = link
|
||||
rline = self.get_start_tag().format(link)
|
||||
rline += anchor
|
||||
rline += self.get_end_tag()
|
||||
return rline
|
||||
|
||||
def is_text(self, line):
|
||||
return (not (line == '' or line == '\n')
|
||||
and self.mstate == State.INITIAL and
|
||||
(not line.startswith(Tag.LINK) and not line.startswith(Tag.PRE)
|
||||
and not line.startswith(Tag.HEADING)
|
||||
and not line.startswith(Tag.UNORDERED)
|
||||
and not line.startswith(Tag.QUOTE)))
|
||||
|
||||
def get_start_tag(self):
|
||||
tag = list()
|
||||
tag.append('')
|
||||
tag.append('<p>\n')
|
||||
tag.append('<a href="{}">')
|
||||
tag.append('<pre{}>\n')
|
||||
tag.append('<h{}>')
|
||||
tag.append('<ul>\n')
|
||||
tag.append('<blockquote>\n')
|
||||
return tag[self.mstate]
|
||||
|
||||
def get_end_tag(self):
|
||||
tag = list()
|
||||
tag.append('')
|
||||
tag.append('</p>\n')
|
||||
tag.append('</a>\n')
|
||||
tag.append('</pre>\n')
|
||||
tag.append('</h{}>\n')
|
||||
tag.append('</ul>\n')
|
||||
tag.append('</blockquote>\n')
|
||||
return tag[self.mstate]
|
||||
|
||||
def get_document_from_gemfile(self, filename):
|
||||
rdocument = '<!DOCTYPE html>\n<html>\n<head>\n<title>gemtext2html</title>\n</head>\n<body>\n'
|
||||
with open(filename) as gemtext:
|
||||
mline: str = gemtext.readline()
|
||||
while mline:
|
||||
rdocument += self.parse_line(mline)
|
||||
mline = gemtext.readline()
|
||||
if self.mstate != State.INITIAL:
|
||||
rdocument += self.get_end_tag()
|
||||
rdocument += '</body>\n</html>\n'
|
||||
return rdocument
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
mparser: GemParser = GemParser()
|
||||
document = mparser.get_document_from_gemfile(sys.argv[1])
|
||||
print(document)
|
Loading…
Reference in new issue