|
|
@ -1,32 +1,117 @@
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
"""This is a python module that will parse gemtext and convert it to html5
|
|
|
|
|
|
|
|
"""
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import sys
|
|
|
|
import sys
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class State:
|
|
|
|
|
|
|
|
INITIAL = 0
|
|
|
|
|
|
|
|
TEXT = 1
|
|
|
|
|
|
|
|
LINK = 2
|
|
|
|
|
|
|
|
PRETEXT = 3
|
|
|
|
|
|
|
|
HEADING = 4
|
|
|
|
|
|
|
|
UNORDERED = 5
|
|
|
|
|
|
|
|
QUOTE = 6
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Tag:
|
|
|
|
|
|
|
|
LINK = '=>'
|
|
|
|
|
|
|
|
PRE = '```'
|
|
|
|
|
|
|
|
HEADING = '#'
|
|
|
|
|
|
|
|
UNORDERED = '*'
|
|
|
|
|
|
|
|
QUOTE = '>'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class GemParser:
|
|
|
|
class GemParser:
|
|
|
|
|
|
|
|
"""This is the main parser class
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
def __init__(self):
|
|
|
|
|
|
|
|
"""Constructor for the GemParser class
|
|
|
|
|
|
|
|
"""
|
|
|
|
self.mstate = State.INITIAL
|
|
|
|
self.mstate = State.INITIAL
|
|
|
|
self.recurse = False
|
|
|
|
self.recurse = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_document_from_gemfile(self, filename):
|
|
|
|
|
|
|
|
"""This subroutine will read a file line by line and convert it to html
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
|
|
filename (str): A file name corresponing to a file of gemtext
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
|
|
str: A valid html5 document as string
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
rdocument = '<!DOCTYPE html>\n<html>\n<head>\n'
|
|
|
|
|
|
|
|
rdocument += '<title>gemtext2html</title>\n</head>\n<body>\n'
|
|
|
|
|
|
|
|
with open(filename) as gemtext:
|
|
|
|
|
|
|
|
mline: str = gemtext.readline()
|
|
|
|
|
|
|
|
while mline:
|
|
|
|
|
|
|
|
rline = self.parse_line(mline)
|
|
|
|
|
|
|
|
if rline is not None and rline != str() and rline != '\n':
|
|
|
|
|
|
|
|
rdocument += rline
|
|
|
|
|
|
|
|
mline = gemtext.readline()
|
|
|
|
|
|
|
|
rdocument += '{}</body>\n</html>\n'.format(self.get_end_tag())
|
|
|
|
|
|
|
|
return rdocument
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_end_tag(self):
|
|
|
|
|
|
|
|
"""A subroutine that will emit the correct end tag for the state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
|
|
str: A html end tag
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
tag = list()
|
|
|
|
|
|
|
|
tag.append('')
|
|
|
|
|
|
|
|
tag.append('</p>\n')
|
|
|
|
|
|
|
|
tag.append('</a>\n')
|
|
|
|
|
|
|
|
tag.append('</pre>\n')
|
|
|
|
|
|
|
|
tag.append('</h{}>\n')
|
|
|
|
|
|
|
|
tag.append('</ul>\n')
|
|
|
|
|
|
|
|
tag.append('</blockquote>\n')
|
|
|
|
|
|
|
|
return tag[self.mstate]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_start_tag(self):
|
|
|
|
|
|
|
|
"""A subroutine to emit the correct html start tag for the state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
|
|
str: A html start tag
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
tag = list()
|
|
|
|
|
|
|
|
tag.append('')
|
|
|
|
|
|
|
|
tag.append('<p>\n')
|
|
|
|
|
|
|
|
tag.append('<a href="{}">')
|
|
|
|
|
|
|
|
tag.append('<pre{}>\n')
|
|
|
|
|
|
|
|
tag.append('<h{}>')
|
|
|
|
|
|
|
|
tag.append('<ul>\n')
|
|
|
|
|
|
|
|
tag.append('<blockquote>\n')
|
|
|
|
|
|
|
|
return tag[self.mstate]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def is_text(self, line):
|
|
|
|
|
|
|
|
"""A function that will check if this is a paragraph of text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
|
|
line (str): A line of gemtext
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
|
|
bool: True if it is text, False if it is something else
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
return (not (line == '' or line == '\n')
|
|
|
|
|
|
|
|
and self.mstate == State.INITIAL and
|
|
|
|
|
|
|
|
(not line.startswith(Tag.LINK) and not line.startswith(Tag.PRE)
|
|
|
|
|
|
|
|
and not line.startswith(Tag.HEADING)
|
|
|
|
|
|
|
|
and not line.startswith(Tag.UNORDERED)
|
|
|
|
|
|
|
|
and not line.startswith(Tag.QUOTE)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_heading(self, line, level=0):
|
|
|
|
|
|
|
|
"""A function that will recursively call it self to get the correct
|
|
|
|
|
|
|
|
heading level
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
|
|
line (str): A gemtext heading line
|
|
|
|
|
|
|
|
level (int, optional): The heading level to start from. Defaults to 0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
|
|
str: A html heading tag of the correct level
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
if line.startswith(Tag.HEADING):
|
|
|
|
|
|
|
|
return self.parse_heading(line[1:], level + 1)
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
return "{}{}{}".format(self.get_start_tag().format(level),
|
|
|
|
|
|
|
|
line.strip().rstrip('\n'),
|
|
|
|
|
|
|
|
self.get_end_tag().format(level))
|
|
|
|
|
|
|
|
|
|
|
|
def parse_line(self, line):
|
|
|
|
def parse_line(self, line):
|
|
|
|
|
|
|
|
"""This subroutine will parse a single line of gemtext and enter the
|
|
|
|
|
|
|
|
correct state and output the corresponding html
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
|
|
line (string): A string of gemtext
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
|
|
str: A string of html
|
|
|
|
|
|
|
|
"""
|
|
|
|
rstring = str()
|
|
|
|
rstring = str()
|
|
|
|
is_text = self.is_text(line)
|
|
|
|
is_text = self.is_text(line)
|
|
|
|
starts_with_pre = line.startswith(Tag.PRE)
|
|
|
|
starts_with_pre = line.startswith(Tag.PRE)
|
|
|
@ -114,6 +199,14 @@ class GemParser:
|
|
|
|
sys.exit(1)
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
def parse_link(self, line):
|
|
|
|
def parse_link(self, line):
|
|
|
|
|
|
|
|
"""This function will parse a gemtext link
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
|
|
line (str): A link line of gemtext
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
|
|
str: A html fragment with a <a> tag
|
|
|
|
|
|
|
|
"""
|
|
|
|
linearr = line[2:].strip().split(' ')
|
|
|
|
linearr = line[2:].strip().split(' ')
|
|
|
|
link = linearr[0]
|
|
|
|
link = linearr[0]
|
|
|
|
anchor = str().join(linearr[1:])
|
|
|
|
anchor = str().join(linearr[1:])
|
|
|
@ -124,54 +217,27 @@ class GemParser:
|
|
|
|
rline += self.get_end_tag()
|
|
|
|
rline += self.get_end_tag()
|
|
|
|
return rline
|
|
|
|
return rline
|
|
|
|
|
|
|
|
|
|
|
|
def is_text(self, line):
|
|
|
|
|
|
|
|
return (not (line == '' or line == '\n')
|
|
|
|
|
|
|
|
and self.mstate == State.INITIAL and
|
|
|
|
|
|
|
|
(not line.startswith(Tag.LINK) and not line.startswith(Tag.PRE)
|
|
|
|
|
|
|
|
and not line.startswith(Tag.HEADING)
|
|
|
|
|
|
|
|
and not line.startswith(Tag.UNORDERED)
|
|
|
|
|
|
|
|
and not line.startswith(Tag.QUOTE)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_start_tag(self):
|
|
|
|
|
|
|
|
tag = list()
|
|
|
|
|
|
|
|
tag.append('')
|
|
|
|
|
|
|
|
tag.append('<p>\n')
|
|
|
|
|
|
|
|
tag.append('<a href="{}">')
|
|
|
|
|
|
|
|
tag.append('<pre{}>\n')
|
|
|
|
|
|
|
|
tag.append('<h{}>')
|
|
|
|
|
|
|
|
tag.append('<ul>\n')
|
|
|
|
|
|
|
|
tag.append('<blockquote>\n')
|
|
|
|
|
|
|
|
return tag[self.mstate]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_end_tag(self):
|
|
|
|
class State:
|
|
|
|
tag = list()
|
|
|
|
"""This is an enumeration of the states of the state machine
|
|
|
|
tag.append('')
|
|
|
|
"""
|
|
|
|
tag.append('</p>\n')
|
|
|
|
INITIAL = 0
|
|
|
|
tag.append('</a>\n')
|
|
|
|
TEXT = 1
|
|
|
|
tag.append('</pre>\n')
|
|
|
|
LINK = 2
|
|
|
|
tag.append('</h{}>\n')
|
|
|
|
PRETEXT = 3
|
|
|
|
tag.append('</ul>\n')
|
|
|
|
HEADING = 4
|
|
|
|
tag.append('</blockquote>\n')
|
|
|
|
UNORDERED = 5
|
|
|
|
return tag[self.mstate]
|
|
|
|
QUOTE = 6
|
|
|
|
|
|
|
|
|
|
|
|
def get_document_from_gemfile(self, filename):
|
|
|
|
|
|
|
|
rdocument = '<!DOCTYPE html>\n<html>\n<head>\n<title>gemtext2html</title>\n</head>\n<body>\n'
|
|
|
|
|
|
|
|
with open(filename) as gemtext:
|
|
|
|
|
|
|
|
mline: str = gemtext.readline()
|
|
|
|
|
|
|
|
while mline:
|
|
|
|
|
|
|
|
rline = self.parse_line(mline)
|
|
|
|
|
|
|
|
if rline is not None and rline != str() and rline != '\n':
|
|
|
|
|
|
|
|
rdocument += rline
|
|
|
|
|
|
|
|
mline = gemtext.readline()
|
|
|
|
|
|
|
|
rdocument += '{}</body>\n</html>\n'.format(self.get_end_tag())
|
|
|
|
|
|
|
|
return rdocument
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_heading(self, line, level=0):
|
|
|
|
class Tag:
|
|
|
|
if line.startswith(Tag.HEADING):
|
|
|
|
"""This is an enumeration of the possible gemtext tags
|
|
|
|
return self.parse_heading(line[1:], level + 1)
|
|
|
|
"""
|
|
|
|
else:
|
|
|
|
LINK = '=>'
|
|
|
|
return "{}{}{}".format(self.get_start_tag().format(level), line.strip().rstrip('\n'),
|
|
|
|
PRE = '```'
|
|
|
|
self.get_end_tag().format(level))
|
|
|
|
HEADING = '#'
|
|
|
|
|
|
|
|
UNORDERED = '*'
|
|
|
|
|
|
|
|
QUOTE = '>'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
if __name__ == '__main__':
|