|
|
|
@ -4,6 +4,10 @@ |
|
|
|
|
# -*- coding: utf-8 -*- |
|
|
|
|
import sys |
|
|
|
|
|
|
|
|
|
# Type aliases |
|
|
|
|
State = int |
|
|
|
|
Tag = str |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class GemParser: |
|
|
|
|
"""This is the main parser class |
|
|
|
@ -12,10 +16,10 @@ class GemParser: |
|
|
|
|
def __init__(self): |
|
|
|
|
"""Constructor for the GemParser class |
|
|
|
|
""" |
|
|
|
|
self.mstate = State.INITIAL |
|
|
|
|
self.recurse = False |
|
|
|
|
self.mstate: State = StateEnum.INITIAL |
|
|
|
|
self.recurse: bool = False |
|
|
|
|
|
|
|
|
|
def get_document_from_gemfile(self, filename): |
|
|
|
|
def get_document_from_gemfile(self, filename: str) -> str: |
|
|
|
|
"""This subroutine will read a file line by line and convert it to html |
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
@ -24,25 +28,25 @@ class GemParser: |
|
|
|
|
Returns: |
|
|
|
|
str: A valid html5 document as string |
|
|
|
|
""" |
|
|
|
|
rdocument = '<!DOCTYPE html>\n<html>\n<head>\n' |
|
|
|
|
rdocument: str = '<!DOCTYPE html>\n<html>\n<head>\n' |
|
|
|
|
rdocument += '<title>gemtext2html</title>\n</head>\n<body>\n' |
|
|
|
|
with open(filename) as gemtext: |
|
|
|
|
mline: str = gemtext.readline() |
|
|
|
|
while mline: |
|
|
|
|
rline = self.parse_line(mline) |
|
|
|
|
rline: str = self.parse_line(mline) |
|
|
|
|
if rline is not None and rline != str() and rline != '\n': |
|
|
|
|
rdocument += rline |
|
|
|
|
mline = gemtext.readline() |
|
|
|
|
rdocument += '{}</body>\n</html>\n'.format(self.get_end_tag()) |
|
|
|
|
return rdocument |
|
|
|
|
|
|
|
|
|
def get_end_tag(self): |
|
|
|
|
def get_end_tag(self) -> str: |
|
|
|
|
"""A subroutine that will emit the correct end tag for the state |
|
|
|
|
|
|
|
|
|
Returns: |
|
|
|
|
str: A html end tag |
|
|
|
|
""" |
|
|
|
|
tag = list() |
|
|
|
|
tag: list[str] = list() |
|
|
|
|
tag.append('') |
|
|
|
|
tag.append('</p>\n') |
|
|
|
|
tag.append('</a>\n') |
|
|
|
@ -52,13 +56,13 @@ class GemParser: |
|
|
|
|
tag.append('</blockquote>\n') |
|
|
|
|
return tag[self.mstate] |
|
|
|
|
|
|
|
|
|
def get_start_tag(self): |
|
|
|
|
def get_start_tag(self) -> str: |
|
|
|
|
"""A subroutine to emit the correct html start tag for the state |
|
|
|
|
|
|
|
|
|
Returns: |
|
|
|
|
str: A html start tag |
|
|
|
|
""" |
|
|
|
|
tag = list() |
|
|
|
|
tag: list[str] = list() |
|
|
|
|
tag.append('') |
|
|
|
|
tag.append('<p>\n') |
|
|
|
|
tag.append('<a href="{}">') |
|
|
|
@ -68,7 +72,7 @@ class GemParser: |
|
|
|
|
tag.append('<blockquote>\n') |
|
|
|
|
return tag[self.mstate] |
|
|
|
|
|
|
|
|
|
def is_text(self, line): |
|
|
|
|
def is_text(self, line: str) -> bool: |
|
|
|
|
"""A function that will check if this is a paragraph of text |
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
@ -78,13 +82,13 @@ class GemParser: |
|
|
|
|
bool: True if it is text, False if it is something else |
|
|
|
|
""" |
|
|
|
|
return (not (line == '' or line == '\n') |
|
|
|
|
and self.mstate == State.INITIAL and |
|
|
|
|
(not line.startswith(Tag.LINK) and not line.startswith(Tag.PRE) |
|
|
|
|
and not line.startswith(Tag.HEADING) |
|
|
|
|
and not line.startswith(Tag.UNORDERED) |
|
|
|
|
and not line.startswith(Tag.QUOTE))) |
|
|
|
|
and self.mstate == StateEnum.INITIAL and |
|
|
|
|
(not line.startswith(TagEnum.LINK) and not line.startswith(TagEnum.PRE) |
|
|
|
|
and not line.startswith(TagEnum.HEADING) |
|
|
|
|
and not line.startswith(TagEnum.UNORDERED) |
|
|
|
|
and not line.startswith(TagEnum.QUOTE))) |
|
|
|
|
|
|
|
|
|
def parse_heading(self, line, level=0): |
|
|
|
|
def parse_heading(self, line: str, level: int = 0) -> str: |
|
|
|
|
"""A function that will recursively call it self to get the correct |
|
|
|
|
heading level |
|
|
|
|
|
|
|
|
@ -95,14 +99,14 @@ class GemParser: |
|
|
|
|
Returns: |
|
|
|
|
str: A html heading tag of the correct level |
|
|
|
|
""" |
|
|
|
|
if line.startswith(Tag.HEADING): |
|
|
|
|
if line.startswith(TagEnum.HEADING): |
|
|
|
|
return self.parse_heading(line[1:], level + 1) |
|
|
|
|
else: |
|
|
|
|
return "{}{}{}".format(self.get_start_tag().format(level), |
|
|
|
|
line.strip().rstrip('\n'), |
|
|
|
|
self.get_end_tag().format(level)) |
|
|
|
|
|
|
|
|
|
def parse_line(self, line): |
|
|
|
|
def parse_line(self, line: str) -> str: |
|
|
|
|
"""This subroutine will parse a single line of gemtext and enter the |
|
|
|
|
correct state and output the corresponding html |
|
|
|
|
|
|
|
|
@ -112,76 +116,76 @@ class GemParser: |
|
|
|
|
Returns: |
|
|
|
|
str: A string of html |
|
|
|
|
""" |
|
|
|
|
rstring = str() |
|
|
|
|
is_text = self.is_text(line) |
|
|
|
|
starts_with_pre = line.startswith(Tag.PRE) |
|
|
|
|
starts_with_ul = line.startswith(Tag.UNORDERED) |
|
|
|
|
starts_with_quote = line.startswith(Tag.QUOTE) |
|
|
|
|
if line == '\n' and self.mstate != State.PRETEXT: |
|
|
|
|
rstring: str = str() |
|
|
|
|
is_text: bool = self.is_text(line) |
|
|
|
|
starts_with_pre: bool = line.startswith(TagEnum.PRE) |
|
|
|
|
starts_with_ul: bool = line.startswith(TagEnum.UNORDERED) |
|
|
|
|
starts_with_quote: bool = line.startswith(TagEnum.QUOTE) |
|
|
|
|
if line == '\n' and self.mstate != StateEnum.PRETEXT: |
|
|
|
|
return rstring |
|
|
|
|
# This is blockquote end tag |
|
|
|
|
if self.mstate == State.QUOTE and not starts_with_quote: |
|
|
|
|
if self.mstate == StateEnum.QUOTE and not starts_with_quote: |
|
|
|
|
rstring += self.get_end_tag() |
|
|
|
|
self.recurse = True |
|
|
|
|
# This is paragraph end tag |
|
|
|
|
if self.mstate == State.TEXT and not is_text: |
|
|
|
|
if self.mstate == StateEnum.TEXT and not is_text: |
|
|
|
|
rstring += self.get_end_tag() |
|
|
|
|
self.recurse = True |
|
|
|
|
# This is ul end tag |
|
|
|
|
if self.mstate == State.UNORDERED and not starts_with_ul: |
|
|
|
|
if self.mstate == StateEnum.UNORDERED and not starts_with_ul: |
|
|
|
|
rstring += self.get_end_tag() |
|
|
|
|
self.recurse = True |
|
|
|
|
# This is a pre start tag |
|
|
|
|
if starts_with_pre and self.mstate != State.PRETEXT: |
|
|
|
|
self.mstate = State.PRETEXT |
|
|
|
|
if starts_with_pre and self.mstate != StateEnum.PRETEXT: |
|
|
|
|
self.mstate = StateEnum.PRETEXT |
|
|
|
|
rstring += self.get_start_tag().format(' aria-label="{}"'.format( |
|
|
|
|
line[4:].rstrip('\n'))) |
|
|
|
|
return rstring |
|
|
|
|
# This is in pre formatted text |
|
|
|
|
elif self.mstate == State.PRETEXT and not starts_with_pre: |
|
|
|
|
elif self.mstate == StateEnum.PRETEXT and not starts_with_pre: |
|
|
|
|
rstring += line |
|
|
|
|
return rstring |
|
|
|
|
# This is a pre end tag |
|
|
|
|
elif starts_with_pre and self.mstate == State.PRETEXT: |
|
|
|
|
elif starts_with_pre and self.mstate == StateEnum.PRETEXT: |
|
|
|
|
rstring += self.get_end_tag() |
|
|
|
|
self.mstate = State.INITIAL |
|
|
|
|
self.mstate = StateEnum.INITIAL |
|
|
|
|
return rstring |
|
|
|
|
else: |
|
|
|
|
# This is paragraph start tag |
|
|
|
|
if is_text and self.mstate != State.TEXT: |
|
|
|
|
self.mstate = State.TEXT |
|
|
|
|
if is_text and self.mstate != StateEnum.TEXT: |
|
|
|
|
self.mstate = StateEnum.TEXT |
|
|
|
|
rstring += self.get_start_tag() |
|
|
|
|
rstring += line |
|
|
|
|
return rstring |
|
|
|
|
# This is in paragraph |
|
|
|
|
if is_text and self.mstate == State.TEXT: |
|
|
|
|
if is_text and self.mstate == StateEnum.TEXT: |
|
|
|
|
rstring += line |
|
|
|
|
return rstring |
|
|
|
|
# This is a heading |
|
|
|
|
if line.startswith(Tag.HEADING): |
|
|
|
|
self.mstate = State.HEADING |
|
|
|
|
if line.startswith(TagEnum.HEADING): |
|
|
|
|
self.mstate = StateEnum.HEADING |
|
|
|
|
rstring += self.parse_heading(line) |
|
|
|
|
self.mstate = State.INITIAL |
|
|
|
|
self.mstate = StateEnum.INITIAL |
|
|
|
|
return rstring |
|
|
|
|
# This is a link |
|
|
|
|
if line.startswith(Tag.LINK): |
|
|
|
|
self.mstate = State.LINK |
|
|
|
|
if line.startswith(TagEnum.LINK): |
|
|
|
|
self.mstate = StateEnum.LINK |
|
|
|
|
rstring += self.parse_link(line) |
|
|
|
|
self.mstate = State.INITIAL |
|
|
|
|
self.mstate = StateEnum.INITIAL |
|
|
|
|
return rstring |
|
|
|
|
# This is ul start tag |
|
|
|
|
if starts_with_ul and not self.mstate == State.UNORDERED: |
|
|
|
|
self.mstate = State.UNORDERED |
|
|
|
|
if starts_with_ul and not self.mstate == StateEnum.UNORDERED: |
|
|
|
|
self.mstate = StateEnum.UNORDERED |
|
|
|
|
rstring += self.get_start_tag() |
|
|
|
|
rstring += '<li>{}</li>\n'.format(line[1:].strip()) |
|
|
|
|
return rstring |
|
|
|
|
# This is in middle of unordered list |
|
|
|
|
if starts_with_ul and self.mstate == State.UNORDERED: |
|
|
|
|
if starts_with_ul and self.mstate == StateEnum.UNORDERED: |
|
|
|
|
rstring += '<li>{}</li>\n'.format(line[1:].strip()) |
|
|
|
|
return rstring |
|
|
|
|
# This is blockquote start tag |
|
|
|
|
if starts_with_quote and self.mstate != State.QUOTE: |
|
|
|
|
self.mstate = State.QUOTE |
|
|
|
|
if starts_with_quote and self.mstate != StateEnum.QUOTE: |
|
|
|
|
self.mstate = StateEnum.QUOTE |
|
|
|
|
rstring += self.get_start_tag() |
|
|
|
|
rstring += line[1:] |
|
|
|
|
return rstring |
|
|
|
@ -191,14 +195,14 @@ class GemParser: |
|
|
|
|
return rstring |
|
|
|
|
if self.recurse: |
|
|
|
|
self.recurse = False |
|
|
|
|
self.mstate = State.INITIAL |
|
|
|
|
self.mstate = StateEnum.INITIAL |
|
|
|
|
# Recurse so we don't miss whats next |
|
|
|
|
return "{}{}".format(rstring, self.parse_line(line)) |
|
|
|
|
else: |
|
|
|
|
print("We should never be here, the line is: {}".format(line)) |
|
|
|
|
sys.exit(1) |
|
|
|
|
|
|
|
|
|
def parse_link(self, line): |
|
|
|
|
def parse_link(self, line: str) -> str: |
|
|
|
|
"""This function will parse a gemtext link |
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
@ -207,40 +211,40 @@ class GemParser: |
|
|
|
|
Returns: |
|
|
|
|
str: A html fragment with a <a> tag |
|
|
|
|
""" |
|
|
|
|
linearr = line[2:].strip().split(' ') |
|
|
|
|
link = linearr[0] |
|
|
|
|
anchor = str().join(linearr[1:]) |
|
|
|
|
linearr: list[str] = line[2:].strip().split(' ') |
|
|
|
|
link: str = linearr[0] |
|
|
|
|
anchor: str = str().join(linearr[1:]) |
|
|
|
|
if not anchor: |
|
|
|
|
anchor = link |
|
|
|
|
rline = self.get_start_tag().format(link) |
|
|
|
|
rline: str = self.get_start_tag().format(link) |
|
|
|
|
rline += anchor |
|
|
|
|
rline += self.get_end_tag() |
|
|
|
|
return rline |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class State: |
|
|
|
|
class StateEnum: |
|
|
|
|
"""This is an enumeration of the states of the state machine |
|
|
|
|
""" |
|
|
|
|
INITIAL = 0 |
|
|
|
|
TEXT = 1 |
|
|
|
|
LINK = 2 |
|
|
|
|
PRETEXT = 3 |
|
|
|
|
HEADING = 4 |
|
|
|
|
UNORDERED = 5 |
|
|
|
|
QUOTE = 6 |
|
|
|
|
INITIAL: State = 0 |
|
|
|
|
TEXT: State = 1 |
|
|
|
|
LINK: State = 2 |
|
|
|
|
PRETEXT: State = 3 |
|
|
|
|
HEADING: State = 4 |
|
|
|
|
UNORDERED: State = 5 |
|
|
|
|
QUOTE: State = 6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Tag: |
|
|
|
|
class TagEnum: |
|
|
|
|
"""This is an enumeration of the possible gemtext tags |
|
|
|
|
""" |
|
|
|
|
LINK = '=>' |
|
|
|
|
PRE = '```' |
|
|
|
|
HEADING = '#' |
|
|
|
|
UNORDERED = '*' |
|
|
|
|
QUOTE = '>' |
|
|
|
|
LINK: Tag = '=>' |
|
|
|
|
PRE: Tag = '```' |
|
|
|
|
HEADING: Tag = '#' |
|
|
|
|
UNORDERED: Tag = '*' |
|
|
|
|
QUOTE: Tag = '>' |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
|
mparser: GemParser = GemParser() |
|
|
|
|
document = mparser.get_document_from_gemfile(sys.argv[1]) |
|
|
|
|
document: str = mparser.get_document_from_gemfile(sys.argv[1]) |
|
|
|
|
print(document) |
|
|
|
|