Compare commits

...

4 Commits

Author SHA1 Message Date
Micke Nordin 76942c0b3e Don't set a label if there is none
3 years ago
Micke Nordin 806b51615a Add type hints.
3 years ago
Micke Nordin aa74299881 Add docstrings and convert to package
3 years ago
Micke Nordin bee7b41a51 Working parser/converter
3 years ago

12
.gitignore vendored

@ -0,0 +1,12 @@
.idea/.name
.idea/gemtext2html.iml
.idea/inspectionProfiles/Project_Default.xml
.idea/inspectionProfiles/profiles_settings.xml
.idea/misc.xml
.idea/modules.xml
.idea/vcs.xml
.idea/workspace.xml
.vscode/.ropeproject/config.py
.vscode/.ropeproject/objectdb
.vscode/launch.json
.vscode/settings.json

@ -1,160 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sys
class State:
INITIAL = 0
TEXT = 1
LINK = 2
PRETEXT = 3
HEADING = 4
UNORDERED = 5
QUOTE = 6
class Tag:
LINK = '=>'
PRE = '```'
HEADING = '#'
UNORDERED = '*'
QUOTE = '>'
class GemParser:
def __init__(self):
self.mstate = State.INITIAL
self.toggle = False
def parse_line(self, line):
rstring = str()
# This is a pre start tag
if line.startswith(Tag.PRE) and not self.toggle:
self.mstate = State.PRETEXT
self.toggle = True
rstring = self.get_start_tag().format(' aria-label="{}"'.format(
line[4:].rstrip('\n')))
# This is a pre end tag
elif line.startswith(Tag.PRE):
rstring = self.get_end_tag()
self.mstate = State.INITIAL
self.toggle = False
# This is in pre formatted text
elif self.mstate == State.PRETEXT:
rstring = line
else:
# This is ul start tag
if line.startswith(Tag.UNORDERED) and not self.toggle:
self.mstate = State.UNORDERED
self.toggle = True
rstring = self.get_start_tag()
rstring += '<li>{}</li>\n'.format(line[1:].strip())
# This is in middle of unordered list
elif line.startswith(Tag.UNORDERED):
rstring += '<li>{}</li>\n'.format(line[1:].strip())
# This is ul end tag
elif self.mstate == State.UNORDERED and not line.startswith(
Tag.UNORDERED):
rstring += self.get_end_tag()
self.mstate = State.INITIAL
self.toggle = False
# Recurse so we don't miss whats next
rstring += self.parse_line(line)
# This is blockquote start tag
if line.startswith(Tag.QUOTE) and not self.toggle:
self.mstate = State.QUOTE
self.toggle = True
rstring = self.get_start_tag()
rstring += line[1:]
# This is in quote
elif line.startswith(Tag.QUOTE):
rstring += line[1:]
# FIXME: If a list is placed directly after a quote there will ba a list item in the quote...
# This is blockquote end tag
elif self.mstate == State.QUOTE and not line.startswith(Tag.QUOTE):
rstring += self.get_end_tag()
self.mstate = State.INITIAL
self.toggle = False
# Recurse so we don't miss whats next
rstring += self.parse_line(line)
# This is paragraph start tag
if self.is_text(line) and not self.toggle:
self.mstate = State.TEXT
self.toggle = True
rstring = self.get_start_tag()
rstring += line
# This is in paragraph
elif self.is_text(line):
rstring += line
# This is paragraph end tag
elif self.mstate == State.TEXT:
rstring += self.get_end_tag()
self.mstate = State.INITIAL
self.toggle = False
# Recurse so we don't miss whats next
rstring += self.parse_line(line)
# This is a link
if line.startswith(Tag.LINK):
self.mstate = State.LINK
rstring = self.parse_link(line)
self.mstate = State.INITIAL
return rstring
def parse_link(self, line):
linearr = line[2:].strip().split(' ')
link = linearr[0]
anchor = str().join(linearr[1:])
if not anchor:
anchor = link
rline = self.get_start_tag().format(link)
rline += anchor
rline += self.get_end_tag()
return rline
def is_text(self, line):
return (not (line == '' or line == '\n')
and self.mstate == State.INITIAL and
(not line.startswith(Tag.LINK) and not line.startswith(Tag.PRE)
and not line.startswith(Tag.HEADING)
and not line.startswith(Tag.UNORDERED)
and not line.startswith(Tag.QUOTE)))
def get_start_tag(self):
tag = list()
tag.append('')
tag.append('<p>\n')
tag.append('<a href="{}">')
tag.append('<pre{}>\n')
tag.append('<h{}>')
tag.append('<ul>\n')
tag.append('<blockquote>\n')
return tag[self.mstate]
def get_end_tag(self):
tag = list()
tag.append('')
tag.append('</p>\n')
tag.append('</a>\n')
tag.append('</pre>\n')
tag.append('</h{}>\n')
tag.append('</ul>\n')
tag.append('</blockquote>\n')
return tag[self.mstate]
def get_document_from_gemfile(self, filename):
rdocument = '<!DOCTYPE html>\n<html>\n<head>\n<title>gemtext2html</title>\n</head>\n<body>\n'
with open(filename) as gemtext:
mline: str = gemtext.readline()
while mline:
rdocument += self.parse_line(mline)
mline = gemtext.readline()
if self.mstate != State.INITIAL:
rdocument += self.get_end_tag()
rdocument += '</body>\n</html>\n'
return rdocument
if __name__ == '__main__':
mparser: GemParser = GemParser()
document = mparser.get_document_from_gemfile(sys.argv[1])
print(document)

@ -0,0 +1,252 @@
#!/usr/bin/env python3
"""This is a python module that will parse gemtext and convert it to html5
"""
# -*- coding: utf-8 -*-
import sys
# Type aliases
State = int
Tag = str
class GemParser:
"""This is the main parser class
"""
def __init__(self):
"""Constructor for the GemParser class
"""
self.mstate: State = StateEnum.INITIAL
self.recurse: bool = False
def get_document_from_gemfile(self, filename: str) -> str:
"""This subroutine will read a file line by line and convert it to html
Args:
filename (str): A file name corresponing to a file of gemtext
Returns:
str: A valid html5 document as string
"""
rdocument: str = '<!DOCTYPE html>\n<html>\n<head>\n'
rdocument += '<title>gemtext2html</title>\n</head>\n<body>\n'
with open(filename) as gemtext:
mline: str = gemtext.readline()
while mline:
rline: str = self.parse_line(mline)
if rline is not None and rline != str() and rline != '\n':
rdocument += rline
mline = gemtext.readline()
rdocument += '{}</body>\n</html>\n'.format(self.get_end_tag())
return rdocument
def get_end_tag(self) -> str:
"""A subroutine that will emit the correct end tag for the state
Returns:
str: A html end tag
"""
tag: list[str] = list()
tag.append('')
tag.append('</p>\n')
tag.append('</a>\n')
tag.append('</pre>\n')
tag.append('</h{}>\n')
tag.append('</ul>\n')
tag.append('</blockquote>\n')
return tag[self.mstate]
def get_start_tag(self) -> str:
"""A subroutine to emit the correct html start tag for the state
Returns:
str: A html start tag
"""
tag: list[str] = list()
tag.append('')
tag.append('<p>\n')
tag.append('<a href="{}">')
tag.append('<pre{}>\n')
tag.append('<h{}>')
tag.append('<ul>\n')
tag.append('<blockquote>\n')
return tag[self.mstate]
def is_text(self, line: str) -> bool:
"""A function that will check if this is a paragraph of text
Args:
line (str): A line of gemtext
Returns:
bool: True if it is text, False if it is something else
"""
return (not (line == '' or line == '\n')
and self.mstate == StateEnum.INITIAL and
(not line.startswith(TagEnum.LINK) and not line.startswith(TagEnum.PRE)
and not line.startswith(TagEnum.HEADING)
and not line.startswith(TagEnum.UNORDERED)
and not line.startswith(TagEnum.QUOTE)))
def parse_heading(self, line: str, level: int = 0) -> str:
"""A function that will recursively call it self to get the correct
heading level
Args:
line (str): A gemtext heading line
level (int, optional): The heading level to start from. Defaults to 0.
Returns:
str: A html heading tag of the correct level
"""
if line.startswith(TagEnum.HEADING):
return self.parse_heading(line[1:], level + 1)
else:
return "{}{}{}".format(self.get_start_tag().format(level),
line.strip().rstrip('\n'),
self.get_end_tag().format(level))
def parse_line(self, line: str) -> str:
"""This subroutine will parse a single line of gemtext and enter the
correct state and output the corresponding html
Args:
line (string): A string of gemtext
Returns:
str: A string of html
"""
rstring: str = str()
is_text: bool = self.is_text(line)
starts_with_pre: bool = line.startswith(TagEnum.PRE)
starts_with_ul: bool = line.startswith(TagEnum.UNORDERED)
starts_with_quote: bool = line.startswith(TagEnum.QUOTE)
if line == '\n' and self.mstate != StateEnum.PRETEXT:
return rstring
# This is blockquote end tag
if self.mstate == StateEnum.QUOTE and not starts_with_quote:
rstring += self.get_end_tag()
self.recurse = True
# This is paragraph end tag
if self.mstate == StateEnum.TEXT and not is_text:
rstring += self.get_end_tag()
self.recurse = True
# This is ul end tag
if self.mstate == StateEnum.UNORDERED and not starts_with_ul:
rstring += self.get_end_tag()
self.recurse = True
# This is a pre start tag
if starts_with_pre and self.mstate != StateEnum.PRETEXT:
self.mstate = StateEnum.PRETEXT
label: str = line[4:].rstrip('\n').strip()
rstring += self.get_start_tag().format(
' aria-label="{}"'.format(label) if label else '')
return rstring
# This is in pre formatted text
elif self.mstate == StateEnum.PRETEXT and not starts_with_pre:
rstring += line
return rstring
# This is a pre end tag
elif starts_with_pre and self.mstate == StateEnum.PRETEXT:
rstring += self.get_end_tag()
self.mstate = StateEnum.INITIAL
return rstring
else:
# This is paragraph start tag
if is_text and self.mstate != StateEnum.TEXT:
self.mstate = StateEnum.TEXT
rstring += self.get_start_tag()
rstring += line
return rstring
# This is in paragraph
if is_text and self.mstate == StateEnum.TEXT:
rstring += line
return rstring
# This is a heading
if line.startswith(TagEnum.HEADING):
self.mstate = StateEnum.HEADING
rstring += self.parse_heading(line)
self.mstate = StateEnum.INITIAL
return rstring
# This is a link
if line.startswith(TagEnum.LINK):
self.mstate = StateEnum.LINK
rstring += self.parse_link(line)
self.mstate = StateEnum.INITIAL
return rstring
# This is ul start tag
if starts_with_ul and not self.mstate == StateEnum.UNORDERED:
self.mstate = StateEnum.UNORDERED
rstring += self.get_start_tag()
rstring += '<li>{}</li>\n'.format(line[1:].strip())
return rstring
# This is in middle of unordered list
if starts_with_ul and self.mstate == StateEnum.UNORDERED:
rstring += '<li>{}</li>\n'.format(line[1:].strip())
return rstring
# This is blockquote start tag
if starts_with_quote and self.mstate != StateEnum.QUOTE:
self.mstate = StateEnum.QUOTE
rstring += self.get_start_tag()
rstring += line[1:]
return rstring
# This is in quote
if starts_with_quote:
rstring += line[1:]
return rstring
if self.recurse:
self.recurse = False
self.mstate = StateEnum.INITIAL
# Recurse so we don't miss whats next
return "{}{}".format(rstring, self.parse_line(line))
else:
print("We should never be here, the line is: {}".format(line))
sys.exit(1)
def parse_link(self, line: str) -> str:
"""This function will parse a gemtext link
Args:
line (str): A link line of gemtext
Returns:
str: A html fragment with a <a> tag
"""
linearr: list[str] = line[2:].strip().split(' ')
link: str = linearr[0]
anchor: str = str().join(linearr[1:])
if not anchor:
anchor = link
rline: str = self.get_start_tag().format(link)
rline += anchor
rline += self.get_end_tag()
return rline
class StateEnum:
"""This is an enumeration of the states of the state machine
"""
INITIAL: State = 0
TEXT: State = 1
LINK: State = 2
PRETEXT: State = 3
HEADING: State = 4
UNORDERED: State = 5
QUOTE: State = 6
class TagEnum:
"""This is an enumeration of the possible gemtext tags
"""
LINK: Tag = '=>'
PRE: Tag = '```'
HEADING: Tag = '#'
UNORDERED: Tag = '*'
QUOTE: Tag = '>'
if __name__ == '__main__':
mparser: GemParser = GemParser()
document: str = mparser.get_document_from_gemfile(sys.argv[1])
print(document)
Loading…
Cancel
Save