Add type hints.
This is helpful for better ide support and for my own thinking...
This commit is contained in:
parent
eee0377a95
commit
3c8456cda3
2 changed files with 76 additions and 71 deletions
9
.gitignore
vendored
9
.gitignore
vendored
|
@ -1,11 +1,12 @@
|
||||||
.idea/.name
|
.idea/.name
|
||||||
.idea/gemtext2html.iml
|
.idea/gemtext2html.iml
|
||||||
|
.idea/inspectionProfiles/Project_Default.xml
|
||||||
|
.idea/inspectionProfiles/profiles_settings.xml
|
||||||
.idea/misc.xml
|
.idea/misc.xml
|
||||||
.idea/modules.xml
|
.idea/modules.xml
|
||||||
.idea/vcs.xml
|
.idea/vcs.xml
|
||||||
.idea/inspectionProfiles/profiles_settings.xml
|
.idea/workspace.xml
|
||||||
.idea/inspectionProfiles/Project_Default.xml
|
|
||||||
.vscode/launch.json
|
|
||||||
.vscode/settings.json
|
|
||||||
.vscode/.ropeproject/config.py
|
.vscode/.ropeproject/config.py
|
||||||
.vscode/.ropeproject/objectdb
|
.vscode/.ropeproject/objectdb
|
||||||
|
.vscode/launch.json
|
||||||
|
.vscode/settings.json
|
|
@ -4,6 +4,10 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
# Type aliases
|
||||||
|
State = int
|
||||||
|
Tag = str
|
||||||
|
|
||||||
|
|
||||||
class GemParser:
|
class GemParser:
|
||||||
"""This is the main parser class
|
"""This is the main parser class
|
||||||
|
@ -12,10 +16,10 @@ class GemParser:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
"""Constructor for the GemParser class
|
"""Constructor for the GemParser class
|
||||||
"""
|
"""
|
||||||
self.mstate = State.INITIAL
|
self.mstate: State = StateEnum.INITIAL
|
||||||
self.recurse = False
|
self.recurse: bool = False
|
||||||
|
|
||||||
def get_document_from_gemfile(self, filename):
|
def get_document_from_gemfile(self, filename: str) -> str:
|
||||||
"""This subroutine will read a file line by line and convert it to html
|
"""This subroutine will read a file line by line and convert it to html
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -24,25 +28,25 @@ class GemParser:
|
||||||
Returns:
|
Returns:
|
||||||
str: A valid html5 document as string
|
str: A valid html5 document as string
|
||||||
"""
|
"""
|
||||||
rdocument = '<!DOCTYPE html>\n<html>\n<head>\n'
|
rdocument: str = '<!DOCTYPE html>\n<html>\n<head>\n'
|
||||||
rdocument += '<title>gemtext2html</title>\n</head>\n<body>\n'
|
rdocument += '<title>gemtext2html</title>\n</head>\n<body>\n'
|
||||||
with open(filename) as gemtext:
|
with open(filename) as gemtext:
|
||||||
mline: str = gemtext.readline()
|
mline: str = gemtext.readline()
|
||||||
while mline:
|
while mline:
|
||||||
rline = self.parse_line(mline)
|
rline: str = self.parse_line(mline)
|
||||||
if rline is not None and rline != str() and rline != '\n':
|
if rline is not None and rline != str() and rline != '\n':
|
||||||
rdocument += rline
|
rdocument += rline
|
||||||
mline = gemtext.readline()
|
mline = gemtext.readline()
|
||||||
rdocument += '{}</body>\n</html>\n'.format(self.get_end_tag())
|
rdocument += '{}</body>\n</html>\n'.format(self.get_end_tag())
|
||||||
return rdocument
|
return rdocument
|
||||||
|
|
||||||
def get_end_tag(self):
|
def get_end_tag(self) -> str:
|
||||||
"""A subroutine that will emit the correct end tag for the state
|
"""A subroutine that will emit the correct end tag for the state
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: A html end tag
|
str: A html end tag
|
||||||
"""
|
"""
|
||||||
tag = list()
|
tag: list[str] = list()
|
||||||
tag.append('')
|
tag.append('')
|
||||||
tag.append('</p>\n')
|
tag.append('</p>\n')
|
||||||
tag.append('</a>\n')
|
tag.append('</a>\n')
|
||||||
|
@ -52,13 +56,13 @@ class GemParser:
|
||||||
tag.append('</blockquote>\n')
|
tag.append('</blockquote>\n')
|
||||||
return tag[self.mstate]
|
return tag[self.mstate]
|
||||||
|
|
||||||
def get_start_tag(self):
|
def get_start_tag(self) -> str:
|
||||||
"""A subroutine to emit the correct html start tag for the state
|
"""A subroutine to emit the correct html start tag for the state
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
str: A html start tag
|
str: A html start tag
|
||||||
"""
|
"""
|
||||||
tag = list()
|
tag: list[str] = list()
|
||||||
tag.append('')
|
tag.append('')
|
||||||
tag.append('<p>\n')
|
tag.append('<p>\n')
|
||||||
tag.append('<a href="{}">')
|
tag.append('<a href="{}">')
|
||||||
|
@ -68,7 +72,7 @@ class GemParser:
|
||||||
tag.append('<blockquote>\n')
|
tag.append('<blockquote>\n')
|
||||||
return tag[self.mstate]
|
return tag[self.mstate]
|
||||||
|
|
||||||
def is_text(self, line):
|
def is_text(self, line: str) -> bool:
|
||||||
"""A function that will check if this is a paragraph of text
|
"""A function that will check if this is a paragraph of text
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -78,13 +82,13 @@ class GemParser:
|
||||||
bool: True if it is text, False if it is something else
|
bool: True if it is text, False if it is something else
|
||||||
"""
|
"""
|
||||||
return (not (line == '' or line == '\n')
|
return (not (line == '' or line == '\n')
|
||||||
and self.mstate == State.INITIAL and
|
and self.mstate == StateEnum.INITIAL and
|
||||||
(not line.startswith(Tag.LINK) and not line.startswith(Tag.PRE)
|
(not line.startswith(TagEnum.LINK) and not line.startswith(TagEnum.PRE)
|
||||||
and not line.startswith(Tag.HEADING)
|
and not line.startswith(TagEnum.HEADING)
|
||||||
and not line.startswith(Tag.UNORDERED)
|
and not line.startswith(TagEnum.UNORDERED)
|
||||||
and not line.startswith(Tag.QUOTE)))
|
and not line.startswith(TagEnum.QUOTE)))
|
||||||
|
|
||||||
def parse_heading(self, line, level=0):
|
def parse_heading(self, line: str, level: int = 0) -> str:
|
||||||
"""A function that will recursively call it self to get the correct
|
"""A function that will recursively call it self to get the correct
|
||||||
heading level
|
heading level
|
||||||
|
|
||||||
|
@ -95,14 +99,14 @@ class GemParser:
|
||||||
Returns:
|
Returns:
|
||||||
str: A html heading tag of the correct level
|
str: A html heading tag of the correct level
|
||||||
"""
|
"""
|
||||||
if line.startswith(Tag.HEADING):
|
if line.startswith(TagEnum.HEADING):
|
||||||
return self.parse_heading(line[1:], level + 1)
|
return self.parse_heading(line[1:], level + 1)
|
||||||
else:
|
else:
|
||||||
return "{}{}{}".format(self.get_start_tag().format(level),
|
return "{}{}{}".format(self.get_start_tag().format(level),
|
||||||
line.strip().rstrip('\n'),
|
line.strip().rstrip('\n'),
|
||||||
self.get_end_tag().format(level))
|
self.get_end_tag().format(level))
|
||||||
|
|
||||||
def parse_line(self, line):
|
def parse_line(self, line: str) -> str:
|
||||||
"""This subroutine will parse a single line of gemtext and enter the
|
"""This subroutine will parse a single line of gemtext and enter the
|
||||||
correct state and output the corresponding html
|
correct state and output the corresponding html
|
||||||
|
|
||||||
|
@ -112,76 +116,76 @@ class GemParser:
|
||||||
Returns:
|
Returns:
|
||||||
str: A string of html
|
str: A string of html
|
||||||
"""
|
"""
|
||||||
rstring = str()
|
rstring: str = str()
|
||||||
is_text = self.is_text(line)
|
is_text: bool = self.is_text(line)
|
||||||
starts_with_pre = line.startswith(Tag.PRE)
|
starts_with_pre: bool = line.startswith(TagEnum.PRE)
|
||||||
starts_with_ul = line.startswith(Tag.UNORDERED)
|
starts_with_ul: bool = line.startswith(TagEnum.UNORDERED)
|
||||||
starts_with_quote = line.startswith(Tag.QUOTE)
|
starts_with_quote: bool = line.startswith(TagEnum.QUOTE)
|
||||||
if line == '\n' and self.mstate != State.PRETEXT:
|
if line == '\n' and self.mstate != StateEnum.PRETEXT:
|
||||||
return rstring
|
return rstring
|
||||||
# This is blockquote end tag
|
# This is blockquote end tag
|
||||||
if self.mstate == State.QUOTE and not starts_with_quote:
|
if self.mstate == StateEnum.QUOTE and not starts_with_quote:
|
||||||
rstring += self.get_end_tag()
|
rstring += self.get_end_tag()
|
||||||
self.recurse = True
|
self.recurse = True
|
||||||
# This is paragraph end tag
|
# This is paragraph end tag
|
||||||
if self.mstate == State.TEXT and not is_text:
|
if self.mstate == StateEnum.TEXT and not is_text:
|
||||||
rstring += self.get_end_tag()
|
rstring += self.get_end_tag()
|
||||||
self.recurse = True
|
self.recurse = True
|
||||||
# This is ul end tag
|
# This is ul end tag
|
||||||
if self.mstate == State.UNORDERED and not starts_with_ul:
|
if self.mstate == StateEnum.UNORDERED and not starts_with_ul:
|
||||||
rstring += self.get_end_tag()
|
rstring += self.get_end_tag()
|
||||||
self.recurse = True
|
self.recurse = True
|
||||||
# This is a pre start tag
|
# This is a pre start tag
|
||||||
if starts_with_pre and self.mstate != State.PRETEXT:
|
if starts_with_pre and self.mstate != StateEnum.PRETEXT:
|
||||||
self.mstate = State.PRETEXT
|
self.mstate = StateEnum.PRETEXT
|
||||||
rstring += self.get_start_tag().format(' aria-label="{}"'.format(
|
rstring += self.get_start_tag().format(' aria-label="{}"'.format(
|
||||||
line[4:].rstrip('\n')))
|
line[4:].rstrip('\n')))
|
||||||
return rstring
|
return rstring
|
||||||
# This is in pre formatted text
|
# This is in pre formatted text
|
||||||
elif self.mstate == State.PRETEXT and not starts_with_pre:
|
elif self.mstate == StateEnum.PRETEXT and not starts_with_pre:
|
||||||
rstring += line
|
rstring += line
|
||||||
return rstring
|
return rstring
|
||||||
# This is a pre end tag
|
# This is a pre end tag
|
||||||
elif starts_with_pre and self.mstate == State.PRETEXT:
|
elif starts_with_pre and self.mstate == StateEnum.PRETEXT:
|
||||||
rstring += self.get_end_tag()
|
rstring += self.get_end_tag()
|
||||||
self.mstate = State.INITIAL
|
self.mstate = StateEnum.INITIAL
|
||||||
return rstring
|
return rstring
|
||||||
else:
|
else:
|
||||||
# This is paragraph start tag
|
# This is paragraph start tag
|
||||||
if is_text and self.mstate != State.TEXT:
|
if is_text and self.mstate != StateEnum.TEXT:
|
||||||
self.mstate = State.TEXT
|
self.mstate = StateEnum.TEXT
|
||||||
rstring += self.get_start_tag()
|
rstring += self.get_start_tag()
|
||||||
rstring += line
|
rstring += line
|
||||||
return rstring
|
return rstring
|
||||||
# This is in paragraph
|
# This is in paragraph
|
||||||
if is_text and self.mstate == State.TEXT:
|
if is_text and self.mstate == StateEnum.TEXT:
|
||||||
rstring += line
|
rstring += line
|
||||||
return rstring
|
return rstring
|
||||||
# This is a heading
|
# This is a heading
|
||||||
if line.startswith(Tag.HEADING):
|
if line.startswith(TagEnum.HEADING):
|
||||||
self.mstate = State.HEADING
|
self.mstate = StateEnum.HEADING
|
||||||
rstring += self.parse_heading(line)
|
rstring += self.parse_heading(line)
|
||||||
self.mstate = State.INITIAL
|
self.mstate = StateEnum.INITIAL
|
||||||
return rstring
|
return rstring
|
||||||
# This is a link
|
# This is a link
|
||||||
if line.startswith(Tag.LINK):
|
if line.startswith(TagEnum.LINK):
|
||||||
self.mstate = State.LINK
|
self.mstate = StateEnum.LINK
|
||||||
rstring += self.parse_link(line)
|
rstring += self.parse_link(line)
|
||||||
self.mstate = State.INITIAL
|
self.mstate = StateEnum.INITIAL
|
||||||
return rstring
|
return rstring
|
||||||
# This is ul start tag
|
# This is ul start tag
|
||||||
if starts_with_ul and not self.mstate == State.UNORDERED:
|
if starts_with_ul and not self.mstate == StateEnum.UNORDERED:
|
||||||
self.mstate = State.UNORDERED
|
self.mstate = StateEnum.UNORDERED
|
||||||
rstring += self.get_start_tag()
|
rstring += self.get_start_tag()
|
||||||
rstring += '<li>{}</li>\n'.format(line[1:].strip())
|
rstring += '<li>{}</li>\n'.format(line[1:].strip())
|
||||||
return rstring
|
return rstring
|
||||||
# This is in middle of unordered list
|
# This is in middle of unordered list
|
||||||
if starts_with_ul and self.mstate == State.UNORDERED:
|
if starts_with_ul and self.mstate == StateEnum.UNORDERED:
|
||||||
rstring += '<li>{}</li>\n'.format(line[1:].strip())
|
rstring += '<li>{}</li>\n'.format(line[1:].strip())
|
||||||
return rstring
|
return rstring
|
||||||
# This is blockquote start tag
|
# This is blockquote start tag
|
||||||
if starts_with_quote and self.mstate != State.QUOTE:
|
if starts_with_quote and self.mstate != StateEnum.QUOTE:
|
||||||
self.mstate = State.QUOTE
|
self.mstate = StateEnum.QUOTE
|
||||||
rstring += self.get_start_tag()
|
rstring += self.get_start_tag()
|
||||||
rstring += line[1:]
|
rstring += line[1:]
|
||||||
return rstring
|
return rstring
|
||||||
|
@ -191,14 +195,14 @@ class GemParser:
|
||||||
return rstring
|
return rstring
|
||||||
if self.recurse:
|
if self.recurse:
|
||||||
self.recurse = False
|
self.recurse = False
|
||||||
self.mstate = State.INITIAL
|
self.mstate = StateEnum.INITIAL
|
||||||
# Recurse so we don't miss whats next
|
# Recurse so we don't miss whats next
|
||||||
return "{}{}".format(rstring, self.parse_line(line))
|
return "{}{}".format(rstring, self.parse_line(line))
|
||||||
else:
|
else:
|
||||||
print("We should never be here, the line is: {}".format(line))
|
print("We should never be here, the line is: {}".format(line))
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
def parse_link(self, line):
|
def parse_link(self, line: str) -> str:
|
||||||
"""This function will parse a gemtext link
|
"""This function will parse a gemtext link
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -207,40 +211,40 @@ class GemParser:
|
||||||
Returns:
|
Returns:
|
||||||
str: A html fragment with a <a> tag
|
str: A html fragment with a <a> tag
|
||||||
"""
|
"""
|
||||||
linearr = line[2:].strip().split(' ')
|
linearr: list[str] = line[2:].strip().split(' ')
|
||||||
link = linearr[0]
|
link: str = linearr[0]
|
||||||
anchor = str().join(linearr[1:])
|
anchor: str = str().join(linearr[1:])
|
||||||
if not anchor:
|
if not anchor:
|
||||||
anchor = link
|
anchor = link
|
||||||
rline = self.get_start_tag().format(link)
|
rline: str = self.get_start_tag().format(link)
|
||||||
rline += anchor
|
rline += anchor
|
||||||
rline += self.get_end_tag()
|
rline += self.get_end_tag()
|
||||||
return rline
|
return rline
|
||||||
|
|
||||||
|
|
||||||
class State:
|
class StateEnum:
|
||||||
"""This is an enumeration of the states of the state machine
|
"""This is an enumeration of the states of the state machine
|
||||||
"""
|
"""
|
||||||
INITIAL = 0
|
INITIAL: State = 0
|
||||||
TEXT = 1
|
TEXT: State = 1
|
||||||
LINK = 2
|
LINK: State = 2
|
||||||
PRETEXT = 3
|
PRETEXT: State = 3
|
||||||
HEADING = 4
|
HEADING: State = 4
|
||||||
UNORDERED = 5
|
UNORDERED: State = 5
|
||||||
QUOTE = 6
|
QUOTE: State = 6
|
||||||
|
|
||||||
|
|
||||||
class Tag:
|
class TagEnum:
|
||||||
"""This is an enumeration of the possible gemtext tags
|
"""This is an enumeration of the possible gemtext tags
|
||||||
"""
|
"""
|
||||||
LINK = '=>'
|
LINK: Tag = '=>'
|
||||||
PRE = '```'
|
PRE: Tag = '```'
|
||||||
HEADING = '#'
|
HEADING: Tag = '#'
|
||||||
UNORDERED = '*'
|
UNORDERED: Tag = '*'
|
||||||
QUOTE = '>'
|
QUOTE: Tag = '>'
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
mparser: GemParser = GemParser()
|
mparser: GemParser = GemParser()
|
||||||
document = mparser.get_document_from_gemfile(sys.argv[1])
|
document: str = mparser.get_document_from_gemfile(sys.argv[1])
|
||||||
print(document)
|
print(document)
|
||||||
|
|
Loading…
Add table
Reference in a new issue