#!/usr/bin/env python3 """This is a python module that will parse gemtext and convert it to html5 """ # -*- coding: utf-8 -*- import sys # Type aliases State = int Tag = str class GemParser: """This is the main parser class """ def __init__(self): """Constructor for the GemParser class """ self.mstate: State = StateEnum.INITIAL self.recurse: bool = False def get_document_from_gemfile(self, filename: str) -> str: """This subroutine will read a file line by line and convert it to html Args: filename (str): A file name corresponing to a file of gemtext Returns: str: A valid html5 document as string """ rdocument: str = '\n\n
\n' rdocument += '\n')
tag.append('')
tag.append('\n')
tag.append('
\n')
tag.append('
\n')
return tag[self.mstate]
def is_text(self, line: str) -> bool:
"""A function that will check if this is a paragraph of text
Args:
line (str): A line of gemtext
Returns:
bool: True if it is text, False if it is something else
"""
return (not (line == '' or line == '\n')
and self.mstate == StateEnum.INITIAL and
(not line.startswith(TagEnum.LINK) and not line.startswith(TagEnum.PRE)
and not line.startswith(TagEnum.HEADING)
and not line.startswith(TagEnum.UNORDERED)
and not line.startswith(TagEnum.QUOTE)))
def parse_heading(self, line: str, level: int = 0) -> str:
"""A function that will recursively call it self to get the correct
heading level
Args:
line (str): A gemtext heading line
level (int, optional): The heading level to start from. Defaults to 0.
Returns:
str: A html heading tag of the correct level
"""
if line.startswith(TagEnum.HEADING):
return self.parse_heading(line[1:], level + 1)
else:
return "{}{}{}".format(self.get_start_tag().format(level),
line.strip().rstrip('\n'),
self.get_end_tag().format(level))
def parse_line(self, line: str) -> str:
"""This subroutine will parse a single line of gemtext and enter the
correct state and output the corresponding html
Args:
line (string): A string of gemtext
Returns:
str: A string of html
"""
rstring: str = str()
is_text: bool = self.is_text(line)
starts_with_pre: bool = line.startswith(TagEnum.PRE)
starts_with_ul: bool = line.startswith(TagEnum.UNORDERED)
starts_with_quote: bool = line.startswith(TagEnum.QUOTE)
if line == '\n' and self.mstate != StateEnum.PRETEXT:
return rstring
# This is blockquote end tag
if self.mstate == StateEnum.QUOTE and not starts_with_quote:
rstring += self.get_end_tag()
self.recurse = True
# This is paragraph end tag
if self.mstate == StateEnum.TEXT and not is_text:
rstring += self.get_end_tag()
self.recurse = True
# This is ul end tag
if self.mstate == StateEnum.UNORDERED and not starts_with_ul:
rstring += self.get_end_tag()
self.recurse = True
# This is a pre start tag
if starts_with_pre and self.mstate != StateEnum.PRETEXT:
self.mstate = StateEnum.PRETEXT
label: str = line[4:].rstrip('\n').strip()
rstring += self.get_start_tag().format(
' aria-label="{}"'.format(label) if label else '')
return rstring
# This is in pre formatted text
elif self.mstate == StateEnum.PRETEXT and not starts_with_pre:
rstring += line
return rstring
# This is a pre end tag
elif starts_with_pre and self.mstate == StateEnum.PRETEXT:
rstring += self.get_end_tag()
self.mstate = StateEnum.INITIAL
return rstring
else:
# This is paragraph start tag
if is_text and self.mstate != StateEnum.TEXT:
self.mstate = StateEnum.TEXT
rstring += self.get_start_tag()
rstring += line
return rstring
# This is in paragraph
if is_text and self.mstate == StateEnum.TEXT:
rstring += line
return rstring
# This is a heading
if line.startswith(TagEnum.HEADING):
self.mstate = StateEnum.HEADING
rstring += self.parse_heading(line)
self.mstate = StateEnum.INITIAL
return rstring
# This is a link
if line.startswith(TagEnum.LINK):
self.mstate = StateEnum.LINK
rstring += self.parse_link(line)
self.mstate = StateEnum.INITIAL
return rstring
# This is ul start tag
if starts_with_ul and not self.mstate == StateEnum.UNORDERED:
self.mstate = StateEnum.UNORDERED
rstring += self.get_start_tag()
rstring += '