Source code for frontend.wikicms

'''
Created on 2020-07-27

@author: wf
'''
from wikibot.wikiclient import WikiClient
from wikibot.smw import SMWClient
from frontend.site import Site
from bs4 import BeautifulSoup
import traceback
import requests
from flask import Response, render_template

[docs]class Frontend(object): ''' Wiki Content Management System Frontend ''' def __init__(self, siteName:str,debug:bool=False,filterKeys=None): ''' Constructor Args: siteName(str): the name of the site this frontend is for debug: (bool): True if debugging should be on filterKeys: (list): a list of keys for filters to be applied e.g. editsection ''' self.site=Site(siteName) self.debug=debug self.wiki=None if filterKeys is None: self.filterKeys=["editsection","parser-output"] else: self.filterKeys=[]
[docs] def log(self,msg): ''' log the given message if debugging is true Args: msg(str): the message to log ''' if self.debug: print(msg,flush=True)
[docs] def open(self,appWrap=None): ''' open the frontend Args: appWrap(appWrap): optional fb4 Application Wrapper ''' self.appWrap=appWrap if self.wiki is None: self.wiki=WikiClient.ofWikiId(self.site.wikiId) self.wiki.login() self.smwclient=SMWClient(self.wiki.getSite()) self.site.open(appWrap)
[docs] def errMsg(self,ex): if self.debug: msg="%s\n%s" % (repr(ex),traceback.format_exc()) else: msg=repr(ex) return msg
[docs] def wikiPage(self,pagePath): ''' get the wikiPage for the given pagePath Args: pagePath(str): the page path Returns: str: the pageTitle ''' if "/index.php/" in pagePath: wikipage=pagePath.replace("/index.php/","") elif pagePath.startswith("/"): wikipage=pagePath[1:] else: wikipage=pagePath return wikipage
[docs] def checkPath(self,pagePath): ''' check the given pathPath Args: pagePath(str): the page Path to check Returns: str: None or an error message with the illegal chars being used ''' error=None self.log(pagePath) illegalChars=['{','}','<','>','[',']','|'] for illegalChar in illegalChars: if illegalChar in pagePath: error="invalid char %s in given pagePath " % (illegalChar) return error;
[docs] def needsProxy(self,path:str)->bool: ''' Args: path(str): the path to check Returns: True if this path needs to be proxied ''' result=path.startswith("/images/") return result
[docs] def proxy(self,path:str)->str: ''' proxy a request see https://stackoverflow.com/a/50231825/1497139 Args: path(str): the path to proxy Returns: the proxied result Request ''' wikiUser=self.wiki.wikiUser url="%s%s%s" % (wikiUser.url,wikiUser.scriptPath,path) r = requests.get(url) return Response(r.content)
[docs] def filter(self,html): return self.doFilter(html,self.filterKeys)
[docs] def fixNode(self,node,attribute,prefix,delim=None): ''' fix the given node node(BeautifulSoup): the node attribute(str): the name of the attribute e.g. "href", "src" prefix(str): the prefix to replace e.g. "/", "/images", "/thumbs" delim(str): if not None the delimiter for multiple values ''' siteprefix="/%s%s" % (self.site.name,prefix) if attribute in node.attrs: attrval=node.attrs[attribute] if delim is not None: vals=attrval.split(delim) else: vals=[attrval] delim="" newvals=[] for val in vals: if val.startswith(prefix): newvals.append(val.replace(prefix,siteprefix,1)) else: newvals.append(val) if delim is not None: node.attrs[attribute]=delim.join(newvals)
[docs] def fixImages(self,soup): for img in soup.findAll('img'): self.fixNode(img,"src","/") self.fixNode(img,"srcset","/",", ")
[docs] def fixHtml(self,soup): ''' fix the HTML in the given soup Args: soup(BeautifulSoup): the html parser ''' self.fixImages(soup) # fix absolute hrefs for a in soup.findAll('a'): self.fixNode(a,"href","/") return soup
[docs] def unwrap(self,soup): html=str(soup) html=html.replace("<html><body>","") html=html.replace("</body></html>","") return html
[docs] def doFilter(self,html,filterKeys): # https://stackoverflow.com/questions/5598524/can-i-remove-script-tags-with-beautifulsoup soup = BeautifulSoup(html,'lxml') if "parser-output" in filterKeys: parserdiv=soup.find('div',{"class": "mw-parser-output"}) if parserdiv: soup=parserdiv pass # https://stackoverflow.com/questions/5041008/how-to-find-elements-by-class if "editsection" in filterKeys: for s in soup.select('span.mw-editsection'): s.extract() return soup
[docs] def getFrame(self,pageTitle): ''' get the frame template to be used for the given pageTitle# Args: pageTitle(str): the pageTitle to get the Property:Frame for Returns: str: the frame or None ''' askQuery="""{{#ask: [[%s]] |mainlabel=- |?Frame=frame }} """ % pageTitle frame=None frameResult={} try: frameResult=self.smwclient.query(askQuery) except Exception as ex: if "invalid characters" in self.unwrap(ex): pass else: raise ex if pageTitle in frameResult: frameRow=frameResult[pageTitle] frame=frameRow['frame'] # legacy java handling if frame is not None: frame=frame.replace(".rythm","") pass return frame
[docs] def getContent(self,pagePath:str): ''' get the content for the given pagePath Args: pagePath(str): the pagePath whatToFilter(list): list of filter keys Returns: str: the HTML content for the given path ''' content=None error=None pageTitle="?" try: if pagePath=="/": pageTitle=self.site.defaultPage else: error=self.checkPath(pagePath) pageTitle=self.wikiPage(pagePath) if error is None: if self.wiki is None: raise Exception("getContent without wiki - you might want to call open first") content=self.wiki.getHtml(pageTitle) soup=self.filter(content) soup=self.fixHtml(soup) content=self.unwrap(soup) except Exception as e: error=self.errMsg(e) return pageTitle,content,error
[docs] def renderTemplate(self,template,**kwargs): ''' render the given template with the given arguments Args: template(str): the template file to be used kwargs(): same arguments a for dict constructor Returns: str: the rendered result ''' # pass on keyword args if self.appWrap is not None: with self.appWrap.app.app_context(): result=render_template(template,**kwargs) else: result=render_template(template,**kwargs) if result is None: return None,self.site.error else: return result,None
[docs] def toReveal(self,html): ''' convert the given html to reveal ''' soup = BeautifulSoup(html,'lxml') for h2 in soup.findChildren(recursive=True): if h2.name=="h2": span=h2.next_element if span.name=="span": tagid=span.get('id') if tagid.startswith("⌘⌘"): section = soup.new_tag("section") h2.parent.append(section) section.insert(0,h2) tag=h2.next_element while (tag is not None and tag.name!="h2"): if tag.parent!=h2: section.append(tag) tag=tag.next_element html=self.unwrap(soup) return html
[docs] def render(self,path:str,**kwargs)->str: ''' render the given path Args: path(str): the path to render the content for kwargs(): optional keyword arguments Returns: str: the rendered result ''' if self.needsProxy(path): result=self.proxy(path) else: pageTitle, content, error = self.getContent(path); frame=self.getFrame(pageTitle) if frame is not None: template = "%s.html" % frame if frame == "reveal" and error is None: content=self.toReveal(content) else: template = self.site.template if not 'title' in kwargs: kwargs['title']=pageTitle if not 'content' in kwargs: kwargs['content']=content if not 'error' in kwargs: kwargs['error']=error result=self.renderTemplate(template, **kwargs) return result