Source code for frontend.wikicms

'''
Created on 2020-07-27

@author: wf
'''
from wikibot.wikiclient import WikiClient
from wikibot.smw import SMWClient
from frontend.site import Site
from bs4 import BeautifulSoup
import traceback
import requests
from flask import Response, render_template

[docs]class Frontend(object):
    '''
    Wiki Content Management System Frontend
    '''
    def __init__(self, siteName:str,debug:bool=False,filterKeys=None):
        '''
        Constructor
        Args:
            siteName(str): the name of the site this frontend is for
            debug: (bool): True if debugging should be on
            filterKeys: (list): a list of keys for filters to be applied e.g. editsection
        '''
        
        self.site=Site(siteName)
        self.debug=debug
        self.wiki=None
        if filterKeys is None:
            self.filterKeys=["editsection","parser-output"]
        else:
            self.filterKeys=[]
        
[docs]    def log(self,msg):
        '''
        log the given message if debugging is true
        
        Args:
            msg(str): the message to log
        '''
        if self.debug:
            print(msg,flush=True)
        
[docs]    def open(self,appWrap=None):
        '''
        open the frontend
        
        Args:
             appWrap(appWrap): optional fb4 Application Wrapper
        '''
        self.appWrap=appWrap
        if self.wiki is None:
            self.wiki=WikiClient.ofWikiId(self.site.wikiId)
            self.wiki.login()
            self.smwclient=SMWClient(self.wiki.getSite())
            self.site.open(appWrap)
        
[docs]    def errMsg(self,ex):
        if self.debug:
            msg="%s\n%s" % (repr(ex),traceback.format_exc())
        else:
            msg=repr(ex)
        return msg
    
[docs]    def wikiPage(self,pagePath):
        '''
        get the wikiPage for the given pagePath
        
        Args:
            pagePath(str): the page path
        Returns:
            str: the pageTitle
        '''
        if "/index.php/" in pagePath:
            wikipage=pagePath.replace("/index.php/","")
        elif pagePath.startswith("/"):
            wikipage=pagePath[1:]    
        else:
            wikipage=pagePath
        return wikipage
    
[docs]    def checkPath(self,pagePath):
        '''
        check the given pathPath
        
        Args:
            pagePath(str): the page Path to check
            
        Returns:
            str: None or an error message with the illegal chars being used
        '''
        error=None
        self.log(pagePath)
        illegalChars=['{','}','<','>','[',']','|']
        for illegalChar in illegalChars:
            if illegalChar in pagePath:
                error="invalid char %s in given pagePath " % (illegalChar)
        return error;
    
[docs]    def needsProxy(self,path:str)->bool:
        '''
        Args:
            path(str): the path to check
        Returns:
            True if this path needs to be proxied 
        '''
        result=path.startswith("/images/")
        return result
    
[docs]    def proxy(self,path:str)->str:
        '''
        proxy a request
        see https://stackoverflow.com/a/50231825/1497139
        
        Args:
            path(str): the path to proxy
        Returns:
            the proxied result Request
        '''
        wikiUser=self.wiki.wikiUser
        url="%s%s%s" % (wikiUser.url,wikiUser.scriptPath,path)
        r = requests.get(url)
        return Response(r.content)
    
[docs]    def filter(self,html):
        return self.doFilter(html,self.filterKeys)
    
[docs]    def fixNode(self,node,attribute,prefix,delim=None):
        '''
        fix the given node
        
        node(BeautifulSoup): the node
        attribute(str): the name of the attribute e.g. "href", "src"
        prefix(str): the prefix to replace e.g. "/", "/images", "/thumbs"
        delim(str): if not None the delimiter for multiple values
        '''
        siteprefix="/%s%s" % (self.site.name,prefix)
        if attribute in node.attrs:
            attrval=node.attrs[attribute]
            if delim is not None:
                vals=attrval.split(delim)
            else:
                vals=[attrval]
                delim=""
            newvals=[]
            for val in vals:    
                if val.startswith(prefix):
                    newvals.append(val.replace(prefix,siteprefix,1))
                else:
                    newvals.append(val)
            if delim is not None:
                node.attrs[attribute]=delim.join(newvals)
    
[docs]    def fixImages(self,soup):
        for img in soup.findAll('img'):
            self.fixNode(img,"src","/")
            self.fixNode(img,"srcset","/",", ")
            
    
[docs]    def fixHtml(self,soup):
        '''
        fix the HTML in the given soup
        
        Args:
            soup(BeautifulSoup): the html parser
        '''
        self.fixImages(soup)
        # fix absolute hrefs
        for a in soup.findAll('a'):
            self.fixNode(a,"href","/")
        return soup
    
[docs]    def unwrap(self,soup):
        html=str(soup)
        html=html.replace("<html><body>","")
        html=html.replace("</body></html>","")
        return html
        
[docs]    def doFilter(self,html,filterKeys):
        # https://stackoverflow.com/questions/5598524/can-i-remove-script-tags-with-beautifulsoup
        soup = BeautifulSoup(html,'lxml')
        if "parser-output" in filterKeys:
            parserdiv=soup.find('div',{"class": "mw-parser-output"})
            if parserdiv:
                soup=parserdiv
                pass
        # https://stackoverflow.com/questions/5041008/how-to-find-elements-by-class
        if "editsection" in filterKeys:
            for s in soup.select('span.mw-editsection'):
                s.extract()
        return soup
    
[docs]    def getFrame(self,pageTitle):
        '''
        get the frame template to be used for the given pageTitle#
        
        Args:
            pageTitle(str): the pageTitle to get the Property:Frame for
            
        Returns:
            str: the frame or None
        '''
        askQuery="""{{#ask: [[%s]]
|mainlabel=-
|?Frame=frame
}}
""" % pageTitle
        frame=None
        frameResult={}
        try:
            frameResult=self.smwclient.query(askQuery)
        except Exception as ex:
            if "invalid characters" in self.unwrap(ex):
                pass
            else:
                raise ex
        if pageTitle in frameResult:
            frameRow=frameResult[pageTitle]
            frame=frameRow['frame']
            # legacy java handling
            if frame is not None:
                frame=frame.replace(".rythm","")
            pass
        return frame
            
[docs]    def getContent(self,pagePath:str):
        ''' get the content for the given pagePath 
        Args:
            pagePath(str): the pagePath
            whatToFilter(list): list of filter keys
        Returns:
            str: the HTML content for the given path
        '''
        content=None
        error=None
        pageTitle="?"
        try:
            if pagePath=="/":
                pageTitle=self.site.defaultPage
            else:
                error=self.checkPath(pagePath)
                pageTitle=self.wikiPage(pagePath)
            if error is None:
                if self.wiki is None:
                    raise Exception("getContent without wiki - you might want to call open first")
                content=self.wiki.getHtml(pageTitle)
                soup=self.filter(content)
                soup=self.fixHtml(soup)
                content=self.unwrap(soup)
        except Exception as e:
            error=self.errMsg(e)
        return pageTitle,content,error
    
[docs]    def renderTemplate(self,template,**kwargs):
        '''
        render the given template with the given arguments
        
        Args:
            template(str): the template file to be used
            kwargs(): same arguments a for dict constructor
            
        Returns:
            str: the rendered result
        '''
        # pass on keyword args
        if self.appWrap is not None:
            with self.appWrap.app.app_context():
                result=render_template(template,**kwargs)
        else:   
            result=render_template(template,**kwargs)
        if result is None:
            return None,self.site.error
        else:
            return result,None
        
[docs]    def toReveal(self,html):
        '''
        convert the given html to reveal
        '''
        soup = BeautifulSoup(html,'lxml')
        for h2 in soup.findChildren(recursive=True):
            if h2.name=="h2":
                span=h2.next_element
                if span.name=="span":
                    tagid=span.get('id')
                    if tagid.startswith("⌘⌘"):
                        section = soup.new_tag("section")
                        h2.parent.append(section)
                        section.insert(0,h2)
                        tag=h2.next_element
                        while (tag is not None and tag.name!="h2"):
                            if tag.parent!=h2:
                                section.append(tag)
                            tag=tag.next_element
        html=self.unwrap(soup)
        return html
        
[docs]    def render(self,path:str,**kwargs)->str:
        '''
        render the given path
        
        Args:
            path(str): the path to render the content for
            kwargs(): optional keyword arguments
            
        Returns:
            str: the rendered result
        '''
        if self.needsProxy(path):
            result=self.proxy(path)
        else:
            pageTitle, content, error = self.getContent(path);
            frame=self.getFrame(pageTitle)
            if frame is not None:
                template = "%s.html" % frame
                if frame == "reveal" and error is None:
                    content=self.toReveal(content)
            else:
                template = self.site.template
            if not 'title' in kwargs:
                kwargs['title']=pageTitle
            if not 'content' in kwargs:
                kwargs['content']=content
            if not 'error' in kwargs:
                kwargs['error']=error
            result=self.renderTemplate(template, **kwargs)
        return result