fellowiki/controllers/wikiparser/links.py

# Copyright (c) 2006 Jan Niklas Fingerle
# 
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

"""fellowiki wiki parser: macro support

TODO
    
"""

import re
from urllib import quote, unquote, splitattr, splituser
from urlparse import urlsplit, urlunsplit    

from parser import Token, XMLElement
from util import remove_backslashes_and_whitespace

link_allowed_targets = ['http', 'ftp', 'mailto']
image_link_allowed_targets = ['http', 'ftp']
user_domain_schemes = ['mailto']

LINK = 'link'
IMAGE_LINK = 'image link'

def check_and_normalize_url(url, allowed_schemes):
    scheme, location, path, query, fragment = urlsplit(url)
    
    if scheme not in allowed_schemes:
        return None, url
        
    if scheme in user_domain_schemes:
        location = path
        path = query = fragment = ''
    
    if location is not None:
        user, host = splituser(location)
        if host <> '': ## idna encoding encodes '' ==> '.' :-(
            host = host.encode('idna')
        
        if user is not None:
            user = ':'.join([quote(unquote(u.encode('utf-8'))) for u in user.split(':')])
            location = u'%s@%s' % (user, host)
        else:
            location = host
    
    if path is not None:
        path_ = [splitattr(segment) for segment in path.split('/')]
        path_ = [(quote(unquote(seg.encode('utf-8'))), 
                    [u'='.join([quote(unquote(p.encode('utf-8'))) for p in parm_.split('=')]) 
                    for parm_ in parm])    
                for (seg, parm) in path_]
        
        path = []
        
        for (segment, parms) in path_:
            if len(parms) > 0:
                segment_ = u';'.join([segment, u';'.join(parms)])
                path.append(segment_)
            else:
                path.append(segment)
                
        path = u'/'.join(path)
    
    if query is not None:
        query = u'&'.join([u'='.join([quote(unquote(q.encode('utf-8'))) for q in query_.split('=')]) 
                           for query_ in query.split('&')])
    
    if fragment is not None:
        fragment = quote(unquote(fragment.encode('utf-8')))
    
    if scheme in user_domain_schemes:
        path = location
        location = ''
        
    return scheme, urlunsplit((scheme, location, path, query, fragment))
            
class LinkToken(Token): 
    def render(self, new_token):
        new_token.prepend(self.xhtml)
        
    def evaluate(self, result, tokens, state, procs):
        Token.evaluate(self, result, tokens, state, procs)
        
        self.xhtml = XMLElement('a')
        
        match_obj = re.match(r'\[\[(([^\\\]]|\\.|\](?=[^\]]))*)' \
                               r'>>(([^\\\]]|\\.|\](?=[^\]]))*)\]\]', 
                             self.text)
        
        if match_obj:
            text, _, target, _ = match_obj.groups()
            text = remove_backslashes_and_whitespace(text)
            target = remove_backslashes_and_whitespace(target)
        else:
            text = target = remove_backslashes_and_whitespace(self.text[2:-2])
                
        self.xhtml.append(re.sub('[ \n\t]+', ' ', text))
        
        scheme, url = check_and_normalize_url(target, link_allowed_targets)
        
        if scheme is not None:
            if scheme == 'mailto':
                self.xhtml.attributes['class'] = 'link_mailto'
            else:
                self.xhtml.attributes['class'] = 'link_external'
            
            self.xhtml.attributes['href'] = url
            return
        
        try:
            link_proc = procs[LINK]
        except KeyError:
            self.xhtml.attributes['class'] = 'link_unresolved'
            self.xhtml.tag = 'span'
            return
            
        self.xhtml.translations.append((LINK, 
                                       None,
                                       [target]))
        self.xhtml.attributes['class'] = 'link_internal'
            
class ImageLinkToken(Token): 
    def render(self, new_token):
        new_token.prepend(self.xhtml)
        
    def evaluate(self, result, tokens, state, procs):
        Token.evaluate(self, result, tokens, state, procs)
        
        self.xhtml = XMLElement('a')
        
        # 1st attempt: description and link
        match_obj = re.match(r'\[\[\[(([^\\\]]|\\.|\]{1,2}(?=[^\]]))*)' \
                               r'\|\|(([^\\\]]|\\.|\]{1,2}(?=[^\]]))*)' \
                                 r'>>(([^\\\]]|\\.|\]{1,2}(?=[^\]]))*)\]\]\]',
                             self.text)
                             
        if match_obj:
            image, _, description, _, target, _ = match_obj.groups()
        else: # 2nd attempt: only link
            match_obj = re.match(r'\[\[\[(([^\\\]]|\\.|\]{1,2}(?=[^\]]))*)' \
                                     r'>>(([^\\\]]|\\.|\]{1,2}(?=[^\]]))*)\]\]\]', 
                                 self.text)
            if match_obj:
                image, _, target, _ = match_obj.groups()
                description = None
            else: # 3rd attempt: only description
                match_obj = re.match(r'\[\[\[(([^\\\]]|\\.|\]{1,2}(?=[^\]]))*)' \
                                       r'\|\|(([^\\\]]|\\.|\]{1,2}(?=[^\]]))*)\]\]\]', 
                                     self.text)
                if match_obj:
                    image, _, description, _ = match_obj.groups()
                    target = None
                else:
                    image = self.text[3:-3]
                    description = target = None
                    
        image = remove_backslashes_and_whitespace(image)
        if description is not None:
            description = remove_backslashes_and_whitespace(description)
            description = re.sub('[ \n\t]+', ' ', description)
        else:
            description = ''
            
        has_target = False
        
        if target is not None:
            has_target = True
            target = remove_backslashes_and_whitespace(target)
            scheme, url = check_and_normalize_url(target, link_allowed_targets)
            
            if scheme is not None:
                if scheme == 'mailto':
                    self.xhtml.attributes['class'] = 'link_mailto'
                else:
                    self.xhtml.attributes['class'] = 'link_external'
                self.xhtml.attributes['href'] = url
            else:    
                try:
                    link_proc = procs[LINK]
                    self.xhtml.translations.append((LINK, 
                                                    None,
                                                    [target]))
                    self.xhtml.attributes['class'] = 'link_internal'
                except KeyError:
                    self.xhtml.tag = 'span'
                    self.xhtml.attributes['class'] = 'link_unresolved'
                    
        scheme, url = check_and_normalize_url(image, image_link_allowed_targets)
        
        if scheme is not None:
            img = XMLElement('img')
            img.attributes['class'] = 'img_external'
            img.attributes['src'] = url
            
            img.attributes['alt'] = description
                
            if has_target:
                self.xhtml.append(img)
            else:
                self.xhtml = img
        else:   
            try:
                link_proc = procs[IMAGE_LINK]
                self.xhtml.translations.append((IMAGE_LINK, 
                                                None,
                                                [image, description, has_target]))
            except KeyError:
                span = XMLElement('span')
                
                span.append(re.sub('[ \n\t]+', ' ', image))
                if description <> '':
                    span.append(": %s" % description)
                    
                span.attributes['class'] = 'image_unresolved'
                self.xhtml.append(span)

          
def extend_wiki_parser(wiki_parser):
    wiki_parser.regexes[IMAGE_LINK] = (10, r'\[\[\[([^\\\]\n]|\\.|\]{1,2}(?=[^\]]))*\]\]\]', 
                    ImageLinkToken, dict(preference = 20))
    wiki_parser.regexes[LINK] = (20, r'\[\[([^\\\]\n]|\\.|\](?=[^\]]))*\]\]', 
                    LinkToken, dict(preference = 20))
fellowiki documentation built on Sept. 19, 2017, 7:34 p.m.