Logo Search packages:      
Sourcecode: uicilibris version File versions  Download package

w2book.py

# -*- coding: utf-8 -*-
#     $Id: w2book.py 42 2011-08-13 16:11:32Z georgesk $     
#
# w2book.py is part of the package uicilibris 
#
# uicilibris is based on wiki2beamer's code, which was authored by
# Michael Rentzsch and Kai Dietrich
#
# (c) 2007-2008 Michael Rentzsch (http://www.repc.de)
# (c) 2009-2010 Michael Rentzsch (http://www.repc.de)
#               Kai Dietrich (mail@cleeus.de)
# (c) 2011      Georges Khaznadar (georgesk@ofset.org)
#
# Create high-level parseable code from a wiki-like code, like LaTeX
#
#
#     This file is part of uicilibris.
# uicilibris is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# uicilibris is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with uicilibris.  If not, see <http://www.gnu.org/licenses/>.

import re, wikiParser, url, os.path, sys
from transform2book import transform
from expand import *
# from xml.dom.minidom import parse
# BeautifulSoup is much resilient than xml.dom
from BeautifulSoup import BeautifulSoup

00038 class w2bstate:
    """
    a class to implement the states of the engine which
    drives wiki2beamer objets
    """
00043     def __init__(self):
        """
        the constructor
        """
        self.enum_item_level = ''
        self.current_line = 0
        self.defverbs = {}
        self.code_pos = 0
        self.tableStack=[]
        self.allTables=[]
        self.tableErrors=[]
        self.currentPage=None
        return

    def __str__(self):
        result="w2bstate instance{"
        result+="enum_item_level = %s, " %self.enum_item_level
        result+="current_line = %s, " %self.current_line
        result+="defverbs = %s, " %self.defverbs
        result+="code_pos = %s, " %self.code_pos
        result+="tableStack = %s, " %self.tableStack
        result+="allTables = %s, " %self.allTables
        result+="tableErrors = %s, " %self.tableErrors
        result+="currentPage = %s" %self.currentPage
        result +="}"
        return result

00070     def flushTableStack(self):
        """
        resets the stack of tables
        """
        self.tableStack=[]

tableHeadPattern =re.compile(r"\\begin{tabular}\[table id=([0-9]+)\]")

00078 class wiki2book(wikiParser.wikiParser):
    """
    a class which enriches wikiParser with a LaTeX/Book
    export feature.

    The underlying wikiParser already processes input files or an url
    pointing to a table of contents in a mediawiki and modifies the
    content with templates defined in wikiParser.processTemplates4L
    """
00087     def __str__(self):
        """
        @return contents converted to Latex/Beamer syntax as a single string,
        encoded in utf-8
        """
        result=self.preamble()
        for l in self.convert2book(self.lines, self.report):
            l=self.processTabular(l)
            l = self.sanitize(l)
            result += l
        result += self.postamble()
        return result

    def processTabular(self, l):
        m=tableHeadPattern.match(l)
        if m:
            tableId=int(m.group(1))
            for t in self.state.allTables:
                if t.id==tableId:
                    cols=t.columns
                    if cols < 1: cols=1
                    break
            columns="l|"*cols
            l= tableHeadPattern.sub(r"\\begin{tabular}{|%s}" %columns,l)
        return l

00113     def toFile(self, fileName, report=None):
        """
        write self contents to a file
        imports and writes necessary images
        @param report if True, this method will output a few messages on sys.stderr;
        if report is a callback function, this function will be called with one parameter.
        """
        outfile=open(fileName, "w")
        outfile.write("%s" %self)
        outfile.close()
        path=os.path.dirname(fileName)
        href=""
        for img in self.imageSet:
            completeUrl="http://%s/index.php/File:%s" %(self.baseAddress, img)
            page=url.urlopen(completeUrl)
            soup=BeautifulSoup(page.read())
            divs=soup.findAll("div", id="file")
            for div in divs:
                a=div.find("a")
                href=a["href"]
                if report==True:
                    print >> sys.stderr, "'%s'" %href
                elif callable(report):
                    report("'%s'" %href)
            imgData=url.urlopen("http://%s/%s" %(self.host,href))
            imgFile=open(os.path.join(path,img),"w")
            imgFile.write(imgData.read())
            imgFile.close()
        return
    
            
    def preamble(self):
        return """\
\\documentclass{book}
% -*- coding: utf-8 -*-

\\usepackage[utf8x]{inputenc}
\\usepackage{ucs}
\\usepackage{lmodern}
\\usepackage{graphicx}
\\usepackage[frenchb]{babel}
\\usepackage{hyperref,wrapfig}
\\usepackage{amssymb}
\\usepackage{latexsym}

\\PrerenderUnicode{É} % Pre-render some accented chars for titles of chapter
\\PrerenderUnicode{À}

\\newcommand{\\nop}{}

\\begin{document}
"""

    def postamble(self):
        return """\
\\end{document}
"""

00171     def sanitize(self, s):
        """
        @return a sanitized output: get rid of <math>, &amp; etc.
        processes the comments left by previous works;
        processes underscores in lines where there is no maths
        turns lines with math-only contents to out-of text formulas
        takes in account url-like words
        """
        s=s.replace("&quot;","\"")
        s=s.replace("&amp;lt;math&gt;", "$")
        s=s.replace("&amp;lt;/math&gt;", "$")
        s=s.replace("&amp;lt;code&gt;", "\\texttt{")
        s=s.replace("&amp;lt;/code&gt;", "}")
        s=s.replace("&amp;lt;ref&gt;", "\\footnote{")
        s=s.replace("&amp;lt;/ref&gt;", " }")
        s=s.replace("&amp;lt;references /&gt;", "")
        s=s.replace("&amp;lt;", "<")
        s=s.replace("&gt;", ">")
        s=s.replace("&amp;","\\&")
        s=s.replace("%", "\\%{}")
        s=re.sub(r"\\comment\{(.*)\}",r"%% \1",s)
        if "$" not in s:
            s=s.replace("_","\\_")
        if len(s)>2 and s[0]=="$" and s[-1]=="$":
            s="$%s$" %s
        s=re.sub(r"(http://[^ \}]+)", r"\\href{\1 }{\mbox{\1} }",s)
        return s
    
00199     def convert2book(self,lines, report=False):
        """
        convert to LaTeX book
        @param lines a list of lines
        @param report if True, messages are emitted to sys.stderr;
        if it is callable, it is invoked with the same messages
        """
        self.state = w2bstate()
        result = [''] #start with one empty line as line 0
        codebuffer = []

        nowikimode = False
        codemode = False

        for line in lines:
            (line, nowikimode) = wikiParser.get_nowikimode(line, nowikimode)
            if nowikimode:
                result.append(line)
            else:
                (line, _codemode) = wikiParser.get_codemode(line, codemode)
                if _codemode and not codemode: #code mode was turned on
                    codebuffer = []
                elif not _codemode and codemode: #code mode was turned off
                    expand_code_segment(result, codebuffer, self.state)
                codemode = _codemode

                if codemode:
                    codebuffer.append(line)
                else:
                    self.state.current_line = len(result)
                    result.append(transform(line, self.state, report))

        result.append(transform("", self.state, report))   # close open environments

        #insert defverbs somewhere at the beginning
        expand_code_defverbs(result, self.state)
        return result

00237     def parse_usepackage(self, usepackage):
        """
        @param usepackage (str)
            the unparsed usepackage string in the form [options]{name}
        @return (tuple)
            (name(str), options(str))
        """

        p = re.compile(r'^\s*(\[.*\])?\s*\{(.*)\}\s*$')
        m = p.match(usepackage)
        g = m.groups()
        if len(g)<2 or len(g)>2:
            syntax_error('usepackage specifications have to be of the form [%s]{%s}', usepackage)
        elif g[1]==None and g[1].strip()!='':
            syntax_error('usepackage specifications have to be of the form [%s]{%s}', usepackage)
        else:
            options = g[0]
            name = g[1].strip()
            return (name, options)

    def parse_bool(self, string):
        boolean = False

        if string == 'True' or string == 'true' or string == '1':
            boolean = True
        elif string == 'False' or string == 'false' or string =='0':
            boolean = False
        else:
            syntax_error('Boolean expected (True/true/1 or False/false/0)', string)

        return boolean


Generated by  Doxygen 1.6.0   Back to index