Bib2LaTeX Converter

From PeformIQ Upgrade
Jump to navigation Jump to search

Overview

Script

#!/usr/bin/env python

# bib2ltx.py

# Author:   Christopher Arnt <chris.arndt@web.de>
# Version:  0.2b
# Date:     Monday, 21.01.2002
# Copyleft: GPL

"""This script parses a list of bibliographic entries and outputs it in
LaTeX.
"""

import xml.sax
import UserDict, UserList


def latex_escape(s):
    """Escape LaTEx special characters."""

    #s = s.replace('$', '\$')
    s = s.replace('|', '$\backslash$')
    s = s.replace('{', '\{')
    s = s.replace('}', '\}')
    s = s.replace('#', '\#')
    s = s.replace('%', '\%')
    s = s.replace('&', '\&')
    s = s.replace('_', '\_')
    s = s.replace('"', r'\dq{}')
    s = s.replace('^', '\verb|^|')
    s = s.replace('~', '\verb|~|')
    return s


class BibItemField:
    """Container for single tag value and attributes"""

    def __init__(self, value=None, attrs={}):
        self.value = value
        self.attrs = attrs


class BibItem(UserDict.UserDict):
    """Item (entry) of the bibliography. Basically a dict of tag lists."""

    def __init__(self, itemType='book', dict=None):
        UserDict.UserDict.__init__(self, dict)
        self.itemType = itemType

    def getField(self, key, default="", sep=", "):
        if self.data.has_key(key):
            l = map(lambda x: x.value, self.data[key])
            if len(l) > 3:
                s = self.data[key][0].value + '[u.a.]'
            else:
                s = sep.join(l)
            return latex_escape(s)
        return latex_escape(default)

    def __str__(self):
        s = ""
        for key in self.data.keys():
            s = s + "'%s': %s\n" % (key, self.getField(key))
        return s


class Bibliography(UserList.UserList):
    """The bibliography is basically a list of BibItems.

    It has a method for sorting the bibliography and for outputting as
    a LaTeX document.
    """

    itemTypes = ['book', 'article', 'injournal', 'webpage']
    format = 'latex'
    standalone = 1
    sortOutput = 1
    preamble = r"""\documentclass[german,a4paper]{scrartcl}

\usepackage{babel}
\usepackage[T1]{fontenc}
\usepackage[latin1]{inputenc}
\usepackage{times}

\begin{document}
"""
    postamble = "\\end{document}\n"


    def __init__(self, items=None):
        UserList.UserList.__init__(self, items)

    def sortItems(self, x, y):
        ax = x.get('author')
        if ax:
            ax = ax[0].value.lower()
        else:
            ax = 'anonymous'
        ay = y.get('author')
        if ay:
            ay = ay[0].value.lower()
        else:
            ay = 'anonymous'
        r = cmp(ax,ay)
        if r == 0:
            try:
                return cmp(x['year'][0].value, y['year'][0].value)
            except:
                return 0
        return r

    def output(self, fp):
        self.out = fp

        if self.sortOutput:
            self.sort(self.sortItems)

        # print out the the LaTeX preamble
        if self.standalone:
            self.out.write(self.preamble)

        self.out.write("\\section*{Literatur}\n\n")
        self.out.write("\\begin{enumerate}\n\\raggedright\n")
        for i in range(len(self)):
            self.outputItem(i)
        self.out.write("\\end{enumerate}\n")
        if self.standalone:
            self.out.write(self.postamble)

    def outputItem(self, i):
        self.out.write(r"\item ")
        # XXX check for "[ders.]"

        # author and title
        self.out.write(r"\textsc{%s}, " % \
          self[i].getField('author', 'Anonymous'))
        self.out.write(r"\emph{%s}, " % self[i].getField('title'))

        # titel and author (editor) of containing book
        if self[i].itemType == 'article':
            self.out.write("in: %s, " % self[i].getField('intitle'))
            if self[i].has_key('inauthor') and \
              self[i]['inauthor'][0].attrs.get('is_ed'):
                self.out.write("hrsg. v. ")
            self.out.write("%s, " % self[i].getField('inauthor', '', ' -- '))

        if self[i].itemType == 'injournal':
            self.out.write('%s ' % self[i].getField('journal'))

        # volume number
        if self[i].has_key('volume'):
            if self[i].itemType in ['book', 'article']:
                self.out.write('Bd. ')
            self.out.write('%s, ' % self[i].getField('volume'))

        # where published and when
        if self[i].itemType in ['book', 'article']:
            self.out.write("%s " % self[i].getField('city', 'o.O.'," -- "))

        if self[i].itemType in ['book', 'article', 'injournal']:
            self.out.write("%s" % self[i].getField('year', 'o.J.'))

        if self[i].itemType == 'webpage':
            self.out.write("%s, " % self[i].getField('url'))
            self.out.write(self[i].getField('date'))

        # page numbers
        if self[i].itemType in ['article', 'injournal']:
            self.out.write(", %s." % self[i].getField('pages', 'XXX'))
        else:
            self.out.write(".")

        # signature, when present
        if self[i].has_key('signature'):
            self.out.write("""\\\\\nSign.: %s\n""" % \
              self[i].getField('signature', '', " / "))
        self.out.write("\n\n")


class ContentHandler(xml.sax.ContentHandler):
    """Handler fro SAX parsing.

    Collects bibliography entries in a Bibliography object.
    """

    def __init__(self):
        self.item = None
        self.items = Bibliography()
        self.current = None
        self.text = []

    def startElement(self, name, attrs):
        """Handle an event for the start tag."""

        if name in self.items.itemTypes:
            if name == getattr(self.item, 'itemType', None):
                raise xml.sax.SAXParseException, "Malformed XML"
            self.item = BibItem(name)
        elif self.item != None:
            self.current = name
            self.attrs = attrs

    def endElement(self, name):
        """Handle an event for the closing tag."""

        if name in self.items.itemTypes:
            if name != getattr(self.item, 'itemType', None):
                raise xml.sax.SAXParseException, "Malformed XML"
            self.items.append(self.item)
            self.item = None
        elif self.item != None:
            self.setField()
        self.current = None
        self.text = []
        self.attrs = None


    def characters(self, data, *args):
        """Handle a character data event."""

        #data = data.strip()
        if data and self.current:
            self.text.append(data.encode('iso8859_1'))

    def setField(self):
        if not self.item.has_key(self.current):
            self.item[self.current] = [
              BibItemField("".join(self.text), self.attrs)
            ]
        else:
            self.item[self.current].append(
              BibItemField("".join(self.text), self.attrs)
            )

def parseXML(file):
    """Parse the XML file and return a Bibliography object."""

    ch = ContentHandler()
    p = xml.sax.make_parser()
    p.setContentHandler(ch)
    p.parse(file)
    return ch.items


def main(args):
    """Parse a file from the commandline and output LaTeX on stdout."""

    file = sys.argv.pop(1)
    try:
        bib = parseXML(file)
    except xml.sax.SAXParseException, msg:
        sys.stderr.write("%s\nProcessing aborted!\n" % (msg))
    bib.output(sys.stdout)


if __name__ == '__main__':

    import sys
    main(sys.argv[1:])