| 1 |
# =========================================================================== |
|---|
| 2 |
# Copyright (C) 2006-2008 Ancient World Mapping Center (UNC-CH) and the |
|---|
| 3 |
# Institute for the Study of the Ancient World (NYU) |
|---|
| 4 |
# |
|---|
| 5 |
# This program is free software; you can redistribute it and/or modify |
|---|
| 6 |
# it under the terms of the GNU General Public License as published by |
|---|
| 7 |
# the Free Software Foundation; either version 2 of the License, or |
|---|
| 8 |
# (at your option) any later version. |
|---|
| 9 |
# |
|---|
| 10 |
# This program is distributed in the hope that it will be useful, |
|---|
| 11 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|---|
| 12 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|---|
| 13 |
# GNU General Public License for more details. |
|---|
| 14 |
# |
|---|
| 15 |
# You should have received a copy of the GNU General Public License along |
|---|
| 16 |
# with this program; if not, write to the Free Software Foundation, Inc., |
|---|
| 17 |
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
|---|
| 18 |
# |
|---|
| 19 |
# About Pleiades |
|---|
| 20 |
# -------------- |
|---|
| 21 |
# |
|---|
| 22 |
# Pleiades is an international research network and associated web portal and |
|---|
| 23 |
# content management system devoted to the study of ancient geography. |
|---|
| 24 |
# |
|---|
| 25 |
# See http://pleiades.stoa.org |
|---|
| 26 |
# |
|---|
| 27 |
# Funding for the creation of this software was provided by a grant from the |
|---|
| 28 |
# U.S. National Endowment for the Humanities (http://www.neh.gov), and |
|---|
| 29 |
# by the Institute for the Study of the Ancient World at New York University |
|---|
| 30 |
# (http://www.nyu.edu/isaw) |
|---|
| 31 |
# =========================================================================== |
|---|
| 32 |
|
|---|
| 33 |
import logging |
|---|
| 34 |
from os.path import join |
|---|
| 35 |
|
|---|
| 36 |
import lxml.etree as etree |
|---|
| 37 |
|
|---|
| 38 |
XSLTFILE = 'wordstripper.xsl' |
|---|
| 39 |
|
|---|
| 40 |
def strip(contextpath, source): |
|---|
| 41 |
"""Strip unneeded formatting inherited from MSWord. Uses lxml and an external XSLT stylesheet.""" |
|---|
| 42 |
logging.info("BEGIN attempt to strip unneeded formatting inherited from MSWord using %s: wordstripper.strip()" % XSLTFILE) |
|---|
| 43 |
xslt_doc = etree.parse(join(contextpath, XSLTFILE)) |
|---|
| 44 |
transform = etree.XSLT(xslt_doc) |
|---|
| 45 |
result = etree.XML(unicode(transform(source))) |
|---|
| 46 |
logging.info("DONE with wordstripper.strip()") |
|---|
| 47 |
return result |
|---|
| 48 |
|
|---|
| 49 |
def _test(): |
|---|
| 50 |
import doctest |
|---|
| 51 |
doctest.testmod() |
|---|
| 52 |
doctest.testfile('tests/wordstripper.txt') |
|---|
| 53 |
# invoke additional doctest files here |
|---|
| 54 |
|
|---|
| 55 |
if __name__ == "__main__": |
|---|
| 56 |
_test() |
|---|
| 57 |
|
|---|
| 58 |
|
|---|