| 1 |
""" |
|---|
| 2 |
dir2xml.py |
|---|
| 3 |
|
|---|
| 4 |
get all the batlas dirs from word-export html to xml once and for all |
|---|
| 5 |
""" |
|---|
| 6 |
|
|---|
| 7 |
import os |
|---|
| 8 |
import re |
|---|
| 9 |
import logging |
|---|
| 10 |
import datetime as dt |
|---|
| 11 |
|
|---|
| 12 |
import lxml.etree as etree |
|---|
| 13 |
|
|---|
| 14 |
import placesaver |
|---|
| 15 |
import batlaspipe as bp |
|---|
| 16 |
|
|---|
| 17 |
from placesaver import periods, AWMC, ADLGAZ, DC, XML, TEI, refmagic, do_nscleanup |
|---|
| 18 |
from bidmaker import DIAMOND_STOP_REGEX |
|---|
| 19 |
from batlaspipe import SLASH_REGEX, NAMESPACE, XMLDECL, DASHNUM_END_REGEX, PREPARER |
|---|
| 20 |
|
|---|
| 21 |
PLEIADES = 'http://atlantides.org/batlas/' |
|---|
| 22 |
|
|---|
| 23 |
BAD_REGEX = re.compile('(........)config\.xml') |
|---|
| 24 |
|
|---|
| 25 |
|
|---|
| 26 |
COMMENTFILE = r'./etc/dirxmlcomment.txt' |
|---|
| 27 |
|
|---|
| 28 |
priorcitations = [] |
|---|
| 29 |
|
|---|
| 30 |
def htmldump(map, pipe, destdir): |
|---|
| 31 |
x = p['cleanxml'] |
|---|
| 32 |
pcontent = etree.tostring(x).encode('utf-8') |
|---|
| 33 |
dest = ''.join((map, '.html')) |
|---|
| 34 |
dest = os.path.join(destdir, dest) |
|---|
| 35 |
g = open(dest,'w') |
|---|
| 36 |
g.write(pcontent) |
|---|
| 37 |
g.close() |
|---|
| 38 |
|
|---|
| 39 |
|
|---|
| 40 |
def htmlconvert(configfile, sourcefile, destdir): |
|---|
| 41 |
p = bp.Pipe(configfile, sourcefile, destdir) |
|---|
| 42 |
p.cycle() |
|---|
| 43 |
return p |
|---|
| 44 |
|
|---|
| 45 |
|
|---|
| 46 |
def htmlconvertall(configdir, sourcedir, destdir): |
|---|
| 47 |
|
|---|
| 48 |
maps = [] |
|---|
| 49 |
configs = os.listdir(configdir) |
|---|
| 50 |
for config in configs: |
|---|
| 51 |
m = BAD_REGEX.match(config) |
|---|
| 52 |
if m: |
|---|
| 53 |
maps.append(m.group(1)) |
|---|
| 54 |
for map in maps: |
|---|
| 55 |
config = ''.join((map, 'config', '.xml')) |
|---|
| 56 |
config = os.path.join(configdir, config) |
|---|
| 57 |
source = ''.join((map, '.htm')) |
|---|
| 58 |
source = os.path.join(sourcedir, source) |
|---|
| 59 |
p = htmlconvert(config, source, destdir) |
|---|
| 60 |
htmldump(p, destdir) |
|---|
| 61 |
|
|---|
| 62 |
def xmlconvert(configfile, sourcefile, destdir): |
|---|
| 63 |
|
|---|
| 64 |
p = htmlconvert(configfile, sourcefile, destdir) |
|---|
| 65 |
p.idit() |
|---|
| 66 |
return p |
|---|
| 67 |
|
|---|
| 68 |
|
|---|
| 69 |
def xmlconvertall(configdir, sourcedir, destdir): |
|---|
| 70 |
|
|---|
| 71 |
maps = [] |
|---|
| 72 |
configs = os.listdir(configdir) |
|---|
| 73 |
for config in configs: |
|---|
| 74 |
m = BAD_REGEX.match(config) |
|---|
| 75 |
if m: |
|---|
| 76 |
maps.append(m.group(1)) |
|---|
| 77 |
for map in maps: |
|---|
| 78 |
config = ''.join((map, 'config', '.xml')) |
|---|
| 79 |
config = os.path.join(configdir, config) |
|---|
| 80 |
source = ''.join((map, '.htm')) |
|---|
| 81 |
source = os.path.join(sourcedir, source) |
|---|
| 82 |
p = xmlconvert(config, source, destdir) |
|---|
| 83 |
xmldump(p, destdir) |
|---|
| 84 |
print 'wahoo' |
|---|
| 85 |
|
|---|
| 86 |
|
|---|
| 87 |
|
|---|
| 88 |
def encodeids(place, parent): |
|---|
| 89 |
if len(place.batlasids) > 0: |
|---|
| 90 |
for i, bid in enumerate(place.batlasids): |
|---|
| 91 |
if i == 0 or bid != place.batlasids[0]: |
|---|
| 92 |
q = etree.SubElement(parent, '{%s}baid' % PLEIADES, id=bid) |
|---|
| 93 |
parent.xpath("*[local-name() = 'baid']")[0].attrib['primary'] = 'yes' |
|---|
| 94 |
|
|---|
| 95 |
def encodetypes(place, parent): |
|---|
| 96 |
|
|---|
| 97 |
if place.dirtype == 'name': |
|---|
| 98 |
ptype = 'labeled feature' |
|---|
| 99 |
elif place.dirtype == 'numbered': |
|---|
| 100 |
ptype = 'numbered feature' |
|---|
| 101 |
elif place.dirtype == 'unlocated': |
|---|
| 102 |
ptype = 'unlocated toponym' |
|---|
| 103 |
elif place.dirtype == 'false': |
|---|
| 104 |
ptype = 'false toponym' |
|---|
| 105 |
else: |
|---|
| 106 |
ptype = place.dirtype |
|---|
| 107 |
q = etree.SubElement(parent, '{%s}type' % PLEIADES) |
|---|
| 108 |
q.text = ptype |
|---|
| 109 |
|
|---|
| 110 |
if 'group' in place.dirtype: |
|---|
| 111 |
q = etree.SubElement(parent, 'featurecount') |
|---|
| 112 |
q.text = "%s" % place.featurecount |
|---|
| 113 |
|
|---|
| 114 |
# additional feature types |
|---|
| 115 |
for i, t in enumerate(place.types): |
|---|
| 116 |
if i == 0: |
|---|
| 117 |
q = etree.SubElement(parent, 'subtype') |
|---|
| 118 |
elif t != place.types[i-1]: |
|---|
| 119 |
q = etree.SubElement(parent, 'subtype') |
|---|
| 120 |
q.text = t |
|---|
| 121 |
|
|---|
| 122 |
|
|---|
| 123 |
def encodegrid(place, parent): |
|---|
| 124 |
|
|---|
| 125 |
if len(place.grid) > 0: |
|---|
| 126 |
q = etree.SubElement(parent, '{%s}gridsquare' % PLEIADES) |
|---|
| 127 |
q.text = place.grid |
|---|
| 128 |
|
|---|
| 129 |
|
|---|
| 130 |
def encodemaplabels(place, parent): |
|---|
| 131 |
|
|---|
| 132 |
if len(place.namestring) > 0: |
|---|
| 133 |
if place.dirtype not in ['unlocated', 'false']: |
|---|
| 134 |
q = etree.SubElement(parent, '{%s}label' % PLEIADES, context='map') |
|---|
| 135 |
txt = SLASH_REGEX.sub('/', place.namestring) |
|---|
| 136 |
txt = DIAMOND_STOP_REGEX.sub('', txt) |
|---|
| 137 |
q.text = txt.strip() |
|---|
| 138 |
|
|---|
| 139 |
|
|---|
| 140 |
def encodecitations(place, parent, mapnum): |
|---|
| 141 |
|
|---|
| 142 |
citcontent = u'' |
|---|
| 143 |
if place.dirtype == 'unlocated': |
|---|
| 144 |
citname =SLASH_REGEX.sub('/', place.namestring.strip()) |
|---|
| 145 |
citcontent = "BAtlas %s unlocated %s" % (mapnum, citname) |
|---|
| 146 |
elif place.dirtype =='false': |
|---|
| 147 |
citname = SLASH_REGEX.sub('/',place.namestring.strip()) |
|---|
| 148 |
citcontent = "BAtlas %s false name %s" % (mapnum, citname) |
|---|
| 149 |
elif place.dirtype == 'numbered' and len(place.placenames) == 0: |
|---|
| 150 |
citname = SLASH_REGEX.sub('/',place.locdesc.strip()) |
|---|
| 151 |
citcontent = "BAtlas %s %s no. %s (%s)" % (mapnum, place.grid, place.namestring, citname) |
|---|
| 152 |
elif place.dirtype == 'numbered' and len(place.placenames) > 0: |
|---|
| 153 |
citname = SLASH_REGEX.sub('/',place.placenames[0].name.strip()) |
|---|
| 154 |
citcontent = "BAtlas %s %s no. %s (%s)" % (mapnum, place.grid, place.namestring, citname) |
|---|
| 155 |
elif place.dirtype == 'name' and len(place.namestring) == 0: |
|---|
| 156 |
citname = SLASH_REGEX.sub('/',place.locdesc.strip()) |
|---|
| 157 |
citcontent = "BAtlas %s %s %s" % (mapnum, place.grid, citname) |
|---|
| 158 |
elif len(place.namestring) == 0: |
|---|
| 159 |
citname = SLASH_REGEX.sub('/',place.locdesc.strip()) |
|---|
| 160 |
citcontent = "BAtlas %s %s unnamed %s (%s)" % (mapnum, place.grid, place.dirtype.replace('-', ' '), citname) |
|---|
| 161 |
else: |
|---|
| 162 |
txt = SLASH_REGEX.sub('/', place.namestring) |
|---|
| 163 |
txt = DIAMOND_STOP_REGEX.sub('', txt) |
|---|
| 164 |
citname = txt.strip() |
|---|
| 165 |
citcontent = "BAtlas %s %s %s" % (mapnum, place.grid, citname) |
|---|
| 166 |
if len(citcontent) > 0: |
|---|
| 167 |
if len(place.batlasids) > 0: |
|---|
| 168 |
writecit(parent, citcontent, place.batlasids[0]) |
|---|
| 169 |
else: |
|---|
| 170 |
writecit(parent, citcontent, '') |
|---|
| 171 |
return citname |
|---|
| 172 |
|
|---|
| 173 |
def encodeplacenames(place, parent, mapnum, citname): |
|---|
| 174 |
|
|---|
| 175 |
for i, n in enumerate(place.placenames): |
|---|
| 176 |
q = etree.SubElement(parent, '{%s}geogname' % PLEIADES) |
|---|
| 177 |
q.text = n.name.strip() |
|---|
| 178 |
# if q.text not in citname: |
|---|
| 179 |
# q.attrib['type'] = 'variant' |
|---|
| 180 |
if n.variant: |
|---|
| 181 |
q.attrib['type'] = 'variant' |
|---|
| 182 |
elif n.minorAlternative: |
|---|
| 183 |
q.attrib['type'] = 'minor-alternate' |
|---|
| 184 |
if n.completeness != 'complete': |
|---|
| 185 |
q.attrib['completeness'] = n.completeness |
|---|
| 186 |
if n.accuracy != 'accurate': |
|---|
| 187 |
q.attrib['accuracy'] = n.accuracy |
|---|
| 188 |
if n.inferred: |
|---|
| 189 |
q.attrib['inferred'] = 'yes' |
|---|
| 190 |
if n.certainty != 'certain': |
|---|
| 191 |
q.attrib['certainty'] = n.certainty |
|---|
| 192 |
if len(place.placenames) > 1 and citname != q.text: |
|---|
| 193 |
txt = q.text |
|---|
| 194 |
if n.completeness == 'reconstructable': |
|---|
| 195 |
txt = u"*%s" % txt |
|---|
| 196 |
if n.accuracy == 'inaccurate': |
|---|
| 197 |
txt = u"\u2018%s\u2019" % txt |
|---|
| 198 |
if n.inferred == True: |
|---|
| 199 |
txt = u"[%s]" % txt |
|---|
| 200 |
if n.certainty != 'certain': |
|---|
| 201 |
txt = u"%s?" % txt |
|---|
| 202 |
if n.minorAlternative: |
|---|
| 203 |
txt = u"\u00A7%s" % txt |
|---|
| 204 |
if place.dirtype == 'unlocated': |
|---|
| 205 |
txt = "BAtlas %s unlocated %s" % (mapnum, txt) |
|---|
| 206 |
elif place.dirtype == 'false': |
|---|
| 207 |
txt = "BAtlas %s false %s" % (mapnum, txt) |
|---|
| 208 |
elif len(place.placenames) > 1: |
|---|
| 209 |
txt = "BAtlas %s %s %s" % (mapnum, place.grid, txt) |
|---|
| 210 |
if len(txt) > 0: |
|---|
| 211 |
if 'island-group' in place.types: |
|---|
| 212 |
txt = "%s Inss." % txt |
|---|
| 213 |
elif 'island' in place.types: |
|---|
| 214 |
txt = "%s Ins." % txt |
|---|
| 215 |
elif 'river' in place.types: |
|---|
| 216 |
txt = "%s fl." % txt |
|---|
| 217 |
# if appropriate, write a citation |
|---|
| 218 |
writecit(parent, txt, place.batlasids[0]) |
|---|
| 219 |
|
|---|
| 220 |
def encodelocdesc(place, parent): |
|---|
| 221 |
|
|---|
| 222 |
if len(place.locdesc) > 0: |
|---|
| 223 |
q = etree.SubElement(parent, 'location') |
|---|
| 224 |
q.text = SLASH_REGEX.sub('/',place.locdesc.strip()) |
|---|
| 225 |
|
|---|
| 226 |
|
|---|
| 227 |
def encodenotes(place, parent): |
|---|
| 228 |
if len(place.note) > 0: |
|---|
| 229 |
q = etree.SubElement(parent, 'note') |
|---|
| 230 |
q.text = place.note |
|---|
| 231 |
|
|---|
| 232 |
def encodeitineraries(place, parent, mapnum): |
|---|
| 233 |
if len(place.itinraw) > 0: |
|---|
| 234 |
q = etree.SubElement(parent, 'itinerary') |
|---|
| 235 |
q.text = place.itinraw.strip() |
|---|
| 236 |
citcontent = "BATlas %s %s (%s)" % (mapnum, place.dirtype, place.itinraw.strip()) |
|---|
| 237 |
if len(place.batlasids) > 0: |
|---|
| 238 |
writecit(parent, citcontent, place.batlasids[0]) |
|---|
| 239 |
else: |
|---|
| 240 |
writecit(parent, citcontent, '') |
|---|
| 241 |
|
|---|
| 242 |
def oldschoolFeatureID(place, parent, mapnum): |
|---|
| 243 |
if place.tablei == -1 and place.rowi == -1: |
|---|
| 244 |
placeid = "batlas-%s-anon-%s" % (mapnum, place.anonsequence) |
|---|
| 245 |
else: |
|---|
| 246 |
placeid = "batlas-%s-%s-%s" % (mapnum, place.tablei+1, place.rowi+1) |
|---|
| 247 |
tag_fid = etree.Element("{%s}featureID" % ADLGAZ) |
|---|
| 248 |
tag_fid.text = placeid |
|---|
| 249 |
parent.append(tag_fid) |
|---|
| 250 |
|
|---|
| 251 |
def oldschoolTimePeriodNames(place, parent): |
|---|
| 252 |
for tp in place.periods: |
|---|
| 253 |
tag_tp = etree.Element("{%s}timePeriod" % ADLGAZ) |
|---|
| 254 |
tag_tpn = etree.Element("{%s}timePeriodName" % ADLGAZ) |
|---|
| 255 |
tpstring = periods[tp[0]] |
|---|
| 256 |
if tp[1] == 'less-confident': |
|---|
| 257 |
tpstring += "?" |
|---|
| 258 |
tag_tpn.text = tpstring |
|---|
| 259 |
tag_tp.append(tag_tpn) |
|---|
| 260 |
parent.append(tag_tp) |
|---|
| 261 |
|
|---|
| 262 |
|
|---|
| 263 |
def oldschoolAttribution(pipe, parent): |
|---|
| 264 |
for c in pipe.creators: |
|---|
| 265 |
tag_c = etree.Element("{%s}creator" % DC) |
|---|
| 266 |
tag_c.text = c |
|---|
| 267 |
parent.append(tag_c) |
|---|
| 268 |
for c in pipe.contributors: |
|---|
| 269 |
tag_c = etree.Element("{%s}contributor" % DC) |
|---|
| 270 |
tag_c.text = c |
|---|
| 271 |
parent.append(tag_c) |
|---|
| 272 |
|
|---|
| 273 |
def oldschoolFeatureNames(place, parent, magicrefs): |
|---|
| 274 |
for pn in place.placenames: |
|---|
| 275 |
|
|---|
| 276 |
tag_fn = etree.Element("{%s}featureName" % ADLGAZ) |
|---|
| 277 |
|
|---|
| 278 |
# transliteration |
|---|
| 279 |
tag_translit = etree.Element("{%s}transliteration" % AWMC) |
|---|
| 280 |
tag_translit.text = pn.name |
|---|
| 281 |
tag_fn.append(tag_translit) |
|---|
| 282 |
|
|---|
| 283 |
# classificationSection |
|---|
| 284 |
try: |
|---|
| 285 |
if place.types.index('people') != 0: |
|---|
| 286 |
nametype = 'ethnic' |
|---|
| 287 |
except ValueError: |
|---|
| 288 |
nametype = 'geographic' |
|---|
| 289 |
tag_cs = etree.Element("{%s}classificationSection" % ADLGAZ) |
|---|
| 290 |
tag_ct = etree.Element("{%s}classificationTerm" % ADLGAZ) |
|---|
| 291 |
tag_ct.text = nametype |
|---|
| 292 |
tag_cs.append(tag_ct) |
|---|
| 293 |
tag_css = etree.Element("{%s}classificationScheme" % ADLGAZ) |
|---|
| 294 |
tag_csn = etree.Element("{%s}schemeName" % ADLGAZ) |
|---|
| 295 |
tag_csn.text = "geoNameType" |
|---|
| 296 |
tag_css.append(tag_csn) |
|---|
| 297 |
tag_cs.append(tag_css) |
|---|
| 298 |
if pn.inferred: |
|---|
| 299 |
tag_naspect = etree.Element("{%s}nameAspect" % AWMC) |
|---|
| 300 |
tag_naspect.attrib['ref'] = 'na-inferred' |
|---|
| 301 |
tag_cs.append(tag_naspect) |
|---|
| 302 |
if pn.completeness != 'complete': |
|---|
| 303 |
tag_naspect = etree.Element("{%s}nameAspect" % AWMC) |
|---|
| 304 |
tag_naspect.attrib['ref'] = 'na-reconstructed' |
|---|
| 305 |
tag_cs.append(tag_naspect) |
|---|
| 306 |
if pn.accuracy != 'accurate': |
|---|
| 307 |
tag_naspect = etree.Element("{%s}nameAspect" % AWMC) |
|---|
| 308 |
tag_naspect.attrib['ref'] = 'na-inaccurate' |
|---|
| 309 |
tag_cs.append(tag_naspect) |
|---|
| 310 |
tag_nassoc = etree.Element("{%s}nameAssociation" % AWMC) |
|---|
| 311 |
tag_nassoc.attrib['ref'] = pn.certainty |
|---|
| 312 |
tag_cs.append(tag_nassoc) |
|---|
| 313 |
tag_fn.append(tag_cs) |
|---|
| 314 |
|
|---|
| 315 |
# timePeriods for the name |
|---|
| 316 |
for tp in pn.periods: |
|---|
| 317 |
tag_tp = etree.Element("{%s}timePeriod" % ADLGAZ) |
|---|
| 318 |
tag_tpn = etree.Element("{%s}timePeriodName" % ADLGAZ) |
|---|
| 319 |
tpstring = periods[tp[0]] |
|---|
| 320 |
if tp[1] == 'less-confident': |
|---|
| 321 |
tpstring += "?" |
|---|
| 322 |
tag_tpn.text = tpstring |
|---|
| 323 |
tag_tp.append(tag_tpn) |
|---|
| 324 |
tag_fn.append(tag_tp) |
|---|
| 325 |
|
|---|
| 326 |
# secondary references for the name |
|---|
| 327 |
if len(pn.references) > 0: |
|---|
| 328 |
tag_refs = etree.Element("{%s}secondaryReferences" % AWMC) |
|---|
| 329 |
for ref in pn.references: |
|---|
| 330 |
tag_bibl_xml = "<tei:bibl xmlns='%s' xmlns:tei='%s'>%s</tei:bibl>" % (TEI, TEI, magicrefs[ref]) |
|---|
| 331 |
if "tei:title" in tag_bibl_xml: |
|---|
| 332 |
pass |
|---|
| 333 |
else: |
|---|
| 334 |
print place.namestring.encode('ascii', 'xmlcharrefreplace') |
|---|
| 335 |
print ">>>> no title in: '%s'" % tag_bibl_xml.encode('ascii', 'xmlcharrefreplace') |
|---|
| 336 |
|
|---|
| 337 |
tag_refs.append(etree.XML(tag_bibl_xml)) |
|---|
| 338 |
|
|---|
| 339 |
tag_fn.append(tag_refs) |
|---|
| 340 |
|
|---|
| 341 |
|
|---|
| 342 |
parent.append(tag_fn) |
|---|
| 343 |
|
|---|
| 344 |
def oldschoolReferences(place, parent, mapnum): |
|---|
| 345 |
|
|---|
| 346 |
# first, recall our origins in the barrington atlas |
|---|
| 347 |
tag_refs = etree.Element("{%s}secondaryReferences" % AWMC) |
|---|
| 348 |
try: |
|---|
| 349 |
thislabel = place.namestring.replace("/ ", "/") |
|---|
| 350 |
except: |
|---|
| 351 |
thislabel = '' |
|---|
| 352 |
if len(thislabel) > 0: |
|---|
| 353 |
findi = thislabel.find(u'\xA7') |
|---|
| 354 |
if findi > -1: |
|---|
| 355 |
thislabel = thislabel[:findi-1].strip() |
|---|
| 356 |
tag_bibl_xml = "<tei:bibl xmlns='%s' xmlns:tei='%s'><title>BAtlas</title> <biblScope>%s %s %s</biblScope></tei:bibl>" % (TEI, TEI, mapnum, place.grid, thislabel) |
|---|
| 357 |
else: |
|---|
| 358 |
tag_bibl_xml = "<tei:bibl xmlns='%s' xmlns:tei='%s'><title>BAtlas</title> <biblScope>%s %s</biblScope></tei:bibl>" % (TEI, TEI, mapnum, place.grid) |
|---|
| 359 |
|
|---|
| 360 |
tag_refs.append(etree.XML(tag_bibl_xml)) |
|---|
| 361 |
|
|---|
| 362 |
# now, any other references |
|---|
| 363 |
magicrefs = {} |
|---|
| 364 |
if len(place.placenames) == 1: |
|---|
| 365 |
for ref in place.references: |
|---|
| 366 |
magicrefs[ref] = refmagic(ref, place.placenames[0].name) |
|---|
| 367 |
elif len(place.placenames) > 1: |
|---|
| 368 |
for ref in place.references: |
|---|
| 369 |
magicrefs[ref] = refmagic(ref, place.namestring) |
|---|
| 370 |
for pn in place.placenames: |
|---|
| 371 |
for ref in pn.references: |
|---|
| 372 |
magicrefs[ref] = refmagic(ref, pn.name) |
|---|
| 373 |
|
|---|
| 374 |
|
|---|
| 375 |
if len(magicrefs) == 0 and len(place.references) > 0: |
|---|
| 376 |
for ref in place.references: |
|---|
| 377 |
magicrefs[ref] = refmagic(ref, "") |
|---|
| 378 |
|
|---|
| 379 |
for ref in place.references: |
|---|
| 380 |
try: |
|---|
| 381 |
# need to fix this usage now that we have internal tagging |
|---|
| 382 |
tag_bibl_xml = "<tei:bibl xmlns='%s' xmlns:tei='%s'>%s</tei:bibl>" % (TEI, TEI, magicrefs[ref]) |
|---|
| 383 |
if "tei:title" in tag_bibl_xml: |
|---|
| 384 |
pass |
|---|
| 385 |
else: |
|---|
| 386 |
print place.namestring.encode('ascii', 'xmlcharrefreplace') |
|---|
| 387 |
print ">>>> no title in: '%s'" % tag_bibl_xml.encode('ascii', 'xmlcharrefreplace') |
|---|
| 388 |
except KeyError: |
|---|
| 389 |
print 'KeyError %s' % ref.encode('ascii', 'backslashreplace') |
|---|
| 390 |
print magicrefs |
|---|
| 391 |
tag_refs.append(etree.XML(tag_bibl_xml)) |
|---|
| 392 |
parent.append(tag_refs) |
|---|
| 393 |
return magicrefs |
|---|
| 394 |
|
|---|
| 395 |
def encodeplace(place, parent, mapnum, pipe): |
|---|
| 396 |
e = etree.SubElement(parent, '{%s}place' % PLEIADES) |
|---|
| 397 |
oldschoolFeatureID(place, e, mapnum) |
|---|
| 398 |
encodeids(place, e) |
|---|
| 399 |
encodetypes(place, e) |
|---|
| 400 |
encodegrid(place, e) |
|---|
| 401 |
encodemaplabels(place, e) |
|---|
| 402 |
citname = encodecitations(place, e, mapnum) |
|---|
| 403 |
encodeplacenames(place, e, mapnum, citname) |
|---|
| 404 |
encodeitineraries(place, e, mapnum) |
|---|
| 405 |
encodelocdesc(place, e) |
|---|
| 406 |
encodenotes(place, e) |
|---|
| 407 |
oldschoolTimePeriodNames(place, e) |
|---|
| 408 |
oldschoolAttribution(pipe, e) |
|---|
| 409 |
refdict = oldschoolReferences(place, e, mapnum) |
|---|
| 410 |
oldschoolFeatureNames(place, e, refdict) |
|---|
| 411 |
|
|---|
| 412 |
|
|---|
| 413 |
def xmldump(pipe, destdir): |
|---|
| 414 |
"""Output an xml file containing the ids, along with some descriptive info |
|---|
| 415 |
""" |
|---|
| 416 |
|
|---|
| 417 |
priorcitations = [] |
|---|
| 418 |
# get all the places |
|---|
| 419 |
places = pipe['places'] |
|---|
| 420 |
|
|---|
| 421 |
# serialize to xml |
|---|
| 422 |
d = etree.Element('{%s}featurelist' % PLEIADES) |
|---|
| 423 |
d.attrib['mapnum'] = pipe.map_number |
|---|
| 424 |
q = etree.SubElement(d, '{%s}uribase' % PLEIADES) |
|---|
| 425 |
q.text = "%s/" % NAMESPACE |
|---|
| 426 |
for p in places: |
|---|
| 427 |
encodeplace(p, d, pipe.map_number, pipe) |
|---|
| 428 |
|
|---|
| 429 |
cleantree = do_nscleanup(pipe['contextpath'], d) |
|---|
| 430 |
|
|---|
| 431 |
|
|---|
| 432 |
# write to file, prepending explanator comment text, date etc. |
|---|
| 433 |
cmntf = open(COMMENTFILE) |
|---|
| 434 |
cmnt = cmntf.read() |
|---|
| 435 |
cmntf.close() |
|---|
| 436 |
|
|---|
| 437 |
dtime = dt.datetime.utcnow() |
|---|
| 438 |
dtstamp = dtime.isoformat() |
|---|
| 439 |
dtyear = dtime.year |
|---|
| 440 |
|
|---|
| 441 |
fn = "map%s.xml" % pipe.map_number |
|---|
| 442 |
fpath = os.path.join(destdir, fn) |
|---|
| 443 |
f = open(fpath, 'w') |
|---|
| 444 |
f.write(XMLDECL) |
|---|
| 445 |
cmnt = cmnt % (fn, pipe.map_number, dtime.isoformat(), PREPARER) |
|---|
| 446 |
f.write(cmnt) |
|---|
| 447 |
etree.ElementTree(cleantree).write(f) |
|---|
| 448 |
f.close() |
|---|
| 449 |
logging.info("wrote output result xml file on %s" % fpath) |
|---|
| 450 |
|
|---|
| 451 |
def writecit(parent, tcontent, primeid): |
|---|
| 452 |
ctcontent = u''.join(tcontent.split()) |
|---|
| 453 |
if ctcontent in priorcitations: |
|---|
| 454 |
# if primeid ends in dash-number then postfix the number onto the citation |
|---|
| 455 |
m = DASHNUM_END_REGEX.search(primeid) |
|---|
| 456 |
n = DASHNUM_END_REGEX.search(tcontent) |
|---|
| 457 |
if m and not n: |
|---|
| 458 |
writecit(parent, "%s (%s)" % (tcontent, m.group(1)), primeid) |
|---|
| 459 |
else: |
|---|
| 460 |
logging.warning("Suppressed writing of citation '%s' for primary id '%s' because that citation is already in use" % (tcontent, primeid)) |
|---|
| 461 |
|
|---|
| 462 |
else: |
|---|
| 463 |
q = etree.SubElement(parent, '{%s}citation' % PLEIADES) |
|---|
| 464 |
q.text = tcontent |
|---|
| 465 |
priorcitations.append(ctcontent) |
|---|
| 466 |
|
|---|