Changeset 842
- Timestamp:
- 06/15/07 15:41:33 (2 years ago)
- Files:
-
- BADataMunger/trunk/placesaver.py (modified) (3 diffs)
- BADataMunger/trunk/tableparser.py (modified) (11 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
BADataMunger/trunk/placesaver.py
r840 r842 83 83 tag_bibl = etree.Element("{%s}bibl" % TEI) 84 84 try: 85 tag_bibl.text = u"BAtlas %s %s %s" % (self.map_number, place.grid, place.namestring )85 tag_bibl.text = u"BAtlas %s %s %s" % (self.map_number, place.grid, place.namestring.replace("/ ", "/")) 86 86 except: 87 87 tag_bibl.text = u"BAtlas %s %s" % (self.map_number, place.grid) … … 141 141 tag_css.append(tag_csn) 142 142 tag_cs.append(tag_css) 143 #if pn.certainty != "certain": 143 if pn.inferred: 144 tag_naspect = etree.Element("{%s}nameAspect" % AWMC) 145 tag_naspect.attrib['ref'] = 'na-inferred' 146 tag_cs.append(tag_naspect) 147 if pn.completeness != 'complete': 148 tag_naspect = etree.Element("{%s}nameAspect" % AWMC) 149 tag_naspect.attrib['ref'] = 'na-reconstructed' 150 tag_cs.append(tag_naspect) 151 if pn.accuracy != 'accurate': 152 tag_naspect = etree.Element("{%s}nameAspect" % AWMC) 153 tag_naspect.attrib['ref'] = 'na-inaccurate' 154 tag_cs.append(tag_naspect) 144 155 tag_nassoc = etree.Element("{%s}nameAssociation" % AWMC) 145 156 tag_nassoc.attrib['ref'] = pn.certainty … … 157 168 tag_tp.append(tag_tpn) 158 169 tag_fn.append(tag_tp) 170 171 # secondary references for the name 172 if len(pn.references) > 0: 173 tag_refs = etree.Element("{%s}secondaryReferences" % AWMC) 174 for ref in pn.references: 175 tag_bibl = etree.Element("{%s}bibl" % TEI) 176 tag_bibl.text = ref 177 tag_refs.append(tag_bibl) 178 179 tag_fn.append(tag_refs) 180 159 181 160 182 ge.append(tag_fn) BADataMunger/trunk/tableparser.py
r840 r842 211 211 elif d.tag == 'placenames': 212 212 213 # placenames should be coordinated with periods , if possible213 # placenames should be coordinated with periods and references, if possible 214 214 try: 215 215 context = d.xpath("ancestor::when | ancestor::otherwise")[0] … … 221 221 except: 222 222 pass 223 try: 224 rcelli = context.xpath("./descendant::references")[0].attrib['cell'] 225 except: 226 pass 223 227 else: 224 228 try: … … 226 230 except: 227 231 pass 228 if pcelli: 229 #print "processing placenames from celli = %s (%s) with periods from pcelli = %s (%s)" 232 try: 233 rcelli = context.xpath("./descendant::periods[not(ancestor::when) and not(ancestor::otherwise)]")[0].attrib['cell'] 234 except: 235 pass 236 if pcelli and rcelli: 237 self.placenames = self.parse_placenames(cells[celli], cells[pcelli], cells[rcelli]) 238 elif pcelli: 230 239 self.placenames = self.parse_placenames(cells[celli], cells[pcelli]) 240 elif rcelli: 241 self.placenames = self.parse_placenames(cells[celli], None, cells[rcelli]) 231 242 else: 232 243 self.placenames = self.parse_placenames(cells[celli]) … … 263 274 self.periods = self.parse_periods(cells[2]) 264 275 self.locdesc = normalizetext(getalltext(cells[3])) 265 self.placenames = self.parse_placenames(cells[1], cells[2] )276 self.placenames = self.parse_placenames(cells[1], cells[2], cells[4]) 266 277 self.namestring = normalizetext(getalltext(cells[1])) 267 278 self.references = self.parse_references(cells[4]) … … 271 282 # name | references | comment 272 283 self.type = 'false' 273 self.placenames = self.parse_placenames(cells[0] )284 self.placenames = self.parse_placenames(cells[0], None, cells[1]) 274 285 self.namestring = normalizetext(getalltext(cells[0])) 275 286 self.references = self.parse_references(cells[1]) … … 301 312 self.periods = self.parse_periods(cells[1]) 302 313 self.locdesc = normalizetext(getalltext(cells[2])) 303 self.placenames = self.parse_placenames(cells[0], cells[1] )314 self.placenames = self.parse_placenames(cells[0], cells[1], cells[3]) 304 315 self.namestring = normalizetext(getalltext(cells[0])) 305 316 self.references = self.parse_references(cells[3]) … … 343 354 return [normalizetext(place) for place in places] 344 355 345 def parse_placenames(self, namecell, periodcell=None ):356 def parse_placenames(self, namecell, periodcell=None, refcell=None): 346 357 """We can have multiple placenames in a single cell, and there are two types of delimiters. 347 358 Parse this mess to get the individual names, and then deal with variants too.""" … … 388 399 pname.periods = self.periods 389 400 401 # associate references, if necessary, with the placenames 402 # if separate references are provided for each work 403 if refcell: 404 refparas = refcell.xpath("*[local-name()='p']") 405 if len(refparas) > 1: 406 for pname in placenames: 407 try: 408 pname.references = self.parse_references(refparas[pname.originalPosition]) 409 except: 410 pass 411 elif len(self.references) > 0: 412 for pname in placenames: 413 pname.references = self.references 414 390 415 return placenames 391 416 … … 429 454 m = re.match(pattern, ntext) 430 455 if m: 431 in terred = True456 inferred = True 432 457 pattern = u'[\[\]]' 433 458 newntext = re.sub(pattern, u'', ntext) … … 447 472 references=[] 448 473 449 paragraphs = referencecell.xpath(" *[local-name()='p']")474 paragraphs = referencecell.xpath("descendant-or-self::*[local-name()='p']") 450 475 for paragraph in paragraphs: 451 476 ptext = normalizetext(getalltext(paragraph)) … … 475 500 476 501 477 def parse_periodcell(self, periodcell):478 print "parse_periodcell"479 periods=[]480 pctext = normalizetext(getalltext(periodcell))481 if len(pctext) > 0:482 periodishes = periodcell.xpath("descendant::*[local-name()='p']")483 try:484 periodish = periodishes[self.originalPosition]485 except:486 pass487 if periodish:488 lpstring = normalizetext(getalltext(periodish))489 if len(lpstring) > 0:490 for pcode in periodcodes:491 qpcode = '%s?' % pcode492 if lpstring.find(qpcode) != -1:493 lpstring = lpstring.replace(qpcode, '')494 if lpstring.find(pcode) != -1:495 lpstring = lpstring.replace(pcode, '')496 periods.append((pcode, 'confident'))497 else:498 periods.append((pcode, 'less-confident'))499 else:500 if lpstring.find(pcode) != -1:501 lpstring = lpstring.replace(pcode, '')502 periods.append((pcode, 'confident'))503 504 return periods505 502 506 503 def __str__(self):
