root/BADataMunger/trunk/tablegroker.py

Revision 825, 0.7 kB (checked in by thomase, 2 years ago)

Saving full place information using the Pleiades frankenformat. Partial support for saving in the new TEI place format. Parsing periods for names.

  • Property svn:eol-style set to native
Line 
1
2 from etreehelps import getalltext
3 from texthelps import normalizetext
4
5 def grok(source):
6     """Find the tables in source and return a dictionary of them, keyed by their titles."""
7     tables={}
8    
9
10     # find listing div
11     divs = source.xpath("descendant::*[local-name()='div']")
12     dirlistdiv = None
13     for div in divs:
14         if div.xpath("descendant::*[local-name()='p']/*[local-name()='b' and contains(., 'Directory')]"):
15             dirlistdiv = div
16    
17     # iterate through tables and get their titles
18     for ti, table in enumerate(dirlistdiv.xpath("descendant::*[local-name()='table']")):
19         text = normalizetext(getalltext(table.xpath("preceding-sibling::*[local-name()='p'][1]")[0]))
20         tables[text]=(ti,table)
21     return tables
Note: See TracBrowser for help on using the browser.