|
Revision 825, 0.7 kB
(checked in by thomase, 2 years ago)
|
Saving full place information using the Pleiades frankenformat. Partial support for saving in the new TEI place format. Parsing periods for names.
|
- Property svn:eol-style set to
native
|
| Line | |
|---|
| 1 |
|
|---|
| 2 |
from etreehelps import getalltext |
|---|
| 3 |
from texthelps import normalizetext |
|---|
| 4 |
|
|---|
| 5 |
def grok(source): |
|---|
| 6 |
"""Find the tables in source and return a dictionary of them, keyed by their titles.""" |
|---|
| 7 |
tables={} |
|---|
| 8 |
|
|---|
| 9 |
|
|---|
| 10 |
# find listing div |
|---|
| 11 |
divs = source.xpath("descendant::*[local-name()='div']") |
|---|
| 12 |
dirlistdiv = None |
|---|
| 13 |
for div in divs: |
|---|
| 14 |
if div.xpath("descendant::*[local-name()='p']/*[local-name()='b' and contains(., 'Directory')]"): |
|---|
| 15 |
dirlistdiv = div |
|---|
| 16 |
|
|---|
| 17 |
# iterate through tables and get their titles |
|---|
| 18 |
for ti, table in enumerate(dirlistdiv.xpath("descendant::*[local-name()='table']")): |
|---|
| 19 |
text = normalizetext(getalltext(table.xpath("preceding-sibling::*[local-name()='p'][1]")[0])) |
|---|
| 20 |
tables[text]=(ti,table) |
|---|
| 21 |
return tables |
|---|