| 1 |
import re |
|---|
| 2 |
import logging |
|---|
| 3 |
|
|---|
| 4 |
from pleiades.normalizer import normalizer |
|---|
| 5 |
|
|---|
| 6 |
DIAMOND_STOP_REGEX = re.compile(u"\u00A7.+") |
|---|
| 7 |
SILENT_POSTFIX_REGEX = re.compile(r', (T|Mon)\.') |
|---|
| 8 |
ITIN_IGNORE_REGEX = re.compile(u'[\?\*\[\]\u2018\u2019,]') |
|---|
| 9 |
RIVER_REGEX = re.compile(r' fl\.$') |
|---|
| 10 |
ISLAND_REGEX = re.compile(r' Ins\.$') |
|---|
| 11 |
ISLAND_GROUP_REGEX = re.compile(r' Inss\.$') |
|---|
| 12 |
|
|---|
| 13 |
ALLIDS = [] |
|---|
| 14 |
|
|---|
| 15 |
class BAtlasIDMaker: |
|---|
| 16 |
"""A class for making URIs for Barrington Atlas features""" |
|---|
| 17 |
|
|---|
| 18 |
def makeID (self, phrase, mapnum, gridsq, phrasepostfix='', phraseprefix=''): |
|---|
| 19 |
"""Make an ID from a properly prepared string.""" |
|---|
| 20 |
rawpieces = [phrase, mapnum, gridsq] |
|---|
| 21 |
pieces = [] |
|---|
| 22 |
for p in rawpieces: |
|---|
| 23 |
q = u'-'.join(normalizer.normalizeN(p)) |
|---|
| 24 |
pieces.append(q) |
|---|
| 25 |
if len(phrasepostfix) == 0: |
|---|
| 26 |
pass |
|---|
| 27 |
else: |
|---|
| 28 |
pieces.insert(1, phrasepostfix) |
|---|
| 29 |
if len(phraseprefix) == 0: |
|---|
| 30 |
pass |
|---|
| 31 |
else: |
|---|
| 32 |
pieces.insert(0, phraseprefix) |
|---|
| 33 |
pieces = [p for p in pieces if len(p) > 0] |
|---|
| 34 |
ident = u'-'.join(pieces) |
|---|
| 35 |
return ident |
|---|
| 36 |
|
|---|
| 37 |
|
|---|
| 38 |
def buildLabel (self, origstring, placenames, dirtype, locdesc = u'', itin = u''): |
|---|
| 39 |
"""Dir file doesn't exactly preserve the label, so we have to create it.""" |
|---|
| 40 |
|
|---|
| 41 |
if dirtype == 'false': |
|---|
| 42 |
logging.debug("false origstring: %s" % origstring) |
|---|
| 43 |
|
|---|
| 44 |
phrase = origstring.strip() |
|---|
| 45 |
|
|---|
| 46 |
if dirtype in ['road','coastal-change', 'unlabeled']: |
|---|
| 47 |
# suppress certain types because they're not helpful |
|---|
| 48 |
return u'' |
|---|
| 49 |
|
|---|
| 50 |
elif dirtype in ['canal', 'canal-group', 'earthworks']: |
|---|
| 51 |
# might be named or unnamed |
|---|
| 52 |
if len(phrase) > 0: |
|---|
| 53 |
phrase = DIAMOND_STOP_REGEX.sub('', phrase) |
|---|
| 54 |
phrase = SILENT_POSTFIX_REGEX.sub('', phrase) |
|---|
| 55 |
else: |
|---|
| 56 |
phrase = "%s %s" % (dirtype, locdesc.strip()) |
|---|
| 57 |
|
|---|
| 58 |
elif len(placenames) > 0 or dirtype in ['name','numbered','lettered']: |
|---|
| 59 |
# the most basic case: features with names: |
|---|
| 60 |
# use the batlas directory namestring |
|---|
| 61 |
# strip off the so-called "diamonds": variants in the dir but not on the map |
|---|
| 62 |
# strip out suffix tags for particular subtypes that are not used in map labels |
|---|
| 63 |
phrase = DIAMOND_STOP_REGEX.sub('', phrase) |
|---|
| 64 |
phrase = SILENT_POSTFIX_REGEX.sub('', phrase) |
|---|
| 65 |
|
|---|
| 66 |
|
|---|
| 67 |
|
|---|
| 68 |
elif dirtype in ['aqueduct', 'aqueduct-group', 'bath', 'bridge', 'bridge-group', 'causeway', 'cemetery', 'centuriation', 'centuriation-group', 'church-group', 'dam', 'dam-group', 'dike', 'dike-group', 'feature', 'feature-group', 'fort', 'fort-group', 'levee', 'lighthouse', 'lighthouse-group', 'mine', 'mine-group', 'mole', 'monastery', 'monument', 'monument-group', 'pass', 'quarry', 'quarry-group', 'reservoir', 'road-station', 'salt-pans', 'spring', 'tumulus', 'tunnel', 'villa', 'villa-group', 'wall', 'wall-group', 'waterwheel', 'well']: |
|---|
| 69 |
# second most common case: more-or-less normal features without names |
|---|
| 70 |
phrase = "%s %s %s" % (dirtype, phrase, locdesc) |
|---|
| 71 |
if dirtype == 'road': |
|---|
| 72 |
cleanit = SILENT_POSTFIX_REGEX.sub(u'', itin) |
|---|
| 73 |
phrase = "%s %s" % (phrase, cleanit) |
|---|
| 74 |
phrase = ITIN_IGNORE_REGEX.sub(u'', phrase) |
|---|
| 75 |
|
|---|
| 76 |
|
|---|
| 77 |
|
|---|
| 78 |
|
|---|
| 79 |
# there are various missing classes |
|---|
| 80 |
else: |
|---|
| 81 |
logging.warning("bidmaker.buildLabel did nothing for origstring='%s', dirtype='%s', locdesc='%s'" % (origstring, dirtype, locdesc)) |
|---|
| 82 |
|
|---|
| 83 |
if dirtype == 'false': |
|---|
| 84 |
logging.debug("returning label: '%s'" % phrase.strip()) |
|---|
| 85 |
|
|---|
| 86 |
return phrase.strip() |
|---|
| 87 |
|
|---|
| 88 |
|
|---|
| 89 |
def buildAltLabel (self, origstring, name, placetypes): |
|---|
| 90 |
"""We allow IDs that incorporate only one name in a multi-name label, so we have to generate |
|---|
| 91 |
these according to a sensible model.""" |
|---|
| 92 |
|
|---|
| 93 |
phrase = name.strip() |
|---|
| 94 |
if phrase == DIAMOND_STOP_REGEX.sub('', origstring.strip()).strip(): |
|---|
| 95 |
return u'' |
|---|
| 96 |
else: |
|---|
| 97 |
phrase = SILENT_POSTFIX_REGEX.sub('', phrase) |
|---|
| 98 |
phrase = self.addSmartSuffix (phrase, placetypes) |
|---|
| 99 |
return phrase.strip() |
|---|
| 100 |
|
|---|
| 101 |
def addSmartSuffix (self, name, placetypes): |
|---|
| 102 |
"""Sweet blackberry surprise.""" |
|---|
| 103 |
|
|---|
| 104 |
fixregs = { |
|---|
| 105 |
'island' : ISLAND_REGEX, |
|---|
| 106 |
'island-group' : ISLAND_GROUP_REGEX, |
|---|
| 107 |
'river' : RIVER_REGEX |
|---|
| 108 |
} |
|---|
| 109 |
|
|---|
| 110 |
suffixes = { |
|---|
| 111 |
'island' : u'Ins.', |
|---|
| 112 |
'island-group' : u'Inss.', |
|---|
| 113 |
'river' : u'fl.' |
|---|
| 114 |
} |
|---|
| 115 |
|
|---|
| 116 |
phrase = name |
|---|
| 117 |
for placetype in placetypes: |
|---|
| 118 |
if fixregs.has_key(placetype): |
|---|
| 119 |
m = fixregs[placetype].search(phrase) |
|---|
| 120 |
if not m: |
|---|
| 121 |
phrase = u"%s %s" % (phrase, suffixes[placetype]) |
|---|
| 122 |
break |
|---|
| 123 |
return phrase |
|---|
| 124 |
|
|---|
| 125 |
|
|---|