| 1 |
import logging |
|---|
| 2 |
|
|---|
| 3 |
import lxml.etree as etree |
|---|
| 4 |
|
|---|
| 5 |
from etreehelps import getalltext |
|---|
| 6 |
from texthelps import normalizetext |
|---|
| 7 |
import tableparser |
|---|
| 8 |
|
|---|
| 9 |
ESRI = "http://www.esri.com/schemas/ArcGIS/9.0" |
|---|
| 10 |
|
|---|
| 11 |
class gisMixer: |
|---|
| 12 |
"""""" |
|---|
| 13 |
|
|---|
| 14 |
def __init__(self, parent, source): |
|---|
| 15 |
|
|---|
| 16 |
self.parent = parent |
|---|
| 17 |
# read in the source file |
|---|
| 18 |
f = open(source) |
|---|
| 19 |
gis_text = f.read() |
|---|
| 20 |
f.close() |
|---|
| 21 |
tree = etree.XML(gis_text) |
|---|
| 22 |
|
|---|
| 23 |
# get the field names |
|---|
| 24 |
e_fields = tree.xpath("//WorkspaceData/descendant::Fields/FieldArray/Field") |
|---|
| 25 |
self.fieldnames = [normalizetext(getalltext(e_field.xpath("Name")[0])) for e_field in e_fields] |
|---|
| 26 |
self.gridi = self.fieldnames.index('GridSquare') |
|---|
| 27 |
self.labeli = self.fieldnames.index('Label') |
|---|
| 28 |
self.disambiguatori = self.fieldnames.index('disambiguator') |
|---|
| 29 |
self.shapei = self.fieldnames.index('SHAPE') |
|---|
| 30 |
self.typei = self.fieldnames.index('Type') |
|---|
| 31 |
self.approxi = self.fieldnames.index('Approximate') |
|---|
| 32 |
|
|---|
| 33 |
# get the geometry type |
|---|
| 34 |
e_geofielddef = tree.xpath("//WorkspaceData/descendant::Fields/FieldArray/Field[%s]/GeometryDef/GeometryType" % (self.shapei + 1)) |
|---|
| 35 |
self.geotype = normalizetext(getalltext(e_geofielddef[0])) |
|---|
| 36 |
logging.info("geotype=%s" % self.geotype) |
|---|
| 37 |
|
|---|
| 38 |
# get the data and store it for later use |
|---|
| 39 |
self.fieldvalues = [] |
|---|
| 40 |
e_records = tree.xpath("//WorkspaceData/descendant::Records/Record") |
|---|
| 41 |
for i, e_record in enumerate(e_records): |
|---|
| 42 |
self.fieldvalues.append([]) |
|---|
| 43 |
e_values = e_record.xpath("Values/Value") |
|---|
| 44 |
for j, e_value in enumerate(e_values): |
|---|
| 45 |
if j == self.shapei: |
|---|
| 46 |
self.fieldvalues[i].append(self.parseshape(e_value)) |
|---|
| 47 |
else: |
|---|
| 48 |
self.fieldvalues[i].append(normalizetext(getalltext(e_value))) |
|---|
| 49 |
|
|---|
| 50 |
# create a match flag list, one item for each set of fielddata |
|---|
| 51 |
self.fieldmatches = [False for i in range(len(self.fieldvalues))] |
|---|
| 52 |
|
|---|
| 53 |
logging.info ("gisMixer got %s records from %s" % (len(self.fieldvalues), source)) |
|---|
| 54 |
logging.debug("FIELD NAMES ARE:") |
|---|
| 55 |
for name in self.fieldnames: |
|---|
| 56 |
logging.debug("%s" % name ) |
|---|
| 57 |
logging.debug("FIRST RECORD CONTENT:") |
|---|
| 58 |
for item in self.fieldvalues[0]: |
|---|
| 59 |
logging.debug("%s" % item) |
|---|
| 60 |
|
|---|
| 61 |
def parseshape(self, e_shape): |
|---|
| 62 |
coords = [] |
|---|
| 63 |
if self.geotype == 'esriGeometryPoint': |
|---|
| 64 |
t_x = e_shape.findtext('X') |
|---|
| 65 |
t_y = e_shape.findtext('Y') |
|---|
| 66 |
cpair = (round(eval(t_x), 4), round(eval(t_y), 4)) |
|---|
| 67 |
coords.append(cpair) |
|---|
| 68 |
else: |
|---|
| 69 |
logging.warning("gismixer.gisMixer.parseshape() has no support for geotype = %s" % self.geotype) |
|---|
| 70 |
return coords |
|---|
| 71 |
|
|---|
| 72 |
def mixall(self, config, places): |
|---|
| 73 |
|
|---|
| 74 |
logging.info("BEGIN attempting to match and combine %s directory places with %s map/gis places" \ |
|---|
| 75 |
% (len(places),len(self.fieldvalues))) |
|---|
| 76 |
|
|---|
| 77 |
for place in places: |
|---|
| 78 |
if place.dirtype == 'unlocated' or place.dirtype == 'false': |
|---|
| 79 |
place.matched = True |
|---|
| 80 |
else: |
|---|
| 81 |
self.mixin(config, place) |
|---|
| 82 |
|
|---|
| 83 |
logging.info("DONE attempting to match and combine %s directory places with %s map/gis places" % \ |
|---|
| 84 |
(len(places),len(self.fieldvalues))) |
|---|
| 85 |
|
|---|
| 86 |
logging.info("BEGIN attempting to deal with map/gis places for which there is no match in the directory") |
|---|
| 87 |
|
|---|
| 88 |
gisdata = self.fieldvalues |
|---|
| 89 |
matchedgisdata = [value for i, value in enumerate(self.fieldvalues) if self.fieldmatches[i]] |
|---|
| 90 |
|
|---|
| 91 |
|
|---|
| 92 |
if len(gisdata) == len(matchedgisdata): |
|---|
| 93 |
logging.info("all GIS places (%s) were matched" % len(gisdata)) |
|---|
| 94 |
elif len(gisdata) > len(matchedgisdata): |
|---|
| 95 |
logging.info("%s of the %s GIS places were matched" % (len(matchedgisdata), len(gisdata))) |
|---|
| 96 |
unmatchedgisdata = [value for i, value in enumerate(self.fieldvalues) if self.fieldmatches[i] == False] |
|---|
| 97 |
suppressnodes = config.xpath("//suppress") |
|---|
| 98 |
lbl_suppressions = [] |
|---|
| 99 |
full_suppressions = [] |
|---|
| 100 |
for snode in suppressnodes: |
|---|
| 101 |
try: |
|---|
| 102 |
supgrid = normalizetext(getalltext(snode.xpath("grid")[0])) |
|---|
| 103 |
except: |
|---|
| 104 |
supgrid = '' |
|---|
| 105 |
try: |
|---|
| 106 |
suplabel = normalizetext(getalltext(snode.xpath("label")[0])) |
|---|
| 107 |
except: |
|---|
| 108 |
suplabel = '' |
|---|
| 109 |
try: |
|---|
| 110 |
suptype = normalizetext(getalltext(snode.xpath("type")[0])) |
|---|
| 111 |
except: |
|---|
| 112 |
suptype = '' |
|---|
| 113 |
try: |
|---|
| 114 |
supdisamb = normalizetext(getalltext(snode.xpath("disambiguator")[0])) |
|---|
| 115 |
except: |
|---|
| 116 |
supdisamb = '' |
|---|
| 117 |
if len(suplabel) > 0 and suptype == '' and supdisamb == '': |
|---|
| 118 |
lbl_suppressions.append((supgrid, suplabel)) |
|---|
| 119 |
else: |
|---|
| 120 |
full_suppressions.append((supgrid, suplabel, suptype, supdisamb)) |
|---|
| 121 |
|
|---|
| 122 |
#suppressions = [(normalizetext(getalltext(snode.xpath("grid")[0])), normalizetext(getalltext(snode.xpath("label")[0]))) for snode in suppressnodes] |
|---|
| 123 |
|
|---|
| 124 |
for i, value in enumerate(unmatchedgisdata): |
|---|
| 125 |
supmatches = [item for item in lbl_suppressions if item == (value[self.gridi], value[self.labeli])] |
|---|
| 126 |
if len(supmatches) == 1: |
|---|
| 127 |
logging.info("suppressing item %s based on a grid-and-label suppression directive:" % i) |
|---|
| 128 |
for j, item in enumerate(value): |
|---|
| 129 |
logging.info(" %s = %s" % (self.fieldnames[j], item)) |
|---|
| 130 |
elif len(supmatches) > 1: |
|---|
| 131 |
logging.warning("multiple suppression directives match this item (%s, %s)" % (value[self.gridi], value[self.labeli])) |
|---|
| 132 |
else: |
|---|
| 133 |
supmatches = [item for item in full_suppressions if item == (value[self.gridi], value[self.labeli], value[self.typei].lower(), value[self.disambiguatori])] |
|---|
| 134 |
if len(supmatches) == 1: |
|---|
| 135 |
logging.info("suppressing item %s based on a full suppression directive:" % i) |
|---|
| 136 |
for j, item in enumerate(value): |
|---|
| 137 |
logging.info(" %s = %s" % (self.fieldnames[j], item)) |
|---|
| 138 |
elif len(supmatches) > 1: |
|---|
| 139 |
logging.warning("multiple suppression directives match this item (%s, %s, %s, %s)" % (value[self.gridi], value[self.labeli], value[self.typei].lower(), value[self.disambiguatori])) |
|---|
| 140 |
else: |
|---|
| 141 |
logging.warning("UNMATCHED map/gis place %s was NOT suppressed:" % i) |
|---|
| 142 |
for j, item in enumerate(value): |
|---|
| 143 |
logging.warning(" %s = %s" % (self.fieldnames[j], item)) |
|---|
| 144 |
places.append(self.addin(value, i)) |
|---|
| 145 |
else: |
|---|
| 146 |
logging.warning("tallies absurdly claim that more GIS places were matched (%s) than are actually in the GIS data (%s)" \ |
|---|
| 147 |
% (len(matchedgisdata), len(gisdata))) |
|---|
| 148 |
logging.info("DONE attempting to deal with map/gis places for which there is no match in the directory") |
|---|
| 149 |
|
|---|
| 150 |
def addin(self, gisdata, seq): |
|---|
| 151 |
p = tableparser.Place() |
|---|
| 152 |
p.anonsequence = seq |
|---|
| 153 |
p.map_number = self.parent.map_number |
|---|
| 154 |
p.tablei = -1 |
|---|
| 155 |
p.rowi = -1 |
|---|
| 156 |
p.matched = True |
|---|
| 157 |
p.shapes.append(gisdata[self.shapei]) |
|---|
| 158 |
p.approximates.append(gisdata[self.approxi]) |
|---|
| 159 |
p.types.append(self.get_type(gisdata[self.typei].lower())) |
|---|
| 160 |
p.grid = gisdata[self.gridi] |
|---|
| 161 |
return p |
|---|
| 162 |
|
|---|
| 163 |
def mixin(self, config, place): |
|---|
| 164 |
r = self.get_records(config, place) |
|---|
| 165 |
if r: |
|---|
| 166 |
|
|---|
| 167 |
if place.matched: |
|---|
| 168 |
logging.warning("place claims it's already matched") |
|---|
| 169 |
place.matched = True |
|---|
| 170 |
|
|---|
| 171 |
# mix the data |
|---|
| 172 |
|
|---|
| 173 |
for i, re in enumerate(r): |
|---|
| 174 |
place.shapes.append(re[self.shapei]) |
|---|
| 175 |
place.approximates.append(re[self.approxi]) |
|---|
| 176 |
place.types.append(self.get_type(re[self.typei].lower())) |
|---|
| 177 |
else: |
|---|
| 178 |
try: |
|---|
| 179 |
logging.warning( "failed to match %s: '%s' (%s - %s), rowi=%s, disambiguator: %s" % (place.dirtype, label.encode('ascii', 'backslashreplace'), place.locdesc.encode('ascii', 'backslashreplace'), place.itinraw.encode('ascii', 'backslashreplace'), place.rowi, disambiguator)) |
|---|
| 180 |
except: |
|---|
| 181 |
logging.warning( "failed to match %s: '%s' (%s - %s), rowi=%s" % (place.dirtype, place.namestring.encode('ascii', 'backslashreplace'), place.locdesc.encode('ascii', 'backslashreplace'), place.itinraw.encode('ascii', 'backslashreplace'), place.rowi)) |
|---|
| 182 |
|
|---|
| 183 |
|
|---|
| 184 |
def get_type(self, rawtype): |
|---|
| 185 |
if rawtype == 'road station': |
|---|
| 186 |
cleantype = 'station' |
|---|
| 187 |
elif rawtype == 'water wheel': |
|---|
| 188 |
cleantype = 'wheel' |
|---|
| 189 |
elif rawtype == 'mountain pass': |
|---|
| 190 |
cleantype = 'pass' |
|---|
| 191 |
elif rawtype == 'inland water': |
|---|
| 192 |
cleantype = 'water-inland' |
|---|
| 193 |
elif rawtype == 'open water': |
|---|
| 194 |
cleantype = 'water-open' |
|---|
| 195 |
elif rawtype == 'salt marsh': |
|---|
| 196 |
cleantype = 'salt-marsh' |
|---|
| 197 |
else: |
|---|
| 198 |
cleantype = rawtype |
|---|
| 199 |
return cleantype |
|---|
| 200 |
|
|---|
| 201 |
def get_records(self, config, p): |
|---|
| 202 |
|
|---|
| 203 |
gridmatches = [] |
|---|
| 204 |
typematches = [] |
|---|
| 205 |
labelmatches = [] |
|---|
| 206 |
matches = [] |
|---|
| 207 |
|
|---|
| 208 |
candidates = [value for i, value in enumerate(self.fieldvalues) if self.fieldmatches[i] == False] |
|---|
| 209 |
label = self.clean_label(p.namestring) |
|---|
| 210 |
gridsquare = p.grid |
|---|
| 211 |
soughttype = p.dirtype.lower() |
|---|
| 212 |
|
|---|
| 213 |
try: |
|---|
| 214 |
e_dis = config.xpath("//disambiguator[@tabletype='%s' and @rowi='%s']" % (soughttype , p.rowi ))[0] |
|---|
| 215 |
disambiguator = normalizetext(getalltext(e_dis)) |
|---|
| 216 |
except: |
|---|
| 217 |
disambiguator = 0 |
|---|
| 218 |
|
|---|
| 219 |
logging.info("seeking a %s in gridsquare %s with disambiguator %s" % (soughttype, gridsquare, disambiguator)) |
|---|
| 220 |
|
|---|
| 221 |
|
|---|
| 222 |
try: |
|---|
| 223 |
gridmatches = [value for value in candidates if value[self.gridi] == gridsquare] |
|---|
| 224 |
except ValueError: |
|---|
| 225 |
logging.warning( "gisMixer could not find any data for gridsquare = %s (seeking %s)" % (gridsquare, label)) |
|---|
| 226 |
if len(gridmatches)>0 and soughttype != 'name' and soughttype != 'numbered': |
|---|
| 227 |
try: |
|---|
| 228 |
typematches = [value for value in gridmatches if value[self.typei].lower() == soughttype] |
|---|
| 229 |
except: |
|---|
| 230 |
logging.warning("gisMixer could not find any data for type = '%s' in grid '%s'" % (soughttype, gridsquare)) |
|---|
| 231 |
elif len(gridmatches)>0: |
|---|
| 232 |
typematches = gridmatches |
|---|
| 233 |
|
|---|
| 234 |
if len(gridmatches) == 0: |
|---|
| 235 |
logging.warning("gisMixer found 0 gridmatches") |
|---|
| 236 |
elif len(typematches) == 0: |
|---|
| 237 |
logging.warning("gisMixer found 0 typematches in grid") |
|---|
| 238 |
|
|---|
| 239 |
if len(typematches) > 0: |
|---|
| 240 |
try: |
|---|
| 241 |
labelmatches = [value for value in typematches if self.clean_label(value[self.labeli]) == label] |
|---|
| 242 |
except ValueError: |
|---|
| 243 |
logging.warning("gisMixer could not find any data for label = '%s' in grid '%s' with type '%s'" % (label, gridsquare, soughttype)) |
|---|
| 244 |
if len(labelmatches) > 0 and disambiguator != 0: |
|---|
| 245 |
matches = [value for value in labelmatches if value[self.disambiguatori] == disambiguator] |
|---|
| 246 |
if len(matches) == 0: |
|---|
| 247 |
logging.warning("gisMaker could not find any data for label = '%s' and disambiguator = '%s' in grid '%s'" % (label, gridsquare, disambiguator)) |
|---|
| 248 |
elif len(labelmatches) == 0: |
|---|
| 249 |
logging.warning("gisMaker could not find any data for label = '%s' in grid '%s'" % (label, gridsquare)) |
|---|
| 250 |
matches = labelmatches |
|---|
| 251 |
else: |
|---|
| 252 |
matches = labelmatches |
|---|
| 253 |
if len(matches) == 1: |
|---|
| 254 |
thematch = matches[0] |
|---|
| 255 |
self.fieldmatches[self.fieldvalues.index(thematch)] = True |
|---|
| 256 |
return matches |
|---|
| 257 |
elif len(matches) > 1: |
|---|
| 258 |
try: |
|---|
| 259 |
multe = config.xpath("//multiple[@tabletype='%s' and @rowi='%s']" % (soughttype , p.rowi ))[0] |
|---|
| 260 |
#print multe |
|---|
| 261 |
multext = normalizetext(getalltext(multe)) |
|---|
| 262 |
#print multext |
|---|
| 263 |
mult = eval(multext) |
|---|
| 264 |
except: |
|---|
| 265 |
mult = 0 |
|---|
| 266 |
if len(matches) == mult: |
|---|
| 267 |
for thematch in matches: |
|---|
| 268 |
self.fieldmatches[self.fieldvalues.index(thematch)] = True |
|---|
| 269 |
return matches |
|---|
| 270 |
else: |
|---|
| 271 |
logging.warning("gisMaker found %s apparent matches for label = '%s', in grid '%s', with disambiguator = '%s' and mult = '%s'" % (len(matches), label, gridsquare, disambiguator, mult)) |
|---|
| 272 |
return None |
|---|
| 273 |
else: |
|---|
| 274 |
return None |
|---|
| 275 |
|
|---|
| 276 |
def clean_label(self, raw_label): |
|---|
| 277 |
clabel = raw_label.replace("/ ", "/") |
|---|
| 278 |
diamondi = clabel.find(u'\xa7') |
|---|
| 279 |
if diamondi > 0: |
|---|
| 280 |
clabel = clabel[:diamondi-1] |
|---|
| 281 |
clabel = clabel.replace(", Mon.", "") |
|---|
| 282 |
clabel = clabel.replace(", T.", "") |
|---|
| 283 |
clabel = clabel.replace(u"\u2018", u"'") |
|---|
| 284 |
clabel = clabel.replace(u"\u2019", u"'") |
|---|
| 285 |
#print "'%s'" % clabel.encode('ascii', 'backslashreplace') |
|---|
| 286 |
clabel = clabel.strip() |
|---|
| 287 |
return clabel |
|---|
| 288 |
|
|---|
| 289 |
|
|---|
| 290 |
|
|---|