root/BADataMunger/trunk/bibliosaver.py

Revision 844, 2.0 kB (checked in by thomase, 2 years ago)

Incorporate the mods mixing process (enhancing the records with data from the awmc bib database) into the main production pipeline for the full munge.

  • Property svn:eol-style set to native
Line 
1 from os.path import join
2 from StringIO import StringIO
3
4 import lxml.etree as etree
5
6 import modsmixer
7
8 MODS = "http://www.loc.gov/mods/v3"
9 XHTML = "http://www.w3.org/1999/xhtml"
10
11 NSCLEANUPXSLT = 'nscleanup.xsl'
12
13 def save_biblio_mods(self):
14     """Save all of the bibliography as a mods file."""
15    
16     # iterate through the bibliography list, creating a corresponding mods file
17     bibs = self['bibliography']
18     modsCollection = etree.Element("{%s}modsCollection" % MODS)
19     for i, bib in enumerate(bibs):
20         mods = etree.Element("{%s}mods" % MODS)
21         modsCollection.append(mods)
22         mods.append(do_title(bib[0], 'abbreviated'))
23         mods.append(do_title(bib[1]))
24         mods.append(do_citation(bib[2]))
25        
26     # lxml etree makes ugly xml with lots of unnecessarily repeated namespace attributes
27     # so clean it up
28     cleantree = do_nscleanup(self['contextpath'], modsCollection)
29    
30     # encode the content as utf8 and save to file
31     pcontent = etree.tostring(cleantree).encode('utf-8')
32     destfile = "%s-biblio-mods.xml" % self['filenameroot']
33     destpath = join(self['bibdestdir'], destfile)
34     g = open(destpath,'w')
35     g.write(pcontent)
36     g.close()
37    
38     # mix the content in the mods file with content in the master library file
39     destfile = "%s-biblio-mods-mixed.xml" % self['filenameroot']
40     m = modsmixer.Mixer(destpath, self['biblibfile'], join(self['bibdestdir'], destfile))
41        
42 def do_title(content, titleType=None):
43     titleInfo = etree.Element("{%s}titleInfo" % MODS)
44     if titleType:
45         titleInfo.attrib['type'] = titleType
46     title = etree.Element("{%s}title" % MODS)
47     title.text = unicode(content)
48     titleInfo.append(title)
49     return titleInfo
50    
51 def do_citation(content):
52     abstract = etree.Element("{%s}abstract" % MODS)
53     abstract.append(content)
54     return abstract
55    
56 def do_nscleanup(contextpath, source):
57     xslt_doc = etree.parse(join(contextpath, NSCLEANUPXSLT))
58     transform = etree.XSLT(xslt_doc)
59     result = etree.XML(etree.tostring(transform(source)))
60     return result
61    
62        
Note: See TracBrowser for help on using the browser.