Changeset 985

Show
Ignore:
Timestamp:
08/10/07 12:27:39 (1 year ago)
Author:
thomase
Message:

when splitting mods records from a single collection file to separate files, xlink:href on relatedItem has to be changed from a relative internal link (based on id) to a relative external link, based on filename (assume flat directory or appearance thereof, as that's how we do it on the web)

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • BibIt/trunk/bibchunker.py

    r952 r985  
    2828NSD = {'mods': MODS, 'xlink': XLINK} 
    2929     
     30XLINKCLEANUPXSLT = 'mungexlinksforsplit.xsl' 
     31 
     32     
    3033MODSTOP = '<?xml version="1.0" encoding="UTF-8"?>\n<mods xmlns="http://www.loc.gov/mods/v3"\n' \ 
    3134    + 'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\nxmlns:mods="http://www.loc.gov/mods/v3"\n' \ 
     
    3942     
    4043    try:                                 
    41         opts, args = getopt.getopt(argv, "s:d:x:", ["source=", "destination=", "htmlsource="]) 
     44        opts, args = getopt.getopt(argv, "s:d:x:h", ["source=", "destination=", "htmlsource=", "help"]) 
    4245    except getopt.GetoptError: 
    43         usage() 
     46        print __doc__ 
    4447        sys.exit(2)                      
    4548     
     
    8083            print 'made new directories for %s' % modspath 
    8184         
    82         modsxml = etree.tounicode(mods, pretty_print=1).encode('utf-8', 'xmlcharrefreplace') 
     85         
     86        modsxml = etree.tounicode(doxlinkcleanup(mods), pretty_print=1).encode('utf-8', 'xmlcharrefreplace') 
    8387        modsxml = MODSTOP + modsxml[5:].strip() 
    8488        modsfilepath = os.path.join(modspath, modsfile) 
     
    104108    sys.exit(2) 
    105109     
     110def doxlinkcleanup(source): 
     111    contextpath = os.getcwd() 
     112    xslt_doc = etree.parse(os.path.join(contextpath, XLINKCLEANUPXSLT)) 
     113    transform = etree.XSLT(xslt_doc) 
     114    result = etree.XML(etree.tounicode(transform(source))) 
     115    return result 
     116     
     117     
    106118if __name__ == "__main__": 
    107119    main(sys.argv[1:])