Source code for minsci.xmu.tools.biblio.bibcheck

import re
from collections import namedtuple

from ....xmu import XMu, BiblioRecord


Existing = namedtuple('Existing', ['irn', 'authors', 'pub_date'])


[docs]class BibCheck(XMu): def __init__(self, *args, **kwargs): kwargs['container'] = BiblioRecord super(BibCheck, self).__init__(*args, **kwargs) self.records = {}
[docs] def iterate(self, element): """Returns basic information about a reference""" rec = self.parse(element) self.records[rec('irn')] = Existing(rec('irn'), self.get_authors(rec), self.get_pub_date(rec))
[docs] @staticmethod def get_authors(rec): key = '{}AuthorsRef_tab'.format(rec.prefix) return [re.sub(r'[^A-Za-z0-9]', '', a['NamLast']).lower() for a in rec(key)]
[docs] @staticmethod def get_pub_date(rec): for key in ('{}PublicationDate', '{}PublicationDates'): val = rec(key.format(rec.prefix)) if val: return get_year(val)
[docs]def compare_citations(authors, pub_date, existing, show_warnings=True): """Checks new author and publication date against existing record""" # Test authors if not authors: msg = 'E: No authors found: {}'.format(existing.irn) print msg return False # Test first authors. These MUST match. if existing.authors and authors[0] != existing.authors[0]: msg = ('E: First author mismatch: {}: {} =>' ' {}').format(existing.irn, existing.authors, authors) print msg return False # Test full author list. This is only yields a warning because mismatches # here are common and to some extent expected (for example, if there is a # long list of authors) if (existing.authors and authors != existing.authors and existing.authors[-1] != 'others'): if show_warnings: print ('W: Author mismatch: {}: {} =>' ' {}').format(existing.irn, existing.authors, authors) # Test publication year. This MUST match. new_year = get_year(pub_date) old_year = get_year(existing.pub_date) if old_year and old_year != new_year: msg = ('E: Pub. year mismatch: {}: {} =>' ' {}').format(existing.irn, existing.pub_date, pub_date) print msg return False return True
[docs]def get_year(val): """Parses a four-digit year from a date string""" if val is not None: match = re.search(r'\d{4}', val) if match is not None: return match.group(0)