Writing custom maps

Manatus scenarios expose data from XML and JSON sources for processing. Manatus SourceResource objects are data containers applying the Digital Public Library of America Metadata Application Profile version 4 (DPLA MAPv4) standard. Many elements can be directly walked from the source document to the sourceResource object. Often it might be desirable to add additional processing steps to make changes to the data on route. This is where writing a custom map is helpful.

Here is the default Dublin Core map at manatus.maps.dc_standard_map():

def dc_standard_map(record):
    logger.debug(f'Loaded {__name__}.dc_standard_map map')
    sr = SourceResource()
    if record.contributor:
        sr.contributor = [{'name': name} for name in record.contributor]
    sr.creator = [{'name': name} for name in record.creator if record.creator]
    sr.date = record.date
    sr.description = record.description
    sr.format = record.format
    sr.identifier = record.harvest_id
    sr.language = record.language
    if record.place:
        sr.spatial = [{'name': place} for place in record.place]
    sr.publisher = record.publisher
    sr.rights = record.rights
    if record.subject:
        sr.subject = [{'name': subject} for subject in record.subject]
    sr.title = record.title
    sr.type = record.type
    tn = None
    yield sr, tn

Compare that to a stand alone Dublin Core map with additional processing and logging instructions:

import logging

from citrus import SourceResource

logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())
logger.debug(f'Loaded {__name__} map')


def fiu_dc_map(rec):
    sr = SourceResource()

    # contributor
    if rec.contributor:
        sr.contributor = [{'name': contributor} for contributor in
                          rec.contributor]

    # creator
    if rec.creator:
        sr.creator = [{'name': creator} for creator in
                      rec.creator]

    # date
    try:
        sr.date = {'begin': rec.date[0],
                   'end': rec.date[0],
                   'displayDate': rec.date[0]}
    except TypeError:
        logger.info(f"No date - {rec.harvest_id}")

    # description
    sr.description = rec.description

    # format
    sr.format = rec.format

    # identifier
    try:
        for identifier in rec.identifier:
            if 'dpanther.fiu.edu' in identifier:
                sr.identifier = identifier
    except (TypeError, AttributeError):
        logger.error(f"No identifier - {rec.harvest_id}")
        return None

    # language
    try:
        sr.language = [{'name': lang} for lang in rec.language]
    except TypeError:
        logger.info(f"No language - {rec.harvest_id}")

    # place
    if rec.place:
        sr.spatial = [{'name': place} for place in rec.place]

    # publisher
    sr.publisher = rec.publisher

    # rights
    try:
        if len(rec.rights) > 1:
            for r in rec.rights:
                if r.startswith('http'):
                    sr.rights = [{'@id': r}]
        else:
            if rec.rights[0].startswith('http'):
                sr.rights = [{'@id': rec.rights[0]}]
            else:
                logger.warning(f"No rights URI - {rec.harvest_id}")
                sr.rights = [{'text': rec.rights[0]}]
    except TypeError:
        logger.error(f"No rights - {rec.harvest_id}")
        return None

    # subject
    if rec.subject:
        sr.subject = [{'name': subject} for subject in rec.subject]

    # title
    sr.title = rec.title

    # type
    sr.type = rec.type

    # thumbnail
    if rec.thumbnail:
        tn = rec.thumbnail
    else:
        tn = None

    yield sr, tn

Additional custom maps can be found and used for reference at https://github.com/SunshineStateDigitalNetowrk/loren-ipsum.

Note

Make sure the custom map function has a unique name that will not collide with something coming before it in the MRO.

Custom maps can be attached to a data source through the manatus_scenarios.cfg configuration file.