Skip to content
27 changes: 27 additions & 0 deletions ckanext/xloader/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,30 @@ def after_upload(self, context, resource_dict, dataset_dict):
the resource that was uploaded
"""
pass

def datastore_before_update(self, resource_id, existing_info, new_headers):
""" Called by the loader just before it is about to modify the
DataStore table for a resource (truncate, drop+recreate, or create).
It allows plugins to inspect the difference between the current
DataStore columns and the ones detected in the incoming file, for
example to log an activity when columns are added, removed or
renamed.

:param resource_id: the ID of the resource whose DataStore table is
about to be updated.
:type resource_id: string

:param existing_info: a mapping of ``{field_id: info_dict}`` built
from the existing DataStore table's Data Dictionary, or ``None``
if the DataStore table does not yet exist.
:type existing_info: dict or None

:param new_headers: the list of field dicts that will be written to
the DataStore. Each dict has at least an ``id`` and ``type``
key, and may include an ``info`` dict for fields that already
existed.
:type new_headers: list of dicts

The return value is ignored.
Comment thread
avdata99 marked this conversation as resolved.
"""
pass
16 changes: 16 additions & 0 deletions ckanext/xloader/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,23 @@

import ckan.plugins as p

from .interfaces import IXloader
from .job_exceptions import FileCouldNotBeLoadedError, LoaderError
from .parser import CSV_SAMPLE_LINES, TypeConverter
from .utils import cleanup_temp_file, datastore_resource_exists, headers_guess, type_guess


def _notify_datastore_before_update(resource_id, existing_info, new_headers):
"""Notify IXloader plugins that the DataStore table for ``resource_id``
is about to change. See ``IXloader.datastore_before_update``.
"""
for plugin in p.PluginImplementations(IXloader):
plugin.datastore_before_update(
resource_id=resource_id,
existing_info=existing_info,
new_headers=new_headers,
)

from ckan.plugins.toolkit import config

import ckanext.datastore.backend.postgres as datastore_db
Expand Down Expand Up @@ -356,6 +369,7 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', allow_type_guessing
'''
fields_match = _fields_match(fields, existing_fields, logger)
if fields_match == FieldMatch.EXACT_MATCH:
_notify_datastore_before_update(resource_id, existing_info, fields)

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Line 33 uses kwargs, this uses args. Should be consistent.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated here okfn@8b2a706 @ThrawnCA

logger.info('Clearing records for "%s" from DataStore.', resource_id)
_clear_datastore_resource(resource_id)
else:
Expand All @@ -366,6 +380,7 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', allow_type_guessing
# then we need to re-guess types
if allow_type_guessing and fields_match == FieldMatch.MISMATCH:
raise LoaderError("File structure has changed, reverting to Tabulator")
_notify_datastore_before_update(resource_id, existing_info, fields)
else:
fields = [
{'id': header_name,
Expand Down Expand Up @@ -590,6 +605,7 @@ def row_iterator():
Otherwise 'datastore_create' will append to the existing datastore.
And if the fields have significantly changed, it may also fail.
'''
_notify_datastore_before_update(resource_id, existing_info, headers_dicts)
if existing:
if _fields_match(headers_dicts, existing_fields, logger) == FieldMatch.EXACT_MATCH:
logger.info('Clearing records for "%s" from DataStore.', resource_id)
Expand Down
Loading