diff --git a/ckanext/dcat/processors.py b/ckanext/dcat/processors.py index 79f35821..f7d5cb42 100644 --- a/ckanext/dcat/processors.py +++ b/ckanext/dcat/processors.py @@ -13,7 +13,7 @@ import ckan.plugins as p -from ckanext.dcat.utils import catalog_uri, dataset_uri, url_to_rdflib_format, DCAT_EXPOSE_SUBCATALOGS +from ckanext.dcat.utils import catalog_uri, dataset_uri, catalog_record_uri, url_to_rdflib_format, DCAT_EXPOSE_SUBCATALOGS from ckanext.dcat.profiles import DCAT, DCT, FOAF from ckanext.dcat.exceptions import RDFProfileException, RDFParserException @@ -264,6 +264,24 @@ def graph_from_dataset(self, dataset_dict): return dataset_ref + def graph_from_catalog_record(self, dataset_dict, dataset_ref, catalog_ref): + ''' + Creates a graph for the catalog record using the loaded profiles + + The class RDFLib graph (accessible via `serializer.g`) will be updated + by the loaded profiles. + + Returns the reference to the catalog record, which will be an rdflib URIRef. + ''' + + catalog_record_ref = URIRef(catalog_record_uri(dataset_dict)) + + for profile_class in self._profiles: + profile = profile_class(self.g, compatibility_mode=self.compatibility_mode) + profile.graph_from_catalog_record(dataset_dict, dataset_ref, catalog_record_ref) + + return catalog_record_ref + def graph_from_catalog(self, catalog_dict=None): ''' Creates a graph for the catalog (CKAN site) using the loaded profiles @@ -356,6 +374,9 @@ def serialize_catalog(self, catalog_dict=None, dataset_dicts=None, if dataset_dicts: for dataset_dict in dataset_dicts: dataset_ref = self.graph_from_dataset(dataset_dict) + catalog_record_ref = self.graph_from_catalog_record(dataset_dict, dataset_ref, catalog_ref) + if self.g.triples((catalog_record_ref, RDF.type, DCAT.CatalogRecord)): + self.g.add((catalog_ref, DCAT.record, catalog_record_ref)) cat_ref = self._add_source_catalog(catalog_ref, dataset_dict, dataset_ref) if not cat_ref: diff --git a/ckanext/dcat/profiles/base.py b/ckanext/dcat/profiles/base.py index 29802793..80b64d6a 100644 --- a/ckanext/dcat/profiles/base.py +++ b/ckanext/dcat/profiles/base.py @@ -1305,6 +1305,21 @@ def _extract_catalog_dict(self, catalog_ref): ) return out + def graph_from_catalog_record(self, dataset_dict, dataset_ref, catalog_record_ref): + """ + Creates an RDF graph for the catalog record + + The class RDFLib graph (accessible via `self.g`) should be updated on + this method + + `dataset_dict` is a dict with the dataset metadata like the one + returned by `package_show`. `dataset_ref` is an rdflib URIRef object + that must be used to reference the dataset when working with the graph. + `catalog_record_ref` is an rdflib URIRef object + that must be used to reference the catalog record when working with the graph. + """ + pass + def graph_from_catalog(self, catalog_dict, catalog_ref): """ Creates an RDF graph for the whole catalog (site) diff --git a/ckanext/dcat/profiles/dcat_us_3.py b/ckanext/dcat/profiles/dcat_us_3.py index c2bd2ae8..6d33cc3a 100644 --- a/ckanext/dcat/profiles/dcat_us_3.py +++ b/ckanext/dcat/profiles/dcat_us_3.py @@ -62,6 +62,10 @@ def graph_from_catalog(self, catalog_dict, catalog_ref): self._graph_from_catalog_base(catalog_dict, catalog_ref) + def graph_from_catalog_record(self, dataset_dict, dataset_ref, catalog_record_ref): + + self._graph_from_catalog_record_base(dataset_dict, dataset_ref, catalog_record_ref) + def _data_dictionary_parse(self, data_dict, subject): g = self.g diff --git a/ckanext/dcat/profiles/euro_dcat_ap.py b/ckanext/dcat/profiles/euro_dcat_ap.py index c2ac56b9..edfc01e6 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap.py +++ b/ckanext/dcat/profiles/euro_dcat_ap.py @@ -31,6 +31,10 @@ def graph_from_catalog(self, catalog_dict, catalog_ref): self._graph_from_catalog_base(catalog_dict, catalog_ref) + def graph_from_catalog_record(self, dataset_dict, dataset_ref, catalog_record_ref): + + self._graph_from_catalog_record_base(dataset_dict, dataset_ref, catalog_record_ref) + def _graph_from_dataset_v1_only(self, dataset_dict, dataset_ref): """ CKAN -> DCAT v1 specific properties (not applied to higher versions) diff --git a/ckanext/dcat/profiles/euro_dcat_ap_2.py b/ckanext/dcat/profiles/euro_dcat_ap_2.py index e5204be1..0c5de817 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_2.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_2.py @@ -57,6 +57,10 @@ def graph_from_catalog(self, catalog_dict, catalog_ref): self._graph_from_catalog_base(catalog_dict, catalog_ref) + def graph_from_catalog_record(self, dataset_dict, dataset_ref, catalog_record_ref): + + self._graph_from_catalog_record_base(dataset_dict, dataset_ref, catalog_record_ref) + def _parse_dataset_v2(self, dataset_dict, dataset_ref): """ DCAT -> CKAN properties carried forward to higher DCAT-AP versions diff --git a/ckanext/dcat/profiles/euro_dcat_ap_3.py b/ckanext/dcat/profiles/euro_dcat_ap_3.py index 92206558..492571d7 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_3.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_3.py @@ -49,6 +49,10 @@ def graph_from_catalog(self, catalog_dict, catalog_ref): self._graph_from_catalog_base(catalog_dict, catalog_ref) + def graph_from_catalog_record(self, dataset_dict, dataset_ref, catalog_record_ref): + + self._graph_from_catalog_record_base(dataset_dict, dataset_ref, catalog_record_ref) + def _graph_from_dataset_v3(self, dataset_dict, dataset_ref): dataset_series = False diff --git a/ckanext/dcat/profiles/euro_dcat_ap_base.py b/ckanext/dcat/profiles/euro_dcat_ap_base.py index e42c6afb..38927c7f 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_base.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_base.py @@ -779,3 +779,18 @@ def _graph_from_catalog_base(self, catalog_dict, catalog_ref): modified = self._last_catalog_modification() if modified: self._add_date_triple(catalog_ref, DCT.modified, modified) + + def _graph_from_catalog_record_base(self, dataset_dict, dataset_ref, catalog_record_ref): + g = self.g + + for prefix, namespace in namespaces.items(): + g.bind(prefix, namespace) + + g.add((catalog_record_ref, RDF.type, DCAT.CatalogRecord)) + g.add((catalog_record_ref, FOAF.primaryTopic, dataset_ref)) + # NOTE: _graph_from_dataset_base sets dct:modified for dcat:Dataset with metadata_modified too + # This might be semanctically incorrect, as this should pertain to the content of the dataset according to MobilityDCAT's interpretation of DCAT-AP2 + items =[ + ('metadata_modified', DCT.modified, None, Literal), + ] + self._add_date_triples_from_dict(dataset_dict, catalog_record_ref, items) diff --git a/ckanext/dcat/utils.py b/ckanext/dcat/utils.py index 0931aecc..fa5f25df 100644 --- a/ckanext/dcat/utils.py +++ b/ckanext/dcat/utils.py @@ -166,6 +166,24 @@ def dataset_uri(dataset_dict): return uri +def catalog_record_uri(dataset_dict): + ''' + Returns an URI for the catalog record + + This will be used to uniquely reference the catalog record on the RDF + serializations. + + Returns a string with the catalog record URI. + ''' + if dataset_dict.get('id'): + uri = '{0}/catalog-record/{1}'.format(catalog_uri().rstrip('/'), + dataset_dict['id']) + else: + uri = '{0}/catalog-record/{1}'.format(catalog_uri().rstrip('/'), + str(uuid.uuid4())) + log.warning('Using a random id for catalog record URI') + + return uri def resource_uri(resource_dict): '''