From 6133880739ba23255f9b06525031ae66f1835fe0 Mon Sep 17 00:00:00 2001 From: Francesco Piero Paolicelli Date: Sat, 24 Jan 2026 16:50:14 +0100 Subject: [PATCH] Update processors.py add dcat:service in dcat:Catalog dcat:service must be in dcat:Catalog if distribution have dcat:accessService --- ckanext/dcat/processors.py | 72 ++++++++++++++++++++++++++++---------- 1 file changed, 53 insertions(+), 19 deletions(-) diff --git a/ckanext/dcat/processors.py b/ckanext/dcat/processors.py index 79f35821..879e361f 100644 --- a/ckanext/dcat/processors.py +++ b/ckanext/dcat/processors.py @@ -333,39 +333,73 @@ def serialize_catalog(self, catalog_dict=None, dataset_dicts=None, _format='xml', pagination_info=None): ''' Returns an RDF serialization of the whole catalog - - `catalog_dict` can contain literal values for the dcat:Catalog class - like `title`, `homepage`, etc. If not provided these would get default - values from the CKAN config (eg from `ckan.site_title`). - - If passed a list of CKAN dataset dicts, these will be also serializsed - as part of the catalog. - **Note:** There is no hard limit on the number of datasets at this - level, this should be handled upstream. - - The serialization format can be defined using the `_format` parameter. - It must be one of the ones supported by RDFLib, defaults to `xml`. - - `pagination_info` may be a dict containing keys describing the results - pagination. See the `_add_pagination_triples()` method for details. - - Returns a string with the serialized catalog + ... ''' catalog_ref = self.graph_from_catalog(catalog_dict) + if dataset_dicts: for dataset_dict in dataset_dicts: + + # ------------------------------------------------------ + # FIX ACCESS_RIGHTS PERSO IN package_search (catalog.ttl) + # ------------------------------------------------------ + if not dataset_dict.get('access_rights'): + extras = dataset_dict.get('extras', []) + + # CKAN extras come lista di dict + if isinstance(extras, list): + for e in extras: + if e.get('key') == 'access_rights' and e.get('value'): + dataset_dict['access_rights'] = e['value'] + break + + # CKAN extras come dict + elif isinstance(extras, dict): + if extras.get('access_rights'): + dataset_dict['access_rights'] = extras['access_rights'] + # ------------------------------------------------------ + # FINE FIX + # ------------------------------------------------------ + dataset_ref = self.graph_from_dataset(dataset_dict) + log.debug('catalog_ref in graph %s', catalog_ref) cat_ref = self._add_source_catalog(catalog_ref, dataset_dict, dataset_ref) - if not cat_ref: - self.g.add((catalog_ref, DCAT.dataset, dataset_ref)) + + # scegli UNA VOLTA il catalogo target + if cat_ref: + target_catalog = cat_ref + else: + org_site = self.g.objects( + URIRef(str(catalog_ref) + "/organization/" + dataset_dict.get('owner_org')), + VCARD.hasURL + ) + try: + target_catalog = next(org_site) + except StopIteration: + target_catalog = catalog_ref # fallback pulito + + # collega il dataset + self.g.add((target_catalog, DCAT.dataset, dataset_ref)) + + # collega i servizi (DCAT-AP 2/3): Catalog -> dcat:service + try: + for dist in self.g.objects(dataset_ref, DCAT.distribution): + for svc in self.g.objects(dist, DCAT.accessService): + self.g.add((target_catalog, DCAT.service, svc)) + except Exception as e: + log.debug( + "Unable to add dcat:service for dataset %s: %r", + dataset_dict.get('name') or dataset_dict.get('id'), e + ) if pagination_info: self._add_pagination_triples(pagination_info) if not _format: _format = 'xml' + _format = url_to_rdflib_format(_format) output = self.g.serialize(format=_format)