diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7db99384..9f83c6d8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -77,4 +77,4 @@ jobs: ckan -c test.ini db init ckan -c test.ini db pending-migrations --apply - name: Run tests - run: pytest --ckan-ini=test.ini --cov=ckanext.dcat --cov-report=term-missing --cov-append --disable-warnings ckanext/dcat/tests + run: pytest --ckan-ini=test.ini --cov=ckanext.dcat --cov-report=term-missing --cov-append --disable-warnings ckanext/dcat/tests \ No newline at end of file diff --git a/ckanext/dcat/profiles/base.py b/ckanext/dcat/profiles/base.py index 29802793..5b0591e0 100644 --- a/ckanext/dcat/profiles/base.py +++ b/ckanext/dcat/profiles/base.py @@ -7,7 +7,7 @@ from ckantoolkit import ObjectNotFound, asbool, aslist, config, get_action, url_for from dateutil.parser import parse as parse_date from geomet import InvalidGeoJSONException, wkt -from rdflib import BNode, Literal, URIRef, term +from rdflib import BNode, Literal, URIRef, term, PROV from rdflib.namespace import ORG, RDF, RDFS, SKOS, XSD, Namespace from ckanext.dcat.utils import DCAT_EXPOSE_SUBCATALOGS @@ -95,7 +95,6 @@ def __new__(cls, value, lang=None): # In case something goes wrong: use Literal return Literal(value, lang=lang) - class CleanedURIRef(object): """Performs some basic URL encoding on value before creating an URIRef object. @@ -547,9 +546,13 @@ def _agents_details(self, subject, predicate): ) agent_details["url"] = self._object_value(agent, FOAF.homepage) agent_details["type"] = self._object_value(agent, DCT.type) - agent_details['identifier'] = self._object_value(agent, DCT.identifier) - agents.append(agent_details) + agent_details["identifier"] = self._object_value(agent, DCT.identifier) + + acted_orgs = self._agents_details(agent, PROV.actedOnBehalfOf) + if acted_orgs: + agent_details["actedOnBehalfOf"] = acted_orgs + agents.append(agent_details) return agents def _contact_details(self, subject, predicate): @@ -819,6 +822,83 @@ def _read_list_value(self, value): return items + def _add_agent_to_graph(self, subject_ref, predicate, agent_dict): + """ + Serializes a foaf:Agent or foaf:Organization with optional subfields into the RDF graph. + + Parameters: + - subject_ref: The RDF subject (dataset, activity, etc.) + - predicate: The RDF predicate (e.g., dct:publisher, prov:wasAssociatedWith, dcat:agent) + - agent_dict: A dict with agent metadata (e.g., name, email, homepage, type, identifier, actedOnBehalfOf) + """ + uri = agent_dict.get("uri", "").strip() + + agent_ref = URIRefOrLiteral(uri) if uri else BNode() + + self.g.add((subject_ref, predicate, agent_ref)) + self.g.add((agent_ref, RDF.type, FOAF.Organization)) + self.g.add((agent_ref, RDF.type, FOAF.Agent)) + + if agent_dict.get("name"): + self.g.add((agent_ref, FOAF.name, Literal(agent_dict["name"]))) + if agent_dict.get("email"): + email = agent_dict["email"] + if not email.startswith("mailto:"): + email = f"mailto:{email}" + self.g.add((agent_ref, FOAF.mbox, URIRef(email))) + if agent_dict.get("url"): + self.g.add((agent_ref, FOAF.homepage, URIRef(agent_dict["url"]))) + if agent_dict.get("homepage"): + self.g.add((agent_ref, FOAF.homepage, URIRef(agent_dict["homepage"]))) + if agent_dict.get("type"): + self.g.add((agent_ref, DCT.type, URIRef(agent_dict["type"]))) + if agent_dict.get("identifier"): + self.g.add((agent_ref, DCT.identifier, Literal(agent_dict["identifier"]))) + + for sub_org in agent_dict.get("actedOnBehalfOf", []): + if sub_org.get("name"): + org_ref = BNode() + self.g.add((agent_ref, PROV.actedOnBehalfOf, org_ref)) + self.g.add((org_ref, RDF.type, PROV.Organization)) + self.g.add((org_ref, FOAF.name, Literal(sub_org["name"]))) + + return agent_ref + + def _add_contact_to_graph(self, subject, predicate, contact): + contact_uri = contact.get("uri") + if contact_uri: + contact_details = CleanedURIRef(contact_uri) + else: + contact_details = BNode() + + self.g.add((contact_details, RDF.type, VCARD.Kind)) + self.g.add((subject, predicate, contact_details)) + + self._add_triple_from_dict(contact, contact_details, VCARD.fn, "name") + self._add_triple_from_dict( + contact, + contact_details, + VCARD.hasEmail, + "email", + _type=URIRef, + value_modifier=self._add_mailto, + ) + self._add_triple_from_dict( + contact, + contact_details, + VCARD.hasUID, + "identifier", + _type=URIRefOrLiteral, + ) + self._add_triple_from_dict( + contact, + contact_details, + VCARD.hasURL, + "url", + _type=URIRef, + ) + + def _add_spatial_value_to_graph(self, spatial_ref, predicate, value): """ Adds spatial triples to the graph. Assumes that value is a GeoJSON string diff --git a/ckanext/dcat/profiles/euro_dcat_ap_2.py b/ckanext/dcat/profiles/euro_dcat_ap_2.py index e5204be1..fd0a6bc5 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_2.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_2.py @@ -1,7 +1,7 @@ import json from decimal import Decimal, DecimalException -from rdflib import URIRef, BNode, Literal, Namespace +from rdflib import URIRef, BNode, Literal, Namespace, FOAF, PROV, RDF, RDFS from ckanext.dcat.utils import resource_uri from .base import URIRefOrLiteral, CleanedURIRef @@ -18,7 +18,6 @@ from .euro_dcat_ap_base import BaseEuropeanDCATAPProfile - ELI = Namespace("http://data.europa.eu/eli/ontology#") @@ -65,6 +64,32 @@ def _parse_dataset_v2(self, dataset_dict, dataset_ref): # Call base super method for common properties super().parse_dataset(dataset_dict, dataset_ref) + # --- Provenance deserialization --- + was_generated_by = self.g.value(dataset_ref, PROV.wasGeneratedBy) + if was_generated_by: + activity_dict = {} + activity_dict["uri"] = str(was_generated_by) + activity_dict["type"] = [ + str(t) for t in self.g.objects(was_generated_by, RDF.type) + ] + activity_dict["label"] = self._object_value(was_generated_by, RDFS.label) + activity_dict["seeAlso"] = self._object_value(was_generated_by, RDFS.seeAlso) + activity_dict["dct_type"] = self._object_value(was_generated_by, DCT.type) + activity_dict["startedAtTime"] = self._object_value( + was_generated_by, PROV.startedAtTime + ) + + agents = self._agents_details(was_generated_by, PROV.wasAssociatedWith) + if agents: + activity_dict["wasAssociatedWith"] = [agents[0]] # Only take the first agent + + dataset_dict["provenance_activity"] = [activity_dict] + + # --- Qualified Attribution --- + qualified_attributions = self._parse_qualified_attributions(dataset_ref) + if qualified_attributions: + dataset_dict["qualified_attribution"] = qualified_attributions + # Standard values value = self._object_value(dataset_ref, DCAT.temporalResolution) if value: @@ -159,7 +184,7 @@ def _parse_dataset_v2(self, dataset_dict, dataset_ref): ): access_service_dict = {} - # Simple values + # Simple values for key, predicate in ( ("availability", DCATAP.availability), ("title", DCT.title), @@ -167,19 +192,43 @@ def _parse_dataset_v2(self, dataset_dict, dataset_ref): ("license", DCT.license), ("access_rights", DCT.accessRights), ("description", DCT.description), + ("identifier", DCT.identifier), + ("description", DCT.description), + ("modified", DCT.modified), ): value = self._object_value(access_service, predicate) if value: access_service_dict[key] = value - # List + + # List values for key, predicate in ( ("endpoint_url", DCAT.endpointURL), ("serves_dataset", DCAT.servesDataset), + ("conforms_to", DCT.conformsTo), + ("format", DCT["format"]), + ("language", DCT.language), + ("rights", DCT.rights), + ("landing_page", DCAT.landingPage), + ("keyword", DCAT.keyword), + ("applicable_legislation", DCATAP.applicableLegislation), + ("theme", DCAT.theme), ): values = self._object_value_list(access_service, predicate) if values: access_service_dict[key] = values + contact_points = self._contact_details(access_service, DCAT.contactPoint) + if contact_points: + access_service_dict["contact"] = contact_points + + publishers = self._agents_details(access_service, DCT.publisher) + if publishers: + access_service_dict["publisher"] = publishers + + creators = self._agents_details(access_service, DCT.creator) + if creators: + access_service_dict["creator"] = creators + # Access service URI (explicitly show the missing ones) access_service_dict["uri"] = ( str(access_service) @@ -247,6 +296,44 @@ def _graph_from_dataset_v2(self, dataset_dict, dataset_ref): _class=_class, ) + # --- Provenance serialization --- + activities = dataset_dict.get("provenance_activity", []) + + for activity in activities: + activity_uri = URIRef(activity.get("uri")) if activity.get("uri") else BNode() + self.g.add((dataset_ref, PROV.wasGeneratedBy, activity_uri)) + self.g.add((activity_uri, RDF.type, PROV.Activity)) + + if activity.get("label"): + self.g.add((activity_uri, RDFS.label, Literal(activity["label"]))) + if activity.get("seeAlso"): + self.g.add((activity_uri, RDFS.seeAlso, URIRef(activity["seeAlso"]))) + if activity.get("dct_type"): + self.g.add((activity_uri, DCT.type, URIRef(activity["dct_type"]))) + if activity.get("startedAtTime"): + self.g.add((activity_uri, PROV.startedAtTime, Literal(activity["startedAtTime"], datatype=XSD.dateTime))) + + for agent_dict in activity.get("wasAssociatedWith", []): + self._add_agent_to_graph(activity_uri, PROV.wasAssociatedWith, agent_dict) + + # Qualified Attribution + qualified_attributions = dataset_dict.get("qualified_attribution", []) + for attr in qualified_attributions: + attr_ref = BNode() + self.g.add((dataset_ref, DCAT.qualifiedAttribution, attr_ref)) + self.g.add((attr_ref, RDF.type, DCAT.Attribution)) + + agent_list = attr.get("agent", []) + for agent_dict in agent_list: + if isinstance(agent_dict, dict): + self._add_agent_to_graph(attr_ref, DCAT.agent, agent_dict) + elif isinstance(agent_dict, str): + self.g.add((attr_ref, DCAT.agent, URIRef(agent_dict))) + role = attr.get("role") + if role: + self.g.add((attr_ref, DCAT.hadRole, URIRef(role))) + + # Temporal # The profile for DCAT-AP 1 stored triples using schema:startDate, @@ -408,12 +495,58 @@ def _graph_from_dataset_v2(self, dataset_dict, dataset_ref): RDFS.Resource, ), ("description", DCT.description, None, Literal), + ("modified", DCT.modified, None, Literal), ] - self._add_triples_from_dict( access_service_dict, access_service_node, items ) + if access_service_dict.get("modified"): + self._add_date_triple(access_service_node, DCT.modified, access_service_dict.get("modified")) + + + contact_point_dict = access_service_dict.get("contact") + if contact_point_dict: + self._add_contact_to_graph(access_service_node, DCAT.contactPoint, contact_point_dict) + + publisher_dict = access_service_dict.get("publisher") + if publisher_dict: + self._add_agent_to_graph(access_service_node, DCT.publisher, publisher_dict) + + for creator_dict in access_service_dict.get("creator", []): + self._add_agent_to_graph(access_service_node, DCT.creator, creator_dict) + + # Extra list values for access services + extra_items = [ + ("conforms_to", DCT.conformsTo, None, URIRefOrLiteral), + ("format", DCT["format"], None, URIRefOrLiteral), + ("language", DCT.language, None, URIRefOrLiteral), + ("rights", DCT.rights, None, URIRefOrLiteral), + ("landing_page", DCAT.landingPage, None, URIRefOrLiteral), + ("applicable_legislation", DCATAP.applicableLegislation, None, URIRefOrLiteral, ELI.LegalResource), + ("theme", DCAT.theme, None, URIRefOrLiteral), + ] + self._add_list_triples_from_dict(access_service_dict, access_service_node, extra_items) + + # Add single-value triple for identifier + self._add_triple_from_dict( + access_service_dict, + access_service_node, + DCT.identifier, + "identifier", + _type=URIRefOrLiteral + ) + + # Add keyword list + self._add_triple_from_dict( + access_service_dict, + access_service_node, + DCAT.keyword, + "keyword", + list_value=True, + _type=Literal + ) + # Lists items = [ ( @@ -448,3 +581,23 @@ def _graph_from_dataset_v2_only(self, dataset_dict, dataset_ref): _type=URIRefOrLiteral, _class=ADMS.Identifier, ) + + def _parse_qualified_attributions(self, dataset_ref): + attributions = [] + for qual_attr_ref in self.g.objects(dataset_ref, PROV.qualifiedAttribution): + attr = {} + + # Get role + for role_ref in self.g.objects(qual_attr_ref, DCAT.hadRole): + attr["role"] = str(role_ref) + break + + # Get agent (using shared logic) + agent_details = self._agents_details(qual_attr_ref, PROV.agent) + if agent_details: + attr["agent"] = agent_details + + if attr: + attributions.append(attr) + + return attributions diff --git a/ckanext/dcat/profiles/euro_dcat_ap_3.py b/ckanext/dcat/profiles/euro_dcat_ap_3.py index 92206558..a99cadfe 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_3.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_3.py @@ -8,6 +8,7 @@ RDF, ) +from .base import URIRefOrLiteral from ckanext.dcat.utils import dataset_uri from .euro_dcat_ap_2 import EuropeanDCATAP2Profile from .euro_dcat_ap_scheming import EuropeanDCATAPSchemingProfile @@ -29,6 +30,11 @@ def parse_dataset(self, dataset_dict, dataset_ref): # DCAT AP v2 scheming fields dataset_dict = self._parse_dataset_v2_scheming(dataset_dict, dataset_ref) + # DCAT AP v3: hasVersion + values = self._object_value_list(dataset_ref, DCAT.hasVersion) + if values: + dataset_dict["has_version"] = values + return dataset_dict def graph_from_dataset(self, dataset_dict, dataset_ref): @@ -45,6 +51,12 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): # DCAT AP v3 properties also applied to higher versions self._graph_from_dataset_v3(dataset_dict, dataset_ref) + # DCAT AP v3: List triples + items = [ + ("has_version", DCAT.hasVersion, None, URIRefOrLiteral), + ] + self._add_list_triples_from_dict(dataset_dict, dataset_ref, items) + def graph_from_catalog(self, catalog_dict, catalog_ref): self._graph_from_catalog_base(catalog_dict, catalog_ref) diff --git a/ckanext/dcat/profiles/euro_dcat_ap_base.py b/ckanext/dcat/profiles/euro_dcat_ap_base.py index e42c6afb..ba9f1f2a 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_base.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_base.py @@ -206,6 +206,17 @@ def _parse_dataset_base(self, dataset_dict, dataset_ref): src_data = self._extract_catalog_dict(catalog_src) dataset_dict["extras"].extend(src_data) + + homepage = self._object_value(dataset_ref, FOAF.homepage) + if homepage: + dataset_dict["extras"].append( + {"key": "homepage", "value": homepage} + ) + elif config.get("ckan.site_url"): + dataset_dict["extras"].append( + {"key": "homepage", "value": config.get("ckan.site_url")} + ) + # Resources for distribution in self._distributions(dataset_ref): @@ -778,4 +789,4 @@ def _graph_from_catalog_base(self, catalog_dict, catalog_ref): # Dates modified = self._last_catalog_modification() if modified: - self._add_date_triple(catalog_ref, DCT.modified, modified) + self._add_date_triple(catalog_ref, DCT.modified, modified) \ No newline at end of file diff --git a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py index bea68935..4a7db6f0 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py @@ -148,45 +148,9 @@ def _graph_from_dataset_v2_scheming(self, dataset_dict, dataset_ref): Add triples to the graph from new repeating subfields """ contact = dataset_dict.get("contact") - if ( - isinstance(contact, list) - and len(contact) - and self._not_empty_dict(contact[0]) - ): + if contact: for item in contact: - contact_uri = item.get("uri") - if contact_uri: - contact_details = CleanedURIRef(contact_uri) - else: - contact_details = BNode() - - self.g.add((contact_details, RDF.type, VCARD.Kind)) - self.g.add((dataset_ref, DCAT.contactPoint, contact_details)) - - self._add_triple_from_dict(item, contact_details, VCARD.fn, "name") - # Add mail address as URIRef, and ensure it has a mailto: prefix - self._add_triple_from_dict( - item, - contact_details, - VCARD.hasEmail, - "email", - _type=URIRef, - value_modifier=self._add_mailto, - ) - self._add_triple_from_dict( - item, - contact_details, - VCARD.hasUID, - "identifier", - _type=URIRefOrLiteral, - ) - self._add_triple_from_dict( - item, - contact_details, - VCARD.hasURL, - "url", - _type=URIRef, - ) + self._add_contact_to_graph(dataset_ref, DCAT.contactPoint, item) self._add_agents(dataset_ref, dataset_dict, "publisher", DCT.publisher) self._add_agents(dataset_ref, dataset_dict, "creator", DCT.creator) diff --git a/ckanext/dcat/profiles/euro_health_dcat_ap.py b/ckanext/dcat/profiles/euro_health_dcat_ap.py index 3da8628f..e461e5af 100644 --- a/ckanext/dcat/profiles/euro_health_dcat_ap.py +++ b/ckanext/dcat/profiles/euro_health_dcat_ap.py @@ -1,6 +1,8 @@ -from rdflib import XSD, Literal, URIRef +from rdflib import XSD, Literal, URIRef, RDF, BNode, DCAT, RDFS from rdflib.namespace import Namespace - +from rdflib.namespace import DCTERMS as DCT +from .base import CleanedURIRef +from ckanext.dcat.utils import resource_uri from ckanext.dcat.profiles.base import URIRefOrLiteral from ckanext.dcat.profiles.euro_dcat_ap_3 import EuropeanDCATAP3Profile @@ -10,6 +12,12 @@ # Data Privacy Vocabulary namespace DPV = Namespace("https://w3id.org/dpv#") +# Data Quality Vocabulary namespace +DQV = Namespace("http://www.w3.org/ns/dqv#") + +# Open Annotation namespace +OA = Namespace("http://www.w3.org/ns/oa#") + namespaces = { "healthdcatap": HEALTHDCATAP, "dpv": DPV, @@ -42,18 +50,19 @@ def _parse_health_fields(self, dataset_dict, dataset_ref): agents = self._agents_details(dataset_ref, HEALTHDCATAP.hdab) if agents: dataset_dict["hdab"] = agents + # Add the quality annotations + quality_annotations = self._parse_quality_annotation(dataset_ref) + if quality_annotations: + dataset_dict["quality_annotation"] = quality_annotations + + # Dataset-level retention + dataset_dict["retention_period"] = self._parse_retention_period(dataset_ref) - # Retention period - retention_start, retention_end = self._time_interval( - dataset_ref, HEALTHDCATAP.retentionPeriod, dcat_ap_version=2 - ) - retention_dict = {} - if retention_start is not None: - retention_dict["start"] = retention_start - if retention_end is not None: - retention_dict["end"] = retention_end - if retention_dict: - dataset_dict["retention_period"] = [retention_dict] + # Distribution-level retention + for distribution_ref in self._distributions(dataset_ref): + for resource_dict in dataset_dict.get("resources", []): + if resource_dict["distribution_ref"] == str(distribution_ref): + resource_dict["retention_period"] = self._parse_retention_period(distribution_ref) return dataset_dict @@ -87,6 +96,73 @@ def __parse_healthdcat_stringvalues(self, dataset_dict, dataset_ref): if values: dataset_dict[key] = values + def __parse_healthdcat_booleanvalues(self, dataset_dict, dataset_ref): + for key, predicate in ( + ("trusted_data_holder", HEALTHDCATAP.trustedDataHolder), + ): + value = self._object_value(dataset_ref, predicate) + if value is not None: + lowered = value.lower() + if lowered in ("true", "false"): + dataset_dict[key] = lowered == "true" + + def _parse_quality_annotation(self, dataset_ref): + """ + Parse DQV quality annotations from the RDF graph. + + Returns a list of quality annotation dictionaries. + Only includes annotations where body and target are valid URIs. + """ + quality_annotation = [] + + # Find all quality annotations for this dataset + for annotation_ref in self.g.objects(dataset_ref, DQV.hasQualityAnnotation): + annotation_dict = {} + + # Get the body (must be a URI) + body = self._object_value(annotation_ref, OA.hasBody) + if body and isinstance(body, str) and body.startswith(("http://", "https://")): + annotation_dict["body"] = body + + # Get the target (must be a URI) + target = self._object_value(annotation_ref, OA.hasTarget) + if target and isinstance(target, str) and target.startswith(("http://", "https://")): + annotation_dict["target"] = target + + # Only include the annotation if both body and target are valid URIs + if "body" not in annotation_dict or "target" not in annotation_dict: + continue + + # Get the motivation (URI or literal) + motivation = self._object_value(annotation_ref, OA.motivatedBy) + if motivation: + annotation_dict["motivated_by"] = motivation + + quality_annotation.append(annotation_dict) + + return quality_annotation + + def _parse_retention_period(self, subject_ref): + """ + Parses the HEALTHDCATAP.retentionPeriod from the RDF graph for a given subject + (e.g., dataset or distribution). + + Returns a list with a single dict, e.g., + [{"start": "2023-01-01", "end": "2025-01-01"}] + or an empty list if no values are found. + """ + retention_start, retention_end = self._time_interval( + subject_ref, HEALTHDCATAP.retentionPeriod, dcat_ap_version=2 + ) + retention_dict = {} + if retention_start is not None: + retention_dict["start"] = retention_start + if retention_end is not None: + retention_dict["end"] = retention_end + + return [retention_dict] if retention_dict else [] + + def graph_from_dataset(self, dataset_dict, dataset_ref): super().graph_from_dataset(dataset_dict, dataset_ref) for prefix, namespace in namespaces.items(): @@ -132,6 +208,32 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): self._add_nonneg_integer_triple(dataset_dict, dataset_ref, key, predicate) self._add_agents(dataset_ref, dataset_dict, "hdab", HEALTHDCATAP.hdab) + self._add_quality_annotation(dataset_dict, dataset_ref) + + # Dataset-level retention period + self._add_retention_period(dataset_ref, dataset_dict.get("retention_period", [])) + + for resource_dict in dataset_dict.get("resources", []): + distribution_ref = CleanedURIRef(resource_uri(resource_dict)) + self._add_retention_period(distribution_ref, resource_dict.get("retention_period", [])) + + def _add_retention_period(self, subject_ref, retention_list): + for retention in retention_list: + start = retention.get("start") + end = retention.get("end") + comment = retention.get("comment") + + if start or end or comment: + period_node = BNode() + self.g.add((subject_ref, HEALTHDCATAP.retentionPeriod, period_node)) + self.g.add((period_node, RDF.type, DCT.PeriodOfTime)) + + if start: + self.g.add((period_node, DCAT.startDate, Literal(start, datatype=XSD.date))) + if end: + self.g.add((period_node, DCAT.endDate, Literal(end, datatype=XSD.date))) + if comment: + self.g.add((period_node, RDFS.comment, Literal(comment, lang="en"))) def _add_nonneg_integer_triple(self, dataset_dict, dataset_ref, key, predicate): """ @@ -156,14 +258,37 @@ def _add_nonneg_integer_triple(self, dataset_dict, dataset_ref, key, predicate): ) except (ValueError, TypeError): self.g.add((dataset_ref, predicate, Literal(value))) - - def __parse_healthdcat_booleanvalues(self, dataset_dict, dataset_ref): - for key, predicate in ( - ("trusted_data_holder", HEALTHDCATAP.trustedDataHolder), - ): - value = self._object_value(dataset_ref, predicate) - if value is not None: - dataset_dict[key] = value.lower() == "true" + + def _add_quality_annotation(self, dataset_dict, dataset_ref): + """ + Serialize qualified_annotation entries into RDF as DQV.QualityAnnotations. + Only URI-based body, target, and motivation values are supported. + """ + quality_annotation = self._get_dict_value(dataset_dict, "quality_annotation") + + if not quality_annotation: + return + + for annotation in quality_annotation: + if not isinstance(annotation, dict): + continue + + annotation_ref = BNode() + + # Link from dataset + self.g.add((dataset_ref, DQV.hasQualityAnnotation, annotation_ref)) + self.g.add((annotation_ref, RDF.type, OA.Annotation)) + + # URI-based fields only + for field, predicate in [ + ("body", OA.hasBody), + ("target", OA.hasTarget), + ("motivated_by", OA.motivatedBy), + ]: + uri = annotation.get(field) + if isinstance(uri, str) and uri.startswith(("http://", "https://")): + self.g.add((annotation_ref, predicate, URIRef(uri))) + def graph_from_catalog(self, catalog_dict, catalog_ref): super().graph_from_catalog(catalog_dict, catalog_ref) diff --git a/ckanext/dcat/schemas/dcat_ap_full.yaml b/ckanext/dcat/schemas/dcat_ap_full.yaml index e6fb4a37..39a7ae67 100644 --- a/ckanext/dcat/schemas/dcat_ap_full.yaml +++ b/ckanext/dcat/schemas/dcat_ap_full.yaml @@ -112,6 +112,11 @@ dataset_fields: label: Identifier help_text: Unique identifier for the creator, such as an ORCID or ROR ID. +- field_name: homepage + label: Homepage + display_snippet: link.html + help_text: A web page that acts as the homepage for the dataset. + - field_name: license_id label: License form_snippet: license.html @@ -285,6 +290,85 @@ dataset_fields: help_text: The function of an entity or agent with respect to another entity or resource. help_text: A description of a relationship with another resource. +- field_name: provenance_activity + label: Provenance Activity + repeating_label: Provenance Activity + repeating_once: true + repeating_subfields: + - field_name: uri + label: Activity URI + help_text: URI of the provenance activity (if available). + - field_name: label + label: Label + help_text: Human-readable label for the activity. + - field_name: seeAlso + label: See Also + help_text: Related link for the activity. + - field_name: dct_type + label: Type + help_text: Type of the activity (URI). + - field_name: startedAtTime + label: Started At Time + help_text: When the activity started (ISO 8601). + - field_name: wasAssociatedWith + label: Associated Agent + repeating_label: Agent + repeating_once: true + repeating_subfields: + - field_name: uri + label: URI + - field_name: name + label: Name + - field_name: email + label: Email + - field_name: homepage + label: Homepage + - field_name: type + label: Type + - field_name: identifier + label: Identifier + - field_name: url + label: URL + - field_name: actedOnBehalfOf + label: Acted On Behalf Of + repeating_label: Organization + repeating_once: true + repeating_subfields: + - field_name: name + label: Organization Name + help_text: Structured provenance activity information, including agent and organization. + +# Add qualified_attribution field here, just before the commented-out hvd_category field +- field_name: qualified_attribution + label: Qualified Attribution + repeating_label: Attribution + repeating_once: true + repeating_subfields: + - field_name: agent + label: Agent + repeating_label: Agent + repeating_once: true + repeating_subfields: + - field_name: uri + label: URI + - field_name: name + label: Name + - field_name: email + label: Email + - field_name: homepage + label: Homepage + - field_name: type + label: Type + - field_name: identifier + label: Identifier + - field_name: url + label: URL + - field_name: role + label: Role + help_text: Role of the agent (e.g., data processor, contributor). + help_text: Structured qualified attribution information including agent and role. + + #- field_name: hvd_category # label: HVD Category # preset: multiple_text @@ -439,6 +523,11 @@ resource_fields: - field_name: title label: Title + - field_name: description + label: Description + form_snippet: markdown.html + help_text: A free-text account of the resource. + - field_name: endpoint_description label: Endpoint description @@ -456,6 +545,147 @@ resource_fields: validators: ignore_missing unicode_safe help_text: Information regarding access or restrictions based on privacy, security, or other policies. + - field_name: conforms_to + label: Conforms to + preset: multiple_text + validators: ignore_missing scheming_multiple_text + + - field_name: format + label: Format + preset: multiple_text + validators: ignore_missing scheming_multiple_text + + - field_name: identifier + label: Identifier + + - field_name: language + label: Language + preset: multiple_text + validators: ignore_missing scheming_multiple_text + + - field_name: rights + label: Rights + form_snippet: markdown.html + display_snippet: markdown.html + preset: multiple_text + validators: ignore_missing scheming_multiple_text + + - field_name: landing_page + label: Landing page + preset: multiple_text + validators: ignore_missing scheming_multiple_text + + - field_name: keyword + label: Keywords + preset: tag_string_autocomplete + form_placeholder: eg. economy, mental health, government + help_text: Keywords or tags describing the dataservice. Use commas to separate multiple values. + + - field_name: applicable_legislation + label: Applicable legislation + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: The legislation that mandates the creation or management of the dataset. + + - field_name: contact + label: Contact point + repeating_label: Contact point + repeating_once: true + repeating_subfields: + + - field_name: uri + label: URI + + - field_name: name + label: Name + + - field_name: email + label: Email + display_snippet: email.html + + - field_name: identifier + label: Identifier + help_text: Unique identifier for the contact point. Such as a ROR ID. + + - field_name: url + label: URL + help_text: A URL associated with the contact + help_text: Contact information for enquiries about the dataservice. + + - field_name: creator + label: Creator + repeating_label: Creator + repeating_subfields: + + - field_name: uri + label: URI + help_text: URI of the creator, if available. + + - field_name: name + label: Name + help_text: Name of the entity or person who created the dataservice. + + - field_name: email + label: Email + display_snippet: email.html + help_text: Contact email of the creator. + + - field_name: url + label: URL + display_snippet: link.html + help_text: URL for more information about the creator. + + - field_name: type + label: Type + help_text: Type of creator (e.g., Organization, Person). + + - field_name: identifier + label: Identifier + help_text: Unique identifier for the creator, such as an ORCID or ROR ID. + + - field_name: publisher + label: Publisher + repeating_label: Publisher + repeating_once: true + repeating_subfields: + + - field_name: uri + label: URI + + - field_name: name + label: Name + + - field_name: email + label: Email + display_snippet: email.html + + - field_name: url + label: URL + display_snippet: link.html + + - field_name: type + label: Type + + - field_name: identifier + label: Identifier + help_text: Unique identifier for the publisher, such as a ROR ID. + help_text: Entity responsible for making the dataset available. + + - field_name: license + label: License + help_text: License in which the resource is made available. If not provided will be inherited from the dataset. + + - field_name: modified + label: Modification date + preset: dcat_date + help_text: Most recent date on which the dataset was changed, updated or modified. + + - field_name: theme + label: Theme + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: A category of the dataservice. A Dataservice may be associated with multiple themes. + help_text: A data service that gives access to the resource. # Note: if not provided, this will be autogenerated diff --git a/ckanext/dcat/schemas/health_dcat_ap.yaml b/ckanext/dcat/schemas/health_dcat_ap.yaml index c8624396..5181df94 100644 --- a/ckanext/dcat/schemas/health_dcat_ap.yaml +++ b/ckanext/dcat/schemas/health_dcat_ap.yaml @@ -109,6 +109,11 @@ dataset_fields: label: Identifier help_text: Unique identifier for the creator, such as an ORCID or ROR ID. +- field_name: homepage + label: Homepage + display_snippet: link.html + help_text: A web page that acts as the homepage for the dataset. + - field_name: license_id label: License form_snippet: license.html @@ -444,11 +449,106 @@ dataset_fields: help_text: The function of an entity or agent with respect to another entity or resource. help_text: A description of a relationship with another resource. +- field_name: provenance_activity + label: Provenance Activity + repeating_label: Provenance Activity + repeating_once: true + repeating_subfields: + - field_name: uri + label: Activity URI + help_text: URI of the provenance activity (if available). + - field_name: label + label: Label + help_text: Human-readable label for the activity. + - field_name: seeAlso + label: See Also + help_text: Related link for the activity. + - field_name: dct_type + label: Type + help_text: Type of the activity (URI). + - field_name: startedAtTime + label: Started At Time + help_text: When the activity started (ISO 8601). + - field_name: wasAssociatedWith + label: Associated Agent + repeating_label: Agent + repeating_once: true + repeating_subfields: + - field_name: uri + label: URI + - field_name: name + label: Name + - field_name: email + label: Email + - field_name: homepage + label: Homepage + - field_name: type + label: Type + - field_name: identifier + label: Identifier + - field_name: url + label: URL + - field_name: actedOnBehalfOf + label: Acted On Behalf Of + repeating_label: Organization + repeating_once: true + repeating_subfields: + - field_name: name + label: Organization Name + help_text: Structured provenance activity information, including agent and organization. + # Note: if not provided, this will be autogenerated - field_name: uri label: URI help_text: An URI for this dataset (if not provided it will be autogenerated). +- field_name: qualified_attribution + label: Qualified Attribution + repeating_label: Attribution + repeating_once: true + repeating_subfields: + - field_name: agent + label: Agent + repeating_label: Agent + repeating_once: true + repeating_subfields: + - field_name: uri + label: URI + - field_name: name + label: Name + - field_name: email + label: Email + - field_name: homepage + label: Homepage + - field_name: type + label: Type + - field_name: identifier + label: Identifier + - field_name: url + label: URL + - field_name: role + label: Role + help_text: Role of the agent (e.g., data processor, contributor). + help_text: Structured qualified attribution information including agent and role. + +- field_name: quality_annotation + label: Quality annotations + repeating_label: Quality annotation + repeating_subfields: + - field_name: body + label: Body + help_text: The content of the quality annotation (e.g., URL to certificate, measurement value, assessment result). + - field_name: target + label: Target + help_text: The specific aspect of the dataset being annotated (e.g., URI or description of what is being assessed). + - field_name: motivated_by + label: Motivated by + help_text: The motivation or reason for the quality annotation. + help_text: > + Quality annotations provide information about the quality of the dataset, including certifications, + measurements, and assessments. These annotations follow the Data Quality Vocabulary (DQV) + and Web Annotation standards. + # TODO: relation-based properties are not yet included (e.g. is_version_of, source, sample, etc) # resource_fields: @@ -585,13 +685,17 @@ resource_fields: label: Access services repeating_label: Access service repeating_subfields: - - field_name: uri label: URI - field_name: title label: Title + - field_name: description + label: Description + form_snippet: markdown.html + help_text: A free-text account of the resource. + - field_name: endpoint_description label: Endpoint description @@ -603,15 +707,160 @@ resource_fields: label: Serves dataset preset: multiple_text validators: ignore_missing scheming_multiple_text - + - field_name: access_rights label: Access rights validators: ignore_missing unicode_safe help_text: Information regarding access or restrictions based on privacy, security, or other policies. + - field_name: conforms_to + label: Conforms to + preset: multiple_text + validators: ignore_missing scheming_multiple_text + + - field_name: format + label: Format + preset: multiple_text + validators: ignore_missing scheming_multiple_text + + - field_name: identifier + label: Identifier + + - field_name: language + label: Language + preset: multiple_text + validators: ignore_missing scheming_multiple_text + + - field_name: rights + label: Rights + preset: multiple_text + validators: ignore_missing scheming_multiple_text + + - field_name: landing_page + label: Landing page + preset: multiple_text + validators: ignore_missing scheming_multiple_text + + - field_name: keyword + label: Keywords + preset: tag_string_autocomplete + form_placeholder: eg. economy, mental health, government + help_text: Keywords or tags describing the dataservice. Use commas to separate multiple values. + + - field_name: applicable_legislation + label: Applicable legislation + preset: multiple_text + validators: ignore_missing scheming_multiple_text + help_text: The legislation that mandates the creation or management of the dataset. + + - field_name: contact + label: Contact point + repeating_label: Contact point + repeating_once: true + repeating_subfields: + + - field_name: uri + label: URI + + - field_name: name + label: Name + + - field_name: email + label: Email + display_snippet: email.html + + - field_name: identifier + label: Identifier + help_text: Unique identifier for the contact point. Such as a ROR ID. + + - field_name: url + label: URL + help_text: A URL associated with the contact + help_text: Contact information for enquiries about the dataservice. + + - field_name: creator + label: Creator + repeating_label: Creator + repeating_subfields: + + - field_name: uri + label: URI + help_text: URI of the creator, if available. + + - field_name: name + label: Name + help_text: Name of the entity or person who created the dataservice. + + - field_name: email + label: Email + display_snippet: email.html + help_text: Contact email of the creator. + + - field_name: url + label: URL + display_snippet: link.html + help_text: URL for more information about the creator. + + - field_name: type + label: Type + help_text: Type of creator (e.g., Organization, Person). + + - field_name: identifier + label: Identifier + help_text: Unique identifier for the creator, such as an ORCID or ROR ID. + + - field_name: publisher + label: Publisher + repeating_label: Publisher + repeating_once: true + repeating_subfields: + + - field_name: uri + label: URI + + - field_name: name + label: Name + + - field_name: email + label: Email + display_snippet: email.html + + - field_name: url + label: URL + display_snippet: link.html + + - field_name: type + label: Type + + - field_name: identifier + label: Identifier + help_text: Unique identifier for the publisher, such as a ROR ID. + help_text: Entity responsible for making the dataset available. + + - field_name: license + label: License + help_text: License in which the resource is made available. If not provided will be inherited from the dataset. + + - field_name: modified + label: Modification date + preset: dcat_date + help_text: Most recent date on which the dataset was changed, updated or modified. + help_text: A data service that gives access to the resource. # Note: if not provided, this will be autogenerated - field_name: uri label: URI help_text: An URI for this resource (if not provided it will be autogenerated). + +- field_name: retention_period + label: Retention period + repeating_subfields: + + - field_name: start + label: Start + preset: dcat_date + + - field_name: end + label: End + preset: dcat_date diff --git a/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_parse.py b/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_parse.py index 8da1c634..d0252a11 100644 --- a/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_parse.py +++ b/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_parse.py @@ -124,6 +124,7 @@ def _get_extra_value_as_list(key): assert _get_extra_value('access_rights') == 'public' assert _get_extra_value('provenance') == 'Some statement about provenance' assert _get_extra_value('dcat_type') == 'test-type' + assert _get_extra_value('homepage') == 'http://dataset.info.org/home' # Lists assert sorted(_get_extra_value_as_list('language')) == [u'ca', u'en', u'es'] diff --git a/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_serialize.py b/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_serialize.py index 826aef47..88826149 100644 --- a/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_serialize.py +++ b/ckanext/dcat/tests/profiles/dcat_ap/test_euro_dcatap_profile_serialize.py @@ -1514,4 +1514,4 @@ def test_dont_set_missing_license_for_resource_config_param_value_false(self): assert str(distribution) == utils.resource_uri(resource) # Verify that the license of the dataset is not in the distribution - assert not self._triple(g, distribution, DCT.license, URIRef(dataset['license_id'])) + assert not self._triple(g, distribution, DCT.license, URIRef(dataset['license_id'])) \ No newline at end of file diff --git a/ckanext/dcat/tests/profiles/dcat_ap_2/test_euro_dcatap_2_profile_parse.py b/ckanext/dcat/tests/profiles/dcat_ap_2/test_euro_dcatap_2_profile_parse.py index 8573cbab..94a1e541 100644 --- a/ckanext/dcat/tests/profiles/dcat_ap_2/test_euro_dcatap_2_profile_parse.py +++ b/ckanext/dcat/tests/profiles/dcat_ap_2/test_euro_dcatap_2_profile_parse.py @@ -24,6 +24,84 @@ class TestEuroDCATAP2ProfileParsing(BaseParseTest): + def test_parse_access_service_extra_fields(self): + rdf_data = ''' + + + + + + + + + service-123 + + open use + + keyword1 + keyword2 + + This SPARQL end point allow to directly query the EU Whoiswho content + + + European Commission + + + + + Publications Office of the European Union + + + + + John Doe + + + + + + + + + + ''' + p = RDFParser(profiles=DCAT_AP_PROFILES) + p.parse(rdf_data) + datasets = list(p.datasets()) + assert len(datasets) == 1 + resources = datasets[0]['resources'] + assert len(resources) == 1 + access_services = json.loads(resources[0]['access_services']) + assert len(access_services) == 1 + access_service = access_services[0] + assert access_service['conforms_to'] == ['http://example.org/spec'] + assert access_service['format'] == ['http://example.org/format'] + assert access_service['identifier'] == 'service-123' + assert access_service['language'] == ['http://publications.europa.eu/resource/authority/language/ENG'] + assert access_service['rights'] == ['open use'] + assert access_service['landing_page'] == ['http://example.org/landing'] + assert access_service['applicable_legislation'] == ['http://data.europa.eu/eli/reg_impl/2023/138/oj'] + assert sorted(access_service['keyword']) == ['keyword1', 'keyword2'] + assert access_service['description'] == 'This SPARQL end point allow to directly query the EU Whoiswho content' + + contact_points = access_service.get("contact") + assert isinstance(contact_points, list) + assert contact_points[0].get("name") == "John Doe" + assert contact_points[0].get("email") == "john@example.org" + + creator = access_service.get("creator") + assert isinstance(creator, list) + assert creator[0].get("name") == "European Commission" + + publishers = access_service.get("publisher") + assert isinstance(publishers, list) + assert publishers[0].get("name") == "Publications Office of the European Union" def test_dataset_all_fields(self): @@ -82,6 +160,10 @@ def test_dataset_all_fields(self): SPARQL url description + + 2012-05-01T00:04:06 + + @@ -155,11 +237,15 @@ def test_dataset_all_fields(self): assert access_service.get('license') == 'http://publications.europa.eu/resource/authority/licence/COM_REUSE' assert access_service.get('access_rights') == 'http://publications.europa.eu/resource/authority/access-right/PUBLIC' assert access_service.get('description') == 'This SPARQL end point allow to directly query the EU Whoiswho content (organization / membership / person)' + assert access_service.get('modified') == '2012-05-01T00:04:06' # List endpoint_url_list = access_service.get('endpoint_url') assert len(endpoint_url_list) == 1 assert 'http://publications.europa.eu/webapi/rdf/sparql' in endpoint_url_list + theme_list = access_service.get('theme') + assert isinstance(theme_list, list) + assert sorted(theme_list) == ['http://example.org/theme/environment', 'http://example.org/theme/transport'] def test_availability_distibutions_without_uri(self): diff --git a/ckanext/dcat/tests/profiles/dcat_ap_2/test_euro_dcatap_2_profile_serialize.py b/ckanext/dcat/tests/profiles/dcat_ap_2/test_euro_dcatap_2_profile_serialize.py index 8f9ebd87..dc5cba96 100644 --- a/ckanext/dcat/tests/profiles/dcat_ap_2/test_euro_dcatap_2_profile_serialize.py +++ b/ckanext/dcat/tests/profiles/dcat_ap_2/test_euro_dcatap_2_profile_serialize.py @@ -418,7 +418,20 @@ def test_distribution_fields(self): 'access_rights': 'http://publications.europa.eu/resource/authority/access-right/PUBLIC', 'description': 'This SPARQL end point allow to directly query the EU Whoiswho content 1', 'endpoint_url': ['http://publications.europa.eu/webapi/rdf/sparql'], - 'serves_dataset': ['http://data.europa.eu/88u/dataset/eu-whoiswho-the-official-directory-of-the-european-union'] + 'serves_dataset': ['http://data.europa.eu/88u/dataset/eu-whoiswho-the-official-directory-of-the-european-union'], + 'conforms_to': ['http://example.org/spec'], + 'applicable_legislation': ['http://data.europa.eu/eli/reg_impl/2023/138/oj'], + 'format': ['http://example.org/format'], + 'identifier': 'service-123', + 'language': ['http://publications.europa.eu/resource/authority/language/ENG'], + 'rights': ['open use'], + 'landing_page': ['http://example.org/landing'], + 'keyword': ['keyword1', 'keyword2'], + 'contact': {'name': 'John Doe', 'email': 'john@example.org'}, + 'creator': [{'name': 'European Commission'}], + 'publisher': {'name': 'Publications Office of the European Union'}, + 'modified': '2024-01-01T12:00:00', + 'theme': ['http://example.org/theme/environment', 'http://example.org/theme/transport'], }, { 'availability': 'http://publications.europa.eu/resource/authority/planned-availability/EXPERIMENTAL', @@ -428,7 +441,15 @@ def test_distribution_fields(self): 'access_rights': 'http://publications.europa.eu/resource/authority/access-right/OP_DATPRO', 'description': 'This SPARQL end point allow to directly query the EU Whoiswho content 2', 'endpoint_url': ['http://publications.europa.eu/webapi/rdf/sparql'], - 'serves_dataset': ['http://data.europa.eu/88u/dataset/eu-whoiswho-the-official-directory-of-the-european-union'] + 'serves_dataset': ['http://data.europa.eu/88u/dataset/eu-whoiswho-the-official-directory-of-the-european-union'], + 'conforms_to': ['http://example.org/spec'], + 'applicable_legislation': ['http://data.europa.eu/eli/reg_impl/2023/138/oj'], + 'format': ['http://example.org/format'], + 'identifier': 'service-123', + 'language': ['http://publications.europa.eu/resource/authority/language/ENG'], + 'rights': ['open use'], + 'landing_page': ['http://example.org/landing'], + 'keyword': ['keyword1', 'keyword2'] } ]) } @@ -478,11 +499,74 @@ def test_distribution_fields(self): URIRef(access_service.get('license'))) self._assert_simple_value(g, object[2], DCT.title, Literal(access_service.get('title'))) - self._assert_simple_value(g, object[2], DCT.description, - Literal(access_service.get('description'))) + if access_service.get('description'): + self._assert_simple_value(g, object[2], DCT.description, + Literal(access_service.get('description'))) self._assert_simple_value(g, object[2], DCAT.endpointDescription, Literal(access_service.get('endpoint_description'))) + self._assert_simple_value( + g, object[2], DCT.identifier, + Literal(access_service.get('identifier')) if access_service.get('identifier') else None + ) + self._assert_values_list( + g, object[2], DCT.conformsTo, + self._get_typed_list(access_service.get('conforms_to'), URIRef) if access_service.get( + 'conforms_to') else [] + ) + self._assert_values_list( + g, object[2], DCT["format"], + self._get_typed_list(access_service.get('format'), URIRef) if access_service.get('format') else [] + ) + self._assert_values_list( + g, object[2], DCT.language, + self._get_typed_list(access_service.get('language'), URIRef) if access_service.get('language') else [] + ) + self._assert_values_list( + g, object[2], DCT.rights, + self._get_typed_list(access_service.get('rights'), Literal) if access_service.get('rights') else [] + ) + self._assert_values_list( + g, object[2], DCAT.landingPage, + self._get_typed_list(access_service.get('landing_page'), URIRef) if access_service.get( + 'landing_page') else [] + ) + self._assert_values_list( + g, object[2], DCATAP.applicableLegislation, + self._get_typed_list(access_service.get('applicable_legislation'), URIRef) if access_service.get('applicable_legislation') else [] + ) + + if access_service.get('keyword'): + self._assert_values_list( + g, object[2], DCAT.keyword, + self._get_typed_list(access_service.get('keyword'), Literal) + ) + + if access_service.get('contact'): + contact = self._triple(g, object[2], DCAT.contactPoint, None)[2] + assert self._triple(g, contact, VCARD.fn, Literal('John Doe')) + assert self._triple(g, contact, VCARD.hasEmail, URIRef('mailto:john@example.org')) + + if access_service.get('creator'): + creators = self._triples(g, object[2], DCT.creator, None) + assert any(self._triple(g, c[2], FOAF.name, Literal('European Commission')) for c in creators) + + if access_service.get('publisher'): + publishers = self._triples(g, object[2], DCT.publisher, None) + assert any(self._triple(g, p[2], FOAF.name, Literal('Publications Office of the European Union')) for p in publishers) + + if access_service.get('modified'): + assert self._triple( + g, object[2], DCT.modified, + Literal(access_service.get('modified'), datatype=XSD.dateTime) + ) + + if access_service.get('theme'): + self._assert_values_list( + g, object[2], DCAT.theme, + self._get_typed_list(access_service.get('theme'), URIRef) + ) + # Lists self._assert_values_list(g, object[2], DCAT.endpointURL, self._get_typed_list(access_service.get('endpoint_url'), URIRef)) diff --git a/ckanext/dcat/tests/profiles/dcat_ap_2/test_multilingual_support.py b/ckanext/dcat/tests/profiles/dcat_ap_2/test_multilingual_support.py index 9c4629ea..6135e3d8 100644 --- a/ckanext/dcat/tests/profiles/dcat_ap_2/test_multilingual_support.py +++ b/ckanext/dcat/tests/profiles/dcat_ap_2/test_multilingual_support.py @@ -20,7 +20,6 @@ ) from ckanext.dcat.tests.utils import BaseSerializeTest, BaseParseTest - @pytest.mark.usefixtures("with_plugins", "clean_db") @pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets fluent") @pytest.mark.ckan_config( diff --git a/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py b/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py index 17ad472f..7c06fa11 100644 --- a/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py +++ b/ckanext/dcat/tests/profiles/dcat_ap_3/test_euro_dcatap_3_profile_serialize.py @@ -31,13 +31,13 @@ @pytest.mark.usefixtures("with_plugins", "clean_db") -@pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets") +@pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets dataset_series") @pytest.mark.ckan_config( "scheming.dataset_schemas", "ckanext.dcat.schemas:dcat_ap_full.yaml" ) @pytest.mark.ckan_config( "scheming.presets", - "ckanext.scheming:presets.json ckanext.dcat.schemas:presets.yaml", + "ckanext.scheming:presets.json ckanext.dcat.schemas:presets.yaml ckanext.dataset_series.schemas:presets.yaml " ) @pytest.mark.ckan_config("ckanext.dcat.rdf.profiles", "euro_dcat_ap_3") class TestEuroDCATAP3ProfileSerializeDataset(BaseSerializeTest): diff --git a/ckanext/dcat/tests/profiles/health_dcat_ap/test_euro_health_dcat_ap_profile_parse.py b/ckanext/dcat/tests/profiles/health_dcat_ap/test_euro_health_dcat_ap_profile_parse.py index 1d7b53c5..2d907f0f 100644 --- a/ckanext/dcat/tests/profiles/health_dcat_ap/test_euro_health_dcat_ap_profile_parse.py +++ b/ckanext/dcat/tests/profiles/health_dcat_ap/test_euro_health_dcat_ap_profile_parse.py @@ -182,3 +182,43 @@ def test_e2e_dcat_to_ckan(self): "end": "2034-12-31", } ] + assert dataset["resources"][0]["retention_period"] == [ + { + "start": "2020-03-01", + "end": "2034-12-31", + } + ] + + assert dataset["provenance_activity"] == [{ + "uri": "internalURI:wasGeneratedBy0", + "label": "http://dbpedia.org/resource/Record_linkage", + "seeAlso": "https://www.ehealth.fgov.be/ehealthplatform/fr/service-codage-anonymisation-et-ttp", + "dct_type": "http://dbpedia.org/resource/Record_linkage", + "startedAtTime": "2021-01-01T00:00:00+00:00", + "wasAssociatedWith": [{ + "name": "Dr. Joris van Loenhout", + "url": "https://www.sciensano.be/fr/people/joris-van-loenhout", + "email": "Joris.VanLoenhout@sciensano.be", + "type": "", + "uri": "", + "identifier": "", + "actedOnBehalfOf": [{ + "name": "Contact Point" + }] + }] + }] + + assert dataset["qualified_attribution"][0]["role"] == "https://inspire.ec.europa.eu/metadata-codelist/ResponsiblePartyRole/processor" + + agent = dataset["qualified_attribution"][0]["agent"][0] + assert agent["name"] == "Contact Point" + assert agent["email"] == "healthdata@sciensano.be" + assert agent["url"] == "https://healthdata.be" + assert agent["type"] == "" + assert agent["identifier"] == "" + + # DQV Quality Annotation + assert len(dataset["quality_annotation"]) == 1 + assert dataset["quality_annotation"][0]["body"] == "https://certificates.theodi.org/en/datasets/393/certificate" + assert dataset["quality_annotation"][0]["target"] == "https://certificates.theodi.org/en/datasets/393" + assert dataset["quality_annotation"][0]["motivated_by"] == "http://www.w3.org/ns/dqv#qualityAssessment" diff --git a/ckanext/dcat/tests/profiles/health_dcat_ap/test_euro_health_dcat_ap_profile_serialize.py b/ckanext/dcat/tests/profiles/health_dcat_ap/test_euro_health_dcat_ap_profile_serialize.py index b6e5c5c6..0c523189 100644 --- a/ckanext/dcat/tests/profiles/health_dcat_ap/test_euro_health_dcat_ap_profile_serialize.py +++ b/ckanext/dcat/tests/profiles/health_dcat_ap/test_euro_health_dcat_ap_profile_serialize.py @@ -3,9 +3,9 @@ import pytest from ckan.tests.helpers import call_action from geomet import wkt -from rdflib import Graph -from rdflib.namespace import RDF +from rdflib import Graph, PROV, Literal from rdflib.term import URIRef +from rdflib.namespace import Namespace from ckanext.dcat import utils from ckanext.dcat.processors import RDFSerializer @@ -30,6 +30,8 @@ DCAT_AP_PROFILES = ["euro_dcat_ap_3"] +# Open Annotation namespace +OA = Namespace("http://www.w3.org/ns/oa#") @pytest.mark.usefixtures("with_plugins", "clean_db") @pytest.mark.ckan_config("ckan.plugins", "dcat scheming_datasets") @@ -103,3 +105,99 @@ def test_e2e_ckan_to_dcat(self): assert self._triple( g, relation[0][2], predicate, value ), f"relation Predicate {predicate} does not have value {value}" + + # Test provenance activity + provenance = [t for t in g.triples((dataset_ref, PROV.wasGeneratedBy, None))] + assert len(provenance) == 1 + activity_node = provenance[0][2] + activity_items = [ + (RDF.type, PROV.Activity), + (RDFS.label, Literal(dataset_dict["provenance_activity"][0]["label"])), + (RDFS.seeAlso, URIRef(dataset_dict["provenance_activity"][0]["seeAlso"])), + (DCT.type, URIRef(dataset_dict["provenance_activity"][0]["dct_type"])), + (PROV.startedAtTime, Literal(dataset_dict["provenance_activity"][0]["startedAtTime"], datatype=XSD.dateTime)), + ] + for predicate, value in activity_items: + assert self._triple(g, activity_node, predicate, value), f"Provenance {predicate} mismatch" + + agent_triple = list(g.objects(activity_node, PROV.wasAssociatedWith)) + assert len(agent_triple) == 1 + agent_node = agent_triple[0] + agent_items = [ + (RDF.type, PROV.Agent), + (FOAF.name, Literal(dataset_dict["provenance_activity"][0]["wasAssociatedWith"][0]["name"])), + (FOAF.homepage, URIRef(dataset_dict["provenance_activity"][0]["wasAssociatedWith"][0]["homepage"])), + (FOAF.mbox, URIRef(dataset_dict["provenance_activity"][0]["wasAssociatedWith"][0]["email"])), + ] + + acted_on = list(g.objects(agent_node, PROV.actedOnBehalfOf)) + assert len(acted_on) == 1 + org_node = acted_on[0] + assert self._triple(g, org_node, FOAF.name, Literal(dataset_dict["provenance_activity"][0]["wasAssociatedWith"][0]["actedOnBehalfOf"][0]["name"])) + + # Test qualified attribution + attributions = [t for t in g.triples((dataset_ref, DCAT.qualifiedAttribution, None))] + assert len(attributions) == 1 + attr_node = attributions[0][2] + assert self._triple(g, attr_node, RDF.type, DCAT.Attribution) + assert self._triple(g, attr_node, DCAT.hadRole, URIRef(dataset_dict["qualified_attribution"][0]["role"])) + + agent_node = list(g.objects(attr_node, DCAT.agent))[0] + agent_details = dataset_dict["qualified_attribution"][0]["agent"][0] + agent_items = [ + (RDF.type, FOAF.Organization), + (FOAF.name, Literal(agent_details["name"])), + (FOAF.mbox, URIRef("mailto:" + agent_details["email"])), + (FOAF.homepage, URIRef(agent_details["homepage"])), + ] + for predicate, value in agent_items: + assert self._triple(g, agent_node, predicate, value), f"QualifiedAttribution Agent {predicate} mismatch" + + # Test qualified annotation + annotations = [t for t in + g.triples((dataset_ref, URIRef("http://www.w3.org/ns/dqv#hasQualityAnnotation"), None))] + assert len(annotations) == 1, "Expected one dqv:hasQualityAnnotation triple" + + annotation_node = annotations[0][2] + assert self._triple(g, annotation_node, RDF.type, URIRef("http://www.w3.org/ns/oa#Annotation")) + + annotation_details = dataset_dict["quality_annotation"][0] + + # Assert URI-based fields + for field, predicate_uri in [ + ("motivated_by", OA.motivatedBy), + ("body", OA.hasBody), + ("target", OA.hasTarget), + ]: + value = annotation_details.get(field) + assert value is not None, f"Missing {field} in annotation" + assert self._triple(g, annotation_node, URIRef(predicate_uri), + URIRef(value)), f"QualityAnnotation {field} mismatch" + + # Extract the distribution node + distributions = list(g.objects(dataset_ref, DCAT.distribution)) + assert len(distributions) > 0, "No distributions found" + distribution_node = distributions[0] + + distribution_details = dataset_dict["resources"][0] + + assert self._triple(g, distribution_node, RDF.type, DCAT.Distribution) + + # Check retention period + retention_nodes = list(g.objects(distribution_node, HEALTHDCATAP.retentionPeriod)) + assert len(retention_nodes) == 1, "Expected one retentionPeriod node on distribution" + retention_node = retention_nodes[0] + assert self._triple(g, retention_node, RDF.type, DCT.PeriodOfTime) + assert self._triple( + g, + retention_node, + DCAT.startDate, + Literal(distribution_details["retention_period"][0]["start"], datatype=XSD.date) + ) + assert self._triple( + g, + retention_node, + DCAT.endDate, + Literal(distribution_details["retention_period"][0]["end"], datatype=XSD.date) + ) + diff --git a/dev-requirements.txt b/dev-requirements.txt index 1883b253..aa725832 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,6 +1,6 @@ responses>=0.25.2 pyshacl -mlcroissant; python_version >= '3.10' +mlcroissant==1.0.21; python_version >= '3.10' mock pytest-ckan pytest-cov diff --git a/docs/mapping-healthdcat.md b/docs/mapping-healthdcat.md index 4fa1758a..6285fa90 100644 --- a/docs/mapping-healthdcat.md +++ b/docs/mapping-healthdcat.md @@ -5,6 +5,7 @@ This section defines how CKAN fields map to the [HealthDCAT-AP](http://healthdat | DCAT Class | RDF Property | CKAN Dataset Field | Stored as | Notes | |----------------|----------------------------------------|-------------------------------------|-----------|-------| | dcat:Dataset | healthdcatap:analytics | analytics | list | Publishers are encouraged to provide URLs pointing to document repositories where users can access or request associated resources such as technical reports of the dataset, quality measurements, usability indicators,... Note that HealthDCAT-AP mentions also API endpoints or analytics services, but these would not be Distriutions but rather DatasetServices. | +| dcat:Dataset | healthdcatap:qualityAnnotation | quality_annotation | list | This field allows annotations or notes about the quality of the dataset, such as data completeness, known issues, or validation methods. | | dcat:Dataset | healthdcatap:hasCodeValues | code_values | list | Inside this property, you can provide the coding system of the dataset in the form of wikidata URI (example: https://www.wikidata.org/entity/P494 for ICD-10 ID) and the URI of the value that describes the dataset (example: https://icd.who.int/browse10/2019/en#/Y59.0 for viral vaccines) | | dcat:Dataset | healthdcatap:hasCodingSystem | coding_system | list | This property provides informatio on which coding systems are in use inside your dataset. For this, wikidata URIs must be used.| | dcat:Dataset | healthdcatap:healthCategory | health_category | list | Health-specific category values. | @@ -25,6 +26,7 @@ Example value could be: dpv:ResearchAndDevelopment. | | dcat:Dataset | healthdcatap:numberOfUniqueIndividuals | number_of_unique_individuals | integer | This property is not mandatory, since not all datasets might include data from individuals. | | dcat:Dataset | healthdcatap:hdab | hdab | agent | Health Data Access Body responsible. | | dcat:Dataset | healthdcatap:retentionPeriod | retention_period | interval | This property makes use of the class dct:PeriodOfTime, in which a start and end date should be provided. | +| dcat:Distribution | healthdcatap:retentionPeriod | resources_retention_period | interval | This property makes use of the class dct:PeriodOfTime, in which a start and end date should be provided. | ### Notes diff --git a/docs/mapping.md b/docs/mapping.md index fa05ade2..59bd7589 100644 --- a/docs/mapping.md +++ b/docs/mapping.md @@ -42,7 +42,9 @@ some cases the way metadata is stored internally and presented at the CKAN API l | dcat:Dataset | dcat-us:purpose | custom:purpose | | text | DCAT-US v3 and higher only | dcat:Dataset | skos:scopeNote | custom:usage | | text | DCAT-US v3 and higher only | dcat:Dataset | dct:type | custom:dcat_type | | text | | -| dcat:Dataset | dct:hasVersion | custom:has_version | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to another dcat:Dataset | +| dcat:Dataset | prov:wasGeneratedBy | custom:provenance_activity | | text | | +| dcat:Dataset | prov:qualifiedAttribution | custom:qualified_attribution | | list | See [Lists](#lists). Object should contain agent and role | +| dcat:Dataset | dcat:hasVersion | custom:has_version | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to another dcat:Dataset | | dcat:Dataset | dct:isVersionOf | custom:is_version_of | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to another dcat:Dataset | | dcat:Dataset | dct:source | custom:source | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to another dcat:Dataset | | dcat:Dataset | adms:sample | custom:sample | | list | See [Lists](#lists). It is assumed that these are one or more URIs referring to dcat:Distribution instances | @@ -94,6 +96,16 @@ some cases the way metadata is stored internally and presented at the CKAN API l | dcat:Distribution | dct:identifier | custom:identifier | custom:guid, id | text | DCAT-US v3 and higher only | dcat:Distribution | dcat-us:describedBy | custom:data_dictionary | | list of objects | DCAT-US v3 and higher only | dcat:Distribution | dcat:accessService | resource:access_services | | list of objects | | + +| dcat:Catalog | foaf:homepage | custom:catalog_homepage | | text | | + +| dcat:DataService | dct:conformsTo | access_service:conforms_to | | list | See [Lists](#lists) | +| dcat:DataService | dct:format | access_service:format | | text | | +| dcat:DataService | dct:identifier | access_service:identifier | | text | | +| dcat:DataService | dct:language | access_service:language | | list | See [Lists](#lists) | +| dcat:DataService | dct:rights | access_service:rights | | text | | +| dcat:DataService | dcat:landingPage | access_service:landing_page | | text | | +| dcat:DataService | dcat:keyword | access_service:keyword | | list | See [Lists](#lists) | | dcat:DataService | dct:title | access_service:title | | text | | | dcat:DataService | dcat:endpointURL | access_service:endpoint_url | | list | | | dcat:DataService | dcat:endpointDescription | access_service:endpoint_description | | text | | diff --git a/examples/ckan/health_dcat_ap.json b/examples/ckan/health_dcat_ap.json index 26450ee8..88398a3c 100644 --- a/examples/ckan/health_dcat_ap.json +++ b/examples/ckan/health_dcat_ap.json @@ -1,200 +1,299 @@ [ - { - "access_rights": "http://publications.europa.eu/resource/authority/access-right/NON_PUBLIC", - "analytics": [ - "http://example.com/analytics" + { + "qualified_attribution": [ + { + "agent": [ + { + "name": "Contact Point", + "email": "healthdata@sciensano.be", + "homepage": "https://healthdata.be", + "type": "", + "identifier": "" + } ], - "alternate_identifier": [ - "internalURI:admsIdentifier0" - ], - "applicable_legislation": [ - "http://data.europa.eu/eli/reg/2022/868/oj" - ], - "author": null, - "author_email": null, - "code_values": [ - "http://example.com/code1", - "http://example.com/code2" - ], - "coding_system": [ - "http://www.wikidata.org/entity/P1690", - "http://www.wikidata.org/entity/P4229" - ], - "conforms_to": [ - "http://www.wikidata.org/entity/Q19597236" - ], - "creator_user_id": null, - "dcat_type": "n1049372e768c4429a6b2200c22f5f1a4b7", - "documentation": [ - "n1049372e768c4429a6b2200c22f5f1a4b9" - ], - "frequency": "http://publications.europa.eu/resource/authority/frequency/DAILY", - "health_category": [ - "http://example.com/ontology/resource/authority/healthcategories/PHDR", - "http://example.com/ontology/resource/authority/healthcategories/IDHP", - "http://example.com/ontology/resource/authority/healthcategories/DIOH", - "http://example.com/ontology/resource/authority/healthcategories/EHRS" - ], - "health_theme": [ - "http://www.wikidata.org/entity/Q7907952", - "http://www.wikidata.org/entity/Q58624061" - ], - "id": "e7ccf79d-705c-427f-8e96-f87bcd6e5318", - "identifier": "http://example.com/dataset/1234567890", - "is_referenced_by": [ - "https://doi.org/10.1038/sdata.2016.18", - "https://dx.doi.org/10.1002/jmri.28679" - ], - "isopen": false, - "issued": "2024-01-01T00:00:00+00:00", - "language": [ - "http://publications.europa.eu/resource/authority/language/ENG", - "http://publications.europa.eu/resource/authority/language/NLD", - "http://publications.europa.eu/resource/authority/language/FRA" - ], - "legal_basis": [ - "https://w3id.org/dpv#Consent" - ], - "license_id": "", - "license_title": "", - "maintainer": null, - "maintainer_email": null, - "max_typical_age": "110", - "metadata_created": "2024-12-02T19:00:30.897399", - "metadata_modified": "2024-12-02T19:00:30.897406", - "min_typical_age": "0", - "modified": "2024-12-31T23:59:59+00:00", - "name": "test-dcat-1", - "notes": "This dataset is an example of using HealthDCAT-AP in CKAN", - "num_resources": 0, - "num_tags": 3, - "number_of_records": "123456789", - "number_of_unique_individuals": "7654321", - "organization": null, - "personal_data": [ - "https://w3id.org/dpv/dpv-pd#Age", - "https://w3id.org/dpv/dpv-pd#Gender", - "https://w3id.org/dpv/dpv-pd#HealthRecord" - ], - "population_coverage": [ - "This example includes a very non-descript population" - ], - "private": false, - "provenance": "This example dataset is partly sourced from TEHDAS2", - "publisher_note": [ - "Health-RI is the Dutch health care initiative to build an integrated health data infrastructure for research and innovation." - ], - "publisher_type": [ - "http://example.com/publisherType/undefined" - ], - "trusted_data_holder": true, - "purpose": [ - "https://w3id.org/dpv#AcademicResearch" - ], - "qualified_relation": [ - { - "uri": "", - "relation": "http://example.com/dataset/3.141592", - "role": "http://www.iana.org/assignments/relation/related" - } - ], - "state": "active", - "temporal_resolution": "P1D", - "theme": [ - "http://publications.europa.eu/resource/authority/data-theme/HEAL" - ], - "title": "HealthDCAT-AP test dataset", - "type": "dataset", - "uri": "http://example.healthdata.nl/set/dataset", - "version_notes": "Dataset continuously updated", - "contact": [ - { - "email": "covacsurv@sciensano.be", - "identifier": "", + "role": "https://inspire.ec.europa.eu/metadata-codelist/ResponsiblePartyRole/processor" + } + ], + "quality_annotation": [ + { + "motivated_by": "http://www.w3.org/ns/dqv#qualityAssessment", + "body": "https://acertificateserver.eu/mycertificate", + "target": "https://fair.healthdata.be/dataset/d43a158e-7d13-4660-bbc3-9d3f8d5501e5" + } + ], + "access_rights": "http://publications.europa.eu/resource/authority/access-right/NON_PUBLIC", + "analytics": [ + "http://example.com/analytics" + ], + "alternate_identifier": [ + "internalURI:admsIdentifier0" + ], + "applicable_legislation": [ + "http://data.europa.eu/eli/reg/2022/868/oj" + ], + "author": null, + "author_email": null, + "code_values": [ + "http://example.com/code1", + "http://example.com/code2" + ], + "coding_system": [ + "http://www.wikidata.org/entity/P1690", + "http://www.wikidata.org/entity/P4229" + ], + "conforms_to": [ + "http://www.wikidata.org/entity/Q19597236" + ], + "creator_user_id": null, + "dcat_type": "n1049372e768c4429a6b2200c22f5f1a4b7", + "documentation": [ + "n1049372e768c4429a6b2200c22f5f1a4b9" + ], + "frequency": "http://publications.europa.eu/resource/authority/frequency/DAILY", + "health_category": [ + "http://example.com/ontology/resource/authority/healthcategories/PHDR", + "http://example.com/ontology/resource/authority/healthcategories/IDHP", + "http://example.com/ontology/resource/authority/healthcategories/DIOH", + "http://example.com/ontology/resource/authority/healthcategories/EHRS" + ], + "health_theme": [ + "http://www.wikidata.org/entity/Q7907952", + "http://www.wikidata.org/entity/Q58624061" + ], + "id": "e7ccf79d-705c-427f-8e96-f87bcd6e5318", + "identifier": "http://example.com/dataset/1234567890", + "is_referenced_by": [ + "https://doi.org/10.1038/sdata.2016.18", + "https://dx.doi.org/10.1002/jmri.28679" + ], + "isopen": false, + "issued": "2024-01-01T00:00:00+00:00", + "language": [ + "http://publications.europa.eu/resource/authority/language/ENG", + "http://publications.europa.eu/resource/authority/language/NLD", + "http://publications.europa.eu/resource/authority/language/FRA" + ], + "legal_basis": [ + "https://w3id.org/dpv#Consent" + ], + "license_id": "", + "license_title": "", + "maintainer": null, + "maintainer_email": null, + "max_typical_age": "110", + "metadata_created": "2024-12-02T19:00:30.897399", + "metadata_modified": "2024-12-02T19:00:30.897406", + "min_typical_age": "0", + "modified": "2024-12-31T23:59:59+00:00", + "name": "test-dcat-1", + "notes": "This dataset is an example of using HealthDCAT-AP in CKAN", + "num_resources": 0, + "num_tags": 3, + "number_of_records": "123456789", + "number_of_unique_individuals": "7654321", + "organization": null, + "personal_data": [ + "https://w3id.org/dpv/dpv-pd#Age", + "https://w3id.org/dpv/dpv-pd#Gender", + "https://w3id.org/dpv/dpv-pd#HealthRecord" + ], + "population_coverage": [ + "This example includes a very non-descript population" + ], + "private": false, + "provenance": "This example dataset is partly sourced from TEHDAS2", + "publisher_note": [ + "Health-RI is the Dutch health care initiative to build an integrated health data infrastructure for research and innovation." + ], + "publisher_type": [ + "http://example.com/publisherType/undefined" + ], + "trusted_data_holder": true, + "purpose": [ + "https://w3id.org/dpv#AcademicResearch" + ], + "qualified_relation": [ + { + "uri": "", + "relation": "http://example.com/dataset/3.141592", + "role": "http://www.iana.org/assignments/relation/related" + } + ], + "state": "active", + "temporal_resolution": "P1D", + "theme": [ + "http://publications.europa.eu/resource/authority/data-theme/HEAL" + ], + "title": "HealthDCAT-AP test dataset", + "type": "dataset", + "uri": "http://example.healthdata.nl/set/dataset", + "version_notes": "Dataset continuously updated", + "contact": [ + { + "email": "covacsurv@sciensano.be", + "identifier": "", + "name": "Contact Point" + } + ], + "creator": [ + { + "email": "info@example.com", + "identifier": "", + "name": "Contact Point", + "type": "", + "url": "https:/example.com/homepage" + } + ], + "extras": [ + { + "key": "related_resource", + "value": "[\"http://example.com/dataset/9876543210\"]" + }, + { + "key": "sample", + "value": "[\"http://example.com/sample\"]" + }, + { + "key": "spatial_uri", + "value": "http://publications.europa.eu/resource/authority/country/BEL" + } + ], + "hdab": [ + { + "email": "hdab@example.com", + "identifier": "", + "name": "EU Health Data Access Body", + "type": "", + "uri": "", + "url": "https://www.example.com/hdab" + } + ], + "provenance_activity": [ + { + "dct_type": "http://dbpedia.org/resource/Record_linkage", + "label": "http://dbpedia.org/resource/Record_linkage", + "seeAlso": "https://www.ehealth.fgov.be/ehealthplatform/fr/service-codage-anonymisation-et-ttp", + "startedAtTime": "2021-01-01T00:00:00+00:00", + "uri": "internalURI:wasGeneratedBy0", + "wasAssociatedWith": [ + { + "homepage": "https://www.sciensano.be/fr/people/joris-van-loenhout", + "email": "mailto:Joris.VanLoenhout@sciensano.be", + "name": "Dr. Joris van Loenhout", + "actedOnBehalfOf": [ + { "name": "Contact Point" - } - ], - "creator": [ - { - "email": "info@example.com", - "identifier": "", - "name": "Contact Point", - "type": "", - "url": "https:/example.com/homepage" - } - ], - "extras": [ - { - "key": "related_resource", - "value": "[\"http://example.com/dataset/9876543210\"]" - }, - { - "key": "sample", - "value": "[\"http://example.com/sample\"]" - }, - { - "key": "spatial_uri", - "value": "http://publications.europa.eu/resource/authority/country/BEL" - } - ], - "hdab": [ - { - "email": "hdab@example.com", - "identifier": "", - "name": "EU Health Data Access Body", - "type": "", - "uri": "", - "url": "https://www.example.com/hdab" - } - ], - "publisher": [ - { - "email": "info@example.com", - "identifier": "", - "name": "Contact Point", - "type": "", - "uri": "", - "url": "https://healthdata.nl" - } + } + ] + } + ] + } + ], + "publisher": [ + { + "email": "info@example.com", + "identifier": "", + "name": "Contact Point", + "type": "", + "uri": "", + "url": "https://healthdata.nl" + } + ], + "retention_period": [ + { + "end": "2034-12-31", + "start": "2020-03-01" + } + ], + "tags": [ + { + "display_name": "Test 1", + "id": "5c418ec2-cb41-4c42-9b9c-f5d1e3a831e5", + "name": "Test 1", + "state": "active", + "vocabulary_id": null + }, + { + "display_name": "Test 2", + "id": "c4117ace-2114-470d-b6e9-0df7580a12d8", + "name": "Test 2", + "state": "active", + "vocabulary_id": null + }, + { + "display_name": "Test 3", + "id": "d5a5288d-3bff-431e-be94-12c71d25d75b", + "name": "Test 3", + "state": "active", + "vocabulary_id": null + } + ], + "temporal_coverage": [ + { + "end": "2024-12-31", + "start": "2020-03-01" + } + ], + "resources": [ + { + "access_url": "", + "applicable_legislation": [], + "availability": "", + "cache_last_updated": null, + "cache_url": null, + "compress_format": "", + "conforms_to": [], + "created": "2025-07-10T18:19:32.840953", + "description": "", + "documentation": [], + "download_url": "", + "format": null, + "hash": "", + "hash_algorithm": "", + "id": "7e65aeba-136d-48d6-a824-782176c63104", + "issued_date": "", + "issued_time": "", + "language": null, + "last_modified": null, + "license": "", + "metadata_modified": "2025-07-10T18:19:32.838228", + "mimetype": "", + "mimetype_inner": null, + "modified_date": "", + "modified_time": "", + "name": "", + "package_format": "", + "package_id": "16a60ea2-965a-4b5a-9a65-1284354c256e", + "position": 0, + "resource_type": null, + "rights": "", + "spatial_resolution_in_meters": "", + "state": "active", + "status": "", + "temporal_resolution": "", + "uri": "", + "url": "", + "url_type": "", + "access_services": [ + { + "access_rights": "", + "endpoint_description": "", + "endpoint_url": [], + "serves_dataset": [], + "title": "", + "uri": "" + } ], "retention_period": [ - { - "end": "2034-12-31", - "start": "2020-03-01" - } - ], - "tags": [ - { - "display_name": "Test 1", - "id": "5c418ec2-cb41-4c42-9b9c-f5d1e3a831e5", - "name": "Test 1", - "state": "active", - "vocabulary_id": null - }, - { - "display_name": "Test 2", - "id": "c4117ace-2114-470d-b6e9-0df7580a12d8", - "name": "Test 2", - "state": "active", - "vocabulary_id": null - }, - { - "display_name": "Test 3", - "id": "d5a5288d-3bff-431e-be94-12c71d25d75b", - "name": "Test 3", - "state": "active", - "vocabulary_id": null - } - ], - "temporal_coverage": [ - { - "end": "2024-12-31", - "start": "2020-03-01" - } - ], - "resources": [], - "groups": [], - "relationships_as_subject": [], - "relationships_as_object": [] - } + { + "end": "2025-07-18", + "start": "2025-07-10" + } + ] + } + ], + "groups": [], + "relationships_as_subject": [], + "relationships_as_object": [] + } ] \ No newline at end of file diff --git a/examples/dcat/dataset.rdf b/examples/dcat/dataset.rdf index 5ce71e1c..1235888f 100644 --- a/examples/dcat/dataset.rdf +++ b/examples/dcat/dataset.rdf @@ -19,6 +19,7 @@ Zimbabwe Regional Geochemical Survey. During the period 1982-86 a team of geologists from the British Geological Survey ... http://dataset.info.org + exploration geochemistry geology diff --git a/examples/dcat/dataset_health.ttl b/examples/dcat/dataset_health.ttl index 982a728e..f8d72b89 100644 --- a/examples/dcat/dataset_health.ttl +++ b/examples/dcat/dataset_health.ttl @@ -94,7 +94,7 @@ adms:sample ; adms:versionNotes "Dataset continuously updated"; dcat:contactPoint ; - # dcat:distribution ; + dcat:distribution ; dcat:hasVersion ; dcat:keyword "Test 1" , "Test 2" , "Test 3"; dcat:spatialResolutionInMeters "10"^^; @@ -136,6 +136,12 @@ ]; dct:title "Technical report number of unique study subjects available by environment for project HDBP0250"; dcat:accessURL ; + + [ a dct:PeriodOfTime; + rdfs:comment "As stated in the CSI deliberation"; + dcat:endDate "2034-12-31"^^; + dcat:startDate "2020-03-01"^^ + ]; dcat:downloadURL ; dcat:mediaType . @@ -223,7 +229,7 @@ a prov:Attribution; dcat:hadRole ; prov:agent [ a foaf:Organization; - foaf:homepage ; + foaf:homepage ; foaf:mbox ; foaf:name "Contact Point" ] . @@ -242,23 +248,6 @@ a dct:MediaTypeOrExtent . -# -# a dcat:Distribution; -# dcatap:applicableLegislation ; -# dct:description "EU Health Data Access Body For better Healthcare, Research & Policy Making"; -# dct:format ; -# dct:identifier "http://ehelse.healthdataportal.eu/distribution/13a3851d-6cdf-4570-a7f0-7f03015d1925"; -# dct:isPartOf ; -# dct:issued "2024-06-03T08:51:00Z"^^; -# dct:license ; -# dct:modified "2024-06-04T18:00:00Z"^^; -# dct:rights [ a dct:RightsStatement; -# rdfs:label "Access to data is conditional on the issuance of a permit by the HDAB after submission of a data request application (English)" -# ]; -# dct:title "EU Health Data Access Body"; -# dcat:accessURL ; -# dcat:byteSize "80000"^^ . - a prov:Activity; rdfs:label "http://dbpedia.org/resource/Record_linkage";