ckan · amercader · Sep 24, 2025 · Jun 5, 2025 · Jun 19, 2025 · Jun 19, 2025
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -77,4 +77,4 @@ jobs:
         ckan -c test.ini db init
         ckan -c test.ini db pending-migrations --apply
     - name: Run tests
-      run: pytest --ckan-ini=test.ini --cov=ckanext.dcat --cov-report=term-missing --cov-append --disable-warnings ckanext/dcat/tests
+      run: pytest --ckan-ini=test.ini --cov=ckanext.dcat --cov-report=term-missing --cov-append --disable-warnings ckanext/dcat/tests
diff --git a/ckanext/dcat/profiles/base.py b/ckanext/dcat/profiles/base.py
@@ -7,7 +7,7 @@
 from ckantoolkit import ObjectNotFound, asbool, aslist, config, get_action, url_for
 from dateutil.parser import parse as parse_date
 from geomet import InvalidGeoJSONException, wkt
-from rdflib import BNode, Literal, URIRef, term
+from rdflib import BNode, Literal, URIRef, term, PROV
 from rdflib.namespace import ORG, RDF, RDFS, SKOS, XSD, Namespace
 
 from ckanext.dcat.utils import DCAT_EXPOSE_SUBCATALOGS
@@ -95,7 +95,6 @@ def __new__(cls, value, lang=None):
             # In case something goes wrong: use Literal
             return Literal(value, lang=lang)
 
-
 class CleanedURIRef(object):
     """Performs some basic URL encoding on value before creating an URIRef object.
 
@@ -547,9 +546,13 @@ def _agents_details(self, subject, predicate):
                 )
             agent_details["url"] = self._object_value(agent, FOAF.homepage)
             agent_details["type"] = self._object_value(agent, DCT.type)
-            agent_details['identifier'] = self._object_value(agent, DCT.identifier)
-            agents.append(agent_details)
+            agent_details["identifier"] = self._object_value(agent, DCT.identifier)
+
+            acted_orgs = self._agents_details(agent, PROV.actedOnBehalfOf)
+            if acted_orgs:
+                agent_details["actedOnBehalfOf"] = acted_orgs
 
+            agents.append(agent_details)
         return agents
 
     def _contact_details(self, subject, predicate):
@@ -819,6 +822,83 @@ def _read_list_value(self, value):
 
         return items
 
+    def _add_agent_to_graph(self, subject_ref, predicate, agent_dict):
+        """
+        Serializes a foaf:Agent or foaf:Organization with optional subfields into the RDF graph.
+
+        Parameters:
+        - subject_ref: The RDF subject (dataset, activity, etc.)
+        - predicate: The RDF predicate (e.g., dct:publisher, prov:wasAssociatedWith, dcat:agent)
+        - agent_dict: A dict with agent metadata (e.g., name, email, homepage, type, identifier, actedOnBehalfOf)
+        """
+        uri = agent_dict.get("uri", "").strip()
+
+        agent_ref = URIRefOrLiteral(uri) if uri else BNode()
+
+        self.g.add((subject_ref, predicate, agent_ref))
+        self.g.add((agent_ref, RDF.type, FOAF.Organization))
+        self.g.add((agent_ref, RDF.type, FOAF.Agent))
+
+        if agent_dict.get("name"):
+            self.g.add((agent_ref, FOAF.name, Literal(agent_dict["name"])))
+        if agent_dict.get("email"):
+            email = agent_dict["email"]
+            if not email.startswith("mailto:"):
+                email = f"mailto:{email}"
+            self.g.add((agent_ref, FOAF.mbox, URIRef(email)))
+        if agent_dict.get("url"):
+            self.g.add((agent_ref, FOAF.homepage, URIRef(agent_dict["url"])))
+        if agent_dict.get("homepage"):
+            self.g.add((agent_ref, FOAF.homepage, URIRef(agent_dict["homepage"])))
+        if agent_dict.get("type"):
+            self.g.add((agent_ref, DCT.type, URIRef(agent_dict["type"])))
+        if agent_dict.get("identifier"):
+            self.g.add((agent_ref, DCT.identifier, Literal(agent_dict["identifier"])))
+
+        for sub_org in agent_dict.get("actedOnBehalfOf", []):
+            if sub_org.get("name"):
+                org_ref = BNode()
+                self.g.add((agent_ref, PROV.actedOnBehalfOf, org_ref))
+                self.g.add((org_ref, RDF.type, PROV.Organization))
+                self.g.add((org_ref, FOAF.name, Literal(sub_org["name"])))
+
+        return agent_ref
+
+    def _add_contact_to_graph(self, subject, predicate, contact):
+        contact_uri = contact.get("uri")
+        if contact_uri:
+            contact_details = CleanedURIRef(contact_uri)
+        else:
+            contact_details = BNode()
+
+        self.g.add((contact_details, RDF.type, VCARD.Kind))
+        self.g.add((subject, predicate, contact_details))
+
+        self._add_triple_from_dict(contact, contact_details, VCARD.fn, "name")
+        self._add_triple_from_dict(
+            contact,
+            contact_details,
+            VCARD.hasEmail,
+            "email",
+            _type=URIRef,
+            value_modifier=self._add_mailto,
+        )
+        self._add_triple_from_dict(
+            contact,
+            contact_details,
+            VCARD.hasUID,
+            "identifier",
+            _type=URIRefOrLiteral,
+        )
+        self._add_triple_from_dict(
+            contact,
+            contact_details,
+            VCARD.hasURL,
+            "url",
+            _type=URIRef,
+        )
+
+
     def _add_spatial_value_to_graph(self, spatial_ref, predicate, value):
         """
         Adds spatial triples to the graph. Assumes that value is a GeoJSON string

diff --git a/ckanext/dcat/profiles/euro_dcat_ap_2.py b/ckanext/dcat/profiles/euro_dcat_ap_2.py
@@ -1,7 +1,7 @@
 import json
 from decimal import Decimal, DecimalException
 
-from rdflib import URIRef, BNode, Literal, Namespace
+from rdflib import URIRef, BNode, Literal, Namespace, FOAF, PROV, RDF, RDFS
 from ckanext.dcat.utils import resource_uri
 
 from .base import URIRefOrLiteral, CleanedURIRef
@@ -18,7 +18,6 @@
 
 from .euro_dcat_ap_base import BaseEuropeanDCATAPProfile
 
-
 ELI = Namespace("http://data.europa.eu/eli/ontology#")
 
 
@@ -65,6 +64,32 @@ def _parse_dataset_v2(self, dataset_dict, dataset_ref):
         # Call base super method for common properties
         super().parse_dataset(dataset_dict, dataset_ref)
 
+        # --- Provenance deserialization ---
+        was_generated_by = self.g.value(dataset_ref, PROV.wasGeneratedBy)
+        if was_generated_by:
+            activity_dict = {}
+            activity_dict["uri"] = str(was_generated_by)
+            activity_dict["type"] = [
+                str(t) for t in self.g.objects(was_generated_by, RDF.type)
+            ]
+            activity_dict["label"] = self._object_value(was_generated_by, RDFS.label)
+            activity_dict["seeAlso"] = self._object_value(was_generated_by, RDFS.seeAlso)
+            activity_dict["dct_type"] = self._object_value(was_generated_by, DCT.type)
+            activity_dict["startedAtTime"] = self._object_value(
+                was_generated_by, PROV.startedAtTime
+            )
+
+            agents = self._agents_details(was_generated_by, PROV.wasAssociatedWith)
+            if agents:
+                activity_dict["wasAssociatedWith"] = [agents[0]] # Only take the first agent
+
+            dataset_dict["provenance_activity"] = [activity_dict]
+
+        # --- Qualified Attribution ---
+        qualified_attributions = self._parse_qualified_attributions(dataset_ref)
+        if qualified_attributions:
+            dataset_dict["qualified_attribution"] = qualified_attributions
+
         # Standard values
         value = self._object_value(dataset_ref, DCAT.temporalResolution)
         if value:
@@ -159,27 +184,51 @@ def _parse_dataset_v2(self, dataset_dict, dataset_ref):
                     ):
                         access_service_dict = {}
 
-                        #  Simple values
+                        # Simple values
                         for key, predicate in (
                             ("availability", DCATAP.availability),
                             ("title", DCT.title),
                             ("endpoint_description", DCAT.endpointDescription),
                             ("license", DCT.license),
                             ("access_rights", DCT.accessRights),
                             ("description", DCT.description),
+                            ("identifier", DCT.identifier),
+                            ("description", DCT.description),
+                            ("modified", DCT.modified),
                         ):
                             value = self._object_value(access_service, predicate)
                             if value:
                                 access_service_dict[key] = value
-                        #  List
+
+                        # List values
                         for key, predicate in (
                             ("endpoint_url", DCAT.endpointURL),
                             ("serves_dataset", DCAT.servesDataset),
+                            ("conforms_to", DCT.conformsTo),
+                            ("format", DCT["format"]),
+                            ("language", DCT.language),
+                            ("rights", DCT.rights),
+                            ("landing_page", DCAT.landingPage),
+                            ("keyword", DCAT.keyword),
+                            ("applicable_legislation", DCATAP.applicableLegislation),
+                            ("theme", DCAT.theme),
                         ):
                             values = self._object_value_list(access_service, predicate)
                             if values:
                                 access_service_dict[key] = values
 
+                        contact_points = self._contact_details(access_service, DCAT.contactPoint)
+                        if contact_points:
+                            access_service_dict["contact"] = contact_points
+
+                        publishers = self._agents_details(access_service, DCT.publisher)
+                        if publishers:
+                            access_service_dict["publisher"] = publishers
+
+                        creators = self._agents_details(access_service, DCT.creator)
+                        if creators:
+                            access_service_dict["creator"] = creators
+
                         # Access service URI (explicitly show the missing ones)
                         access_service_dict["uri"] = (
                             str(access_service)
@@ -247,6 +296,44 @@ def _graph_from_dataset_v2(self, dataset_dict, dataset_ref):
                 _class=_class,
             )
 
+        # --- Provenance serialization ---
+        activities = dataset_dict.get("provenance_activity", [])
+
+        for activity in activities:
+            activity_uri = URIRef(activity.get("uri")) if activity.get("uri") else BNode()
+            self.g.add((dataset_ref, PROV.wasGeneratedBy, activity_uri))
+            self.g.add((activity_uri, RDF.type, PROV.Activity))
+
+            if activity.get("label"):
+                self.g.add((activity_uri, RDFS.label, Literal(activity["label"])))
+            if activity.get("seeAlso"):
+                self.g.add((activity_uri, RDFS.seeAlso, URIRef(activity["seeAlso"])))
+            if activity.get("dct_type"):
+                self.g.add((activity_uri, DCT.type, URIRef(activity["dct_type"])))
+            if activity.get("startedAtTime"):
+                self.g.add((activity_uri, PROV.startedAtTime, Literal(activity["startedAtTime"], datatype=XSD.dateTime)))
+
+            for agent_dict in activity.get("wasAssociatedWith", []):
+                self._add_agent_to_graph(activity_uri, PROV.wasAssociatedWith, agent_dict)
+
+        # Qualified Attribution
+        qualified_attributions = dataset_dict.get("qualified_attribution", [])
+        for attr in qualified_attributions:
+            attr_ref = BNode()
+            self.g.add((dataset_ref, DCAT.qualifiedAttribution, attr_ref))
+            self.g.add((attr_ref, RDF.type, DCAT.Attribution))
+
+            agent_list = attr.get("agent", [])
+            for agent_dict in agent_list:
+                if isinstance(agent_dict, dict):
+                    self._add_agent_to_graph(attr_ref, DCAT.agent, agent_dict)
+                elif isinstance(agent_dict, str):
+                    self.g.add((attr_ref, DCAT.agent, URIRef(agent_dict)))
+            role = attr.get("role")
+            if role:
+                self.g.add((attr_ref, DCAT.hadRole, URIRef(role)))
+
+
         # Temporal
 
         # The profile for DCAT-AP 1 stored triples using schema:startDate,
@@ -408,12 +495,58 @@ def _graph_from_dataset_v2(self, dataset_dict, dataset_ref):
                         RDFS.Resource,
                     ),
                     ("description", DCT.description, None, Literal),
+                    ("modified", DCT.modified, None, Literal),
                 ]
-
                 self._add_triples_from_dict(
                     access_service_dict, access_service_node, items
                 )
 
+                if access_service_dict.get("modified"):
+                    self._add_date_triple(access_service_node, DCT.modified, access_service_dict.get("modified"))
+
+
+                contact_point_dict = access_service_dict.get("contact")
+                if contact_point_dict:
+                    self._add_contact_to_graph(access_service_node, DCAT.contactPoint, contact_point_dict)
+
+                publisher_dict = access_service_dict.get("publisher")
+                if publisher_dict:
+                    self._add_agent_to_graph(access_service_node, DCT.publisher, publisher_dict)
+
+                for creator_dict in access_service_dict.get("creator", []):
+                    self._add_agent_to_graph(access_service_node, DCT.creator, creator_dict)
+
+                # Extra list values for access services
+                extra_items = [
+                    ("conforms_to", DCT.conformsTo, None, URIRefOrLiteral),
+                    ("format", DCT["format"], None, URIRefOrLiteral),
+                    ("language", DCT.language, None, URIRefOrLiteral),
+                    ("rights", DCT.rights, None, URIRefOrLiteral),
+                    ("landing_page", DCAT.landingPage, None, URIRefOrLiteral),
+                    ("applicable_legislation", DCATAP.applicableLegislation, None, URIRefOrLiteral, ELI.LegalResource),
+                    ("theme", DCAT.theme, None, URIRefOrLiteral),
+                ]
+                self._add_list_triples_from_dict(access_service_dict, access_service_node, extra_items)
+
+                # Add single-value triple for identifier
+                self._add_triple_from_dict(
+                    access_service_dict,
+                    access_service_node,
+                    DCT.identifier,
+                    "identifier",
+                    _type=URIRefOrLiteral
+                )
+
+                # Add keyword list
+                self._add_triple_from_dict(
+                    access_service_dict,
+                    access_service_node,
+                    DCAT.keyword,
+                    "keyword",
+                    list_value=True,
+                    _type=Literal
+                )
+
                 #  Lists
                 items = [
                     (
@@ -448,3 +581,23 @@ def _graph_from_dataset_v2_only(self, dataset_dict, dataset_ref):
             _type=URIRefOrLiteral,
             _class=ADMS.Identifier,
         )
+
+    def _parse_qualified_attributions(self, dataset_ref):
+        attributions = []
+        for qual_attr_ref in self.g.objects(dataset_ref, PROV.qualifiedAttribution):
+            attr = {}
+
+            # Get role
+            for role_ref in self.g.objects(qual_attr_ref, DCAT.hadRole):
+                attr["role"] = str(role_ref)
+                break
+
+            # Get agent (using shared logic)
+            agent_details = self._agents_details(qual_attr_ref, PROV.agent)
+            if agent_details:
+                attr["agent"] = agent_details
+
+            if attr:
+                attributions.append(attr)
+
+        return attributions
diff --git a/ckanext/dcat/profiles/euro_dcat_ap_3.py b/ckanext/dcat/profiles/euro_dcat_ap_3.py
@@ -8,6 +8,7 @@
     RDF,
 )
 
+from .base import URIRefOrLiteral
 from ckanext.dcat.utils import dataset_uri
 from .euro_dcat_ap_2 import EuropeanDCATAP2Profile
 from .euro_dcat_ap_scheming import EuropeanDCATAPSchemingProfile
@@ -29,6 +30,11 @@ def parse_dataset(self, dataset_dict, dataset_ref):
         # DCAT AP v2 scheming fields
         dataset_dict = self._parse_dataset_v2_scheming(dataset_dict, dataset_ref)
 
+        # DCAT AP v3: hasVersion
+        values = self._object_value_list(dataset_ref, DCAT.hasVersion)
+        if values:
+            dataset_dict["has_version"] = values
+
         return dataset_dict
 
     def graph_from_dataset(self, dataset_dict, dataset_ref):
@@ -45,6 +51,12 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
         # DCAT AP v3 properties also applied to higher versions
         self._graph_from_dataset_v3(dataset_dict, dataset_ref)
 
+        # DCAT AP v3: List triples
+        items = [
+            ("has_version", DCAT.hasVersion, None, URIRefOrLiteral),
+        ]
+        self._add_list_triples_from_dict(dataset_dict, dataset_ref, items)
+
     def graph_from_catalog(self, catalog_dict, catalog_ref):
 
         self._graph_from_catalog_base(catalog_dict, catalog_ref)