Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
e79099b
feat(rdf): support serialization of DatasetSeries from RDF and link c…
Jun 5, 2025
22face7
remove fallback. not support by dataseries extension
Jun 19, 2025
84ef929
Remove enter
Jun 19, 2025
1f5110b
fix: skip fluent for 2.9
Jul 1, 2025
2d70ee8
fix import
Jul 1, 2025
b061455
check if this works
Jul 1, 2025
c9e4626
Disable support for 2.9 in tests
Jul 1, 2025
01991b4
check if this works
Jul 1, 2025
ed6b696
Merge pull request #1 from GenomicDataInfrastructure/support-RDF-data…
hcvdwerf Jul 1, 2025
806e248
feat(missing field) add missing fields
Jul 2, 2025
b1c8193
Add homepage
Jul 7, 2025
a6e1e4b
Also serrilaize homepage when available
Jul 7, 2025
217da9a
Added retention period to healthDCAT
Jul 8, 2025
cd8661b
Fix retention period UT
Jul 10, 2025
90dac79
fix test
Jul 10, 2025
1294f4a
feat(missing field) add missing fields
Jul 2, 2025
501a8de
Added DCAT AP 3 has version
Jul 10, 2025
c0efdfc
Merge branch 'add-missing-fields' of https://github.com/hcvdwerf/ckan…
Jul 10, 2025
c4ad649
Added has version to DCAT 3 and added missing dataservice fields
Jul 10, 2025
163d284
fix import
Jul 14, 2025
d8c04df
fix unit tests
Jul 14, 2025
63a2749
Added has version to DCAT 3 and added missing dataservice fields
Jul 10, 2025
fdbd8bc
update schema
Jul 14, 2025
572acbe
Merge branch 'add-missing-fields' of https://github.com/hcvdwerf/ckan…
Jul 14, 2025
1985f6e
fiix mapping documentation
Jul 14, 2025
3cc8905
Updated documetation for retention period
Jul 14, 2025
c03bdd2
fix(dataseries) cardanality for dataseries
Jul 15, 2025
cccd727
fix(UT-cardanality) fix UT for cardanality
Jul 16, 2025
6d23062
add applicable_legislation to Dataservice
Jul 16, 2025
fbbd48e
fix(dataservice (contact & creator)) fix mapping for creator and cont…
Jul 16, 2025
4d1e3a0
add mapping + UT for description within dataservice
Jul 16, 2025
2abc07c
Add if check by contactpoint
Jul 16, 2025
7759e07
Add modified, publisher, license and theme to dataservice
Jul 16, 2025
5762c35
fix(dataseries) Remove dataseries from pull request
Aug 27, 2025
5cf6942
Remove fluent extension tag
Aug 27, 2025
6eec310
Merge branch 'master' into add-missing-fields
hcvdwerf Aug 27, 2025
4485715
Update health_dcat_ap.yaml
hcvdwerf Sep 3, 2025
ba081d7
fix: Always store as list when complex object
Sep 11, 2025
419c364
fix: parse of creator and contact within acces service
Sep 11, 2025
03a5f88
fix(croisant) point to mlcroisant version 1.0.22
Sep 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,4 @@ jobs:
ckan -c test.ini db init
ckan -c test.ini db pending-migrations --apply
- name: Run tests
run: pytest --ckan-ini=test.ini --cov=ckanext.dcat --cov-report=term-missing --cov-append --disable-warnings ckanext/dcat/tests
run: pytest --ckan-ini=test.ini --cov=ckanext.dcat --cov-report=term-missing --cov-append --disable-warnings ckanext/dcat/tests
88 changes: 84 additions & 4 deletions ckanext/dcat/profiles/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from ckantoolkit import ObjectNotFound, asbool, aslist, config, get_action, url_for
from dateutil.parser import parse as parse_date
from geomet import InvalidGeoJSONException, wkt
from rdflib import BNode, Literal, URIRef, term
from rdflib import BNode, Literal, URIRef, term, PROV
from rdflib.namespace import ORG, RDF, RDFS, SKOS, XSD, Namespace

from ckanext.dcat.utils import DCAT_EXPOSE_SUBCATALOGS
Expand Down Expand Up @@ -95,7 +95,6 @@ def __new__(cls, value, lang=None):
# In case something goes wrong: use Literal
return Literal(value, lang=lang)


class CleanedURIRef(object):
"""Performs some basic URL encoding on value before creating an URIRef object.

Expand Down Expand Up @@ -547,9 +546,13 @@ def _agents_details(self, subject, predicate):
)
agent_details["url"] = self._object_value(agent, FOAF.homepage)
agent_details["type"] = self._object_value(agent, DCT.type)
agent_details['identifier'] = self._object_value(agent, DCT.identifier)
agents.append(agent_details)
agent_details["identifier"] = self._object_value(agent, DCT.identifier)

acted_orgs = self._agents_details(agent, PROV.actedOnBehalfOf)
if acted_orgs:
agent_details["actedOnBehalfOf"] = acted_orgs

agents.append(agent_details)
return agents

def _contact_details(self, subject, predicate):
Expand Down Expand Up @@ -819,6 +822,83 @@ def _read_list_value(self, value):

return items

def _add_agent_to_graph(self, subject_ref, predicate, agent_dict):
"""
Serializes a foaf:Agent or foaf:Organization with optional subfields into the RDF graph.

Parameters:
- subject_ref: The RDF subject (dataset, activity, etc.)
- predicate: The RDF predicate (e.g., dct:publisher, prov:wasAssociatedWith, dcat:agent)
- agent_dict: A dict with agent metadata (e.g., name, email, homepage, type, identifier, actedOnBehalfOf)
"""
uri = agent_dict.get("uri", "").strip()

agent_ref = URIRefOrLiteral(uri) if uri else BNode()

self.g.add((subject_ref, predicate, agent_ref))
self.g.add((agent_ref, RDF.type, FOAF.Organization))
self.g.add((agent_ref, RDF.type, FOAF.Agent))

if agent_dict.get("name"):
self.g.add((agent_ref, FOAF.name, Literal(agent_dict["name"])))
if agent_dict.get("email"):
email = agent_dict["email"]
if not email.startswith("mailto:"):
email = f"mailto:{email}"
self.g.add((agent_ref, FOAF.mbox, URIRef(email)))
if agent_dict.get("url"):
self.g.add((agent_ref, FOAF.homepage, URIRef(agent_dict["url"])))
if agent_dict.get("homepage"):
self.g.add((agent_ref, FOAF.homepage, URIRef(agent_dict["homepage"])))
if agent_dict.get("type"):
self.g.add((agent_ref, DCT.type, URIRef(agent_dict["type"])))
if agent_dict.get("identifier"):
self.g.add((agent_ref, DCT.identifier, Literal(agent_dict["identifier"])))

for sub_org in agent_dict.get("actedOnBehalfOf", []):
if sub_org.get("name"):
org_ref = BNode()
self.g.add((agent_ref, PROV.actedOnBehalfOf, org_ref))
self.g.add((org_ref, RDF.type, PROV.Organization))
self.g.add((org_ref, FOAF.name, Literal(sub_org["name"])))

return agent_ref

def _add_contact_to_graph(self, subject, predicate, contact):
contact_uri = contact.get("uri")
if contact_uri:
contact_details = CleanedURIRef(contact_uri)
else:
contact_details = BNode()

self.g.add((contact_details, RDF.type, VCARD.Kind))
self.g.add((subject, predicate, contact_details))

self._add_triple_from_dict(contact, contact_details, VCARD.fn, "name")
self._add_triple_from_dict(
contact,
contact_details,
VCARD.hasEmail,
"email",
_type=URIRef,
value_modifier=self._add_mailto,
)
self._add_triple_from_dict(
contact,
contact_details,
VCARD.hasUID,
"identifier",
_type=URIRefOrLiteral,
)
self._add_triple_from_dict(
contact,
contact_details,
VCARD.hasURL,
"url",
_type=URIRef,
)


def _add_spatial_value_to_graph(self, spatial_ref, predicate, value):
"""
Adds spatial triples to the graph. Assumes that value is a GeoJSON string
Expand Down
163 changes: 158 additions & 5 deletions ckanext/dcat/profiles/euro_dcat_ap_2.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
from decimal import Decimal, DecimalException

from rdflib import URIRef, BNode, Literal, Namespace
from rdflib import URIRef, BNode, Literal, Namespace, FOAF, PROV, RDF, RDFS
from ckanext.dcat.utils import resource_uri

from .base import URIRefOrLiteral, CleanedURIRef
Expand All @@ -18,7 +18,6 @@

from .euro_dcat_ap_base import BaseEuropeanDCATAPProfile


ELI = Namespace("http://data.europa.eu/eli/ontology#")


Expand Down Expand Up @@ -65,6 +64,32 @@ def _parse_dataset_v2(self, dataset_dict, dataset_ref):
# Call base super method for common properties
super().parse_dataset(dataset_dict, dataset_ref)

# --- Provenance deserialization ---
was_generated_by = self.g.value(dataset_ref, PROV.wasGeneratedBy)
if was_generated_by:
activity_dict = {}
activity_dict["uri"] = str(was_generated_by)
activity_dict["type"] = [
str(t) for t in self.g.objects(was_generated_by, RDF.type)
]
activity_dict["label"] = self._object_value(was_generated_by, RDFS.label)
activity_dict["seeAlso"] = self._object_value(was_generated_by, RDFS.seeAlso)
activity_dict["dct_type"] = self._object_value(was_generated_by, DCT.type)
activity_dict["startedAtTime"] = self._object_value(
was_generated_by, PROV.startedAtTime
)

agents = self._agents_details(was_generated_by, PROV.wasAssociatedWith)
if agents:
activity_dict["wasAssociatedWith"] = [agents[0]] # Only take the first agent

dataset_dict["provenance_activity"] = [activity_dict]

# --- Qualified Attribution ---
qualified_attributions = self._parse_qualified_attributions(dataset_ref)
if qualified_attributions:
dataset_dict["qualified_attribution"] = qualified_attributions

# Standard values
value = self._object_value(dataset_ref, DCAT.temporalResolution)
if value:
Expand Down Expand Up @@ -159,27 +184,51 @@ def _parse_dataset_v2(self, dataset_dict, dataset_ref):
):
access_service_dict = {}

# Simple values
# Simple values
for key, predicate in (
("availability", DCATAP.availability),
("title", DCT.title),
("endpoint_description", DCAT.endpointDescription),
("license", DCT.license),
("access_rights", DCT.accessRights),
("description", DCT.description),
("identifier", DCT.identifier),
("description", DCT.description),
("modified", DCT.modified),
):
value = self._object_value(access_service, predicate)
if value:
access_service_dict[key] = value
# List

# List values
for key, predicate in (
("endpoint_url", DCAT.endpointURL),
("serves_dataset", DCAT.servesDataset),
("conforms_to", DCT.conformsTo),
("format", DCT["format"]),
("language", DCT.language),
("rights", DCT.rights),
("landing_page", DCAT.landingPage),
("keyword", DCAT.keyword),
("applicable_legislation", DCATAP.applicableLegislation),
("theme", DCAT.theme),
):
values = self._object_value_list(access_service, predicate)
if values:
access_service_dict[key] = values

contact_points = self._contact_details(access_service, DCAT.contactPoint)
if contact_points:
access_service_dict["contact"] = contact_points

publishers = self._agents_details(access_service, DCT.publisher)
if publishers:
access_service_dict["publisher"] = publishers

creators = self._agents_details(access_service, DCT.creator)
if creators:
access_service_dict["creator"] = creators

# Access service URI (explicitly show the missing ones)
access_service_dict["uri"] = (
str(access_service)
Expand Down Expand Up @@ -247,6 +296,44 @@ def _graph_from_dataset_v2(self, dataset_dict, dataset_ref):
_class=_class,
)

# --- Provenance serialization ---
activities = dataset_dict.get("provenance_activity", [])

for activity in activities:
activity_uri = URIRef(activity.get("uri")) if activity.get("uri") else BNode()
self.g.add((dataset_ref, PROV.wasGeneratedBy, activity_uri))
self.g.add((activity_uri, RDF.type, PROV.Activity))

if activity.get("label"):
self.g.add((activity_uri, RDFS.label, Literal(activity["label"])))
if activity.get("seeAlso"):
self.g.add((activity_uri, RDFS.seeAlso, URIRef(activity["seeAlso"])))
if activity.get("dct_type"):
self.g.add((activity_uri, DCT.type, URIRef(activity["dct_type"])))
if activity.get("startedAtTime"):
self.g.add((activity_uri, PROV.startedAtTime, Literal(activity["startedAtTime"], datatype=XSD.dateTime)))

for agent_dict in activity.get("wasAssociatedWith", []):
self._add_agent_to_graph(activity_uri, PROV.wasAssociatedWith, agent_dict)

# Qualified Attribution
qualified_attributions = dataset_dict.get("qualified_attribution", [])
for attr in qualified_attributions:
attr_ref = BNode()
self.g.add((dataset_ref, DCAT.qualifiedAttribution, attr_ref))
self.g.add((attr_ref, RDF.type, DCAT.Attribution))

agent_list = attr.get("agent", [])
for agent_dict in agent_list:
if isinstance(agent_dict, dict):
self._add_agent_to_graph(attr_ref, DCAT.agent, agent_dict)
elif isinstance(agent_dict, str):
self.g.add((attr_ref, DCAT.agent, URIRef(agent_dict)))
role = attr.get("role")
if role:
self.g.add((attr_ref, DCAT.hadRole, URIRef(role)))


# Temporal

# The profile for DCAT-AP 1 stored triples using schema:startDate,
Expand Down Expand Up @@ -408,12 +495,58 @@ def _graph_from_dataset_v2(self, dataset_dict, dataset_ref):
RDFS.Resource,
),
("description", DCT.description, None, Literal),
("modified", DCT.modified, None, Literal),
]

self._add_triples_from_dict(
access_service_dict, access_service_node, items
)

if access_service_dict.get("modified"):
self._add_date_triple(access_service_node, DCT.modified, access_service_dict.get("modified"))


contact_point_dict = access_service_dict.get("contact")
if contact_point_dict:
self._add_contact_to_graph(access_service_node, DCAT.contactPoint, contact_point_dict)

publisher_dict = access_service_dict.get("publisher")
if publisher_dict:
self._add_agent_to_graph(access_service_node, DCT.publisher, publisher_dict)

for creator_dict in access_service_dict.get("creator", []):
self._add_agent_to_graph(access_service_node, DCT.creator, creator_dict)

# Extra list values for access services
extra_items = [
("conforms_to", DCT.conformsTo, None, URIRefOrLiteral),
("format", DCT["format"], None, URIRefOrLiteral),
("language", DCT.language, None, URIRefOrLiteral),
("rights", DCT.rights, None, URIRefOrLiteral),
("landing_page", DCAT.landingPage, None, URIRefOrLiteral),
("applicable_legislation", DCATAP.applicableLegislation, None, URIRefOrLiteral, ELI.LegalResource),
("theme", DCAT.theme, None, URIRefOrLiteral),
]
self._add_list_triples_from_dict(access_service_dict, access_service_node, extra_items)

# Add single-value triple for identifier
self._add_triple_from_dict(
access_service_dict,
access_service_node,
DCT.identifier,
"identifier",
_type=URIRefOrLiteral
)

# Add keyword list
self._add_triple_from_dict(
access_service_dict,
access_service_node,
DCAT.keyword,
"keyword",
list_value=True,
_type=Literal
)

# Lists
items = [
(
Expand Down Expand Up @@ -448,3 +581,23 @@ def _graph_from_dataset_v2_only(self, dataset_dict, dataset_ref):
_type=URIRefOrLiteral,
_class=ADMS.Identifier,
)

def _parse_qualified_attributions(self, dataset_ref):
attributions = []
for qual_attr_ref in self.g.objects(dataset_ref, PROV.qualifiedAttribution):
attr = {}

# Get role
for role_ref in self.g.objects(qual_attr_ref, DCAT.hadRole):
attr["role"] = str(role_ref)
break

# Get agent (using shared logic)
agent_details = self._agents_details(qual_attr_ref, PROV.agent)
if agent_details:
attr["agent"] = agent_details

if attr:
attributions.append(attr)

return attributions
12 changes: 12 additions & 0 deletions ckanext/dcat/profiles/euro_dcat_ap_3.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
RDF,
)

from .base import URIRefOrLiteral
from ckanext.dcat.utils import dataset_uri
from .euro_dcat_ap_2 import EuropeanDCATAP2Profile
from .euro_dcat_ap_scheming import EuropeanDCATAPSchemingProfile
Expand All @@ -29,6 +30,11 @@ def parse_dataset(self, dataset_dict, dataset_ref):
# DCAT AP v2 scheming fields
dataset_dict = self._parse_dataset_v2_scheming(dataset_dict, dataset_ref)

# DCAT AP v3: hasVersion
values = self._object_value_list(dataset_ref, DCAT.hasVersion)
if values:
dataset_dict["has_version"] = values

return dataset_dict

def graph_from_dataset(self, dataset_dict, dataset_ref):
Expand All @@ -45,6 +51,12 @@ def graph_from_dataset(self, dataset_dict, dataset_ref):
# DCAT AP v3 properties also applied to higher versions
self._graph_from_dataset_v3(dataset_dict, dataset_ref)

# DCAT AP v3: List triples
items = [
("has_version", DCAT.hasVersion, None, URIRefOrLiteral),
]
self._add_list_triples_from_dict(dataset_dict, dataset_ref, items)

def graph_from_catalog(self, catalog_dict, catalog_ref):

self._graph_from_catalog_base(catalog_dict, catalog_ref)
Expand Down
Loading