diff --git a/README.md b/README.md
index 65b58d07..428561d4 100644
--- a/README.md
+++ b/README.md
@@ -805,7 +805,40 @@ This makes it very easy to display the fields in the current language.
To disable this behavior, you can set the following config value in your ini file (default: True):
ckanext.dcat.translate_keys = False
+
+## Multilingual RDF support
+To add multilingual values from RDF to ckan, the function `_object_value` can be called with optional parameter `multilang=true` (defaults to `false`)).
+If `_object_value` is called with the `multilang=true`-parameter, but no language-attribute is found, the value will be added as Literal with the default language (en).
+
+Example RDF:
+```xml
+Dataset Title (EN)
+Dataset Title (DE)
+Dataset Title (FR)
+```
+```json
+{
+ "title":
+ {
+ "en": "Dataset Title (EN)",
+ "de": "Dataset Title (DE)",
+ "fr": "Dataset Title (FR)"
+ }
+}
+```
+Example with missing language in RDF:
+```xml
+Dataset Title
+```
+```json
+{
+ "title":
+ {
+ "en": "Dataset Title"
+ }
+}
+```
## Structured data and Google Dataset Search indexing
diff --git a/ckanext/dcat/profiles.py b/ckanext/dcat/profiles.py
index 70ccf57c..0547325f 100644
--- a/ckanext/dcat/profiles.py
+++ b/ckanext/dcat/profiles.py
@@ -18,7 +18,7 @@
from ckan.lib.munge import munge_tag
from ckan.lib.helpers import url_for
-from ckanext.dcat.utils import resource_uri, publisher_uri_from_dataset_dict, DCAT_EXPOSE_SUBCATALOGS, DCAT_CLEAN_TAGS
+from ckanext.dcat.utils import resource_uri, publisher_uri_from_dataset_dict, DCAT_EXPOSE_SUBCATALOGS, DCAT_CLEAN_TAGS, get_langs
DCT = Namespace("http://purl.org/dc/terms/")
DCAT = Namespace("http://www.w3.org/ns/dcat#")
@@ -133,16 +133,27 @@ def _object(self, subject, predicate):
return _object
return None
- def _object_value(self, subject, predicate):
+ def _object_value(self, subject, predicate, multilang=False):
'''
Given a subject and a predicate, returns the value of the object
-
Both subject and predicate must be rdflib URIRef or BNode objects
-
If found, the unicode representation is returned, else an empty string
'''
+ default_lang = config.get('ckan.locale_default', 'en')
+ lang_dict = {}
for o in self.g.objects(subject, predicate):
- return unicode(o)
+ if multilang and o.language:
+ lang_dict[o.language] = unicode(o)
+ elif multilang:
+ lang_dict[default_lang] = unicode(o)
+ else:
+ return unicode(o)
+ if multilang:
+ # when translation does not exist, create an empty one
+ for lang in get_langs():
+ if lang not in lang_dict:
+ lang_dict[lang] = ''
+ return lang_dict
return ''
def _object_value_int(self, subject, predicate):
@@ -489,19 +500,22 @@ def _add_list_triples_from_dict(self, _dict, subject, items):
def _add_triples_from_dict(self, _dict, subject, items,
list_value=False,
- date_value=False):
+ date_value=False,
+ multilang=False):
for item in items:
key, predicate, fallbacks, _type = item
self._add_triple_from_dict(_dict, subject, predicate, key,
fallbacks=fallbacks,
list_value=list_value,
date_value=date_value,
+ multilang=multilang,
_type=_type)
def _add_triple_from_dict(self, _dict, subject, predicate, key,
fallbacks=None,
list_value=False,
date_value=False,
+ multilang=False,
_type=Literal,
value_modifier=None):
'''
@@ -535,6 +549,8 @@ def _add_triple_from_dict(self, _dict, subject, predicate, key,
self._add_list_triple(subject, predicate, value, _type)
elif value and date_value:
self._add_date_triple(subject, predicate, value, _type)
+ elif value and multilang:
+ self._add_multilang_triple(subject, predicate, value)
elif value:
# Normal text value
# ensure URIRef items are preprocessed (space removal/url encoding)
@@ -542,6 +558,16 @@ def _add_triple_from_dict(self, _dict, subject, predicate, key,
_type = CleanedURIRef
self.g.add((subject, predicate, _type(value)))
+ def _add_multilang_triple(self, subject, predicate, multilang_values): # noqa
+ for key, values in multilang_values.iteritems():
+ if values:
+ # the values can be either a multilang-dict or they are
+ # nested in another iterable (e.g. keywords)
+ if not hasattr(values, '__iter__'):
+ values = [values]
+ for value in values:
+ self.g.add((subject, predicate, Literal(value, lang=key))) # noqa
+
def _add_list_triple(self, subject, predicate, value, _type=Literal):
'''
Adds as many triples to the graph as values
diff --git a/ckanext/dcat/tests/test_base_profile.py b/ckanext/dcat/tests/test_base_profile.py
index 4d037a80..5d8d49fb 100644
--- a/ckanext/dcat/tests/test_base_profile.py
+++ b/ckanext/dcat/tests/test_base_profile.py
@@ -149,6 +149,35 @@ def test_object_int_wrong_value(self):
eq_(value, None)
+ def test_object_value_multilang(self):
+
+ p = RDFProfile(_default_graph())
+
+ p.g.add((URIRef('http://example.org/datasets/1'),
+ DCT.title, Literal('Test Datensatz 1', lang='de')))
+
+ p.g.add((URIRef('http://example.org/datasets/1'),
+ DCT.title, Literal('Test Dataset 1', lang='en')))
+
+ value = p._object_value(URIRef('http://example.org/datasets/1'),
+ DCT.title,
+ multilang=True)
+
+ assert isinstance(value, dict)
+ eq_(value.get('de'), u'Test Datensatz 1')
+ eq_(value.get('en'), u'Test Dataset 1')
+
+ def test_object_value_multilang_missing_lang_param(self):
+
+ p = RDFProfile(_default_graph())
+
+ value = p._object_value(URIRef('http://example.org/datasets/1'),
+ DCT.title,
+ multilang=True)
+
+ assert isinstance(value, dict)
+ eq_(value.get('en'), u'Test Dataset 1')
+
def test_object_list(self):
p = RDFProfile(_default_graph())
diff --git a/ckanext/dcat/utils.py b/ckanext/dcat/utils.py
index be619cb1..203dd2e8 100644
--- a/ckanext/dcat/utils.py
+++ b/ckanext/dcat/utils.py
@@ -337,3 +337,8 @@ def parse_accept_header(accept_header=''):
return accepted_media_types_wildcard[_type]
return None
+
+
+def get_langs():
+ language_priorities = config.get('ckan.locales_offered', '').split()
+ return language_priorities