diff --git a/README.md b/README.md index 65b58d07..428561d4 100644 --- a/README.md +++ b/README.md @@ -805,7 +805,40 @@ This makes it very easy to display the fields in the current language. To disable this behavior, you can set the following config value in your ini file (default: True): ckanext.dcat.translate_keys = False + +## Multilingual RDF support +To add multilingual values from RDF to ckan, the function `_object_value` can be called with optional parameter `multilang=true` (defaults to `false`)). +If `_object_value` is called with the `multilang=true`-parameter, but no language-attribute is found, the value will be added as Literal with the default language (en). + +Example RDF: +```xml +Dataset Title (EN) +Dataset Title (DE) +Dataset Title (FR) +``` +```json +{ + "title": + { + "en": "Dataset Title (EN)", + "de": "Dataset Title (DE)", + "fr": "Dataset Title (FR)" + } +} +``` +Example with missing language in RDF: +```xml +Dataset Title +``` +```json +{ + "title": + { + "en": "Dataset Title" + } +} +``` ## Structured data and Google Dataset Search indexing diff --git a/ckanext/dcat/profiles.py b/ckanext/dcat/profiles.py index 70ccf57c..0547325f 100644 --- a/ckanext/dcat/profiles.py +++ b/ckanext/dcat/profiles.py @@ -18,7 +18,7 @@ from ckan.lib.munge import munge_tag from ckan.lib.helpers import url_for -from ckanext.dcat.utils import resource_uri, publisher_uri_from_dataset_dict, DCAT_EXPOSE_SUBCATALOGS, DCAT_CLEAN_TAGS +from ckanext.dcat.utils import resource_uri, publisher_uri_from_dataset_dict, DCAT_EXPOSE_SUBCATALOGS, DCAT_CLEAN_TAGS, get_langs DCT = Namespace("http://purl.org/dc/terms/") DCAT = Namespace("http://www.w3.org/ns/dcat#") @@ -133,16 +133,27 @@ def _object(self, subject, predicate): return _object return None - def _object_value(self, subject, predicate): + def _object_value(self, subject, predicate, multilang=False): ''' Given a subject and a predicate, returns the value of the object - Both subject and predicate must be rdflib URIRef or BNode objects - If found, the unicode representation is returned, else an empty string ''' + default_lang = config.get('ckan.locale_default', 'en') + lang_dict = {} for o in self.g.objects(subject, predicate): - return unicode(o) + if multilang and o.language: + lang_dict[o.language] = unicode(o) + elif multilang: + lang_dict[default_lang] = unicode(o) + else: + return unicode(o) + if multilang: + # when translation does not exist, create an empty one + for lang in get_langs(): + if lang not in lang_dict: + lang_dict[lang] = '' + return lang_dict return '' def _object_value_int(self, subject, predicate): @@ -489,19 +500,22 @@ def _add_list_triples_from_dict(self, _dict, subject, items): def _add_triples_from_dict(self, _dict, subject, items, list_value=False, - date_value=False): + date_value=False, + multilang=False): for item in items: key, predicate, fallbacks, _type = item self._add_triple_from_dict(_dict, subject, predicate, key, fallbacks=fallbacks, list_value=list_value, date_value=date_value, + multilang=multilang, _type=_type) def _add_triple_from_dict(self, _dict, subject, predicate, key, fallbacks=None, list_value=False, date_value=False, + multilang=False, _type=Literal, value_modifier=None): ''' @@ -535,6 +549,8 @@ def _add_triple_from_dict(self, _dict, subject, predicate, key, self._add_list_triple(subject, predicate, value, _type) elif value and date_value: self._add_date_triple(subject, predicate, value, _type) + elif value and multilang: + self._add_multilang_triple(subject, predicate, value) elif value: # Normal text value # ensure URIRef items are preprocessed (space removal/url encoding) @@ -542,6 +558,16 @@ def _add_triple_from_dict(self, _dict, subject, predicate, key, _type = CleanedURIRef self.g.add((subject, predicate, _type(value))) + def _add_multilang_triple(self, subject, predicate, multilang_values): # noqa + for key, values in multilang_values.iteritems(): + if values: + # the values can be either a multilang-dict or they are + # nested in another iterable (e.g. keywords) + if not hasattr(values, '__iter__'): + values = [values] + for value in values: + self.g.add((subject, predicate, Literal(value, lang=key))) # noqa + def _add_list_triple(self, subject, predicate, value, _type=Literal): ''' Adds as many triples to the graph as values diff --git a/ckanext/dcat/tests/test_base_profile.py b/ckanext/dcat/tests/test_base_profile.py index 4d037a80..5d8d49fb 100644 --- a/ckanext/dcat/tests/test_base_profile.py +++ b/ckanext/dcat/tests/test_base_profile.py @@ -149,6 +149,35 @@ def test_object_int_wrong_value(self): eq_(value, None) + def test_object_value_multilang(self): + + p = RDFProfile(_default_graph()) + + p.g.add((URIRef('http://example.org/datasets/1'), + DCT.title, Literal('Test Datensatz 1', lang='de'))) + + p.g.add((URIRef('http://example.org/datasets/1'), + DCT.title, Literal('Test Dataset 1', lang='en'))) + + value = p._object_value(URIRef('http://example.org/datasets/1'), + DCT.title, + multilang=True) + + assert isinstance(value, dict) + eq_(value.get('de'), u'Test Datensatz 1') + eq_(value.get('en'), u'Test Dataset 1') + + def test_object_value_multilang_missing_lang_param(self): + + p = RDFProfile(_default_graph()) + + value = p._object_value(URIRef('http://example.org/datasets/1'), + DCT.title, + multilang=True) + + assert isinstance(value, dict) + eq_(value.get('en'), u'Test Dataset 1') + def test_object_list(self): p = RDFProfile(_default_graph()) diff --git a/ckanext/dcat/utils.py b/ckanext/dcat/utils.py index be619cb1..203dd2e8 100644 --- a/ckanext/dcat/utils.py +++ b/ckanext/dcat/utils.py @@ -337,3 +337,8 @@ def parse_accept_header(accept_header=''): return accepted_media_types_wildcard[_type] return None + + +def get_langs(): + language_priorities = config.get('ckan.locales_offered', '').split() + return language_priorities