Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -805,7 +805,40 @@ This makes it very easy to display the fields in the current language.
To disable this behavior, you can set the following config value in your ini file (default: True):

ckanext.dcat.translate_keys = False

## Multilingual RDF support
To add multilingual values from RDF to ckan, the function `_object_value` can be called with optional parameter `multilang=true` (defaults to `false`)).
If `_object_value` is called with the `multilang=true`-parameter, but no language-attribute is found, the value will be added as Literal with the default language (en).

Example RDF:
```xml
<dct:title xml:lang="en">Dataset Title (EN)</dct:title>
<dct:title xml:lang="de">Dataset Title (DE)</dct:title>
<dct:title xml:lang="fr">Dataset Title (FR)</dct:title>
```
```json
{
"title":
{
"en": "Dataset Title (EN)",
"de": "Dataset Title (DE)",
"fr": "Dataset Title (FR)"
}
}
```

Example with missing language in RDF:
```xml
<dct:title>Dataset Title</dct:title>
```
```json
{
"title":
{
"en": "Dataset Title"
}
}
```

## Structured data and Google Dataset Search indexing

Expand Down
38 changes: 32 additions & 6 deletions ckanext/dcat/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from ckan.lib.munge import munge_tag
from ckan.lib.helpers import url_for

from ckanext.dcat.utils import resource_uri, publisher_uri_from_dataset_dict, DCAT_EXPOSE_SUBCATALOGS, DCAT_CLEAN_TAGS
from ckanext.dcat.utils import resource_uri, publisher_uri_from_dataset_dict, DCAT_EXPOSE_SUBCATALOGS, DCAT_CLEAN_TAGS, get_langs

DCT = Namespace("http://purl.org/dc/terms/")
DCAT = Namespace("http://www.w3.org/ns/dcat#")
Expand Down Expand Up @@ -133,16 +133,27 @@ def _object(self, subject, predicate):
return _object
return None

def _object_value(self, subject, predicate):
def _object_value(self, subject, predicate, multilang=False):
'''
Given a subject and a predicate, returns the value of the object

Both subject and predicate must be rdflib URIRef or BNode objects

If found, the unicode representation is returned, else an empty string
'''
default_lang = config.get('ckan.locale_default', 'en')
lang_dict = {}
for o in self.g.objects(subject, predicate):
return unicode(o)
if multilang and o.language:
lang_dict[o.language] = unicode(o)
elif multilang:
lang_dict[default_lang] = unicode(o)
else:
return unicode(o)
if multilang:
# when translation does not exist, create an empty one
for lang in get_langs():
if lang not in lang_dict:
lang_dict[lang] = ''
return lang_dict
return ''

def _object_value_int(self, subject, predicate):
Expand Down Expand Up @@ -489,19 +500,22 @@ def _add_list_triples_from_dict(self, _dict, subject, items):

def _add_triples_from_dict(self, _dict, subject, items,
list_value=False,
date_value=False):
date_value=False,
multilang=False):
for item in items:
key, predicate, fallbacks, _type = item
self._add_triple_from_dict(_dict, subject, predicate, key,
fallbacks=fallbacks,
list_value=list_value,
date_value=date_value,
multilang=multilang,
_type=_type)

def _add_triple_from_dict(self, _dict, subject, predicate, key,
fallbacks=None,
list_value=False,
date_value=False,
multilang=False,
_type=Literal,
value_modifier=None):
'''
Expand Down Expand Up @@ -535,13 +549,25 @@ def _add_triple_from_dict(self, _dict, subject, predicate, key,
self._add_list_triple(subject, predicate, value, _type)
elif value and date_value:
self._add_date_triple(subject, predicate, value, _type)
elif value and multilang:
self._add_multilang_triple(subject, predicate, value)
elif value:
# Normal text value
# ensure URIRef items are preprocessed (space removal/url encoding)
if _type == URIRef:
_type = CleanedURIRef
self.g.add((subject, predicate, _type(value)))

def _add_multilang_triple(self, subject, predicate, multilang_values): # noqa
for key, values in multilang_values.iteritems():
if values:
# the values can be either a multilang-dict or they are
# nested in another iterable (e.g. keywords)
if not hasattr(values, '__iter__'):
values = [values]
for value in values:
self.g.add((subject, predicate, Literal(value, lang=key))) # noqa

def _add_list_triple(self, subject, predicate, value, _type=Literal):
'''
Adds as many triples to the graph as values
Expand Down
29 changes: 29 additions & 0 deletions ckanext/dcat/tests/test_base_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,35 @@ def test_object_int_wrong_value(self):

eq_(value, None)

def test_object_value_multilang(self):

p = RDFProfile(_default_graph())

p.g.add((URIRef('http://example.org/datasets/1'),
DCT.title, Literal('Test Datensatz 1', lang='de')))

p.g.add((URIRef('http://example.org/datasets/1'),
DCT.title, Literal('Test Dataset 1', lang='en')))

value = p._object_value(URIRef('http://example.org/datasets/1'),
DCT.title,
multilang=True)

assert isinstance(value, dict)
eq_(value.get('de'), u'Test Datensatz 1')
eq_(value.get('en'), u'Test Dataset 1')

def test_object_value_multilang_missing_lang_param(self):

p = RDFProfile(_default_graph())

value = p._object_value(URIRef('http://example.org/datasets/1'),
DCT.title,
multilang=True)

assert isinstance(value, dict)
eq_(value.get('en'), u'Test Dataset 1')

def test_object_list(self):

p = RDFProfile(_default_graph())
Expand Down
5 changes: 5 additions & 0 deletions ckanext/dcat/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,3 +337,8 @@ def parse_accept_header(accept_header=''):
return accepted_media_types_wildcard[_type]

return None


def get_langs():
language_priorities = config.get('ckan.locales_offered', '').split()
return language_priorities