diff --git a/ckanext/unfold/logic/action.py b/ckanext/unfold/logic/action.py index 85ddba6..d06eb53 100644 --- a/ckanext/unfold/logic/action.py +++ b/ckanext/unfold/logic/action.py @@ -30,7 +30,7 @@ def get_archive_structure( ) try: - nodes = unf_utils.get_archive_tree(resource, resource_view) + nodes = unf_utils.get_archive_tree(resource, resource_view, context) except unf_exception.UnfoldError as e: return {"error": str(e)} diff --git a/ckanext/unfold/templates/unfold_preview.html b/ckanext/unfold/templates/unfold_preview.html index dc98662..b28d428 100644 --- a/ckanext/unfold/templates/unfold_preview.html +++ b/ckanext/unfold/templates/unfold_preview.html @@ -42,7 +42,7 @@ data-module-resource-id="{{ resource.id }}" data-module-resource-view-id="{{ resource_view.id }}" data-module-resource-url="{{ resource.url }}" - data-module-resource-remote="{{ resource.url_type != 'upload' }}" + data-module-resource-remote="{{ resource.url_type not in ('upload', 'file') }}" data-module-resource-format="{{ resource.format }}" data-module-show-context-menu="{{ (show_context_menu_default if resource_view.show_context_menu is undefined else resource_view.show_context_menu) | tojson }}">
diff --git a/ckanext/unfold/tests/test_unfold.py b/ckanext/unfold/tests/test_unfold.py index 1e1c85e..3bc25b7 100644 --- a/ckanext/unfold/tests/test_unfold.py +++ b/ckanext/unfold/tests/test_unfold.py @@ -1,4 +1,6 @@ import os +from contextlib import contextmanager +from typing import Iterator import pytest @@ -48,3 +50,28 @@ def test_build_complex_tree(): assert len(tree) == 15004 root_folders = [node for node in tree if node.parent == "#"] assert len(root_folders) == 4 + + +@pytest.mark.usefixtures("with_request_context") +def test_build_file_resource_tree(monkeypatch, ckan_config): + file_path = os.path.join(os.path.dirname(__file__), "data/test_archive.zip") + resource = { + "id": "file-resource", + "format": "zip", + "url_type": "file", + } + ckan_config["ckanext.unfold.enable_cache"] = False + + @contextmanager + def prepare_file_resource( + resource: dict, + context: dict, + ) -> Iterator[tuple[dict, str]]: + yield resource, file_path + + monkeypatch.setattr(utils, "_prepare_file_resource", prepare_file_resource) + + tree = utils.get_archive_tree(resource, {}) + + assert len(tree) == 11 + assert isinstance(tree[0], types.Node) diff --git a/ckanext/unfold/utils.py b/ckanext/unfold/utils.py index 15a3c53..04c0b50 100644 --- a/ckanext/unfold/utils.py +++ b/ckanext/unfold/utils.py @@ -6,6 +6,9 @@ import math import pathlib import mimetypes +import tempfile +from collections.abc import Iterator +from contextlib import contextmanager from dataclasses import asdict from typing import Any @@ -22,6 +25,7 @@ DEFAULT_DATE_FORMAT = "%d/%m/%Y - %H:%M" REDIS_CACHE_TTL = 3600 * 24 # 24 hour +TEMPORARY_LINK_TTL = 300 log = logging.getLogger(__name__) @@ -173,7 +177,9 @@ def close(cls) -> None: def get_archive_tree( - resource: dict[str, Any], resource_view: dict[str, Any] + resource: dict[str, Any], + resource_view: dict[str, Any], + context: dict[str, Any] | None = None, ) -> list[unf_types.Node]: cache_enabled = unf_config.is_cache_enabled() cached_tree = UnfoldCacheManager.get(resource["id"]) @@ -189,8 +195,11 @@ def get_archive_tree( if "cloudstorage" in tk.g.plugins: _prepare_cloudstorage_resource(resource) - adapter_instance = adapter_cls(resource, resource_view) - archive_tree = adapter_instance.build_archive_tree() + if resource.get("url_type") == "file": + with _prepare_file_resource(resource, context or {}) as prepared: + archive_tree = _build_archive_tree(adapter_cls, resource_view, *prepared) + else: + archive_tree = _build_archive_tree(adapter_cls, resource_view, resource) if cache_enabled: UnfoldCacheManager.save(archive_tree, resource["id"]) @@ -198,6 +207,89 @@ def get_archive_tree( return archive_tree +def _build_archive_tree( + adapter_cls: type[unf_adapters.BaseAdapter], + resource_view: dict[str, Any], + resource: dict[str, Any], + filepath: str | None = None, +) -> list[unf_types.Node]: + adapter_instance = adapter_cls(resource, resource_view, filepath=filepath) + return adapter_instance.build_archive_tree() + + +@contextmanager +def _prepare_file_resource( + resource: dict[str, Any], + context: dict[str, Any], +) -> Iterator[tuple[dict[str, Any], str | None]]: + """Make a ckanext-files resource readable by an archive adapter.""" + file_id = resource.get("url", "").rstrip("/").rsplit("/", 1)[-1] + if not file_id: + raise unf_exception.UnfoldError("Unable to determine the resource file") + + try: + files = _get_files_api() + file_info = tk.get_action("files_file_show")(context, {"id": file_id}) + storage = files.get_storage(file_info["storage"]) + file_data = files.FileData.from_dict(file_info) + except Exception as error: + raise unf_exception.UnfoldError( + f"Unable to access the resource file: {error}" + ) from error + + try: + temporary_url = storage.temporary_link( + file_data, + TEMPORARY_LINK_TTL, + ) + except Exception: + log.exception("Unable to create a temporary archive link") + temporary_url = None + + if temporary_url: + adapter_resource = resource.copy() + adapter_resource.update( + { + "url": temporary_url, + "type": "url", + "size": file_info.get("size", resource.get("size")), + } + ) + yield adapter_resource, None + return + + if not storage.supports(files.Capability.STREAM): + raise unf_exception.UnfoldError("Resource storage does not support reading files") + + suffix = pathlib.Path(file_info.get("name", "")).suffix + with tempfile.NamedTemporaryFile(suffix=suffix) as target: + try: + for chunk in storage.stream(file_data): + target.write(chunk) + target.flush() + except Exception as error: + raise unf_exception.UnfoldError( + f"Unable to read the resource file: {error}" + ) from error + + yield resource, target.name + + +def _get_files_api() -> Any: + """Load the storage API only when a ckanext-files resource is used.""" + try: + from ckan.lib import files + except ImportError: + try: + from ckanext.files import shared as files + except ImportError as error: + raise unf_exception.UnfoldError( + "ckanext-files is required to read this resource" + ) from error + + return files + + def _prepare_cloudstorage_resource(resource: dict[str, Any]) -> None: uploader = get_resource_uploader(resource) diff --git a/pyproject.toml b/pyproject.toml index 97af27e..d1ebe6b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "ckanext-unfold" -version = "2.3.3" +version = "2.4.0" description = "Provides previews for multiple archive formats" authors = [ {name = "DataShades", email = "datashades@linkdigital.com.au"},