Skip to content
28 changes: 25 additions & 3 deletions src/specify_cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,8 +710,15 @@ def preset_add(
zip_path = Path(tmpdir) / "preset.zip"
try:
from specify_cli.authentication.http import open_url as _open_url
from specify_cli._github_http import resolve_github_release_asset_api_url
Comment thread
mnriem marked this conversation as resolved.

with _open_url(from_url, timeout=60) as response:
_preset_extra_headers = None
_resolved_from_url = resolve_github_release_asset_api_url(from_url, _open_url)
if _resolved_from_url:
from_url = _resolved_from_url
_preset_extra_headers = {"Accept": "application/octet-stream"}

with _open_url(from_url, timeout=60, extra_headers=_preset_extra_headers) as response:
Comment thread
mnriem marked this conversation as resolved.
zip_path.write_bytes(response.read())
except urllib.error.URLError as e:
console.print(f"[red]Error:[/red] Failed to download: {e}")
Expand Down Expand Up @@ -3065,9 +3072,17 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None:
console.print("[red]Error:[/red] Only HTTPS URLs are allowed, except HTTP for localhost.")
raise typer.Exit(1)

from specify_cli._github_http import resolve_github_release_asset_api_url as _resolve_gh_asset

_wf_url_extra_headers = None
_resolved_wf_url = _resolve_gh_asset(source, _open_url)
if _resolved_wf_url:
Comment thread
mnriem marked this conversation as resolved.
source = _resolved_wf_url
_wf_url_extra_headers = {"Accept": "application/octet-stream"}
Comment thread
mnriem marked this conversation as resolved.

import tempfile
try:
with _open_url(source, timeout=30) as resp:
with _open_url(source, timeout=30, extra_headers=_wf_url_extra_headers) as resp:
final_url = resp.geturl()
final_parsed = urlparse(final_url)
final_host = final_parsed.hostname or ""
Expand Down Expand Up @@ -3164,9 +3179,16 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None:

try:
from specify_cli.authentication.http import open_url as _open_url
from specify_cli._github_http import resolve_github_release_asset_api_url as _resolve_gh_asset

_wf_cat_extra_headers = None
_resolved_workflow_url = _resolve_gh_asset(workflow_url, _open_url)
Comment thread
mnriem marked this conversation as resolved.
Outdated
if _resolved_workflow_url:
workflow_url = _resolved_workflow_url
_wf_cat_extra_headers = {"Accept": "application/octet-stream"}

workflow_dir.mkdir(parents=True, exist_ok=True)
with _open_url(workflow_url, timeout=30) as response:
with _open_url(workflow_url, timeout=30, extra_headers=_wf_cat_extra_headers) as response:
Comment thread
mnriem marked this conversation as resolved.
# Validate final URL after redirects
final_url = response.geturl()
final_parsed = urlparse(final_url)
Expand Down
77 changes: 75 additions & 2 deletions src/specify_cli/_github_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@

import os
import urllib.request
from typing import Dict
from urllib.parse import urlparse
from typing import Callable, Dict, Optional
from urllib.parse import quote, unquote, urlparse

# GitHub-owned hostnames that should receive the Authorization header.
# Includes codeload.github.com because GitHub archive URL downloads
Expand Down Expand Up @@ -76,6 +76,79 @@ def redirect_request(self, req, fp, code, msg, headers, newurl):
return new_req


def resolve_github_release_asset_api_url(
download_url: str,
open_url_fn: Callable,
timeout: int = 60,
) -> Optional[str]:
Comment thread
mnriem marked this conversation as resolved.
"""Resolve a GitHub browser release URL to its REST API asset URL.

For private or SSO-protected repositories, browser release download
URLs (``https://github.com/<owner>/<repo>/releases/download/<tag>/<asset>``)
redirect to an HTML/SSO page instead of delivering the file. This
helper resolves such a URL to the matching GitHub REST API asset URL
(``https://api.github.com/repos/…/releases/assets/<id>``), which can
then be downloaded with ``Accept: application/octet-stream`` and an
auth token to retrieve the actual file payload.

If *download_url* is already a REST API asset URL, it is returned
as-is. Non-GitHub URLs and GitHub URLs that are not release-download
URLs return ``None``. If the API lookup fails (e.g. network error or
asset not found), ``None`` is returned so callers can fall back to the
original URL.

Args:
download_url: The URL to resolve.
open_url_fn: A callable compatible with
``specify_cli.authentication.http.open_url`` used to make the
authenticated API request.
timeout: Per-request timeout in seconds.

Returns:
The resolved REST API asset URL, or ``None`` if resolution is not
applicable or fails.
"""
import json
import urllib.error

parsed = urlparse(download_url)
parts = [unquote(part) for part in parsed.path.strip("/").split("/")]

# Already a REST API asset URL — use it directly
if (
parsed.hostname == "api.github.com"
and len(parts) >= 6
and parts[:1] == ["repos"]
and parts[3:5] == ["releases", "assets"]
):
return download_url

# Only handle github.com browser release download URLs
if parsed.hostname != "github.com":
return None

# Expecting /<owner>/<repo>/releases/download/<tag>/<asset>
if len(parts) < 6 or parts[2:4] != ["releases", "download"]:
return None

owner, repo, tag = parts[0], parts[1], parts[4]
asset_name = "/".join(parts[5:])
encoded_tag = quote(tag, safe="")
release_url = f"https://api.github.com/repos/{owner}/{repo}/releases/tags/{encoded_tag}"

try:
with open_url_fn(release_url, timeout=timeout) as response:
release_data = json.loads(response.read())
except (urllib.error.URLError, json.JSONDecodeError):
return None

for asset in release_data.get("assets", []):
if asset.get("name") == asset_name and asset.get("url"):
return str(asset["url"])

return None


def open_github_url(url: str, timeout: int = 10):
"""Open a URL with GitHub auth, stripping the header on cross-host redirects.

Expand Down
28 changes: 25 additions & 3 deletions src/specify_cli/presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -1868,13 +1868,29 @@ def _make_request(self, url: str):
from specify_cli.authentication.http import build_request
return build_request(url)

def _open_url(self, url: str, timeout: int = 10):
def _open_url(
self,
url: str,
timeout: int = 10,
extra_headers: Optional[Dict[str, str]] = None,
):
"""Open a URL with provider-based auth, trying each configured provider.

Delegates to :func:`specify_cli.authentication.http.open_url`.
"""
from specify_cli.authentication.http import open_url
return open_url(url, timeout)
return open_url(url, timeout, extra_headers=extra_headers)

def _resolve_github_release_asset_api_url(
self,
download_url: str,
timeout: int = 60,
) -> Optional[str]:
"""Resolve a GitHub release asset URL to its REST API asset URL."""
from specify_cli._github_http import resolve_github_release_asset_api_url
return resolve_github_release_asset_api_url(
download_url, self._open_url, timeout=timeout
)

def _load_catalog_config(self, config_path: Path) -> Optional[List[PresetCatalogEntry]]:
"""Load catalog stack configuration from a YAML file.
Expand Down Expand Up @@ -2332,8 +2348,14 @@ def download_pack(
zip_filename = f"{pack_id}-{version}.zip"
zip_path = target_dir / zip_filename

extra_headers = None
resolved_download_url = self._resolve_github_release_asset_api_url(download_url)
if resolved_download_url:
download_url = resolved_download_url
extra_headers = {"Accept": "application/octet-stream"}

try:
with self._open_url(download_url, timeout=60) as response:
with self._open_url(download_url, timeout=60, extra_headers=extra_headers) as response:
zip_data = response.read()

zip_path.write_bytes(zip_data)
Expand Down
116 changes: 114 additions & 2 deletions tests/test_github_http.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
"""Tests for GitHub-authenticated HTTP request helpers."""

import io
Comment thread
mnriem marked this conversation as resolved.
Outdated
import json
import os
from unittest.mock import patch
from contextlib import contextmanager
from unittest.mock import MagicMock, patch

import pytest

from specify_cli._github_http import (
build_github_request,
resolve_github_release_asset_api_url,
)


Expand Down Expand Up @@ -76,4 +80,112 @@ def test_no_auth_header_when_no_token(self):
def test_missing_hostname_raises_value_error(self):
"""build_github_request() must reject URLs with valid scheme but no hostname."""
with pytest.raises(ValueError, match="url must include a hostname"):
build_github_request("http://")
build_github_request("http://")


class TestResolveGitHubReleaseAssetApiUrl:
"""Tests for resolve_github_release_asset_api_url()."""

def _make_open_url_fn(self, release_json):
"""Create a fake open_url_fn that returns release JSON."""
@contextmanager
def fake_open(url, timeout=None, extra_headers=None):
resp = MagicMock()
resp.read.return_value = json.dumps(release_json).encode()
yield resp
return fake_open

def test_returns_none_for_non_github_url(self):
"""Non-GitHub URLs should return None."""
result = resolve_github_release_asset_api_url(
"https://example.com/file.zip", lambda *a, **kw: None
)
assert result is None

def test_returns_none_for_non_release_github_url(self):
"""GitHub URLs that aren't release downloads return None."""
result = resolve_github_release_asset_api_url(
"https://github.com/org/repo/archive/refs/tags/v1.zip",
lambda *a, **kw: None,
)
assert result is None

def test_passthrough_for_existing_api_asset_url(self):
"""Already-resolved REST API asset URLs are returned as-is."""
url = "https://api.github.com/repos/org/repo/releases/assets/12345"
result = resolve_github_release_asset_api_url(url, lambda *a, **kw: None)
assert result == url

def test_resolves_browser_url_to_api_url(self):
"""Browser release URL resolves to REST API asset URL."""
release_json = {
"assets": [
{"name": "pack.zip", "url": "https://api.github.com/repos/org/repo/releases/assets/99"}
]
}
result = resolve_github_release_asset_api_url(
"https://github.com/org/repo/releases/download/v1.0/pack.zip",
self._make_open_url_fn(release_json),
)
assert result == "https://api.github.com/repos/org/repo/releases/assets/99"

def test_returns_none_when_asset_not_found(self):
"""Returns None when the release exists but asset name doesn't match."""
release_json = {"assets": [{"name": "other.zip", "url": "https://api.github.com/repos/org/repo/releases/assets/1"}]}
result = resolve_github_release_asset_api_url(
"https://github.com/org/repo/releases/download/v1/missing.zip",
self._make_open_url_fn(release_json),
)
assert result is None

def test_returns_none_on_network_error(self):
"""Returns None when the API request fails."""
import urllib.error

@contextmanager
def failing_open(url, timeout=None, extra_headers=None):
raise urllib.error.URLError("network error")
yield # noqa: unreachable

result = resolve_github_release_asset_api_url(
"https://github.com/org/repo/releases/download/v1/pack.zip",
failing_open,
)
assert result is None

def test_tag_with_special_characters_is_url_encoded(self):
"""Tags with reserved characters (e.g. '/') are encoded in the API URL."""
captured_urls = []

@contextmanager
def capturing_open(url, timeout=None, extra_headers=None):
captured_urls.append(url)
resp = MagicMock()
resp.read.return_value = json.dumps({"assets": []}).encode()
yield resp

resolve_github_release_asset_api_url(
"https://github.com/org/repo/releases/download/feature%2Fv1/pack.zip",
capturing_open,
)
# The tag "feature/v1" (decoded from %2F) must be re-encoded as "feature%2Fv1"
assert len(captured_urls) == 1
assert "releases/tags/feature%2Fv1" in captured_urls[0]

def test_tag_with_hash_is_url_encoded(self):
"""Tags with '#' character are properly encoded."""
captured_urls = []

@contextmanager
def capturing_open(url, timeout=None, extra_headers=None):
captured_urls.append(url)
resp = MagicMock()
resp.read.return_value = json.dumps({"assets": []}).encode()
yield resp

resolve_github_release_asset_api_url(
"https://github.com/org/repo/releases/download/v1%23beta/pack.zip",
capturing_open,
)
assert len(captured_urls) == 1
assert "releases/tags/v1%23beta" in captured_urls[0]
Loading
Loading