Skip to content
28 changes: 25 additions & 3 deletions src/specify_cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,8 +710,15 @@ def preset_add(
zip_path = Path(tmpdir) / "preset.zip"
try:
from specify_cli.authentication.http import open_url as _open_url
from specify_cli._github_http import resolve_github_release_asset_api_url
Comment thread
mnriem marked this conversation as resolved.

with _open_url(from_url, timeout=60) as response:
_preset_extra_headers = None
_resolved_from_url = resolve_github_release_asset_api_url(from_url, _open_url)
if _resolved_from_url:
from_url = _resolved_from_url
_preset_extra_headers = {"Accept": "application/octet-stream"}

with _open_url(from_url, timeout=60, extra_headers=_preset_extra_headers) as response:
Comment thread
mnriem marked this conversation as resolved.
zip_path.write_bytes(response.read())
except urllib.error.URLError as e:
console.print(f"[red]Error:[/red] Failed to download: {e}")
Expand Down Expand Up @@ -3065,9 +3072,17 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None:
console.print("[red]Error:[/red] Only HTTPS URLs are allowed, except HTTP for localhost.")
raise typer.Exit(1)

from specify_cli._github_http import resolve_github_release_asset_api_url as _resolve_gh_asset

_wf_url_extra_headers = None
_resolved_wf_url = _resolve_gh_asset(source, _open_url)
if _resolved_wf_url:
Comment thread
mnriem marked this conversation as resolved.
source = _resolved_wf_url
_wf_url_extra_headers = {"Accept": "application/octet-stream"}
Comment thread
mnriem marked this conversation as resolved.

import tempfile
try:
with _open_url(source, timeout=30) as resp:
with _open_url(source, timeout=30, extra_headers=_wf_url_extra_headers) as resp:
final_url = resp.geturl()
final_parsed = urlparse(final_url)
final_host = final_parsed.hostname or ""
Expand Down Expand Up @@ -3164,9 +3179,16 @@ def _validate_and_install_local(yaml_path: Path, source_label: str) -> None:

try:
from specify_cli.authentication.http import open_url as _open_url
from specify_cli._github_http import resolve_github_release_asset_api_url as _resolve_gh_asset

_wf_cat_extra_headers = None
_resolved_workflow_url = _resolve_gh_asset(workflow_url, _open_url)
Comment thread
mnriem marked this conversation as resolved.
Outdated
if _resolved_workflow_url:
workflow_url = _resolved_workflow_url
_wf_cat_extra_headers = {"Accept": "application/octet-stream"}

workflow_dir.mkdir(parents=True, exist_ok=True)
with _open_url(workflow_url, timeout=30) as response:
with _open_url(workflow_url, timeout=30, extra_headers=_wf_cat_extra_headers) as response:
Comment thread
mnriem marked this conversation as resolved.
# Validate final URL after redirects
final_url = response.geturl()
final_parsed = urlparse(final_url)
Expand Down
77 changes: 75 additions & 2 deletions src/specify_cli/_github_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@

import os
import urllib.request
from typing import Dict
from urllib.parse import urlparse
from typing import Callable, Dict, Optional
from urllib.parse import quote, unquote, urlparse

# GitHub-owned hostnames that should receive the Authorization header.
# Includes codeload.github.com because GitHub archive URL downloads
Expand Down Expand Up @@ -76,6 +76,79 @@ def redirect_request(self, req, fp, code, msg, headers, newurl):
return new_req


def resolve_github_release_asset_api_url(
download_url: str,
open_url_fn: Callable,
timeout: int = 60,
) -> Optional[str]:
Comment thread
mnriem marked this conversation as resolved.
"""Resolve a GitHub browser release URL to its REST API asset URL.

For private or SSO-protected repositories, browser release download
URLs (``https://github.com/<owner>/<repo>/releases/download/<tag>/<asset>``)
redirect to an HTML/SSO page instead of delivering the file. This
helper resolves such a URL to the matching GitHub REST API asset URL
(``https://api.github.com/repos/…/releases/assets/<id>``), which can
then be downloaded with ``Accept: application/octet-stream`` and an
auth token to retrieve the actual file payload.

If *download_url* is already a REST API asset URL, it is returned
as-is. Non-GitHub URLs and GitHub URLs that are not release-download
URLs return ``None``. If the API lookup fails (e.g. network error or
asset not found), ``None`` is returned so callers can fall back to the
original URL.

Args:
download_url: The URL to resolve.
open_url_fn: A callable compatible with
``specify_cli.authentication.http.open_url`` used to make the
authenticated API request.
timeout: Per-request timeout in seconds.

Returns:
The resolved REST API asset URL, or ``None`` if resolution is not
applicable or fails.
"""
import json
import urllib.error

parsed = urlparse(download_url)
parts = [unquote(part) for part in parsed.path.strip("/").split("/")]

# Already a REST API asset URL — use it directly
if (
parsed.hostname == "api.github.com"
and len(parts) >= 6
and parts[:1] == ["repos"]
and parts[3:5] == ["releases", "assets"]
):
return download_url

# Only handle github.com browser release download URLs
if parsed.hostname != "github.com":
return None

# Expecting /<owner>/<repo>/releases/download/<tag>/<asset>
if len(parts) < 6 or parts[2:4] != ["releases", "download"]:
return None

owner, repo, tag = parts[0], parts[1], parts[4]
asset_name = "/".join(parts[5:])
encoded_tag = quote(tag, safe="")
release_url = f"https://api.github.com/repos/{owner}/{repo}/releases/tags/{encoded_tag}"

try:
with open_url_fn(release_url, timeout=timeout) as response:
release_data = json.loads(response.read())
except (urllib.error.URLError, json.JSONDecodeError):
return None

for asset in release_data.get("assets", []):
if asset.get("name") == asset_name and asset.get("url"):
return str(asset["url"])

return None


def open_github_url(url: str, timeout: int = 10):
"""Open a URL with GitHub auth, stripping the header on cross-host redirects.

Expand Down
28 changes: 25 additions & 3 deletions src/specify_cli/presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -1868,13 +1868,29 @@ def _make_request(self, url: str):
from specify_cli.authentication.http import build_request
return build_request(url)

def _open_url(self, url: str, timeout: int = 10):
def _open_url(
self,
url: str,
timeout: int = 10,
extra_headers: Optional[Dict[str, str]] = None,
):
"""Open a URL with provider-based auth, trying each configured provider.

Delegates to :func:`specify_cli.authentication.http.open_url`.
"""
from specify_cli.authentication.http import open_url
return open_url(url, timeout)
return open_url(url, timeout, extra_headers=extra_headers)

def _resolve_github_release_asset_api_url(
self,
download_url: str,
timeout: int = 60,
) -> Optional[str]:
"""Resolve a GitHub release asset URL to its REST API asset URL."""
from specify_cli._github_http import resolve_github_release_asset_api_url
return resolve_github_release_asset_api_url(
download_url, self._open_url, timeout=timeout
)

def _load_catalog_config(self, config_path: Path) -> Optional[List[PresetCatalogEntry]]:
"""Load catalog stack configuration from a YAML file.
Expand Down Expand Up @@ -2332,8 +2348,14 @@ def download_pack(
zip_filename = f"{pack_id}-{version}.zip"
zip_path = target_dir / zip_filename

extra_headers = None
resolved_download_url = self._resolve_github_release_asset_api_url(download_url)
if resolved_download_url:
download_url = resolved_download_url
extra_headers = {"Accept": "application/octet-stream"}

try:
with self._open_url(download_url, timeout=60) as response:
with self._open_url(download_url, timeout=60, extra_headers=extra_headers) as response:
zip_data = response.read()

zip_path.write_bytes(zip_data)
Expand Down
81 changes: 73 additions & 8 deletions tests/test_presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -1528,17 +1528,33 @@ def test_download_pack_sends_auth_header(self, project_dir, monkeypatch):
zf.writestr("preset.yml", "id: test-pack\nname: Test\nversion: 1.0.0\n")
zip_bytes = zip_buf.getvalue()

mock_response = MagicMock()
mock_response.read.return_value = zip_bytes
mock_response.__enter__ = lambda s: s
mock_response.__exit__ = MagicMock(return_value=False)
release_response = MagicMock()
release_response.read.return_value = json.dumps(
{
"assets": [
{
"name": "test-pack.zip",
"url": "https://api.github.com/repos/org/repo/releases/assets/1",
}
]
}
).encode()
release_response.__enter__ = lambda s: s
release_response.__exit__ = MagicMock(return_value=False)

captured = {}
asset_response = MagicMock()
asset_response.read.return_value = zip_bytes
asset_response.__enter__ = lambda s: s
asset_response.__exit__ = MagicMock(return_value=False)

captured = []
mock_opener = MagicMock()

def fake_open(req, timeout=None):
captured["req"] = req
return mock_response
captured.append(req)
if req.full_url.endswith("/releases/tags/v1"):
return release_response
return asset_response

mock_opener.open.side_effect = fake_open

Expand All @@ -1554,7 +1570,56 @@ def fake_open(req, timeout=None):
patch("specify_cli.authentication.http.urllib.request.build_opener", return_value=mock_opener):
catalog.download_pack("test-pack", target_dir=project_dir)

assert captured["req"].get_header("Authorization") == "Bearer ghp_testtoken"
assert captured[0].full_url == "https://api.github.com/repos/org/repo/releases/tags/v1"
assert captured[0].get_header("Authorization") == "Bearer ghp_testtoken"
assert captured[1].full_url == "https://api.github.com/repos/org/repo/releases/assets/1"
assert captured[1].get_header("Authorization") == "Bearer ghp_testtoken"
assert captured[1].get_header("Accept") == "application/octet-stream"

def test_download_pack_accepts_direct_github_rest_asset_url(self, project_dir, monkeypatch):
"""download_pack can use a GitHub REST release asset URL directly."""
from unittest.mock import patch, MagicMock

monkeypatch.setenv("GITHUB_TOKEN", "ghp_testtoken")
self._inject_github_config(monkeypatch, token_env="GITHUB_TOKEN")
catalog = PresetCatalog(project_dir)

import io
zip_buf = io.BytesIO()
with zipfile.ZipFile(zip_buf, "w") as zf:
zf.writestr("preset.yml", "id: test-pack\nname: Test\nversion: 1.0.0\n")
zip_bytes = zip_buf.getvalue()

asset_response = MagicMock()
asset_response.read.return_value = zip_bytes
asset_response.__enter__ = lambda s: s
asset_response.__exit__ = MagicMock(return_value=False)

captured = []
mock_opener = MagicMock()

def fake_open(req, timeout=None):
captured.append(req)
return asset_response

mock_opener.open.side_effect = fake_open

pack_info = {
"id": "test-pack",
"name": "Test Pack",
"version": "1.0.0",
"download_url": "https://api.github.com/repos/org/repo/releases/assets/1",
"_install_allowed": True,
}

with patch.object(catalog, "get_pack_info", return_value=pack_info), \
patch("specify_cli.authentication.http.urllib.request.build_opener", return_value=mock_opener):
catalog.download_pack("test-pack", target_dir=project_dir)

assert len(captured) == 1
assert captured[0].full_url == "https://api.github.com/repos/org/repo/releases/assets/1"
assert captured[0].get_header("Authorization") == "Bearer ghp_testtoken"
assert captured[0].get_header("Accept") == "application/octet-stream"


# ===== Integration Tests =====
Expand Down
Loading