Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion api/data_ingestion/models/file_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,13 @@ def filename(self) -> str:
country = self.country

if self.dataset == "structured":
# For structured datasets, use original filename with upload ID
original_name = Path(self.original_filename).stem
filename = f"{original_name}_{self.id}"
return f"{filename}{ext}"
if self.dataset == "health":
# {ISO3}_{original_stem}_{timestamp}.csv under health-master/<ISO3>/
stem = Path(self.original_filename).stem or "health_upload"
return f"{country}_{stem}_{timestamp}{ext}"
else:
filename_elements = [self.id, country, self.dataset]
if self.source is not None and self.dataset != "geolocation":
Expand All @@ -80,6 +83,15 @@ def upload_path(self) -> str:

return f"{settings.LAKEHOUSE_PATH}/raw/custom-dataset/{self.filename}"

if self.dataset == "health":
# Blob path within AZURE_BLOB_CONTAINER_NAME (no leading slash):
# updated_master_schema/health-master/<ISO3 or $NA>/<ISO3>_<stem>_<timestamp>.csv
country_segment = "$NA" if self.country == "N/A" else self.country
return (
f"updated_master_schema/health-master/"
f"{country_segment}/{self.filename}"
)

# For other datasets, use the uploads path
if self.dataset == "unstructured":
dataset_path = "unstructured"
Expand Down
27 changes: 25 additions & 2 deletions api/data_ingestion/routers/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,13 @@ async def upload_unstructured( # noqa: C901
)
db.add(file_upload)
await db.commit()
await db.refresh(file_upload)

# Keep parity with school uploads: persist a sidecar JSON metadata path.
Comment thread
reanbrenda marked this conversation as resolved.
metadata_file_path = get_metadata_path(file_upload.upload_path)
file_upload.metadata_json_path = metadata_file_path
db.add(file_upload)
await db.commit()

client = storage_client.get_blob_client(file_upload.upload_path)

Expand All @@ -531,6 +538,11 @@ async def upload_unstructured( # noqa: C901
metadata=metadata,
content_settings=ContentSettings(content_type=file_type),
)
metadata_blob_client = storage_client.get_blob_client(
file_upload.metadata_json_path
)
metadata_json_bytes = json.dumps(metadata, indent=2).encode()
metadata_blob_client.upload_blob(metadata_json_bytes, overwrite=True)
response.status_code = status.HTTP_201_CREATED
except HttpResponseError as err:
raise HTTPException(
Expand Down Expand Up @@ -591,6 +603,17 @@ async def upload_structured( # noqa: C901
detail="File extension must be .csv for structured datasets.",
)

portal_ds = (form.portal_dataset or "").strip().lower()
if portal_ds == "health":
dataset_label = "health"
elif portal_ds == "":
dataset_label = "structured"
else:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Invalid portal_dataset value.",
)

# For structured datasets, always use "N/A" as country
if form.country == "Global Dataset":
country_code = "N/A"
Expand All @@ -608,7 +631,7 @@ async def upload_structured( # noqa: C901
uploader_id=database_user.id,
uploader_email=database_user.email,
country=country_code,
dataset="structured",
dataset=dataset_label,
original_filename=file.filename,
column_to_schema_mapping={},
column_license={},
Expand All @@ -624,7 +647,7 @@ async def upload_structured( # noqa: C901
**{str(k): str(v) for k, v in orjson.loads(form.metadata).items()},
"country": form.country,
"uploader_email": email,
"dataset_type": "structured",
"dataset_type": dataset_label,
}

if form.source is not None:
Expand Down
2 changes: 2 additions & 0 deletions api/data_ingestion/schemas/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ class UnstructuredFileUploadRequest:
country: str = Form(...)
metadata: str = Form(...)
source: str | None = Form(None)
# When "health", stores dataset=health and uses the health raw path (see upload_structured).
portal_dataset: str | None = Form(None)


@dataclass
Expand Down
30 changes: 30 additions & 0 deletions ui/src/components/upload/Health.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import {
BaseUploadMetadataForm,
UploadMetadataFormProps,
} from "@/components/upload/uploadMetadataFormBase.tsx";
import {
health,
healthMetadataDatasetSection,
healthMetadataNationalSection,
} from "@/constants/metadata";

const HEALTH_INTRO = {
title: "Add health metadata",
paragraphs: [
"Provide context for this health dataset: who compiled or uploaded it, what period it covers, and how it was collected.",
"Required fields include country, health dataset description, focal point (person uploading or responsible), data owner, and the year the data refers to.",
],
};

export function Health(props: UploadMetadataFormProps) {
return (
<BaseUploadMetadataForm
{...props}
mapping={health}
datasetSectionHeading={healthMetadataDatasetSection}
nationalPracticesHeading={healthMetadataNationalSection}
introTitle={HEALTH_INTRO.title}
introParagraphs={HEALTH_INTRO.paragraphs}
/>
);
}
28 changes: 26 additions & 2 deletions ui/src/components/upload/UploadLanding.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ function UploadLanding(props: UploadLandingProps) {
const { hasCoverage, hasGeolocation, isAdmin } = useRoles();

// Tab 0 = Geolocation (source gigasync), 1 = API (source api),
// 2 = Coverage (dataset coverage), 3 = Schemaless (dataset structured)
// 2 = Coverage (dataset coverage), 3 = Schemaless (dataset structured),
// 4 = Health (dataset health)
const tabFilter = (() => {
switch (selectedTab) {
case 0:
Expand All @@ -48,6 +49,8 @@ function UploadLanding(props: UploadLandingProps) {
return { source: null, dataset: "coverage" as const };
case 3:
return { source: null, dataset: "structured" as const };
case 4:
return { source: null, dataset: "health" as const };
default:
return { source: null, dataset: null };
}
Expand Down Expand Up @@ -101,7 +104,7 @@ function UploadLanding(props: UploadLandingProps) {
shared and in which context.
</p>
</div>
<div className="grid grid-cols-4">
<div className="grid grid-cols-1 gap-3 sm:grid-cols-2 lg:grid-cols-5">
{(hasGeolocation || isAdmin) && (
<Button
as={Link}
Expand Down Expand Up @@ -145,6 +148,19 @@ function UploadLanding(props: UploadLandingProps) {
>
Schemaless dataset
</Button>
<Button
as={Link}
to="/upload/$uploadGroup/$uploadType"
params={{
uploadGroup: "other",
uploadType: "health",
}}
className="w-full"
size="xl"
renderIcon={Add}
>
Health dataset
</Button>
</div>
</Stack>

Expand All @@ -158,6 +174,7 @@ function UploadLanding(props: UploadLandingProps) {
<Tab>API</Tab>
<Tab>Coverage</Tab>
<Tab>Schemaless</Tab>
<Tab>Health</Tab>
</TabList>

<TabPanels>
Expand Down Expand Up @@ -189,6 +206,13 @@ function UploadLanding(props: UploadLandingProps) {
dataset={tabFilter.dataset}
/>
</TabPanel>
<TabPanel className="p-0">
<UploadsTable
{...props}
source={tabFilter.source}
dataset={tabFilter.dataset}
/>
</TabPanel>
</TabPanels>
</Tabs>
</Stack>
Expand Down
Loading