-
Notifications
You must be signed in to change notification settings - Fork 3
Hextof lab loader #534
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Hextof lab loader #534
Changes from 2 commits
ceeb637
1c23973
e8965ee
d14bc95
289d037
dcfe456
788d189
69e4595
f4fd755
053bc60
dbb7e94
df78f69
5cd23b4
5b411d1
3739505
a571fa2
73d7b5f
aa42cd8
4734fea
ec2160f
4a6ec53
227dfb1
ef3dcda
dda08a9
475eb8c
a145bea
5da3928
4acc465
32e0643
2721a2b
6c7c23e
e13577a
fb47cd3
528e6aa
bf1dda1
f817476
4e07271
fdeacbf
58c8e01
87ec0cf
2b9372e
45e72a2
aa8dd76
45b33a0
cf71927
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -56,8 +56,10 @@ | |
| set_verbosity(logger, self._verbose) | ||
|
|
||
| self.instrument: str = self._config["core"].get("instrument", "hextof") # default is hextof | ||
| self.beamtime_dir: str = None | ||
| self.raw_dir: str = None | ||
| self.processed_dir: str = None | ||
| self.meta_dir: str = None | ||
|
|
||
| @property | ||
| def verbose(self) -> bool: | ||
|
|
@@ -94,9 +96,14 @@ | |
| # Only raw_dir is necessary, processed_dir can be based on raw_dir, if not provided | ||
| if "paths" in self._config["core"]: | ||
| raw_dir = Path(self._config["core"]["paths"].get("raw", "")) | ||
| print(raw_dir) | ||
| processed_dir = Path( | ||
| self._config["core"]["paths"].get("processed", raw_dir.joinpath("processed")), | ||
| ) | ||
| meta_dir = Path( | ||
| self._config["core"]["paths"].get("meta", raw_dir.joinpath("meta")), | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The path logic is confusing right now as there is too many possibilities. I'd put the default as archiver_url in lab default config, and one automatic option.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This part is also confusing for me, as don't really see how you can get from raw_dir to e.g. processed_dir with |
||
| ) | ||
| beamtime_dir = Path(raw_dir).parent | ||
|
|
||
| else: | ||
| try: | ||
|
|
@@ -130,11 +137,14 @@ | |
| raw_dir = raw_paths[0].resolve() | ||
|
|
||
| processed_dir = beamtime_dir.joinpath("processed") | ||
| meta_dir = beamtime_dir.joinpath("meta/fabtrack/") | ||
|
|
||
| processed_dir.mkdir(parents=True, exist_ok=True) | ||
|
|
||
| self.beamtime_dir = str(beamtime_dir) | ||
| self.raw_dir = str(raw_dir) | ||
| self.processed_dir = str(processed_dir) | ||
| self.meta_dir = str(meta_dir) | ||
|
|
||
| @property | ||
| def available_runs(self) -> list[int]: | ||
|
|
@@ -209,7 +219,7 @@ | |
| # Return the list of found files | ||
| return [str(file.resolve()) for file in files] | ||
|
|
||
| def parse_metadata(self, token: str = None) -> dict: | ||
| def parse_scicat_metadata(self, token: str = None) -> dict: | ||
| """Uses the MetadataRetriever class to fetch metadata from scicat for each run. | ||
|
|
||
| Returns: | ||
|
|
@@ -225,6 +235,23 @@ | |
|
|
||
| return metadata | ||
|
|
||
| def parse_local_metadata(self) -> dict: | ||
| """Uses the MetadataRetriever class to fetch metadata from local folder for each run. | ||
|
|
||
| Returns: | ||
| dict: Metadata dictionary | ||
| """ | ||
| metadata_retriever = MetadataRetriever(self._config["metadata"]) | ||
| metadata = metadata_retriever.get_local_metadata( | ||
| beamtime_id=self._config["core"]["beamtime_id"], | ||
| beamtime_dir=self.beamtime_dir, | ||
| meta_dir=self.meta_dir, | ||
| runs=self.runs, | ||
| metadata=self.metadata, | ||
| ) | ||
|
|
||
| return metadata | ||
|
|
||
| def get_count_rate( | ||
| self, | ||
| fids: Sequence[int] = None, # noqa: ARG002 | ||
|
|
@@ -403,7 +430,12 @@ | |
| filter_timed_by_electron=filter_timed_by_electron, | ||
| ) | ||
|
|
||
| self.metadata.update(self.parse_metadata(token) if collect_metadata else {}) | ||
| if len(self.parse_scicat_metadata(token)) == 0: | ||
| print("No SciCat metadata available, checking local folder") | ||
| self.metadata.update(self.parse_local_metadata()) | ||
| else: | ||
| print("Metadata taken from SciCat") | ||
| self.metadata.update(self.parse_scicat_metadata(token) if collect_metadata else {}) | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not necessarily a big issue but the parse_scicat_metadata is called twice in case it exists, once during if and once during else. scicat_metadata = self.parse_scicat_metadata(token) if collect_metadata else {})
self.metadata.update(scicat_metadata)
if len(scicat_metadata) == 0:
print("No SciCat metadata available, checking local folder")
self.metadata.update(self.parse_local_metadata())
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fine for me. Just wanted to implement check if SciCat entries available then go for it, if not then check local folder to be compatible to older beamtimes. |
||
| self.metadata.update(bh.metadata) | ||
|
|
||
| print(f"loading complete in {time.time() - t0: .2f} s") | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,6 +5,8 @@ | |
| from __future__ import annotations | ||
|
|
||
| import requests | ||
| import json | ||
| import yaml | ||
|
|
||
| from sed.core.config import read_env_var | ||
| from sed.core.config import save_env_var | ||
|
|
@@ -128,19 +130,109 @@ def _get_metadata_per_run(self, pid: str) -> dict: | |
| return {} # Return an empty dictionary for this run | ||
|
|
||
| def _create_old_dataset_url(self, pid: str) -> str: | ||
| return "{burl}/{url}/%2F{npid}".format( | ||
| return "{burl}{url}/%2F{npid}".format( | ||
| burl=self.url, | ||
| url="Datasets", | ||
| url="datasets",#"Datasets", | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did the api change?
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, all metadata was migrated to generalized scicat.desy.de with new api where 'Datasets' were changed to 'datasets' :) |
||
| npid=self._reformat_pid(pid), | ||
| ) | ||
|
|
||
| def _create_new_dataset_url(self, pid: str) -> str: | ||
| return "{burl}/{url}/{npid}".format( | ||
| return "{burl}{url}/{npid}".format( | ||
| burl=self.url, | ||
| url="Datasets", | ||
| url="datasets",#"Datasets", | ||
| npid=self._reformat_pid(pid), | ||
| ) | ||
|
|
||
| def _reformat_pid(self, pid: str) -> str: | ||
| """SciCat adds a pid-prefix + "/" but at DESY prefix = "" """ | ||
| return (pid).replace("/", "%2F") | ||
|
|
||
| def get_local_metadata( | ||
| self, | ||
| beamtime_id: str, | ||
| beamtime_dir: str, | ||
| meta_dir: str, | ||
| runs: list, | ||
| metadata: dict = None, | ||
| ) -> dict: | ||
| """ | ||
| Retrieves metadata for a given beamtime ID and list of runs from local meta folder and yaml file. | ||
|
|
||
| Args: | ||
| beamtime_id (str): The ID of the beamtime. | ||
| runs (list): A list of run IDs. | ||
| metadata (dict, optional): The existing metadata dictionary. | ||
| Defaults to None. | ||
|
|
||
| Returns: | ||
| Dict: The updated metadata dictionary. | ||
|
|
||
| Raises: | ||
| Exception: If the request to retrieve metadata fails. | ||
| """ | ||
| if metadata is None: | ||
| metadata = {} | ||
|
|
||
| beamtime_metadata = self._get_beamtime_metadata(beamtime_dir,beamtime_id) | ||
| metadata.update(beamtime_metadata) | ||
| for run in runs: | ||
| logger.debug(f"Retrieving metadata for PID: {run}") | ||
| local_metadata_per_run = self._get_local_metadata_per_run(meta_dir,run) | ||
| local_metadata_per_run.update(local_metadata_per_run) # TODO: Not correct for multiple runs | ||
|
|
||
| metadata.update({'scientificMetadata': local_metadata_per_run['_data']}) | ||
|
|
||
| logger.debug(f"Retrieved metadata with {len(metadata)} entries") | ||
| return metadata | ||
|
|
||
| def _get_beamtime_metadata( | ||
| self, | ||
| beamtime_dir: str, | ||
| beamtime_id: str, | ||
| ) -> dict: | ||
| """ | ||
| Retrieves general metadata for a given beamtime ID from beamtime-metadata-{beamtime_id}.json file | ||
|
|
||
| Args: | ||
| beamtime_id (str): The ID of the beamtime. | ||
| meta_dir(str): The existing local metadata folder. | ||
|
|
||
| Returns: | ||
| Dict: The retrieved metadata dictionary. | ||
|
|
||
| Raises: | ||
| Exception: If the request to retrieve metadata fails. | ||
| """ | ||
| try: | ||
| f = open(f'{beamtime_dir}/beamtime-metadata-{beamtime_id}.json', "r") | ||
| beamtime_metadata = json.loads(f.read()) | ||
| return beamtime_metadata | ||
|
|
||
| except Exception as exception: | ||
| logger.warning(f"Failed to retrieve metadata for beamtime ID {beamtime_id}: {str(exception)}") | ||
| return {} # Return an empty dictionary for this beamtime ID | ||
|
|
||
|
|
||
| def _get_local_metadata_per_run(self, meta_dir: str, run: str) -> dict: | ||
| """ | ||
| Retrieves metadata for a specific run based on the PID from yaml file in the local beamtime folder. | ||
|
|
||
| Args: | ||
| pid (str): The PID of the run. | ||
|
|
||
| Returns: | ||
| dict: The retrieved metadata. | ||
|
|
||
| Raises: | ||
| Exception: If the request to retrieve metadata fails. | ||
| """ | ||
| try: | ||
| run = str(run) | ||
| with open(f"{meta_dir}/{run}_1.yaml", 'r') as stream: | ||
| print("Getting metadata from local folder") | ||
| run_metadata = yaml.safe_load(stream) | ||
| return run_metadata | ||
|
|
||
| except Exception as exception: | ||
| logger.warning(f"Failed to retrieve metadata for PID {run}: {str(exception)}") | ||
| return {"_data":{}} # Return an empty dictionary for this run | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Instead of adding a new entry to the config model, I'd suggest we just allow directory paths in
sed/src/sed/core/config_model.py
Line 327 in 4a6ec53
what do you think?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fine for me. I just thought as it anyway would be one of the main folders inside the beamtime folder.