Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
ceeb637
working dataframe class for cfel
zain-sohail Sep 28, 2024
1c23973
move file
zain-sohail Dec 17, 2024
e8965ee
move to flash loader
zain-sohail Dec 17, 2024
d14bc95
updates for cfel loader, not breaking tests
zain-sohail Dec 19, 2024
289d037
fix spellcheck
zain-sohail Dec 19, 2024
dcfe456
add example config
zain-sohail Dec 19, 2024
788d189
fix cspell
zain-sohail Dec 19, 2024
69e4595
Merge branch 'v1_feature_branch' into hextof-lab-loader
zain-sohail Jan 28, 2025
f4fd755
update some minor config changes
zain-sohail Jan 30, 2025
053bc60
make sure optional parameters are not necessary
zain-sohail Feb 7, 2025
dbb7e94
Merge branch 'main' into hextof-lab-loader
zain-sohail Feb 7, 2025
df78f69
fix the bugs
zain-sohail Feb 19, 2025
5cd23b4
add timed dataframe starting point
zain-sohail Apr 6, 2025
5b411d1
moving back to main branch for flash, and removing instrument support…
zain-sohail Apr 12, 2025
3739505
separated lab loading procedure but using common methods from flash l…
zain-sohail Apr 12, 2025
a571fa2
fix a few bugs
zain-sohail Apr 12, 2025
73d7b5f
add data for testing and some spelling fixes
zain-sohail Apr 12, 2025
aa42cd8
changed timestamps to use unix format
Aserhisham May 1, 2025
4734fea
Revert "changed timestamps to use unix format"
Aserhisham May 1, 2025
ec2160f
working on timestamps, unfinished testing
Aserhisham May 14, 2025
4a6ec53
added metadata retrieve from beamtime folder
kutnyakhov May 19, 2025
227dfb1
adjusted SciCat part to new version and URL
kutnyakhov May 22, 2025
ef3dcda
changes to validation
May 23, 2025
dda08a9
added get_count_rate() to cfel
Aserhisham Jun 2, 2025
475eb8c
Adjustments to support multiple files
Aserhisham Jul 27, 2025
a145bea
get_count_rate works?
Aserhisham Aug 11, 2025
5da3928
Merge branch 'main' into hextof-lab-loader
Aserhisham Aug 11, 2025
4acc465
test files for cfel, still not finished
Aserhisham Aug 28, 2025
32e0643
Added get_count_rate_ms using millisecCounter and NumOfEvents
kutnyakhov Jan 13, 2026
2721a2b
Added helper _resolve_fids for get_countrates
kutnyakhov Jan 13, 2026
6c7c23e
Changed metadata dict to None in the read_dataframe
kutnyakhov Jan 13, 2026
e13577a
Corrected a bit get_count_rate and get_elapsed_time
kutnyakhov Jan 16, 2026
fb47cd3
Changed df_timestamp to use millisecCounter
kutnyakhov Jan 16, 2026
528e6aa
Adjusted dataframe to have identical length of index and timeStamp
kutnyakhov Jan 20, 2026
bf1dda1
fixed bug in metadata for single local files
kutnyakhov Jan 20, 2026
f817476
fixed bug with single and multiple-runs approach
kutnyakhov Jan 20, 2026
4e07271
Added metadata stacking for multiple runs
kutnyakhov Jan 21, 2026
fdeacbf
Corrected all tests for cfel loader
kutnyakhov Jan 27, 2026
58c8e01
changes for countrate and multiple files
Aserhisham Jan 29, 2026
87ec0cf
more changes
Aserhisham Feb 7, 2026
2b9372e
fixed errors and added get_count_rate_simple
Aserhisham Feb 13, 2026
45e72a2
unified get_count_rate method and corrected get_elepsed_time
kutnyakhov Mar 13, 2026
aa8dd76
fix(loader): correct count rate across runs and refactor elapsed time…
kutnyakhov Mar 27, 2026
45b33a0
adapted tests according to changes in the last commit
kutnyakhov Mar 30, 2026
cf71927
get_count_rate fixed
Aserhisham Apr 25, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/sed/core/config_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class PathsModel(BaseModel):

raw: DirectoryPath
processed: Optional[Union[DirectoryPath, NewPath]] = None
meta: Optional[Union[DirectoryPath, NewPath]] = None

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of adding a new entry to the config model, I'd suggest we just allow directory paths in

archiver_url: Optional[HttpUrl] = None

what do you think?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fine for me. I just thought as it anyway would be one of the main folders inside the beamtime folder.



class CopyToolModel(BaseModel):
Expand Down
36 changes: 34 additions & 2 deletions src/sed/loader/cfel/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,10 @@
set_verbosity(logger, self._verbose)

self.instrument: str = self._config["core"].get("instrument", "hextof") # default is hextof
self.beamtime_dir: str = None
self.raw_dir: str = None
self.processed_dir: str = None
self.meta_dir: str = None

@property
def verbose(self) -> bool:
Expand Down Expand Up @@ -94,9 +96,14 @@
# Only raw_dir is necessary, processed_dir can be based on raw_dir, if not provided
if "paths" in self._config["core"]:
raw_dir = Path(self._config["core"]["paths"].get("raw", ""))
print(raw_dir)
processed_dir = Path(
self._config["core"]["paths"].get("processed", raw_dir.joinpath("processed")),
)
meta_dir = Path(
self._config["core"]["paths"].get("meta", raw_dir.joinpath("meta")),

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The path logic is confusing right now as there is too many possibilities. I'd put the default as archiver_url in lab default config, and one automatic option.
To me its not clear if the meta path is 'meta/' or 'meta/fabtrack/' right now

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This part is also confusing for me, as don't really see how you can get from raw_dir to e.g. processed_dir with raw_dir.joinpath("processed") - because this will give you beamtime_dir/raw_dir/processed instead of beamtime_dir/processed, or?
Currently, meta path is 'meta/fabtrack/' as it comes from Fabiano's code, but probably can be changed just to 'meta/' as soon as it will be accepted/generalized by IT guys.

)
beamtime_dir = Path(raw_dir).parent

else:
try:
Expand Down Expand Up @@ -130,11 +137,14 @@
raw_dir = raw_paths[0].resolve()

processed_dir = beamtime_dir.joinpath("processed")
meta_dir = beamtime_dir.joinpath("meta/fabtrack/")

Check warning on line 140 in src/sed/loader/cfel/loader.py

View workflow job for this annotation

GitHub Actions / lint

Unknown word (fabtrack)

processed_dir.mkdir(parents=True, exist_ok=True)

self.beamtime_dir = str(beamtime_dir)
self.raw_dir = str(raw_dir)
self.processed_dir = str(processed_dir)
self.meta_dir = str(meta_dir)

@property
def available_runs(self) -> list[int]:
Expand Down Expand Up @@ -209,7 +219,7 @@
# Return the list of found files
return [str(file.resolve()) for file in files]

def parse_metadata(self, token: str = None) -> dict:
def parse_scicat_metadata(self, token: str = None) -> dict:
"""Uses the MetadataRetriever class to fetch metadata from scicat for each run.

Returns:
Expand All @@ -225,6 +235,23 @@

return metadata

def parse_local_metadata(self) -> dict:
"""Uses the MetadataRetriever class to fetch metadata from local folder for each run.

Returns:
dict: Metadata dictionary
"""
metadata_retriever = MetadataRetriever(self._config["metadata"])
metadata = metadata_retriever.get_local_metadata(
beamtime_id=self._config["core"]["beamtime_id"],
beamtime_dir=self.beamtime_dir,
meta_dir=self.meta_dir,
runs=self.runs,
metadata=self.metadata,
)

return metadata

def get_count_rate(
self,
fids: Sequence[int] = None, # noqa: ARG002
Expand Down Expand Up @@ -403,7 +430,12 @@
filter_timed_by_electron=filter_timed_by_electron,
)

self.metadata.update(self.parse_metadata(token) if collect_metadata else {})
if len(self.parse_scicat_metadata(token)) == 0:
print("No SciCat metadata available, checking local folder")
self.metadata.update(self.parse_local_metadata())
else:
print("Metadata taken from SciCat")
self.metadata.update(self.parse_scicat_metadata(token) if collect_metadata else {})

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not necessarily a big issue but the parse_scicat_metadata is called twice in case it exists, once during if and once during else.
One way could be:

scicat_metadata = self.parse_scicat_metadata(token) if collect_metadata else {})
self.metadata.update(scicat_metadata)
if len(scicat_metadata) == 0:
    print("No SciCat metadata available, checking local folder")
    self.metadata.update(self.parse_local_metadata())

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fine for me. Just wanted to implement check if SciCat entries available then go for it, if not then check local folder to be compatible to older beamtimes.

self.metadata.update(bh.metadata)

print(f"loading complete in {time.time() - t0: .2f} s")
Expand Down
92 changes: 92 additions & 0 deletions src/sed/loader/flash/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from __future__ import annotations

import requests
import json
import yaml

from sed.core.config import read_env_var
from sed.core.config import save_env_var
Expand Down Expand Up @@ -144,3 +146,93 @@ def _create_new_dataset_url(self, pid: str) -> str:
def _reformat_pid(self, pid: str) -> str:
"""SciCat adds a pid-prefix + "/" but at DESY prefix = "" """
return (pid).replace("/", "%2F")

def get_local_metadata(
self,
beamtime_id: str,
beamtime_dir: str,
meta_dir: str,
runs: list,
metadata: dict = None,
) -> dict:
"""
Retrieves metadata for a given beamtime ID and list of runs from local meta folder and yaml file.

Args:
beamtime_id (str): The ID of the beamtime.
runs (list): A list of run IDs.
metadata (dict, optional): The existing metadata dictionary.
Defaults to None.

Returns:
Dict: The updated metadata dictionary.

Raises:
Exception: If the request to retrieve metadata fails.
"""
if metadata is None:
metadata = {}

beamtime_metadata = self._get_beamtime_metadata(beamtime_dir,beamtime_id)
metadata.update(beamtime_metadata)
for run in runs:
logger.debug(f"Retrieving metadata for PID: {run}")
local_metadata_per_run = self._get_local_metadata_per_run(meta_dir,run)
local_metadata_per_run.update(local_metadata_per_run) # TODO: Not correct for multiple runs

metadata.update({'scientificMetadata': local_metadata_per_run['_data']})

logger.debug(f"Retrieved metadata with {len(metadata)} entries")
return metadata

def _get_beamtime_metadata(
self,
beamtime_dir: str,
beamtime_id: str,
) -> dict:
"""
Retrieves general metadata for a given beamtime ID from beamtime-metadata-{beamtime_id}.json file

Args:
beamtime_id (str): The ID of the beamtime.
meta_dir(str): The existing local metadata folder.

Returns:
Dict: The retrieved metadata dictionary.

Raises:
Exception: If the request to retrieve metadata fails.
"""
try:
f = open(f'{beamtime_dir}/beamtime-metadata-{beamtime_id}.json', "r")
beamtime_metadata = json.loads(f.read())
return beamtime_metadata

except Exception as exception:
logger.warning(f"Failed to retrieve metadata for beamtime ID {beamtime_id}: {str(exception)}")
return {} # Return an empty dictionary for this beamtime ID


def _get_local_metadata_per_run(self, meta_dir: str, run: str) -> dict:
"""
Retrieves metadata for a specific run based on the PID from yaml file in the local beamtime folder.

Args:
pid (str): The PID of the run.

Returns:
dict: The retrieved metadata.

Raises:
Exception: If the request to retrieve metadata fails.
"""
try:
run = str(run)
with open(f"{meta_dir}/{run}_1.yaml", 'r') as stream:
print("Getting metadata from local folder")
run_metadata = yaml.safe_load(stream)
return run_metadata

except Exception as exception:
logger.warning(f"Failed to retrieve metadata for PID {run}: {str(exception)}")
return {"_data":{}} # Return an empty dictionary for this run
Loading