diff --git a/config/evaluate/eval_config.yml b/config/evaluate/eval_config.yml index 53c6f7b28..03ded8b87 100644 --- a/config/evaluate/eval_config.yml +++ b/config/evaluate/eval_config.yml @@ -46,7 +46,7 @@ evaluation: plot_ensemble: "members" #supported: false, "std", "minmax", "members" plot_score_maps: false #plot scores on a 2D maps. it slows down score computation plot_score_animations: false #plot animations of score maps across forecast steps. it slows down score computation - plot_score_timeseries: true #plot timeseries of scores across forecast steps. + plot_score_init_timeseries: true #plot timeseries of scores across forecast steps. print_summary: false #print out score values on screen. it can be verbose log_scale: false add_grid: false diff --git a/packages/evaluate/src/weathergen/evaluate/plotting/plot_orchestration.py b/packages/evaluate/src/weathergen/evaluate/plotting/plot_orchestration.py index 2a44c1993..d349d6860 100644 --- a/packages/evaluate/src/weathergen/evaluate/plotting/plot_orchestration.py +++ b/packages/evaluate/src/weathergen/evaluate/plotting/plot_orchestration.py @@ -165,18 +165,6 @@ def run_score_timeseries_pipeline( f"{len(fsteps)} fsteps × {len(unique_hours)} init hours." ) - # --- Parallel plotting --- - _plot_timeseries_parallel( - reader, - stream, - scores_by_hour, - unique_hours, - fsteps, - da_tars, - global_plotting_options, - n_workers, - ) - return scores_by_hour @@ -213,119 +201,6 @@ def _compute_timeseries_scores_for_fstep( return fstep, region, metric_scores -def _plot_single_timeseries( - output_dir: str, - run_id: str, - metric_name: str, - region: str, - channel: str | None, - fstep: int, - lt_label: str, - score: xr.DataArray, - unique_hours: list[int], - image_format: str, - dpi_val: int, -) -> None: - """Plot a single timeseries figure (parallelisable worker).""" - matplotlib.use("Agg") - - score_vals = score.sel(channel=channel) if channel is not None else score - hours = score_vals.coords["source_end_hour"].values - values = score_vals.values.flatten() - - plt.figure(figsize=(10, 6), dpi=dpi_val) - plt.plot(hours, values, marker="o", linewidth=2, label=run_id) - - ch_label = channel if channel is not None else "all" - title = f"{metric_name.upper()} vs source end hour | {ch_label} | {lt_label} | {region}" - plt.title(title) - plt.xlabel("Source window end hour [UTC]") - plt.ylabel(metric_name.upper()) - plt.xlim(min(unique_hours) - 0.5, max(unique_hours) + 0.5) - plt.xticks(unique_hours) - plt.grid(True, alpha=0.3) - plt.legend() - plt.tight_layout() - - out_dir = Path(output_dir) - plot_path = ( - out_dir / f"{metric_name}_{ch_label}_{region}_lead_{lt_label}" - f"_by_source_end_hour.{image_format}" - ) - plt.savefig(plot_path, bbox_inches="tight") - plt.close() - - -def _plot_timeseries_parallel( - reader: Reader, - stream: str, - scores_by_hour: dict[str, dict[str, dict[int, xr.DataArray]]], - unique_hours: list[int], - fsteps: list[int], - da_tars: dict[int, xr.DataArray], - global_plotting_options: dict | None, - n_workers: int | None = None, -) -> None: - """Dispatch timeseries plotting tasks in parallel.""" - - output_dir = reader.runplot_dir / "plots" / stream / "score_timeseries" - output_dir.mkdir(parents=True, exist_ok=True) - - run_id = reader.run_id - plot_cfg = global_plotting_options or {} - image_format = plot_cfg.get("image_format", "png") - dpi_val = plot_cfg.get("dpi_val", 150) - - # Build fstep → lead_time label mapping - lead_time_by_fstep: dict[int, str] = {} - for fstep in fsteps: - da = da_tars[fstep] - if "lead_time" in da.coords: - lt = da.coords["lead_time"].values - hours = int(lt.astype("timedelta64[h]").astype(int)) - lead_time_by_fstep[fstep] = f"lead time {hours}h" - else: - lead_time_by_fstep[fstep] = f"fstep {fstep}" - - # Build plot tasks - plot_tasks: list[dict] = [] - for metric_name, region_dict in scores_by_hour.items(): - for region, fstep_dict in region_dict.items(): - sample_score = next(iter(fstep_dict.values())) - channels = ( - list(sample_score.coords["channel"].values) - if "channel" in sample_score.dims - else [None] - ) - for channel in channels: - for fstep, score in fstep_dict.items(): - plot_tasks.append( - dict( - output_dir=str(output_dir), - run_id=run_id, - metric_name=metric_name, - region=region, - channel=channel, - fstep=fstep, - lt_label=lead_time_by_fstep[fstep], - score=score, - unique_hours=unique_hours, - image_format=image_format, - dpi_val=dpi_val, - ) - ) - - _logger.info( - f"RUN {run_id} - {stream}: Plotting {len(plot_tasks)} timeseries figures " - f"with up to {n_workers} worker(s)." - ) - - calls = [delayed(_plot_single_timeseries)(**t) for t in plot_tasks] - dispatch_parallel(calls, n_workers=n_workers, backend="loky", desc=f"Timeseries plots {stream}") - - _logger.info(f"RUN {run_id} - {stream}: Score timeseries plots saved to {output_dir}.") - - # --------------------------------------------------------------------------- # Score maps # --------------------------------------------------------------------------- @@ -1144,7 +1019,7 @@ def plot_timeseries_summary( image_format = plt_opt.get("image_format", "png") dpi_val = plt_opt.get("dpi_val", 150) - ts_dir = summary_dir / "score_timeseries" + ts_dir = summary_dir / "score_init_time_series" ts_dir.mkdir(parents=True, exist_ok=True) for metric_name, region_dict in timeseries_scores.items(): @@ -1176,7 +1051,8 @@ def plot_timeseries_summary( ) hours = score_vals.coords["source_end_hour"].values values = score_vals.values.flatten() - label = runs[run_id].get("label", run_id) + run_label = runs[run_id].get("label", run_id) + label = f"{run_label} ({run_id})" color = runs[run_id].get("color", None) plt.plot( hours, @@ -1202,8 +1078,10 @@ def plot_timeseries_summary( plt.legend() plt.tight_layout() + run_ids_str = "_".join(sorted(run_dict.keys())) plot_path = ( ts_dir / f"{metric_name}_{ch_label}_{region}_{stream}" + f"_{run_ids_str}" f"_fstep_{fstep}_by_source_end_hour.{image_format}" ) plt.savefig(plot_path, bbox_inches="tight") diff --git a/packages/evaluate/src/weathergen/evaluate/run_evaluation.py b/packages/evaluate/src/weathergen/evaluate/run_evaluation.py index b5b64dca1..e6af33101 100755 --- a/packages/evaluate/src/weathergen/evaluate/run_evaluation.py +++ b/packages/evaluate/src/weathergen/evaluate/run_evaluation.py @@ -248,8 +248,8 @@ def _process_stream( return run_id, stream, {}, {} plot_score_maps = plot_score_options.get("plot_score_maps", False) and type_ == "zarr" - plot_score_timeseries = ( - plot_score_options.get("plot_score_timeseries", False) and type_ == "zarr" + plot_score_init_time_series = ( + plot_score_options.get("plot_score_init_time_series", False) and type_ == "zarr" ) stream_loaded_scores, recomputable_metrics = reader.load_scores(stream, regions, metrics) @@ -257,7 +257,7 @@ def _process_stream( if recomputable_metrics: metrics_to_compute = recomputable_metrics regions_to_compute = list(set(recomputable_metrics.keys())) - elif plot_score_maps or plot_score_timeseries: + elif plot_score_maps or plot_score_init_time_series: metrics_to_compute = {r: metrics for r in regions} regions_to_compute = regions else: @@ -284,7 +284,7 @@ def _process_stream( plot_score_options=plot_score_options, ) - if plot_score_timeseries: + if plot_score_init_time_series: ts_scores = run_score_timeseries_pipeline( reader, stream, @@ -320,7 +320,7 @@ def evaluate_from_config(cfg: dict, mlflow_client: MlflowClient | None) -> None: plot_score_options = { "plot_score_maps": cfg.evaluation.get("plot_score_maps", False), "plot_score_animations": cfg.evaluation.get("plot_score_animations", False), - "plot_score_timeseries": cfg.evaluation.get("plot_score_timeseries", False), + "plot_score_init_time_series": cfg.evaluation.get("plot_score_init_time_series", False), } global_plotting_opts = cfg.get("global_plotting_options", {}) diff --git a/packages/evaluate/src/weathergen/evaluate/utils/clim_utils.py b/packages/evaluate/src/weathergen/evaluate/utils/clim_utils.py index 0cd860137..82036021e 100644 --- a/packages/evaluate/src/weathergen/evaluate/utils/clim_utils.py +++ b/packages/evaluate/src/weathergen/evaluate/utils/clim_utils.py @@ -268,5 +268,5 @@ def needs_climatology(metrics_dict: dict) -> bool: True if any metric requires climatology, False otherwise """ metrics = [m for metrics in metrics_dict.values() for m in metrics.keys()] - req_clim = ["acc", "rps", "rpss"] + req_clim = ["acc", "rps", "rpss"] return any(m in req_clim for m in metrics)