esm-tools · siligam · Jun 3, 2025 · Jun 2, 2025 · Jun 2, 2025 · Jun 2, 2025
diff --git a/doc/index.rst b/doc/index.rst
@@ -16,14 +16,15 @@ Contents
    installation
    pymor_building_blocks
    pymor_cli
+   pymor_configuration
+   pymor_saving_output
+   pymor_aux_files
    table_explorer
    pymor_on_slurm
    schemas
    standard_library
    including_custom_steps
    including_subcommand_plugins
-   pymor_configuration
-   pymor_aux_files
    pymor_fesom
    timeaveraging_frequencies
    cookbook

diff --git a/doc/pymor_aux_files.rst b/doc/pymor_aux_files.rst
@@ -1,6 +1,6 @@
-==================================
-``pymor`` Using auxiliary files
-==================================
+======================================
+Usage: ``pymor`` Using auxiliary files
+======================================
 
 At times, your post-processing will require additional files beyond the actual data.
 For example, say your are analyzing FESOM output, and need to know the computational mesh

diff --git a/doc/pymor_configuration.rst b/doc/pymor_configuration.rst
@@ -1,4 +1,4 @@
-==========================
-``pymor`` Configuration
-==========================
+==============================
+Usage: ``pymor`` Configuration
+==============================
 .. automodule:: pymor.core.config
diff --git a/doc/pymor_saving_output.rst b/doc/pymor_saving_output.rst
@@ -1,6 +1,6 @@
-=============
-Saving Output
-=============
+====================
+Usage: Saving Output
+====================
 
 This section of the documentation shows what control you have over output file generation.
 
@@ -24,14 +24,17 @@ Frequency Grouping
 ------------------
 
 In the rule section for a particular output, you can control how many timesteps (expressed in days, months, years, etc)
-should be contained in each file. You can use the key ``"output_frequency"``:
+should be contained in each file. You can use the key ``"file_timespan"``:
 
 .. code-block::  yaml
 
     rules:
       - ... other rule configuration ...
-        output_frequency: 50YE
+        file_timespan: 50YE
         ... other rule configuration ...
       - ...another rule...
 
 The full list of possibilities for the frequency strings can be found here: https://pandas.pydata.org/docs/user_guide/timeseries.html#offset-aliases
+
+This can also be changed globally and overridden on a per-rule basis. You can either do this in the inherit section, or, in the ``pymor`` configuration as
+the key ``file_timespan``. Note that the ``pymor`` configuration can also be shared across runs, see the detailed information in :ref:`pymor_configuration`
diff --git a/src/pymor/core/config.py b/src/pymor/core/config.py
@@ -98,6 +98,7 @@ def _parse_bool(value):
 
 class PymorConfig:
     class Config:
+        # [FIXME] Keep the list of all options alphabetical!
         dask_cluster = Option(
             default="local",
             doc="Dask cluster to use. See: https://docs.dask.org/en/stable/deploying.html",
@@ -109,6 +110,21 @@ class Config:
                 ],
             ),
         )
+        dask_cluster_scaling_fixed_jobs = Option(
+            default=5,
+            doc="Number of jobs to create for Jobqueue-backed Dask Cluster",
+            parser=int,
+        )
+        dask_cluster_scaling_maximum_jobs = Option(
+            default=10,
+            doc="Maximum number of jobs to create for Jobqueue-backed Dask Clusters (adaptive)",
+            parser=int,
+        )
+        dask_cluster_scaling_minimum_jobs = Option(
+            default=1,
+            doc="Minimum number of jobs to create for Jobqueue-backed Dask Clusters (adaptive)",
+            parser=int,
+        )
         dask_cluster_scaling_mode = Option(
             default="adapt",
             doc="Flexible dask cluster scaling",
@@ -120,40 +136,44 @@ class Config:
                 ],
             ),
         )
-        dask_cluster_scaling_minimum_jobs = Option(
-            parser=int,
-            default=1,
-            doc="Minimum number of jobs to create for Jobqueue-backed Dask Clusters (adaptive)",
-        )
-        dask_cluster_scaling_maximum_jobs = Option(
-            parser=int,
-            default=10,
-            doc="Maximum number of jobs to create for Jobqueue-backed Dask Clusters (adaptive)",
-        )
-        dask_cluster_scaling_fixed_jobs = Option(
-            parser=int,
-            default=5,
-            doc="Number of jobs to create for Jobqueue-backed Dask Cluster",
-        )
         dimensionless_mapping_table = Option(
-            parser=str,
             default=DIMENSIONLESS_MAPPING_TABLE,
             doc="Where the dimensionless unit mapping table is defined.",
+            parser=str,
         )
         enable_dask = Option(
-            parser=_parse_bool,
             default="yes",
             doc="Whether to enable Dask-based processing",
+            parser=_parse_bool,
         )
         enable_flox = Option(
-            parser=_parse_bool,
             default="yes",
             doc="Whether to enable flox for group-by operation. See: https://flox.readthedocs.io/en/latest/",
+            parser=_parse_bool,
+        )
+        enable_output_subdirs = Option(
+            default="no",
+            doc="Whether to create subdirectories under output_dir when saving data-sets.",
+            parser=_parse_bool,
+        )
+        file_timespan = Option(
+            default="1YS",
+            doc="""Default timespan for grouping output files together.
+
+            Use the special flag ``'file_native'`` to use the same grouping as in the input
+            files. Otherwise, use a ``pandas``-flavoured string, see: https://tinyurl.com/38wxf8px
+            """,
+            parser=str,
         )
         parallel = Option(
-            parser=_parse_bool, default="yes", doc="Whether to run in parallel."
+            default="yes",
+            doc="Whether to run in parallel.",
+            parser=_parse_bool,
+        )
+        parallel_backend = Option(
+            default="dask",
+            doc="Which parallel backend to use.",
         )
-        parallel_backend = Option(default="dask", doc="Which parallel backend to use.")
         pipeline_workflow_orchestrator = Option(
             default="prefect",
             doc="Which workflow orchestrator to use for running pipelines",
@@ -165,11 +185,6 @@ class Config:
                 ],
             ),
         )
-        enable_output_subdirs = Option(
-            parser=_parse_bool,
-            default="no",
-            doc="Whether to create subdirectories under output_dir when saving data-sets.",
-        )
         prefect_task_runner = Option(
             default="thread_pool",
             doc="Which runner to use for Prefect flows.",
@@ -182,17 +197,24 @@ class Config:
             ),
         )
         quiet = Option(
-            default=False, doc="Whether to suppress output.", parser=_parse_bool
+            default=False,
+            doc="Whether to suppress output.",
+            parser=_parse_bool,
         )
         raise_on_no_rule = Option(
-            parser=_parse_bool,
             default="no",
             doc="Whether or not to raise an error if no rule is found for every single DataRequestVariable",
+            parser=_parse_bool,
         )
         warn_on_no_rule = Option(
-            parser=_parse_bool,
             default="yes",
             doc="Whether or not to issue a warning if no rule is found for every single DataRequestVariable",
+            parser=_parse_bool,
+        )
+        xarray_default_missing_value = Option(
+            default=1.0e30,
+            doc="Which missing value to use for xarray. Default is 1e30.",
+            parser=float,
         )
         xarray_engine = Option(
             default="netcdf4",
@@ -206,15 +228,10 @@ class Config:
                 ],
             ),
         )
-        xarray_default_missing_value = Option(
-            default=1.0e30,
-            doc="Which missing value to use for xarray. Default is 1e30.",
-            parser=float,
-        )
         xarray_skip_unit_attr_from_drv = Option(
-            parser=_parse_bool,
             default="yes",
             doc="Whether to skip setting the unit attribute from the DataRequestVariable, this can be handled via Pint",
+            parser=_parse_bool,
         )
         xarray_time_dtype = Option(
             default="float64",
@@ -227,35 +244,35 @@ class Config:
                 ],
             ),
         )
-        xarray_time_unlimited = Option(
+        xarray_time_enable_set_axis = Option(
             default="yes",
-            doc="Whether the time axis is unlimited in xarray.",
+            doc="Whether to enable setting the axis for the time axis in xarray.",
             parser=_parse_bool,
         )
-        xarray_time_set_standard_name = Option(
+        xarray_time_remove_fill_value_attr = Option(
             default="yes",
-            doc="Whether to set the standard name for the time axis in xarray.",
+            doc="Whether to remove the fill_value attribute from the time axis in xarray.",
             parser=_parse_bool,
         )
         xarray_time_set_long_name = Option(
             default="yes",
             doc="Whether to set the long name for the time axis in xarray.",
             parser=_parse_bool,
         )
-        xarray_time_enable_set_axis = Option(
-            parser=_parse_bool,
+        xarray_time_set_standard_name = Option(
             default="yes",
-            doc="Whether to enable setting the axis for the time axis in xarray.",
+            doc="Whether to set the standard name for the time axis in xarray.",
+            parser=_parse_bool,
         )
         xarray_time_taxis_str = Option(
-            parser=str,
             default="T",
             doc="Which axis to set for the time axis in xarray.",
+            parser=str,
         )
-        xarray_time_remove_fill_value_attr = Option(
-            parser=_parse_bool,
+        xarray_time_unlimited = Option(
             default="yes",
-            doc="Whether to remove the fill_value attribute from the time axis in xarray.",
+            doc="Whether the time axis is unlimited in xarray.",
+            parser=_parse_bool,
         )
 
 

diff --git a/src/pymor/std_lib/files.py b/src/pymor/std_lib/files.py
@@ -230,6 +230,25 @@ def split_data_timespan(ds, rule):
     return data_chunks
 
 
+def _save_dataset_with_native_timespan(
+    da,
+    rule,
+    time_label,
+    time_encoding,
+    **extra_kwargs,
+):
+    paths = []
+    datasets = split_data_timespan(da, rule)
+    for group_ds in datasets:
+        paths.append(create_filepath(group_ds, rule))
+    return xr.save_mfdataset(
+        datasets,
+        paths,
+        encoding={time_label: time_encoding},
+        **extra_kwargs,
+    )
+
+
 def save_dataset(da: xr.DataArray, rule):
     """
     Save dataset to one or more files.
@@ -313,16 +332,14 @@ def save_dataset(da: xr.DataArray, rule):
             **extra_kwargs,
         )
 
-    file_timespan = getattr(rule, "file_timespan", None)
-    if file_timespan is None:
-        paths = []
-        datasets = split_data_timespan(da, rule)
-        for group_ds in datasets:
-            paths.append(create_filepath(group_ds, rule))
-        return xr.save_mfdataset(
-            datasets,
-            paths,
-            encoding={time_label: time_encoding},
+    default_file_timespan = rule._pymor_cfg("file_timespan")
+    file_timespan = getattr(rule, "file_timespan", default_file_timespan)
+    if file_timespan == "file_native":
+        return _save_dataset_with_native_timespan(
+            da,
+            rule,
+            time_label,
+            time_encoding,
             **extra_kwargs,
         )
     else:
@@ -337,14 +354,11 @@ def save_dataset(da: xr.DataArray, rule):
                 f"file_timespan {file_timespan_as_dt} is smaller than approx_interval {dt}"
                 "falling back to timespan as defined in the source file"
             )
-            paths = []
-            datasets = split_data_timespan(da, rule)
-            for group_ds in datasets:
-                paths.append(create_filepath(group_ds, rule))
-            return xr.save_mfdataset(
-                datasets,
-                paths,
-                encoding={time_label: time_encoding},
+            return _save_dataset_with_native_timespan(
+                da,
+                rule,
+                time_label,
+                time_encoding,
                 **extra_kwargs,
             )
         else: