diff --git a/datacube/index/hl.py b/datacube/index/hl.py index 3271c9628b..8c8e483529 100644 --- a/datacube/index/hl.py +++ b/datacube/index/hl.py @@ -116,27 +116,35 @@ def check_dataset_consistent(dataset: Dataset) -> tuple[bool, str | None]: """ :return: (Is consistent, [error message|None]) """ - product_measurements = set(dataset.product.measurements.keys()) + product_measurements = dataset.product.measurements if len(product_measurements) == 0: return True, None - if dataset.measurements is None: - return False, "No measurements defined for a dataset" - - # It the type expects measurements, ensure our dataset contains them all. - if not product_measurements.issubset(dataset.measurements.keys()): - # Exclude 3D measurements since it's just a mapping to 2D measurements - not_measured = { - m - for m in product_measurements - set(dataset.measurements.keys()) - if "extra_dim" not in dataset.product.measurements.get(m, []) - } - - if not_measured: - msg = "The dataset is not specifying all of the measurements in this product.\n" - msg += "Missing fields are;\n" + str(not_measured) - return False, msg + req_measurements, cond_measurements = set(), set() + for name, measurement in product_measurements.items(): + if ( + "extra_dim" in measurement + ): # Exclude 3D measurements since it's just a mapping to 2D measurements + continue + if (opt := measurement.get("optional", "no")) == "no": + req_measurements.add(name) + if opt == "maybe": + cond_measurements.add(name) + + if len(req_measurements) != 0 and dataset.measurements is None: + return False, "The dataset does not define any of the required measurements." + if not req_measurements.issubset(dataset.measurements.keys()): + not_measured = req_measurements - set(dataset.measurements.keys()) + return ( + False, + f"The dataset does not specify the following required measurements: {', '.join(not_measured)}", + ) + if not any(m in dataset.measurements for m in cond_measurements): + return ( + False, + f"The dataset must define at least one of the following measurements: {', '.join(cond_measurements)}", + ) return True, None diff --git a/datacube/model/__init__.py b/datacube/model/__init__.py index 498401d3c8..6700ff695c 100644 --- a/datacube/model/__init__.py +++ b/datacube/model/__init__.py @@ -485,6 +485,7 @@ class Measurement: "add_offset", "extra_dim", "dims", + "optional", ) ATTR_SKIP = [ "name", @@ -495,6 +496,7 @@ class Measurement: "extra_dim", "dims", "extra_dim_index", + "optional", ] def __init__(self, canonical_name: str | None = None, *args, **kwargs) -> None: diff --git a/datacube/model/schema/dataset-type-schema.yaml b/datacube/model/schema/dataset-type-schema.yaml index 2a436276ff..0586a367f9 100644 --- a/datacube/model/schema/dataset-type-schema.yaml +++ b/datacube/model/schema/dataset-type-schema.yaml @@ -145,6 +145,8 @@ definitions: description: Captures information about extra dimensions, e.g. `[y, x, wavelength]` items: type: string + optional: + enum: ["yes", "no", "maybe"] required: - name diff --git a/integration_tests/conftest.py b/integration_tests/conftest.py index 1f48eff0de..24b44f1caf 100644 --- a/integration_tests/conftest.py +++ b/integration_tests/conftest.py @@ -252,6 +252,11 @@ def s1_product_doc() -> dict: return get_eo3_test_data_doc("ga_s1_vertical_dualpol.odc-product.yaml") +@pytest.fixture +def s1_full_product_doc() -> dict: + return get_eo3_test_data_doc("ga_s1_full.odc-product.yaml") + + @pytest.fixture def final_dataset_doc() -> tuple[dict, str]: return ( @@ -384,6 +389,15 @@ def ga_s1_product(index: Index, eo3_s1_metadata_type, s1_product_doc) -> Product return p +@pytest.fixture +def ga_s1_full_product( + index: Index, eo3_s1_metadata_type, s1_full_product_doc +) -> Product: + p = index.products.add_document(s1_full_product_doc) + assert p is not None + return p + + @pytest.fixture def eo3_products( index: Index, diff --git a/integration_tests/data/eo3/ga_s1_full.odc-product.yaml b/integration_tests/data/eo3/ga_s1_full.odc-product.yaml new file mode 100644 index 0000000000..9bf199201f --- /dev/null +++ b/integration_tests/data/eo3/ga_s1_full.odc-product.yaml @@ -0,0 +1,52 @@ +name: ga_s1_full +description: > + Radiometric Terrain Corrected (RTC) Backscatter for Sentinel-1 generated from the COMPASS-ISCE3 pipeline. + This product contains scenes with all combinations of measurements (VV, VH, HH, HV). + This product uses the gamma_0 linear backscatter measurement convention. +metadata_type: eo3_s1_ard + +metadata: + product: + name: ga_s1_full + +measurements: + - name: VV + aliases: + - vv + dtype: float32 + nodata: NaN + units: '1' + optional: maybe + - name: VH + aliases: + - vh + dtype: float32 + nodata: NaN + units: '1' + optional: yes + - name: HH + aliases: + - hh + dtype: float32 + nodata: NaN + units: '1' + optional: maybe + - name: HV + aliases: + - hv + dtype: float32 + nodata: NaN + units: '1' + optional: yes + - name: mask + dtype: uint8 + nodata: 255 + units: '1' + flags_definition: + mask: + bits: [0,1,2,3,4,5,6,7] + values: + 1: shadow + 2: layover + 3: shadow and layover + description: shadow layover data mask diff --git a/integration_tests/test_dataset_add.py b/integration_tests/test_dataset_add.py index 57c0b7d593..b751606eae 100644 --- a/integration_tests/test_dataset_add.py +++ b/integration_tests/test_dataset_add.py @@ -218,6 +218,34 @@ def test_dataset_add_not_eo3(index: Index, ls8_eo3_product, eo3_wo_dataset_doc) assert isinstance(_err, BadMatch) +def test_dataset_opt_measurements( + index: Index, s1_dataset_doc, ga_s1_full_product +) -> None: + doc, uri = s1_dataset_doc + doc["product"]["name"] = ga_s1_full_product.name + + doc2ds = Doc2Dataset(index) + _ds, _err = doc2ds(doc, uri) + assert _ds is not None + # remove optional measurement + doc["measurements"].pop("VH") + _ds, _err = doc2ds(doc, uri) + assert _ds is not None + # remove conditional measurement + vv = doc["measurements"].pop("VV") + _ds, _err = doc2ds(doc, uri) + assert "The dataset must define at least one of the following measurements" in _err + assert "VV" in _err # order of these two isn't consistent + assert "HH" in _err + # remove required measurement + doc["measurements"]["VV"] = vv + doc["measurements"].pop("mask") + _ds, _err = doc2ds(doc, uri) + assert ( + _err == "The dataset does not specify the following required measurements: mask" + ) + + @pytest.mark.parametrize("datacube_env_name", ("datacube", "datacube3")) def test_dataset_eo3_no_schema( dataset_add_configs, index_empty, clirunner, caplog