nasa-gibs · ozzp · Aug 21, 2025 · Aug 21, 2025 · Aug 22, 2025 · Sep 4, 2025
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,96 @@
+# mrf/Dockerfile
+
+# =====================================================================
+# Stage 1: Install all development tools, compile the C++ utilities,
+# and create the Python virtual environment.
+# =====================================================================
+FROM almalinux:9 AS builder
+
+# Build Arguments for the el9 GDAL RPM
+ARG GDAL_VERSION=3.6.4
+ARG GIBS_GDAL_RELEASE=1 #
+ARG ALMALINUX_VERSION=9 #
+
+# Install Build-time Dependencies
+RUN dnf install -y epel-release dnf-plugins-core && \
+    dnf config-manager --set-enabled crb && \
+    dnf groupinstall -y "Development Tools" && \
+    dnf install -y --allowerasing \
+    cmake \
+    git \
+    python3-pip \
+    python3-devel \
+    libtiff-devel \
+    sqlite-devel \
+    wget \
+    curl \
+    geos \
+    proj && \
+    dnf clean all
+
+# Install Pre-compiled GIBS GDAL for el9
+RUN wget -P /tmp/ https://github.com/nasa-gibs/gibs-gdal/releases/download/v${GDAL_VERSION}/gibs-gdal-${GDAL_VERSION}-${GIBS_GDAL_RELEASE}.el${ALMALINUX_VERSION}.x86_64.rpm && \
+    dnf install -y /tmp/gibs-gdal-${GDAL_VERSION}-${GIBS_GDAL_RELEASE}.el${ALMALINUX_VERSION}.x86_64.rpm && \
+    rm -rf /tmp/*
+
+# Download the missing private marfa.h header
+RUN curl -L "https://raw.githubusercontent.com/OSGeo/gdal/v${GDAL_VERSION}/frmts/mrf/marfa.h" -o /usr/local/include/marfa.h
+
+WORKDIR /app
+COPY requirements.txt .
+# Create the venv and install packages
+RUN python3 -m venv /app/venv
+ENV PATH="/app/venv/bin:$PATH"
+RUN pip install --no-cache-dir -r requirements.txt
+# Install the Python bindings for the installed GDAL version
+RUN pip install GDAL==$(gdal-config --version)
+
+# Copy the rest of the project files
+COPY . .
+
+# Build the C++ utilities
+RUN cd mrf_apps && make
+
+# Install the project itself into the venv
+RUN pip install -e .
+
+# =====================================================================
+# Stage 2:  Minimal, distributable image.
+# =====================================================================
+FROM almalinux:9
+
+# Install only Runtime Dependencies
+RUN dnf install -y epel-release dnf-plugins-core && \
+    dnf config-manager --set-enabled crb && \
+    dnf install -y --allowerasing python3 wget geos proj && \
+    dnf clean all
+
+# Install the el9 GDAL RPM for its runtime libraries
+ARG GDAL_VERSION=3.6.4
+ARG GIBS_GDAL_RELEASE=1
+ARG ALMALINUX_VERSION=9
+RUN wget -P /tmp/ https://github.com/nasa-gibs/gibs-gdal/releases/download/v${GDAL_VERSION}/gibs-gdal-${GDAL_VERSION}-${GIBS_GDAL_RELEASE}.el${ALMALINUX_VERSION}.x86_64.rpm && \
+    dnf install -y /tmp/gibs-gdal-${GDAL_VERSION}-${GIBS_GDAL_RELEASE}.el${ALMALINUX_VERSION}.x86_64.rpm && \
+    rm -rf /tmp/*
+
+# Tell the linker where to find the new libraries
+# Create a new configuration file for the dynamic linker
+RUN echo "/usr/local/lib" > /etc/ld.so.conf.d/gdal-custom.conf
+
+# Update the shared library cache
+RUN ldconfig
+
+WORKDIR /app
+
+# Copy Artifacts from the "builder" Stage
+COPY --from=builder /app/mrf_apps/can /usr/local/bin/
+COPY --from=builder /app/mrf_apps/jxl /usr/local/bin/
+COPY --from=builder /app/mrf_apps/mrf_insert /usr/local/bin/
+COPY --from=builder /app/venv /app/venv
+COPY mrf_apps/ ./mrf_apps/
+COPY pyproject.toml .
+COPY README.md .
+
+# Set Final Environment Variables
+ENV PATH="/app/venv/bin:$PATH"
+ENV GDAL_DATA="/usr/local/share/gdal"
diff --git a/MRF_Utilities_Test_Suite.md b/MRF_Utilities_Test_Suite.md
@@ -0,0 +1,145 @@
+## **MRF Utilities Test Suite**
+
+This document outlines the unit tests for the Meta Raster Format (MRF) utilities. The tests are written in Python using the `unittest` framework and are designed to be run with a test runner like `pytest`. The suite is structured into separate files for each utility to ensure maintainability and clarity.
+
+A shared test helper, `tests/helpers.py`, provides a base class that handles the setup and teardown of a temporary testing directory and includes methods for creating mock MRF files (`.mrf`, `.idx`, `.dat`). This approach minimizes code duplication and standardizes test environments.
+
+
+### Docker-Based Testing Environment
+
+Using Docker is the recommended method for running this test suite. It creates an environment with all the necessary C++, GDAL, and Python dependencies pre-installed, resolving any platform-specific issues and ensuring the tests run in this isolated environment. This workflow uses a two stage building approach: first creating a base application image, and then building a lightweight test runner image from it.
+
+#### Prerequisites
+
+Ensure Docker installed and running on your system.
+
+#### Building and Running the Tests
+
+**Step 1: Build the Base Application Image**
+Navigate to the project's root directory and run the following command. This builds the main application image, compiling all C++ utilities and installing dependencies. It is tagged as `mrf-app:latest`.
+
+```bash
+docker build --platform linux/amd64 -t mrf-app:latest -f Dockerfile .
+```
+
+> **Note**: The `--platform linux/amd64` flag is required if you are building on an ARM-based machine (like an Apple Silicon Mac) to ensure compatibility with the pre-compiled `x86_64` GDAL RPM used in the build.
+
+**Step 2: Build the Test Suite Image**
+Next, build the dedicated test runner image. This build uses the `mrf-app` image from the previous step as its base.
+
+```bash
+docker build --platform linux/amd64 -t mrf-test-suite -f tests/Dockerfile .
+```
+
+**Step 3: Run the Test Suite**
+Finally, run the tests using the `mrf-test-suite` image. This command starts a container, executes `pytest`, and automatically removes the container (`--rm`) when finished.
+
+```bash
+docker run --rm mrf-test-suite
+```
+
+You should see output from `pytest`, culminating in a summary showing tests passing or failing or skipping.
+
+
+### `can` Utility Tests
+
+**File**: `tests/test_can.py`
+
+These tests validate the `can` C++ command-line utility, which is used for compressing and decompressing sparse MRF index files.
+
+  * **`test_can_uncan_cycle`**: Verifies the round-trip integrity of the canning process. It creates a large, sparse mock index file (`.idx`), runs `can` to compress it to a canned index (`.ix`), and then runs it with the `-u` flag to decompress it back to an `.idx` file. The test passes if the final index file is identical to the original.
+
+
+### `jxl` Utility Tests
+
+**File**: `tests/test_jxl.py`
+
+These tests validate the `jxl` C++ utility, which converts MRF data files and single images between JPEG (JFIF) and JPEG XL (Brunsli) formats.
+
+  * **`test_jxl_mrf_round_trip`**: Verifies the primary MRF conversion. It converts a mock MRF data file (`.pjg`) and its index to JXL format and then back to JPEG, confirming the final files are identical to the originals and that the JXL file is smaller.
+  * **`test_jxl_single_file_round_trip`**: Validates the single-file mode (`-s`). It performs a round-trip conversion on a standalone JPEG file and confirms data integrity.
+  * **`test_jxl_bundle_mode` (Placeholder)**: A placeholder test for Esri bundle mode (`-b`) that is skipped, as creating a valid mock bundle file is non-trivial.
+
+
+### `mrf_clean.py` Tests
+
+**File**: `tests/test_clean.py`
+
+These tests validate `mrf_clean.py`, a script used to optimize MRF storage by removing unused space.
+
+  * **`test_mrf_clean_copy`**: Checks the default "copy" mode. It verifies that the script creates a new, smaller data file with slack space removed and that the new index file has correctly updated, contiguous tile offsets.
+  * **`test_mrf_clean_trim`**: Validates the in-place "trim" mode. It confirms that the original data file is truncated to the correct size and its index file is overwritten with updated offsets.
+
+
+### `mrf_insert` Utility Tests
+
+**File**: `tests/test_mrf_insert.py`
+
+These tests validate the `mrf_insert` C++ utility, which is used to patch a smaller raster into a larger MRF.
+
+  * **`test_mrf_insert_simple_patch`**: Validates the core functionality. It creates an empty target MRF and a smaller source raster, executes `mrf_insert`, and uses GDAL to verify the patched region was written correctly while unpatched regions remain unaffected.
+  * **`test_mrf_insert_with_overviews`**: Tests that inserting a patch with the `-r` flag correctly regenerates the affected overview tiles.
+  * **`test_mrf_insert_partial_tile_overlap`**: Confirms that inserting a source that only partially covers a target tile correctly merges the new data while preserving the uncovered portions of the original tile.
+
+
+### `mrf_join.py` Tests
+
+**File**: `tests/test_join.py`
+
+These tests validate `mrf_join.py`, a script that merges or appends multiple MRF files.
+
+  * **`test_mrf_join_simple_merge`**: Checks the script's ability to merge two sparse MRFs, verifying that the final data file is a concatenation of inputs and the final index correctly combines entries with updated offsets.
+  * **`test_mrf_join_overwrite`**: Confirms the "last-one-wins" logic by joining two MRFs that provide data for the same tile and verifying that the final index points to the data from the last-processed input.
+  * **`test_mrf_append_z_dimension`**: Validates the ability to stack 2D MRFs into a single 3D MRF, checking that the Z dimension is correctly set in the metadata and that the index layout is correct for multiple slices.
+  * **`test_mrf_append_with_overviews`**: Tests the scenario of appending MRFs that contain overviews, ensuring the final interleaved index structure is correctly assembled.
+
+### `mrf_read_data.py` Tests
+
+**File**: `tests/test_read_data.py`
+
+These tests validate `mrf_read_data.py`, which extracts a specific tile or data segment from an MRF data file.
+
+  * **`test_read_with_offset_and_size`**: Validates the direct read mode by using `--offset` and `--size` to extract a specific data segment and confirming the output is correct.
+  * **`test_read_with_index_and_tile`**: Validates the index-based read mode by using `--index` and `--tile` to retrieve a specific tile and verifying its content.
+  * **`test_read_with_little_endian_index`**: Ensures the `--little-endian` flag functions correctly by reading from an index file with a different byte order.
+
+
+### `mrf_read_idx.py` Tests
+
+**File**: `tests/test_read_idx.py`
+
+These tests validate `mrf_read_idx.py`, which converts a binary MRF index file into a CSV.
+
+  * **`test_read_simple_index`**: Validates the script's core functionality with a standard, big-endian index file, verifying the output CSV has the correct headers and data.
+  * **`test_read_little_endian_index`**: Confirms that the `--little-endian` flag works by parsing an index with a different byte order and checking for correctly interpreted values.
+  * **`test_read_empty_index`**: Handles the edge case of an empty input file, ensuring the script produces a CSV with only the header row.
+
+
+### `mrf_size.py` Tests
+
+**File**: `tests/test_mrf_size.py`
+
+These tests validate `mrf_size.py`, which generates a GDAL VRT to visualize the tile sizes from an MRF index.
+
+  * **`test_vrt_creation_single_band`**: Checks VRT generation for a single-band MRF, verifying the VRT's dimensions, GeoTransform, and raw band parameters.
+  * **`test_vrt_creation_multi_band`**: Validates handling of multi-band MRFs, ensuring the VRT contains the correct number of bands with correctly calculated offsets.
+  * **`test_vrt_default_pagesize`**: Ensures the script correctly applies a default 512x512 page size when it's not specified in the MRF metadata.
+
+
+### `tiles2mrf.py` Tests
+
+**File**: `tests/test_tiles2mrf.py`
+
+These tests validate `tiles2mrf.py`, which assembles an MRF from a directory of individual tiles.
+
+  * **`test_simple_conversion`**: Validates basic functionality by assembling a 2x2 grid of tiles and verifying the concatenated data file and sequential index offsets.
+  * **`test_with_overviews_and_padding`**: Checks the creation of a multi-level pyramid, ensuring the script correctly processes all levels and adds necessary padding records to the index.
+  * **`test_blank_tile_handling`**: Validates the `--blank-tile` feature, confirming that blank tiles are omitted from the data file and are represented by a zero-record in the index.
+
+
+### Conditional Test Skipping
+
+The test suite is designed to be run primarily within the provided Docker container, where all dependencies are guaranteed to be met. However, the tests include conditional skipping logic to fail gracefully if run in a local environment that is not fully configured.
+
+  * **C++ Executable Tests**: The tests for **`can`**, **`jxl`**, and **`mrf_insert`** will be skipped if their respective compiled executables are not found in the system's PATH.
+  * **GDAL Python Dependency**: The test for `mrf_insert` requires the GDAL Python bindings to create test files. It will be skipped if the `osgeo.gdal` library cannot be imported.
diff --git a/Makefile.lcl b/Makefile.lcl
@@ -0,0 +1,12 @@
+# Makefile.lcl
+# Local configuration file that provides the system-specific paths needed to compile the C++ utilities.
+
+# Set the base directory where the GDAL RPM installed its files.
+PREFIX=/usr/local
+
+# Set the root for GDAL headers, as expected by the mrf_apps/Makefile.
+# This points to the same location as PREFIX/include but satisfies the variable requirement.
+GDAL_ROOT=/usr/local/include
+
+# Override the library directory to point to lib64
+LIBDIR = $(PREFIX)/lib64
diff --git a/mrf_apps/Makefile b/mrf_apps/Makefile
@@ -4,7 +4,7 @@
 # PREFIX=/home/ec2-user
 # GDAL_ROOT=$(PREFIX)/src/gdal/gdal
 #
-include Makefile.lcl
+include ../Makefile.lcl
 
 TARGETS = can mrf_insert jxl
 GDAL_INCLUDE = -I $(PREFIX)/include -I $(GDAL_ROOT)

diff --git a/mrf_apps/mrf_join.py b/mrf_apps/mrf_join.py
@@ -205,7 +205,8 @@ def mrf_append(inputs, output, outsize, startidx = 0):
         assert os.path.splitext(f)[1] == ext,\
             "All input files should have the same extension as the output"
     # Get the template mrf information from the first input
-    mrfinfo, tree = getmrfinfo(os.path.splitext(inputs[1])[0] + ".mrf", ofname + ".mrf")
+    # Use the first input file (inputs[0]) as template for the output MRF
+    mrfinfo, tree = getmrfinfo(os.path.splitext(inputs[0])[0] + ".mrf")
 
     # Create the output .mrf if it doesn't exist
     if not os.path.isfile(ofname + ".mrf"):

diff --git a/mrf_apps/mrf_size.py b/mrf_apps/mrf_size.py
@@ -121,7 +121,7 @@ def VRT_Size(mrf):
     gt[1] *= mrf.pagesize.x
     gt[5] *= mrf.pagesize.y
     XML.SubElement(root,'GeoTransform').text = ",".join((str(x) for x in gt))
-    bands = int(mrf.size.c / mrf.pagesize.c)
+    bands = int(mrf.size.c)
     for band in range(bands):
         xband = XML.SubElement(root, 'VRTRasterBand', {
             'band':str(band+1),

diff --git a/mrf_apps/tiles2mrf.py b/mrf_apps/tiles2mrf.py
@@ -40,8 +40,8 @@ def option_error(parser, msg):
     sys.exit(1)
 
 def half(val):
-    'Divide by two with roundup, returns at least 1'
-    return 1 + (val - 1 )/2
+    'Divide by two with roundup, returns integer value at least 1'
+    return 1 + (val - 1 ) // 2
 
 def hash_tile(tile):
     h = hashlib.sha256()

diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,6 @@
+[project]
+name = "mrf_utilities"
+version = "0.1.0"
+
+[tool.setuptools]
+packages = ["mrf_apps", "tests"]
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,4 @@
+#requirements.txt
+pytest
+numpy
+Pillow
diff --git a/tests/Dockerfile b/tests/Dockerfile
@@ -0,0 +1,16 @@
+# mrf/tests/Dockerfile
+# This file builds the test runner image.
+
+# Start from application image mrf/Dockerfile
+# Need to make sure the tag in the build step is matching.
+FROM mrf-app:latest
+
+# The WORKDIR and ENV variables are inherited from the base image.
+
+# Copy the test directory into the image.
+# This assumes the `docker build` command was run from the project root.
+COPY tests/ ./tests/
+
+# The source code and tests were copied in the base image,
+# so we just defining the command to run the tests.
+CMD ["pytest"]
diff --git a/tests/helpers.py b/tests/helpers.py
@@ -0,0 +1,70 @@
+# tests/helpers.py
+
+import unittest
+import os
+import shutil
+import struct
+from xml.etree import ElementTree as ET
+from PIL import Image
+
+class MRFTestCase(unittest.TestCase):
+    """
+    A base class for MRF utility tests that handles temporary directory
+    creation and provides helper methods for creating mock MRF files.
+    """
+    def setUp(self):
+        """Set up a temporary directory for test files."""
+        self.test_dir = "mrf_test_temp_dir"
+        if os.path.exists(self.test_dir):
+            shutil.rmtree(self.test_dir)
+        os.makedirs(self.test_dir)
+
+        # Assume C++ utilities are compiled and in the system PATH
+        self.can_executable = "can"
+        self.mrf_insert_executable = "mrf_insert"
+
+    def tearDown(self):
+        """Clean up the temporary directory."""
+        if os.path.exists(self.test_dir):
+            shutil.rmtree(self.test_dir)
+
+    def create_mock_mrf_xml(self, path, xsize=512, ysize=512, channels=1, pagesize=512, data_ext="dat"):
+        """Creates a minimal MRF metadata file."""
+        root = ET.Element("MRF_META")
+        raster = ET.SubElement(root, "Raster")
+        ET.SubElement(raster, "Size", x=str(xsize), y=str(ysize), c=str(channels))
+        ET.SubElement(raster, "PageSize", x=str(pagesize), y=str(pagesize), c=str(channels))
+        data_file = ET.SubElement(raster, "DataFile")
+        base_name = os.path.basename(path).replace('.mrf', '')
+        data_file.text = f"{base_name}.{data_ext}"
+
+        tree = ET.ElementTree(root)
+        tree.write(path)
+
+    def create_mock_idx(self, path, tiles):
+        """Creates a mock index file from a list of (offset, size) tuples."""
+        with open(path, "wb") as f:
+            for offset, size in tiles:
+                f.write(struct.pack('>QQ', offset, size))
+
+    def create_mock_data(self, path, content_list):
+        """Creates a mock data file from a list of byte strings."""
+        with open(path, "wb") as f:
+            for content in content_list:
+                f.write(content)
+
+    def read_idx_file(self, path):
+        """Reads an index file and returns a list of (offset, size) tuples."""
+        tiles = []
+        with open(path, 'rb') as f:
+            while True:
+                chunk = f.read(16)
+                if not chunk:
+                    break
+                tiles.append(struct.unpack('>QQ', chunk))
+        return tiles
+
+    def create_mock_jpeg(self, path, size=(16, 16), color='black'):
+        """Creates a simple, valid JPEG file using Pillow."""
+        with Image.new('RGB', size, color) as img:
+            img.save(path, 'jpeg')