apache · vamshikrishnakyatham · Sep 26, 2025 · Sep 27, 2025 · Sep 27, 2025 · Sep 27, 2025
diff --git a/...i-spark-client/src/main/java/org/apache/hudi/client/common/SparkReaderContextFactory.java b/...i-spark-client/src/main/java/org/apache/hudi/client/common/SparkReaderContextFactory.java
@@ -131,7 +131,8 @@ public HoodieReaderContext<InternalRow> getContext() {
           JavaConverters.asScalaBufferConverter(filters).asScala().toSeq(),
           JavaConverters.asScalaBufferConverter(filters).asScala().toSeq(),
           new HadoopStorageConfiguration(configurationBroadcast.getValue().value()),
-          tableConfigBroadcast.getValue());
+          tableConfigBroadcast.getValue(),
+          Option.empty());
     } else {
       throw new HoodieException("Cannot get the broadcast Spark Parquet reader.");
     }

diff --git a/...t/hudi-spark-client/src/main/scala/org/apache/hudi/BaseSparkInternalRowReaderContext.java b/...t/hudi-spark-client/src/main/scala/org/apache/hudi/BaseSparkInternalRowReaderContext.java
@@ -24,6 +24,7 @@
 import org.apache.hudi.common.engine.HoodieReaderContext;
 import org.apache.hudi.common.model.HoodieRecordMerger;
 import org.apache.hudi.common.table.HoodieTableConfig;
+import org.apache.hudi.common.table.log.InstantRange;
 import org.apache.hudi.common.table.read.FileGroupReaderSchemaHandler;
 import org.apache.hudi.common.util.HoodieRecordUtils;
 import org.apache.hudi.common.util.Option;
@@ -58,6 +59,13 @@ protected BaseSparkInternalRowReaderContext(StorageConfiguration<?> storageConfi
     super(storageConfig, tableConfig, Option.empty(), Option.empty(), recordContext);
   }
 
+  protected BaseSparkInternalRowReaderContext(StorageConfiguration<?> storageConfig,
+                                              HoodieTableConfig tableConfig,
+                                              Option<InstantRange> instantRange,
+                                              BaseSparkInternalRecordContext recordContext) {
+    super(storageConfig, tableConfig, instantRange, Option.empty(), recordContext);
+  }
+
   @Override
   public Option<HoodieRecordMerger> getRecordMerger(RecordMergeMode mergeMode, String mergeStrategyId, String mergeImplClasses) {
     // TODO(HUDI-7843):

diff --git a/...spark-client/src/main/scala/org/apache/hudi/SparkFileFormatInternalRowReaderContext.scala b/...spark-client/src/main/scala/org/apache/hudi/SparkFileFormatInternalRowReaderContext.scala
@@ -27,9 +27,11 @@ import org.apache.hudi.common.engine.HoodieReaderContext
 import org.apache.hudi.common.fs.FSUtils
 import org.apache.hudi.common.model.{HoodieFileFormat, HoodieRecord}
 import org.apache.hudi.common.table.HoodieTableConfig
+import org.apache.hudi.common.table.log.InstantRange
 import org.apache.hudi.common.table.read.buffer.PositionBasedFileGroupRecordBuffer.ROW_INDEX_TEMPORARY_COLUMN_NAME
 import org.apache.hudi.common.util.ValidationUtils.checkState
 import org.apache.hudi.common.util.collection.{CachingIterator, ClosableIterator, Pair => HPair}
+import org.apache.hudi.common.util.Option
 import org.apache.hudi.io.storage.{HoodieSparkFileReaderFactory, HoodieSparkParquetReader}
 import org.apache.hudi.storage.{HoodieStorage, StorageConfiguration, StoragePath}
 import org.apache.hudi.util.CloseableInternalRowIterator
@@ -59,8 +61,9 @@ class SparkFileFormatInternalRowReaderContext(baseFileReader: SparkColumnarFileR
                                               filters: Seq[Filter],
                                               requiredFilters: Seq[Filter],
                                               storageConfiguration: StorageConfiguration[_],
-                                              tableConfig: HoodieTableConfig)
-  extends BaseSparkInternalRowReaderContext(storageConfiguration, tableConfig, SparkFileFormatInternalRecordContext.apply(tableConfig)) {
+                                              tableConfig: HoodieTableConfig,
+                                              instantRangeOpt: Option[InstantRange] = Option.empty())
+  extends BaseSparkInternalRowReaderContext(storageConfiguration, tableConfig, instantRangeOpt, SparkFileFormatInternalRecordContext.apply(tableConfig)) {
   lazy val sparkAdapter: SparkAdapter = SparkAdapterSupport.sparkAdapter
   private lazy val bootstrapSafeFilters: Seq[Filter] = filters.filter(filterIsSafeForBootstrap) ++ requiredFilters
   private lazy val allFilters = filters ++ requiredFilters

diff --git a/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecord.java b/hudi-common/src/main/java/org/apache/hudi/common/model/HoodieRecord.java
@@ -58,6 +58,7 @@ public abstract class HoodieRecord<T> implements HoodieRecordCompatibilityInterf
   public static final String FILENAME_METADATA_FIELD = HoodieMetadataField.FILENAME_METADATA_FIELD.getFieldName();
   public static final String OPERATION_METADATA_FIELD = HoodieMetadataField.OPERATION_METADATA_FIELD.getFieldName();
   public static final String HOODIE_IS_DELETED_FIELD = "_hoodie_is_deleted";
+  public static final String COMMIT_COMPLETION_TIME_METADATA_FIELD = "_hoodie_commit_completion_time";
   // If the ordering value is not set, this default order value is set and
   // always treated as the commit time ordering.
   public static final int DEFAULT_ORDERING_VALUE = 0;

diff --git a/...-common/src/main/java/org/apache/hudi/common/table/read/FileGroupReaderSchemaHandler.java b/...-common/src/main/java/org/apache/hudi/common/table/read/FileGroupReaderSchemaHandler.java
@@ -31,6 +31,7 @@
 import org.apache.hudi.common.table.HoodieTableVersion;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
 import org.apache.hudi.common.table.read.buffer.PositionBasedFileGroupRecordBuffer;
+import org.apache.hudi.common.table.timeline.HoodieInstant;
 import org.apache.hudi.common.util.InternalSchemaCache;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.VisibleForTesting;
@@ -55,6 +56,7 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
+import static org.apache.avro.JsonProperties.NULL_VALUE;
 import static org.apache.hudi.avro.AvroSchemaUtils.appendFieldsToSchemaDedupNested;
 import static org.apache.hudi.avro.AvroSchemaUtils.createNewSchemaFromFieldsWithReference;
 import static org.apache.hudi.avro.AvroSchemaUtils.findNestedField;
@@ -85,6 +87,9 @@ public class FileGroupReaderSchemaHandler<T> {
   protected final TypedProperties properties;
   private final DeleteContext deleteContext;
   private final HoodieTableMetaClient metaClient;
+  private final boolean shouldAddCompletionTime;
+  private final Map<String, String> commitTimeToCompletionTimeMap;
+  private final Schema requestedSchemaWithCompletionTime;
 
   public FileGroupReaderSchemaHandler(HoodieReaderContext<T> readerContext,
                                       Schema tableSchema,
@@ -98,10 +103,24 @@ public FileGroupReaderSchemaHandler(HoodieReaderContext<T> readerContext,
     this.requestedSchema = AvroSchemaCache.intern(requestedSchema);
     this.hoodieTableConfig = metaClient.getTableConfig();
     this.deleteContext = new DeleteContext(properties, tableSchema);
+    this.metaClient = metaClient;
+
+    boolean hasInstantRange = readerContext.getInstantRange().isPresent();
+    boolean shouldAddCompletionTimeField = !metaClient.isMetadataTable()
+        && metaClient.getTableConfig() != null && metaClient.getTableConfig().getTableVersion() != null
+        && metaClient.getTableConfig().getTableVersion().greaterThanOrEquals(HoodieTableVersion.SIX)
+        && hasInstantRange;
+
+    this.shouldAddCompletionTime = shouldAddCompletionTimeField;
+    this.requestedSchemaWithCompletionTime = shouldAddCompletionTimeField
+        ? addCompletionTimeField(this.requestedSchema)
+        : this.requestedSchema;
+    this.commitTimeToCompletionTimeMap = this.shouldAddCompletionTime
+        ? buildCompletionTimeMapping(metaClient)
+        : Collections.emptyMap();
     this.requiredSchema = AvroSchemaCache.intern(prepareRequiredSchema(this.deleteContext));
     this.internalSchema = pruneInternalSchema(requiredSchema, internalSchemaOpt);
     this.internalSchemaOpt = getInternalSchemaOpt(internalSchemaOpt);
-    this.metaClient = metaClient;
   }
 
   public Schema getTableSchema() {
@@ -125,12 +144,63 @@ public Option<InternalSchema> getInternalSchemaOpt() {
   }
 
   public Option<UnaryOperator<T>> getOutputConverter() {
-    if (!AvroSchemaUtils.areSchemasProjectionEquivalent(requiredSchema, requestedSchema)) {
-      return Option.of(readerContext.getRecordContext().projectRecord(requiredSchema, requestedSchema));
+    Schema targetSchema = shouldAddCompletionTime ? requestedSchemaWithCompletionTime : requestedSchema;
+    UnaryOperator<T> projectionConverter = null;
+    UnaryOperator<T> completionTimeConverter = null;
+    boolean schemasEquivalent = AvroSchemaUtils.areSchemasProjectionEquivalent(requiredSchema, targetSchema);
+    if (!schemasEquivalent) {
+      projectionConverter = readerContext.getRecordContext().projectRecord(requiredSchema, targetSchema);
+    }
+    if (shouldAddCompletionTime) {
+      completionTimeConverter = getCompletionTimeTransformer();
+    }
+    if (projectionConverter != null && completionTimeConverter != null) {
+      final UnaryOperator<T> finalProjectionConverter = projectionConverter;
+      final UnaryOperator<T> finalCompletionTimeConverter = completionTimeConverter;
+      UnaryOperator<T> composed = t -> finalCompletionTimeConverter.apply(finalProjectionConverter.apply(t));
+      return Option.of(composed);
+    } else if (projectionConverter != null) {
+      return Option.of(projectionConverter);
+    } else if (completionTimeConverter != null) {
+      return Option.of(completionTimeConverter);
     }
     return Option.empty();
   }
 
+  private UnaryOperator<T> getCompletionTimeTransformer() {
+    return record -> {
+      try {
+        Object commitTimeObj = readerContext.getRecordContext().getValue(
+            record,
+            requestedSchemaWithCompletionTime,
+            HoodieRecord.COMMIT_TIME_METADATA_FIELD
+        );
+        if (commitTimeObj == null) {
+          return record;
+        }
+        String commitTime = commitTimeObj.toString();
+        String completionTime = commitTimeToCompletionTimeMap.getOrDefault(commitTime, commitTime);
+        Schema.Field completionTimeField = requestedSchemaWithCompletionTime.getField(HoodieRecord.COMMIT_COMPLETION_TIME_METADATA_FIELD);
+        if (completionTimeField == null) {
+          return record;
+        }
+        int completionTimePos = completionTimeField.pos();
+        Object[] fieldValues = new Object[requestedSchemaWithCompletionTime.getFields().size()];
+        for (int i = 0; i < fieldValues.length; i++) {
+          if (i == completionTimePos) {
+            fieldValues[i] = completionTime;
+          } else {
+            Schema.Field field = requestedSchemaWithCompletionTime.getFields().get(i);
+            fieldValues[i] = readerContext.getRecordContext().getValue(record, requestedSchemaWithCompletionTime, field.name());
+          }
+        }
+        return readerContext.getRecordContext().constructEngineRecord(requestedSchemaWithCompletionTime, fieldValues);
+      } catch (Exception e) {
+        return record;
+      }
+    };
+  }
+
   public DeleteContext getDeleteContext() {
     return deleteContext;
   }
@@ -172,12 +242,24 @@ Schema generateRequiredSchema(DeleteContext deleteContext) {
     boolean hasInstantRange = readerContext.getInstantRange().isPresent();
     //might need to change this if other queries than mor have mandatory fields
     if (!readerContext.getHasLogFiles()) {
+      List<Schema.Field> addedFields = new ArrayList<>();
       if (hasInstantRange && !findNestedField(requestedSchema, HoodieRecord.COMMIT_TIME_METADATA_FIELD).isPresent()) {
-        List<Schema.Field> addedFields = new ArrayList<>();
         addedFields.add(getField(this.tableSchema, HoodieRecord.COMMIT_TIME_METADATA_FIELD));
-        return appendFieldsToSchemaDedupNested(requestedSchema, addedFields);
       }
-      return requestedSchema;
+      if (shouldAddCompletionTime && !findNestedField(requestedSchemaWithCompletionTime, HoodieRecord.COMMIT_COMPLETION_TIME_METADATA_FIELD).isPresent()) {
+        Schema unionSchema = Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING));
+        Schema.Field completionTimeField = new Schema.Field(
+            HoodieRecord.COMMIT_COMPLETION_TIME_METADATA_FIELD,
+            unionSchema,
+            "Completion time of the commit",
+            NULL_VALUE
+        );
+        addedFields.add(completionTimeField);
+      }
+      if (!addedFields.isEmpty()) {
+        return appendFieldsToSchemaDedupNested(requestedSchemaWithCompletionTime, addedFields);
+      }
+      return requestedSchemaWithCompletionTime;
     }
 
     if (hoodieTableConfig.getRecordMergeMode() == RecordMergeMode.CUSTOM) {
@@ -190,16 +272,30 @@ Schema generateRequiredSchema(DeleteContext deleteContext) {
     for (String field : getMandatoryFieldsForMerging(
         hoodieTableConfig, this.properties, this.tableSchema, readerContext.getRecordMerger(),
         deleteContext.hasBuiltInDeleteField(), deleteContext.getCustomDeleteMarkerKeyValue(), hasInstantRange)) {
-      if (!findNestedField(requestedSchema, field).isPresent()) {
+      if (!findNestedField(requestedSchemaWithCompletionTime, field).isPresent()) {
         addedFields.add(getField(this.tableSchema, field));
       }
     }
 
+    if (hasInstantRange && !findNestedField(requestedSchemaWithCompletionTime, HoodieRecord.COMMIT_TIME_METADATA_FIELD).isPresent()) {
+      addedFields.add(getField(this.tableSchema, HoodieRecord.COMMIT_TIME_METADATA_FIELD));
+    }
+    if (shouldAddCompletionTime && !findNestedField(requestedSchemaWithCompletionTime, HoodieRecord.COMMIT_COMPLETION_TIME_METADATA_FIELD).isPresent()) {
+      Schema unionSchema = Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING));
+      Schema.Field completionTimeField = new Schema.Field(
+          HoodieRecord.COMMIT_COMPLETION_TIME_METADATA_FIELD,
+          unionSchema,
+          "Completion time of the commit",
+          NULL_VALUE
+      );
+      addedFields.add(completionTimeField);
+    }
+
     if (addedFields.isEmpty()) {
-      return requestedSchema;
+      return requestedSchemaWithCompletionTime;
     }
 
-    return appendFieldsToSchemaDedupNested(requestedSchema, addedFields);
+    return appendFieldsToSchemaDedupNested(requestedSchemaWithCompletionTime, addedFields);
   }
 
   private static String[] getMandatoryFieldsForMerging(HoodieTableConfig cfg,
@@ -308,4 +404,26 @@ private static Schema.Field getField(Schema schema, String fieldName) {
     }
     return foundFieldOpt.get();
   }
+
+  private Map<String, String> buildCompletionTimeMapping(HoodieTableMetaClient metaClient) {
+    return metaClient.getCommitsTimeline().filterCompletedInstants().getInstants().stream()
+        .collect(Collectors.toMap(
+            HoodieInstant::requestedTime,
+            instant -> instant.getCompletionTime() != null ? instant.getCompletionTime() : instant.requestedTime()
+        ));
+  }
+
+  private Schema addCompletionTimeField(Schema schema) {
+    if (findNestedField(schema, HoodieRecord.COMMIT_COMPLETION_TIME_METADATA_FIELD).isPresent()) {
+      return schema;
+    }
+    Schema unionSchema = Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING));
+    Schema.Field completionTimeField = new Schema.Field(
+        HoodieRecord.COMMIT_COMPLETION_TIME_METADATA_FIELD,
+        unionSchema,
+        "Completion time of the commit",
+        NULL_VALUE
+    );
+    return appendFieldsToSchemaDedupNested(schema, Collections.singletonList(completionTimeField));
+  }
 }