From 24ac59aa15594ab378a3db4d4bdec3751f6fa7eb Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Wed, 3 Sep 2025 17:39:32 +0300 Subject: [PATCH 1/6] GH-836: Added support of ExtensionType for ComplexCopier --- .../src/main/codegen/includes/vv_imports.ftl | 1 + .../main/codegen/templates/ComplexCopier.java | 42 +++++-- .../complex/impl/UnionExtensionWriter.java | 5 + .../complex/impl/TestComplexCopier.java | 114 ++++++++++++++++++ .../vector/complex/impl/UuidReaderImpl.java | 5 + 5 files changed, 160 insertions(+), 7 deletions(-) diff --git a/vector/src/main/codegen/includes/vv_imports.ftl b/vector/src/main/codegen/includes/vv_imports.ftl index 7f216a7b43..2bbcecc856 100644 --- a/vector/src/main/codegen/includes/vv_imports.ftl +++ b/vector/src/main/codegen/includes/vv_imports.ftl @@ -34,6 +34,7 @@ import org.apache.arrow.vector.complex.*; import org.apache.arrow.vector.complex.reader.*; import org.apache.arrow.vector.complex.impl.*; import org.apache.arrow.vector.complex.writer.*; +import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; diff --git a/vector/src/main/codegen/templates/ComplexCopier.java b/vector/src/main/codegen/templates/ComplexCopier.java index 4fff7059a7..efa8dd883c 100644 --- a/vector/src/main/codegen/templates/ComplexCopier.java +++ b/vector/src/main/codegen/templates/ComplexCopier.java @@ -42,10 +42,14 @@ public class ComplexCopier { * @param output field to write to */ public static void copy(FieldReader input, FieldWriter output) { - writeValue(input, output); + writeValue(input, output, null); } - private static void writeValue(FieldReader reader, FieldWriter writer) { + public static void copy(FieldReader input, FieldWriter output, ExtensionTypeWriterFactory extensionTypeWriterFactory) { + writeValue(input, output, extensionTypeWriterFactory); + } + + private static void writeValue(FieldReader reader, FieldWriter writer, ExtensionTypeWriterFactory extensionTypeWriterFactory) { final MinorType mt = reader.getMinorType(); switch (mt) { @@ -61,7 +65,7 @@ private static void writeValue(FieldReader reader, FieldWriter writer) { FieldReader childReader = reader.reader(); FieldWriter childWriter = getListWriterForReader(childReader, writer); if (childReader.isSet()) { - writeValue(childReader, childWriter); + writeValue(childReader, childWriter, extensionTypeWriterFactory); } else { childWriter.writeNull(); } @@ -79,8 +83,8 @@ private static void writeValue(FieldReader reader, FieldWriter writer) { FieldReader structReader = reader.reader(); if (structReader.isSet()) { writer.startEntry(); - writeValue(mapReader.key(), getMapWriterForReader(mapReader.key(), writer.key())); - writeValue(mapReader.value(), getMapWriterForReader(mapReader.value(), writer.value())); + writeValue(mapReader.key(), getMapWriterForReader(mapReader.key(), writer.key()), extensionTypeWriterFactory); + writeValue(mapReader.value(), getMapWriterForReader(mapReader.value(), writer.value()), extensionTypeWriterFactory); writer.endEntry(); } else { writer.writeNull(); @@ -99,7 +103,7 @@ private static void writeValue(FieldReader reader, FieldWriter writer) { if (childReader.getMinorType() != Types.MinorType.NULL) { FieldWriter childWriter = getStructWriterForReader(childReader, writer, name); if (childReader.isSet()) { - writeValue(childReader, childWriter); + writeValue(childReader, childWriter, extensionTypeWriterFactory); } else { childWriter.writeNull(); } @@ -110,6 +114,21 @@ private static void writeValue(FieldReader reader, FieldWriter writer) { writer.writeNull(); } break; + case EXTENSIONTYPE: + if (extensionTypeWriterFactory == null) { + throw new UnsupportedOperationException( + "EXTENSIONTYPE are not supported yet. Please provide an ExtensionTypeWriterFactory." ); + } + if (reader.isSet()) { + Object value = reader.readObject(); + if (value != null) { + writer.addExtensionTypeWriterFactory(extensionTypeWriterFactory); + writer.writeExtension(value); + } + } else { + writer.writeNull(); + } + break; <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> <#assign fields = minor.fields!type.fields /> <#assign uncappedName = name?uncap_first/> @@ -162,6 +181,9 @@ private static FieldWriter getStructWriterForReader(FieldReader reader, StructWr return (FieldWriter) writer.map(name); case LISTVIEW: return (FieldWriter) writer.listView(name); + case EXTENSIONTYPE: + ExtensionWriter extensionWriter = writer.extension(name, reader.getField().getType()); + return (FieldWriter) extensionWriter; default: throw new UnsupportedOperationException(reader.getMinorType().toString()); } @@ -185,7 +207,10 @@ private static FieldWriter getListWriterForReader(FieldReader reader, ListWriter case NULL: return (FieldWriter) writer.list(); case LISTVIEW: - return (FieldWriter) writer.listView(); + return (FieldWriter) writer.listView(); + case EXTENSIONTYPE: + ExtensionWriter extensionWriter = writer.extension(reader.getField().getType()); + return (FieldWriter) extensionWriter; default: throw new UnsupportedOperationException(reader.getMinorType().toString()); } @@ -211,6 +236,9 @@ private static FieldWriter getMapWriterForReader(FieldReader reader, MapWriter w return (FieldWriter) writer.listView(); case MAP: return (FieldWriter) writer.map(false); + case EXTENSIONTYPE: + ExtensionWriter extensionWriter = writer.extension(reader.getField().getType()); + return (FieldWriter) extensionWriter; default: throw new UnsupportedOperationException(reader.getMinorType().toString()); } diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java index d341384bd9..4219069cba 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionExtensionWriter.java @@ -76,4 +76,9 @@ public void setPosition(int index) { this.writer.setPosition(index); } } + + @Override + public void writeNull() { + this.writer.writeNull(); + } } diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java b/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java index 3bc02c6029..738e8905e3 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java @@ -20,6 +20,7 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import java.math.BigDecimal; +import java.util.UUID; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.DecimalVector; @@ -30,12 +31,14 @@ import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.complex.writer.BaseWriter; +import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter; import org.apache.arrow.vector.complex.writer.FieldWriter; import org.apache.arrow.vector.holders.DecimalHolder; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.DecimalUtility; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -845,4 +848,115 @@ public void testCopyMapVectorWithMapValue() { assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); } } + + @Test + public void testCopyListVectorWithExtensionType() { + try (ListVector from = ListVector.empty("v", allocator); + ListVector to = ListVector.empty("v", allocator)) { + + UnionListWriter listWriter = from.getWriter(); + listWriter.allocate(); + + for (int i = 0; i < COUNT; i++) { + listWriter.setPosition(i); + listWriter.startList(); + ExtensionWriter extensionWriter = listWriter.extension(new UuidType()); + extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); + extensionWriter.writeExtension(UUID.randomUUID()); + extensionWriter.writeExtension(UUID.randomUUID()); + listWriter.endList(); + } + from.setValueCount(COUNT); + + // copy values + FieldReader in = from.getReader(); + FieldWriter out = to.getWriter(); + for (int i = 0; i < COUNT; i++) { + in.setPosition(i); + out.setPosition(i); + ComplexCopier.copy(in, out, new UuidWriterFactory()); + } + + to.setValueCount(COUNT); + + // validate equals + assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); + } + } + + @Test + public void testCopyMapVectorWithExtensionType() { + try (final MapVector from = MapVector.empty("v", allocator, false); + final MapVector to = MapVector.empty("v", allocator, false)) { + + from.allocateNew(); + + UnionMapWriter mapWriter = from.getWriter(); + for (int i = 0; i < COUNT; i++) { + mapWriter.setPosition(i); + mapWriter.startMap(); + mapWriter.startEntry(); + ExtensionWriter extensionKeyWriter = mapWriter.key().extension(new UuidType()); + extensionKeyWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); + extensionKeyWriter.writeExtension(UUID.randomUUID()); + ExtensionWriter extensionValueWriter = mapWriter.value().extension(new UuidType()); + extensionValueWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); + extensionValueWriter.writeExtension(UUID.randomUUID()); + mapWriter.endEntry(); + mapWriter.endMap(); + } + + from.setValueCount(COUNT); + + // copy values + FieldReader in = from.getReader(); + FieldWriter out = to.getWriter(); + for (int i = 0; i < COUNT; i++) { + in.setPosition(i); + out.setPosition(i); + ComplexCopier.copy(in, out, new UuidWriterFactory()); + } + to.setValueCount(COUNT); + + // validate equals + assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); + } + } + + @Test + public void testCopyStructVectorWithExtensionType() { + try (final StructVector from = StructVector.empty("v", allocator); + final StructVector to = StructVector.empty("v", allocator)) { + + from.allocateNewSafe(); + + NullableStructWriter structWriter = from.getWriter(); + for (int i = 0; i < COUNT; i++) { + structWriter.setPosition(i); + structWriter.start(); + ExtensionWriter extensionWriter1 = structWriter.extension("timestamp1", new UuidType()); + extensionWriter1.addExtensionTypeWriterFactory(new UuidWriterFactory()); + extensionWriter1.writeExtension(UUID.randomUUID()); + ExtensionWriter extensionWriter2 = structWriter.extension("timestamp2", new UuidType()); + extensionWriter2.addExtensionTypeWriterFactory(new UuidWriterFactory()); + extensionWriter2.writeExtension(UUID.randomUUID()); + structWriter.end(); + } + + from.setValueCount(COUNT); + + // copy values + FieldReader in = from.getReader(); + FieldWriter out = to.getWriter(); + for (int i = 0; i < COUNT; i++) { + in.setPosition(i); + out.setPosition(i); + ComplexCopier.copy(in, out, new UuidWriterFactory()); + } + to.setValueCount(COUNT); + + // validate equals + assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); + } + } } diff --git a/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java b/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java index 16dd734de8..6b98d3b340 100644 --- a/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java +++ b/vector/src/test/java/org/apache/arrow/vector/complex/impl/UuidReaderImpl.java @@ -61,4 +61,9 @@ public void copyAsValue(AbstractExtensionTypeWriter writer) { UuidWriterImpl impl = (UuidWriterImpl) writer; impl.vector.copyFromSafe(idx(), impl.idx(), vector); } + + @Override + public Object readObject() { + return vector.getObject(idx()); + } } From 8835842552d11def1ccf7e1ce7c8811315e34005 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Wed, 3 Sep 2025 18:46:16 +0300 Subject: [PATCH 2/6] GH-836: updated usages of ComplexCopier.copy --- .../templates/AbstractFieldReader.java | 4 +++ .../main/codegen/templates/BaseReader.java | 3 ++ .../main/codegen/templates/NullReader.java | 2 ++ .../apache/arrow/vector/BaseValueVector.java | 13 ++++++++ .../org/apache/arrow/vector/NullVector.java | 13 ++++++++ .../org/apache/arrow/vector/ValueVector.java | 25 ++++++++++++++ .../complex/AbstractContainerVector.java | 13 ++++++++ .../arrow/vector/complex/LargeListVector.java | 33 ++++++++++++++++++- .../vector/complex/LargeListViewVector.java | 15 +++++++++ .../arrow/vector/complex/ListVector.java | 33 ++++++++++++++++++- .../arrow/vector/complex/ListViewVector.java | 15 ++++++++- .../complex/impl/AbstractBaseReader.java | 10 ++++++ .../complex/impl/UnionLargeListReader.java | 4 +++ 13 files changed, 180 insertions(+), 3 deletions(-) diff --git a/vector/src/main/codegen/templates/AbstractFieldReader.java b/vector/src/main/codegen/templates/AbstractFieldReader.java index 7e84323b64..c7c5b4d78d 100644 --- a/vector/src/main/codegen/templates/AbstractFieldReader.java +++ b/vector/src/main/codegen/templates/AbstractFieldReader.java @@ -109,6 +109,10 @@ public void copyAsField(String name, ${name}Writer writer) { + public void copyAsValue(StructWriter writer, ExtensionTypeWriterFactory writerFactory) { + fail("CopyAsValue StructWriter"); + } + public void read(ExtensionHolder holder) { fail("Extension"); } diff --git a/vector/src/main/codegen/templates/BaseReader.java b/vector/src/main/codegen/templates/BaseReader.java index c52345af21..4c6f49ab9b 100644 --- a/vector/src/main/codegen/templates/BaseReader.java +++ b/vector/src/main/codegen/templates/BaseReader.java @@ -49,6 +49,7 @@ public interface RepeatedStructReader extends StructReader{ boolean next(); int size(); void copyAsValue(StructWriter writer); + void copyAsValue(StructWriter writer, ExtensionTypeWriterFactory writerFactory); } public interface ListReader extends BaseReader{ @@ -59,6 +60,7 @@ public interface RepeatedListReader extends ListReader{ boolean next(); int size(); void copyAsValue(ListWriter writer); + void copyAsValue(ListWriter writer, ExtensionTypeWriterFactory writerFactory); } public interface MapReader extends BaseReader{ @@ -69,6 +71,7 @@ public interface RepeatedMapReader extends MapReader{ boolean next(); int size(); void copyAsValue(MapWriter writer); + void copyAsValue(MapWriter writer, ExtensionTypeWriterFactory writerFactory); } public interface ScalarReader extends diff --git a/vector/src/main/codegen/templates/NullReader.java b/vector/src/main/codegen/templates/NullReader.java index 88e6ea98ea..347ed62fd4 100644 --- a/vector/src/main/codegen/templates/NullReader.java +++ b/vector/src/main/codegen/templates/NullReader.java @@ -86,6 +86,8 @@ public void read(int arrayIndex, Nullable${name}Holder holder){ } + public void copyAsValue(StructWriter writer, ExtensionTypeWriterFactory writerFactory){} + public void read(ExtensionHolder holder) { holder.isSet = 0; } diff --git a/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java b/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java index 37dfa20616..cc57cde29e 100644 --- a/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/BaseValueVector.java @@ -22,6 +22,7 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.ReferenceManager; import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.util.DataSizeRoundingUtil; import org.apache.arrow.vector.util.TransferPair; @@ -260,6 +261,18 @@ public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { throw new UnsupportedOperationException(); } + @Override + public void copyFrom( + int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { + throw new UnsupportedOperationException(); + } + + @Override + public void copyFromSafe( + int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { + throw new UnsupportedOperationException(); + } + /** * Transfer the validity buffer from `validityBuffer` to the target vector's `validityBuffer`. * Start at `startIndex` and copy `length` number of elements. If the starting index is 8 byte diff --git a/vector/src/main/java/org/apache/arrow/vector/NullVector.java b/vector/src/main/java/org/apache/arrow/vector/NullVector.java index 6bfe540d23..0d6dab2837 100644 --- a/vector/src/main/java/org/apache/arrow/vector/NullVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/NullVector.java @@ -27,6 +27,7 @@ import org.apache.arrow.memory.util.hash.ArrowBufHasher; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.compare.VectorVisitor; +import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory; import org.apache.arrow.vector.complex.impl.NullReader; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.ipc.message.ArrowFieldNode; @@ -329,6 +330,18 @@ public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { throw new UnsupportedOperationException(); } + @Override + public void copyFrom( + int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { + throw new UnsupportedOperationException(); + } + + @Override + public void copyFromSafe( + int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { + throw new UnsupportedOperationException(); + } + @Override public String getName() { return this.getField().getName(); diff --git a/vector/src/main/java/org/apache/arrow/vector/ValueVector.java b/vector/src/main/java/org/apache/arrow/vector/ValueVector.java index 3a5058256c..e0628c2ee1 100644 --- a/vector/src/main/java/org/apache/arrow/vector/ValueVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/ValueVector.java @@ -22,6 +22,7 @@ import org.apache.arrow.memory.OutOfMemoryException; import org.apache.arrow.memory.util.hash.ArrowBufHasher; import org.apache.arrow.vector.compare.VectorVisitor; +import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory; import org.apache.arrow.vector.complex.reader.FieldReader; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.Field; @@ -309,6 +310,30 @@ public interface ValueVector extends Closeable, Iterable { */ void copyFromSafe(int fromIndex, int thisIndex, ValueVector from); + /** + * Copy a cell value from a particular index in source vector to a particular position in this + * vector. + * + * @param fromIndex position to copy from in source vector + * @param thisIndex position to copy to in this vector + * @param from source vector + * @param writerFactory the extension type writer factory to use for copying extension type values + */ + void copyFrom( + int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory); + + /** + * Same as {@link #copyFrom(int, int, ValueVector)} except that it handles the case when the + * capacity of the vector needs to be expanded before copy. + * + * @param fromIndex position to copy from in source vector + * @param thisIndex position to copy to in this vector + * @param from source vector + * @param writerFactory the extension type writer factory to use for copying extension type values + */ + void copyFromSafe( + int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory); + /** * Accept a generic {@link VectorVisitor} and return the result. * diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java index a6a71cf1a4..429f9884bb 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/AbstractContainerVector.java @@ -21,6 +21,7 @@ import org.apache.arrow.vector.DensityAwareVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList; @@ -151,6 +152,18 @@ public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { throw new UnsupportedOperationException(); } + @Override + public void copyFrom( + int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { + throw new UnsupportedOperationException(); + } + + @Override + public void copyFromSafe( + int fromIndex, int thisIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { + throw new UnsupportedOperationException(); + } + @Override public String getName() { return name; diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index 997b5a8b78..48c8127e23 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -49,6 +49,7 @@ import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.compare.VectorVisitor; import org.apache.arrow.vector.complex.impl.ComplexCopier; +import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory; import org.apache.arrow.vector.complex.impl.UnionLargeListReader; import org.apache.arrow.vector.complex.impl.UnionLargeListWriter; import org.apache.arrow.vector.complex.reader.FieldReader; @@ -482,12 +483,42 @@ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { */ @Override public void copyFrom(int inIndex, int outIndex, ValueVector from) { + copyFrom(inIndex, outIndex, from, null); + } + + /** + * Copy a cell value from a particular index in source vector to a particular position in this + * vector. + * + * @param inIndex position to copy from in source vector + * @param outIndex position to copy to in this vector + * @param from source vector + * @param writerFactory the extension type writer factory to use for copying extension type values + */ + @Override + public void copyFrom( + int inIndex, int outIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); FieldReader in = from.getReader(); in.setPosition(inIndex); UnionLargeListWriter out = getWriter(); out.setPosition(outIndex); - ComplexCopier.copy(in, out); + ComplexCopier.copy(in, out, writerFactory); + } + + /** + * Same as {@link #copyFrom(int, int, ValueVector)} except that it handles the case when the + * capacity of the vector needs to be expanded before copy. + * + * @param inIndex position to copy from in source vector + * @param outIndex position to copy to in this vector + * @param from source vector + * @param writerFactory the extension type writer factory to use for copying extension type values + */ + @Override + public void copyFromSafe( + int inIndex, int outIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { + copyFrom(inIndex, outIndex, from, writerFactory); } /** diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java index 2da7eb057e..992a664449 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListViewVector.java @@ -41,6 +41,7 @@ import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.compare.VectorVisitor; +import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory; import org.apache.arrow.vector.complex.impl.UnionLargeListViewReader; import org.apache.arrow.vector.complex.impl.UnionLargeListViewWriter; import org.apache.arrow.vector.complex.impl.UnionListReader; @@ -346,6 +347,20 @@ public void copyFrom(int inIndex, int outIndex, ValueVector from) { "LargeListViewVector does not support copyFrom operation yet."); } + @Override + public void copyFromSafe( + int inIndex, int outIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { + throw new UnsupportedOperationException( + "LargeListViewVector does not support copyFromSafe operation yet."); + } + + @Override + public void copyFrom( + int inIndex, int outIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { + throw new UnsupportedOperationException( + "LargeListViewVector does not support copyFrom operation yet."); + } + @Override public FieldVector getDataVector() { return vector; diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java index 93a313ef4f..89549257c4 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListVector.java @@ -42,6 +42,7 @@ import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.compare.VectorVisitor; import org.apache.arrow.vector.complex.impl.ComplexCopier; +import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory; import org.apache.arrow.vector.complex.impl.UnionListReader; import org.apache.arrow.vector.complex.impl.UnionListWriter; import org.apache.arrow.vector.complex.reader.FieldReader; @@ -400,12 +401,42 @@ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { */ @Override public void copyFrom(int inIndex, int outIndex, ValueVector from) { + copyFrom(inIndex, outIndex, from, null); + } + + /** + * Same as {@link #copyFrom(int, int, ValueVector)} except that it handles the case when the + * capacity of the vector needs to be expanded before copy. + * + * @param inIndex position to copy from in source vector + * @param outIndex position to copy to in this vector + * @param from source vector + * @param writerFactory the extension type writer factory to use for copying extension type values + */ + @Override + public void copyFromSafe( + int inIndex, int outIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { + copyFrom(inIndex, outIndex, from, writerFactory); + } + + /** + * Copy a cell value from a particular index in source vector to a particular position in this + * vector. + * + * @param inIndex position to copy from in source vector + * @param outIndex position to copy to in this vector + * @param from source vector + * @param writerFactory the extension type writer factory to use for copying extension type values + */ + @Override + public void copyFrom( + int inIndex, int outIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); FieldReader in = from.getReader(); in.setPosition(inIndex); FieldWriter out = getWriter(); out.setPosition(outIndex); - ComplexCopier.copy(in, out); + ComplexCopier.copy(in, out, writerFactory); } /** diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java index 8711db5e0f..2784240429 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -42,6 +42,7 @@ import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.compare.VectorVisitor; import org.apache.arrow.vector.complex.impl.ComplexCopier; +import org.apache.arrow.vector.complex.impl.ExtensionTypeWriterFactory; import org.apache.arrow.vector.complex.impl.UnionListViewReader; import org.apache.arrow.vector.complex.impl.UnionListViewWriter; import org.apache.arrow.vector.complex.reader.FieldReader; @@ -338,6 +339,12 @@ public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { copyFrom(inIndex, outIndex, from); } + @Override + public void copyFromSafe( + int inIndex, int outIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { + copyFrom(inIndex, outIndex, from, writerFactory); + } + @Override public OUT accept(VectorVisitor visitor, IN value) { return visitor.visit(this, value); @@ -345,12 +352,18 @@ public OUT accept(VectorVisitor visitor, IN value) { @Override public void copyFrom(int inIndex, int outIndex, ValueVector from) { + copyFrom(inIndex, outIndex, from, null); + } + + @Override + public void copyFrom( + int inIndex, int outIndex, ValueVector from, ExtensionTypeWriterFactory writerFactory) { Preconditions.checkArgument(this.getMinorType() == from.getMinorType()); FieldReader in = from.getReader(); in.setPosition(inIndex); FieldWriter out = getWriter(); out.setPosition(outIndex); - ComplexCopier.copy(in, out); + ComplexCopier.copy(in, out, writerFactory); } @Override diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java index b2e95663f7..bf074ecb90 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/AbstractBaseReader.java @@ -115,4 +115,14 @@ public void copyAsValue(ListWriter writer) { public void copyAsValue(MapWriter writer) { ComplexCopier.copy(this, (FieldWriter) writer); } + + @Override + public void copyAsValue(ListWriter writer, ExtensionTypeWriterFactory writerFactory) { + ComplexCopier.copy(this, (FieldWriter) writer, writerFactory); + } + + @Override + public void copyAsValue(MapWriter writer, ExtensionTypeWriterFactory writerFactory) { + ComplexCopier.copy(this, (FieldWriter) writer, writerFactory); + } } diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java index be236c3166..a9104cb0d2 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/impl/UnionLargeListReader.java @@ -105,4 +105,8 @@ public boolean next() { public void copyAsValue(UnionLargeListWriter writer) { ComplexCopier.copy(this, (FieldWriter) writer); } + + public void copyAsValue(UnionLargeListWriter writer, ExtensionTypeWriterFactory writerFactory) { + ComplexCopier.copy(this, (FieldWriter) writer, writerFactory); + } } From 12a6d76447d0ac8c1f5ca3cebb21fa723f4eceb6 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Thu, 4 Sep 2025 13:16:01 +0300 Subject: [PATCH 3/6] GH-836: more tests --- .../apache/arrow/vector/TestListVector.java | 43 +++++++++ .../apache/arrow/vector/TestMapVector.java | 96 +++++++++++++++++++ 2 files changed, 139 insertions(+) diff --git a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index d58b7cc941..c6c7c5c862 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -1271,6 +1271,49 @@ public void testListVectorReaderForExtensionType() throws Exception { } } + @Test + public void testCopyFromForExtensionType() throws Exception { + try (ListVector inVector = ListVector.empty("input", allocator); + ListVector outVector = ListVector.empty("output", allocator)) { + UnionListWriter writer = inVector.getWriter(); + writer.allocate(); + writer.setPosition(0); + UUID u1 = UUID.randomUUID(); + UUID u2 = UUID.randomUUID(); + writer.startList(); + ExtensionWriter extensionWriter = writer.extension(new UuidType()); + extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); + extensionWriter.writeExtension(u1); + extensionWriter.writeExtension(u2); + extensionWriter.writeNull(); + writer.endList(); + + writer.setValueCount(1); + + // copy values from input to output + outVector.allocateNew(); + outVector.copyFrom(0, 0, inVector, new UuidWriterFactory()); + outVector.setValueCount(1); + + UnionListReader reader = outVector.getReader(); + assertTrue(reader.isSet(), "shouldn't be null"); + reader.setPosition(0); + reader.next(); + FieldReader uuidReader = reader.reader(); + UuidHolder holder = new UuidHolder(); + uuidReader.read(holder); + ByteBuffer bb = ByteBuffer.wrap(holder.value); + UUID actualUuid = new UUID(bb.getLong(), bb.getLong()); + assertEquals(u1, actualUuid); + reader.next(); + uuidReader = reader.reader(); + uuidReader.read(holder); + bb = ByteBuffer.wrap(holder.value); + actualUuid = new UUID(bb.getLong(), bb.getLong()); + assertEquals(u2, actualUuid); + } + } + private void writeIntValues(UnionListWriter writer, int[] values) { writer.startList(); for (int v : values) { diff --git a/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java index 313d83ec91..1a1810d0f7 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java @@ -22,24 +22,30 @@ import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertTrue; +import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.UUID; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.impl.UnionMapReader; import org.apache.arrow.vector.complex.impl.UnionMapWriter; +import org.apache.arrow.vector.complex.impl.UuidWriterFactory; import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.complex.writer.BaseWriter.ExtensionWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.ListWriter; import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; import org.apache.arrow.vector.complex.writer.FieldWriter; +import org.apache.arrow.vector.holder.UuidHolder; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.UuidType; import org.apache.arrow.vector.util.JsonStringArrayList; import org.apache.arrow.vector.util.TransferPair; import org.junit.jupiter.api.AfterEach; @@ -1263,4 +1269,94 @@ public void testMapTypeReturnsSupportedMapWriter() { assertEquals(11, getResultValue(resultStruct)); } } + + @Test + public void testMapVectorWithExtensionType() throws Exception { + try (final MapVector inVector = MapVector.empty("map", allocator, false)) { + inVector.allocateNew(); + UnionMapWriter writer = inVector.getWriter(); + writer.setPosition(0); + UUID u1 = UUID.randomUUID(); + UUID u2 = UUID.randomUUID(); + writer.startMap(); + writer.startEntry(); + writer.key().bigInt().writeBigInt(0); + ExtensionWriter extensionWriter = writer.value().extension(new UuidType()); + extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); + extensionWriter.writeExtension(u1); + writer.endEntry(); + writer.startEntry(); + writer.key().bigInt().writeBigInt(1); + extensionWriter = writer.value().extension(new UuidType()); + extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); + extensionWriter.writeExtension(u2); + writer.endEntry(); + writer.endMap(); + + writer.setValueCount(1); + + UnionMapReader mapReader = inVector.getReader(); + mapReader.setPosition(0); + mapReader.next(); + FieldReader uuidReader = mapReader.value(); + UuidHolder holder = new UuidHolder(); + uuidReader.read(holder); + ByteBuffer bb = ByteBuffer.wrap(holder.value); + UUID actualUuid = new UUID(bb.getLong(), bb.getLong()); + assertEquals(u1, actualUuid); + mapReader.next(); + uuidReader = mapReader.value(); + uuidReader.read(holder); + bb = ByteBuffer.wrap(holder.value); + actualUuid = new UUID(bb.getLong(), bb.getLong()); + assertEquals(u2, actualUuid); + } + } + + @Test + public void testCopyFromForExtensionType() throws Exception { + try (final MapVector inVector = MapVector.empty("in", allocator, false); + final MapVector outVector = MapVector.empty("out", allocator, false)) { + inVector.allocateNew(); + UnionMapWriter writer = inVector.getWriter(); + writer.setPosition(0); + UUID u1 = UUID.randomUUID(); + UUID u2 = UUID.randomUUID(); + writer.startMap(); + writer.startEntry(); + writer.key().bigInt().writeBigInt(0); + ExtensionWriter extensionWriter = writer.value().extension(new UuidType()); + extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); + extensionWriter.writeExtension(u1); + writer.endEntry(); + writer.startEntry(); + writer.key().bigInt().writeBigInt(1); + extensionWriter = writer.value().extension(new UuidType()); + extensionWriter.addExtensionTypeWriterFactory(new UuidWriterFactory()); + extensionWriter.writeExtension(u2); + writer.endEntry(); + writer.endMap(); + + writer.setValueCount(1); + outVector.allocateNew(); + outVector.copyFrom(0, 0, inVector, new UuidWriterFactory()); + outVector.setValueCount(1); + + UnionMapReader mapReader = outVector.getReader(); + mapReader.setPosition(0); + mapReader.next(); + FieldReader uuidReader = mapReader.value(); + UuidHolder holder = new UuidHolder(); + uuidReader.read(holder); + ByteBuffer bb = ByteBuffer.wrap(holder.value); + UUID actualUuid = new UUID(bb.getLong(), bb.getLong()); + assertEquals(u1, actualUuid); + mapReader.next(); + uuidReader = mapReader.value(); + uuidReader.read(holder); + bb = ByteBuffer.wrap(holder.value); + actualUuid = new UUID(bb.getLong(), bb.getLong()); + assertEquals(u2, actualUuid); + } + } } From d4d8e1d86d544a141e78d9d4876e92443d51bad5 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Wed, 10 Sep 2025 14:44:55 +0300 Subject: [PATCH 4/6] GH-836: fixed comment --- vector/src/main/codegen/templates/ComplexCopier.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vector/src/main/codegen/templates/ComplexCopier.java b/vector/src/main/codegen/templates/ComplexCopier.java index efa8dd883c..509d0dee9b 100644 --- a/vector/src/main/codegen/templates/ComplexCopier.java +++ b/vector/src/main/codegen/templates/ComplexCopier.java @@ -116,8 +116,7 @@ private static void writeValue(FieldReader reader, FieldWriter writer, Extension break; case EXTENSIONTYPE: if (extensionTypeWriterFactory == null) { - throw new UnsupportedOperationException( - "EXTENSIONTYPE are not supported yet. Please provide an ExtensionTypeWriterFactory." ); + throw new IllegalArgumentException("Must provide ExtensionTypeWriterFactory"); } if (reader.isSet()) { Object value = reader.readObject(); From f8c6490c3be5261855e446973d09d57d1b442551 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Sat, 27 Sep 2025 09:12:36 +0300 Subject: [PATCH 5/6] GH-836: fixed format --- vector/src/main/codegen/templates/ComplexCopier.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vector/src/main/codegen/templates/ComplexCopier.java b/vector/src/main/codegen/templates/ComplexCopier.java index 509d0dee9b..4df5478f48 100644 --- a/vector/src/main/codegen/templates/ComplexCopier.java +++ b/vector/src/main/codegen/templates/ComplexCopier.java @@ -206,7 +206,7 @@ private static FieldWriter getListWriterForReader(FieldReader reader, ListWriter case NULL: return (FieldWriter) writer.list(); case LISTVIEW: - return (FieldWriter) writer.listView(); + return (FieldWriter) writer.listView(); case EXTENSIONTYPE: ExtensionWriter extensionWriter = writer.extension(reader.getField().getType()); return (FieldWriter) extensionWriter; From 6dcd7ee98365e1825fe4309738f00fc1e39d7686 Mon Sep 17 00:00:00 2001 From: Ivan Chesnov Date: Sat, 27 Sep 2025 09:27:01 +0300 Subject: [PATCH 6/6] GH-836: fixed format --- vector/src/main/codegen/templates/NullReader.java | 1 - 1 file changed, 1 deletion(-) diff --git a/vector/src/main/codegen/templates/NullReader.java b/vector/src/main/codegen/templates/NullReader.java index 347ed62fd4..0529633478 100644 --- a/vector/src/main/codegen/templates/NullReader.java +++ b/vector/src/main/codegen/templates/NullReader.java @@ -87,7 +87,6 @@ public void read(int arrayIndex, Nullable${name}Holder holder){ public void copyAsValue(StructWriter writer, ExtensionTypeWriterFactory writerFactory){} - public void read(ExtensionHolder holder) { holder.isSet = 0; }