Skip to content

GH-765: Do not close/free imported BaseStruct objects #766

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion c/src/main/java/org/apache/arrow/c/ArrayImporter.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ void importArray(ArrowArray src) {
ArrowArray ownedArray = ArrowArray.allocateNew(allocator);
ownedArray.save(snapshot);
src.markReleased();
src.close();

recursionLevel = 0;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ final class ArrowArrayStreamReader extends ArrowReader {
this.ownedStream = ArrowArrayStream.allocateNew(allocator);
this.ownedStream.save(snapshot);
stream.markReleased();
stream.close();
}

@Override
Expand Down
219 changes: 200 additions & 19 deletions c/src/main/java/org/apache/arrow/c/Data.java
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,22 @@ public static void exportArrayStream(
new ArrayStreamExporter(allocator).export(out, reader);
}

/**
* Equivalent to calling {@link #importField(BufferAllocator, ArrowSchema,
* CDataDictionaryProvider, boolean) importField(allocator, schema, provider, true)}.
*
* @param allocator Buffer allocator for allocating dictionary vectors
* @param schema C data interface struct representing the field [inout]
* @param provider A dictionary provider will be initialized with empty dictionary vectors
* (optional)
* @return Imported field object
* @see #importField(BufferAllocator, ArrowSchema, CDataDictionaryProvider, boolean)
*/
public static Field importField(
BufferAllocator allocator, ArrowSchema schema, CDataDictionaryProvider provider) {
return importField(allocator, schema, provider, true);
}

/**
* Import Java Field from the C data interface.
*
Expand All @@ -241,19 +257,42 @@ public static void exportArrayStream(
* @param schema C data interface struct representing the field [inout]
* @param provider A dictionary provider will be initialized with empty dictionary vectors
* (optional)
* @param closeImportedStructs if true, the ArrowSchema struct will be closed when this method
* completes.
* @return Imported field object
*/
public static Field importField(
BufferAllocator allocator, ArrowSchema schema, CDataDictionaryProvider provider) {
BufferAllocator allocator,
ArrowSchema schema,
CDataDictionaryProvider provider,
boolean closeImportedStructs) {
try {
SchemaImporter importer = new SchemaImporter(allocator);
return importer.importField(schema, provider);
} finally {
schema.release();
schema.close();
if (closeImportedStructs) {
schema.close();
}
}
}

/**
* Equivalent to calling {@link #importSchema(BufferAllocator, ArrowSchema,
* CDataDictionaryProvider, boolean) importSchema(allocator, schema, provider, true)}.
*
* @param allocator Buffer allocator for allocating dictionary vectors
* @param schema C data interface struct representing the field
* @param provider A dictionary provider will be initialized with empty dictionary vectors
* (optional)
* @return Imported schema object
* @see #importSchema(BufferAllocator, ArrowSchema, CDataDictionaryProvider, boolean)
*/
public static Schema importSchema(
BufferAllocator allocator, ArrowSchema schema, CDataDictionaryProvider provider) {
return importSchema(allocator, schema, provider, true);
}

/**
* Import Java Schema from the C data interface.
*
Expand All @@ -264,36 +303,84 @@ public static Field importField(
* @param schema C data interface struct representing the field
* @param provider A dictionary provider will be initialized with empty dictionary vectors
* (optional)
* @param closeImportedStructs if true, the ArrowSchema struct will be closed when this method
* completes.
* @return Imported schema object
*/
public static Schema importSchema(
BufferAllocator allocator, ArrowSchema schema, CDataDictionaryProvider provider) {
Field structField = importField(allocator, schema, provider);
BufferAllocator allocator,
ArrowSchema schema,
CDataDictionaryProvider provider,
boolean closeImportedStructs) {
Field structField = importField(allocator, schema, provider, closeImportedStructs);
if (structField.getType().getTypeID() != ArrowTypeID.Struct) {
throw new IllegalArgumentException(
"Cannot import schema: ArrowSchema describes non-struct type");
}
return new Schema(structField.getChildren(), structField.getMetadata());
}

/**
* Equivalent to calling {@link #importIntoVector(BufferAllocator, ArrowArray, FieldVector,
* DictionaryProvider, boolean)} importIntoVector(allocator, array, vector, provider, true)}.
*
* @param allocator Buffer allocator
* @param array C data interface struct holding the array data
* @param vector Imported vector object [out]
* @param provider Dictionary provider to load dictionary vectors to (optional)
* @see #importIntoVector(BufferAllocator, ArrowArray, FieldVector, DictionaryProvider, boolean)
*/
public static void importIntoVector(
BufferAllocator allocator,
ArrowArray array,
FieldVector vector,
DictionaryProvider provider) {
importIntoVector(allocator, array, vector, provider, true);
}

/**
* Import Java vector from the C data interface.
*
* <p>The ArrowArray struct has its contents moved (as per the C data interface specification) to
* a private object held alive by the resulting array.
* <p>On successful completion, the ArrowArray struct will have been moved (as per the C data
* interface specification) to a private object held alive by the resulting array.
*
* @param allocator Buffer allocator
* @param array C data interface struct holding the array data
* @param vector Imported vector object [out]
* @param provider Dictionary provider to load dictionary vectors to (optional)
* @param closeImportedStructs if true, the ArrowArray struct will be closed when this method
* completes successfully.
*/
public static void importIntoVector(
BufferAllocator allocator,
ArrowArray array,
FieldVector vector,
DictionaryProvider provider) {
DictionaryProvider provider,
boolean closeImportedStructs) {
ArrayImporter importer = new ArrayImporter(allocator, vector, provider);
importer.importArray(array);
if (closeImportedStructs) {
array.close();
}
}

/**
* Equivalent to calling {@link #importVector(BufferAllocator, ArrowArray, ArrowSchema,
* CDataDictionaryProvider, boolean) importVector(allocator, array, schema, provider, true)}.
*
* @param allocator Buffer allocator for allocating the output FieldVector
* @param array C data interface struct holding the array data
* @param schema C data interface struct holding the array type
* @param provider Dictionary provider to load dictionary vectors to (optional)
* @return Imported vector object
* @see #importVector(BufferAllocator, ArrowArray, ArrowSchema, CDataDictionaryProvider, boolean)
*/
public static FieldVector importVector(
BufferAllocator allocator,
ArrowArray array,
ArrowSchema schema,
CDataDictionaryProvider provider) {
return importVector(allocator, array, schema, provider, true);
}

/**
Expand All @@ -307,19 +394,42 @@ public static void importIntoVector(
* @param array C data interface struct holding the array data
* @param schema C data interface struct holding the array type
* @param provider Dictionary provider to load dictionary vectors to (optional)
* @param closeImportedStructs if true, the ArrowArray struct will be closed when this method
* completes successfully and the ArrowSchema struct will be always be closed.
* @return Imported vector object
*/
public static FieldVector importVector(
BufferAllocator allocator,
ArrowArray array,
ArrowSchema schema,
CDataDictionaryProvider provider) {
Field field = importField(allocator, schema, provider);
CDataDictionaryProvider provider,
boolean closeImportedStructs) {
Field field = importField(allocator, schema, provider, closeImportedStructs);
FieldVector vector = field.createVector(allocator);
importIntoVector(allocator, array, vector, provider);
importIntoVector(allocator, array, vector, provider, closeImportedStructs);
return vector;
}

/**
* Equivalent to calling {@link #importIntoVectorSchemaRoot(BufferAllocator, ArrowArray,
* VectorSchemaRoot, DictionaryProvider, boolean) importIntoVectorSchemaRoot(allocator, array,
* root, provider, true)}.
*
* @param allocator Buffer allocator
* @param array C data interface struct holding the record batch data
* @param root vector schema root to load into
* @param provider Dictionary provider to load dictionary vectors to (optional)
* @see #importIntoVectorSchemaRoot(BufferAllocator, ArrowArray, VectorSchemaRoot,
* DictionaryProvider, boolean)
*/
public static void importIntoVectorSchemaRoot(
BufferAllocator allocator,
ArrowArray array,
VectorSchemaRoot root,
DictionaryProvider provider) {
importIntoVectorSchemaRoot(allocator, array, root, provider, true);
}

/**
* Import record batch from the C data interface into vector schema root.
*
Expand All @@ -333,15 +443,18 @@ public static FieldVector importVector(
* @param array C data interface struct holding the record batch data
* @param root vector schema root to load into
* @param provider Dictionary provider to load dictionary vectors to (optional)
* @param closeImportedStructs if true, the ArrowArray struct will be closed when this method
* completes successfully
*/
public static void importIntoVectorSchemaRoot(
BufferAllocator allocator,
ArrowArray array,
VectorSchemaRoot root,
DictionaryProvider provider) {
DictionaryProvider provider,
boolean closeImportedStructs) {
try (StructVector structVector = StructVector.emptyWithDuplicates("", allocator)) {
structVector.initializeChildrenFromFields(root.getSchema().getFields());
importIntoVector(allocator, array, structVector, provider);
importIntoVector(allocator, array, structVector, provider, closeImportedStructs);
StructVectorUnloader unloader = new StructVectorUnloader(structVector);
VectorLoader loader = new VectorLoader(root);
try (ArrowRecordBatch recordBatch = unloader.getRecordBatch()) {
Expand All @@ -350,6 +463,21 @@ public static void importIntoVectorSchemaRoot(
}
}

/**
* Equivalent to calling {@link #importVectorSchemaRoot(BufferAllocator, ArrowSchema,
* CDataDictionaryProvider, boolean) importVectorSchemaRoot(allocator, schema, provider, true)}.
*
* @param allocator Buffer allocator for allocating the output VectorSchemaRoot
* @param schema C data interface struct holding the record batch schema
* @param provider Dictionary provider to load dictionary vectors to (optional)
* @return Imported vector schema root
* @see #importVectorSchemaRoot(BufferAllocator, ArrowSchema, CDataDictionaryProvider, boolean)
*/
public static VectorSchemaRoot importVectorSchemaRoot(
BufferAllocator allocator, ArrowSchema schema, CDataDictionaryProvider provider) {
return importVectorSchemaRoot(allocator, schema, provider, true);
}

/**
* Import Java vector schema root from a C data interface Schema.
*
Expand All @@ -360,11 +488,37 @@ public static void importIntoVectorSchemaRoot(
* @param allocator Buffer allocator for allocating the output VectorSchemaRoot
* @param schema C data interface struct holding the record batch schema
* @param provider Dictionary provider to load dictionary vectors to (optional)
* @param closeImportedStructs if true, the ArrowSchema struct will be closed when this method
* completes
* @return Imported vector schema root
*/
public static VectorSchemaRoot importVectorSchemaRoot(
BufferAllocator allocator, ArrowSchema schema, CDataDictionaryProvider provider) {
return importVectorSchemaRoot(allocator, null, schema, provider);
BufferAllocator allocator,
ArrowSchema schema,
CDataDictionaryProvider provider,
boolean closeImportedStructs) {
return importVectorSchemaRoot(allocator, null, schema, provider, closeImportedStructs);
}

/**
* Equivalent to calling {@link #importVectorSchemaRoot(BufferAllocator, ArrowArray, ArrowSchema,
* CDataDictionaryProvider, boolean) importVectorSchemaRoot(allocator, array, schema, provider,
* true)}.
*
* @param allocator Buffer allocator for allocating the output VectorSchemaRoot
* @param array C data interface struct holding the record batch data (optional)
* @param schema C data interface struct holding the record batch schema
* @param provider Dictionary provider to load dictionary vectors to (optional)
* @return Imported vector schema root
* @see #importVectorSchemaRoot(BufferAllocator, ArrowArray, ArrowSchema, CDataDictionaryProvider,
* boolean)
*/
public static VectorSchemaRoot importVectorSchemaRoot(
BufferAllocator allocator,
ArrowArray array,
ArrowSchema schema,
CDataDictionaryProvider provider) {
return importVectorSchemaRoot(allocator, array, schema, provider, true);
}

/**
Expand All @@ -383,29 +537,56 @@ public static VectorSchemaRoot importVectorSchemaRoot(
* @param array C data interface struct holding the record batch data (optional)
* @param schema C data interface struct holding the record batch schema
* @param provider Dictionary provider to load dictionary vectors to (optional)
* @param closeImportedStructs if true, the ArrowArray struct will be closed when this method
* completes successfully and the ArrowSchema struct will be always be closed.
* @return Imported vector schema root
*/
public static VectorSchemaRoot importVectorSchemaRoot(
BufferAllocator allocator,
ArrowArray array,
ArrowSchema schema,
CDataDictionaryProvider provider) {
CDataDictionaryProvider provider,
boolean closeImportedStructs) {
VectorSchemaRoot vsr =
VectorSchemaRoot.create(importSchema(allocator, schema, provider), allocator);
VectorSchemaRoot.create(
importSchema(allocator, schema, provider, closeImportedStructs), allocator);
if (array != null) {
importIntoVectorSchemaRoot(allocator, array, vsr, provider);
importIntoVectorSchemaRoot(allocator, array, vsr, provider, closeImportedStructs);
}
return vsr;
}

/**
* Import an ArrowArrayStream as an {@link ArrowReader}.
* Equivalent to calling {@link #importArrayStream(BufferAllocator, ArrowArrayStream, boolean)
* importArrayStream(allocator, stream, true)}.
*
* @param allocator Buffer allocator for allocating the output data.
* @param stream C stream interface struct to import.
* @return Imported reader
* @see #importArrayStream(BufferAllocator, ArrowArrayStream, boolean)
*/
public static ArrowReader importArrayStream(BufferAllocator allocator, ArrowArrayStream stream) {
return new ArrowArrayStreamReader(allocator, stream);
return importArrayStream(allocator, stream, true);
}

/**
* Import an ArrowArrayStream as an {@link ArrowReader}.
*
* <p>On successful completion, the ArrowArrayStream struct will have been moved (as per the C
* data interface specification) to a private object held alive by the resulting ArrowReader.
*
* @param allocator Buffer allocator for allocating the output data.
* @param stream C stream interface struct to import.
* @param closeImportedStructs if true, the ArrowArrayStream struct will be closed when this
* method completes successfully
* @return Imported reader
*/
public static ArrowReader importArrayStream(
BufferAllocator allocator, ArrowArrayStream stream, boolean closeImportedStructs) {
ArrowArrayStreamReader reader = new ArrowArrayStreamReader(allocator, stream);
if (closeImportedStructs) {
stream.close();
}
return reader;
}
}
Loading
Loading