Skip to content

Commit 18b3aff

Browse files
committed
apacheGH-765: Add Data#import... overloads that do not close/free imported BaseStruct objects
1 parent 7c25ce5 commit 18b3aff

File tree

3 files changed

+200
-21
lines changed

3 files changed

+200
-21
lines changed

c/src/main/java/org/apache/arrow/c/ArrayImporter.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ void importArray(ArrowArray src) {
5858
ArrowArray ownedArray = ArrowArray.allocateNew(allocator);
5959
ownedArray.save(snapshot);
6060
src.markReleased();
61-
src.close();
6261

6362
recursionLevel = 0;
6463

c/src/main/java/org/apache/arrow/c/ArrowArrayStreamReader.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ final class ArrowArrayStreamReader extends ArrowReader {
4444
this.ownedStream = ArrowArrayStream.allocateNew(allocator);
4545
this.ownedStream.save(snapshot);
4646
stream.markReleased();
47-
stream.close();
4847
}
4948

5049
@Override

c/src/main/java/org/apache/arrow/c/Data.java

Lines changed: 200 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,22 @@ public static void exportArrayStream(
231231
new ArrayStreamExporter(allocator).export(out, reader);
232232
}
233233

234+
/**
235+
* Equivalent to calling {@link #importField(BufferAllocator, ArrowSchema,
236+
* CDataDictionaryProvider, boolean) importField(allocator, schema, provider, true)}.
237+
*
238+
* @param allocator Buffer allocator for allocating dictionary vectors
239+
* @param schema C data interface struct representing the field [inout]
240+
* @param provider A dictionary provider will be initialized with empty dictionary vectors
241+
* (optional)
242+
* @return Imported field object
243+
* @see #importField(BufferAllocator, ArrowSchema, CDataDictionaryProvider, boolean)
244+
*/
245+
public static Field importField(
246+
BufferAllocator allocator, ArrowSchema schema, CDataDictionaryProvider provider) {
247+
return importField(allocator, schema, provider, true);
248+
}
249+
234250
/**
235251
* Import Java Field from the C data interface.
236252
*
@@ -241,19 +257,42 @@ public static void exportArrayStream(
241257
* @param schema C data interface struct representing the field [inout]
242258
* @param provider A dictionary provider will be initialized with empty dictionary vectors
243259
* (optional)
260+
* @param closeImportedStructs if true, the ArrowSchema struct will be closed when this method
261+
* completes.
244262
* @return Imported field object
245263
*/
246264
public static Field importField(
247-
BufferAllocator allocator, ArrowSchema schema, CDataDictionaryProvider provider) {
265+
BufferAllocator allocator,
266+
ArrowSchema schema,
267+
CDataDictionaryProvider provider,
268+
boolean closeImportedStructs) {
248269
try {
249270
SchemaImporter importer = new SchemaImporter(allocator);
250271
return importer.importField(schema, provider);
251272
} finally {
252273
schema.release();
253-
schema.close();
274+
if (closeImportedStructs) {
275+
schema.close();
276+
}
254277
}
255278
}
256279

280+
/**
281+
* Equivalent to calling {@link #importSchema(BufferAllocator, ArrowSchema,
282+
* CDataDictionaryProvider, boolean) importSchema(allocator, schema, provider, true)}.
283+
*
284+
* @param allocator Buffer allocator for allocating dictionary vectors
285+
* @param schema C data interface struct representing the field
286+
* @param provider A dictionary provider will be initialized with empty dictionary vectors
287+
* (optional)
288+
* @return Imported schema object
289+
* @see #importSchema(BufferAllocator, ArrowSchema, CDataDictionaryProvider, boolean)
290+
*/
291+
public static Schema importSchema(
292+
BufferAllocator allocator, ArrowSchema schema, CDataDictionaryProvider provider) {
293+
return importSchema(allocator, schema, provider, true);
294+
}
295+
257296
/**
258297
* Import Java Schema from the C data interface.
259298
*
@@ -264,36 +303,84 @@ public static Field importField(
264303
* @param schema C data interface struct representing the field
265304
* @param provider A dictionary provider will be initialized with empty dictionary vectors
266305
* (optional)
306+
* @param closeImportedStructs if true, the ArrowSchema struct will be closed when this method
307+
* completes.
267308
* @return Imported schema object
268309
*/
269310
public static Schema importSchema(
270-
BufferAllocator allocator, ArrowSchema schema, CDataDictionaryProvider provider) {
271-
Field structField = importField(allocator, schema, provider);
311+
BufferAllocator allocator,
312+
ArrowSchema schema,
313+
CDataDictionaryProvider provider,
314+
boolean closeImportedStructs) {
315+
Field structField = importField(allocator, schema, provider, closeImportedStructs);
272316
if (structField.getType().getTypeID() != ArrowTypeID.Struct) {
273317
throw new IllegalArgumentException(
274318
"Cannot import schema: ArrowSchema describes non-struct type");
275319
}
276320
return new Schema(structField.getChildren(), structField.getMetadata());
277321
}
278322

323+
/**
324+
* Equivalent to calling {@link #importIntoVector(BufferAllocator, ArrowArray, FieldVector,
325+
* DictionaryProvider, boolean)} importIntoVector(allocator, array, vector, provider, true)}.
326+
*
327+
* @param allocator Buffer allocator
328+
* @param array C data interface struct holding the array data
329+
* @param vector Imported vector object [out]
330+
* @param provider Dictionary provider to load dictionary vectors to (optional)
331+
* @see #importIntoVector(BufferAllocator, ArrowArray, FieldVector, DictionaryProvider, boolean)
332+
*/
333+
public static void importIntoVector(
334+
BufferAllocator allocator,
335+
ArrowArray array,
336+
FieldVector vector,
337+
DictionaryProvider provider) {
338+
importIntoVector(allocator, array, vector, provider, true);
339+
}
340+
279341
/**
280342
* Import Java vector from the C data interface.
281343
*
282-
* <p>The ArrowArray struct has its contents moved (as per the C data interface specification) to
283-
* a private object held alive by the resulting array.
344+
* <p>On successful completion, the ArrowArray struct will have been moved (as per the C data
345+
* interface specification) to a private object held alive by the resulting array.
284346
*
285347
* @param allocator Buffer allocator
286348
* @param array C data interface struct holding the array data
287349
* @param vector Imported vector object [out]
288350
* @param provider Dictionary provider to load dictionary vectors to (optional)
351+
* @param closeImportedStructs if true, the ArrowArray struct will be closed when this method
352+
* completes successfully.
289353
*/
290354
public static void importIntoVector(
291355
BufferAllocator allocator,
292356
ArrowArray array,
293357
FieldVector vector,
294-
DictionaryProvider provider) {
358+
DictionaryProvider provider,
359+
boolean closeImportedStructs) {
295360
ArrayImporter importer = new ArrayImporter(allocator, vector, provider);
296361
importer.importArray(array);
362+
if (closeImportedStructs) {
363+
array.close();
364+
}
365+
}
366+
367+
/**
368+
* Equivalent to calling {@link #importVector(BufferAllocator, ArrowArray, ArrowSchema,
369+
* CDataDictionaryProvider, boolean) importVector(allocator, array, schema, provider, true)}.
370+
*
371+
* @param allocator Buffer allocator for allocating the output FieldVector
372+
* @param array C data interface struct holding the array data
373+
* @param schema C data interface struct holding the array type
374+
* @param provider Dictionary provider to load dictionary vectors to (optional)
375+
* @return Imported vector object
376+
* @see #importVector(BufferAllocator, ArrowArray, ArrowSchema, CDataDictionaryProvider, boolean)
377+
*/
378+
public static FieldVector importVector(
379+
BufferAllocator allocator,
380+
ArrowArray array,
381+
ArrowSchema schema,
382+
CDataDictionaryProvider provider) {
383+
return importVector(allocator, array, schema, provider, true);
297384
}
298385

299386
/**
@@ -307,19 +394,42 @@ public static void importIntoVector(
307394
* @param array C data interface struct holding the array data
308395
* @param schema C data interface struct holding the array type
309396
* @param provider Dictionary provider to load dictionary vectors to (optional)
397+
* @param closeImportedStructs if true, the ArrowArray struct will be closed when this method
398+
* completes successfully and the ArrowSchema struct will be always be closed.
310399
* @return Imported vector object
311400
*/
312401
public static FieldVector importVector(
313402
BufferAllocator allocator,
314403
ArrowArray array,
315404
ArrowSchema schema,
316-
CDataDictionaryProvider provider) {
317-
Field field = importField(allocator, schema, provider);
405+
CDataDictionaryProvider provider,
406+
boolean closeImportedStructs) {
407+
Field field = importField(allocator, schema, provider, closeImportedStructs);
318408
FieldVector vector = field.createVector(allocator);
319-
importIntoVector(allocator, array, vector, provider);
409+
importIntoVector(allocator, array, vector, provider, closeImportedStructs);
320410
return vector;
321411
}
322412

413+
/**
414+
* Equivalent to calling {@link #importIntoVectorSchemaRoot(BufferAllocator, ArrowArray,
415+
* VectorSchemaRoot, DictionaryProvider, boolean) importIntoVectorSchemaRoot(allocator, array,
416+
* root, provider, true)}.
417+
*
418+
* @param allocator Buffer allocator
419+
* @param array C data interface struct holding the record batch data
420+
* @param root vector schema root to load into
421+
* @param provider Dictionary provider to load dictionary vectors to (optional)
422+
* @see #importIntoVectorSchemaRoot(BufferAllocator, ArrowArray, VectorSchemaRoot,
423+
* DictionaryProvider, boolean)
424+
*/
425+
public static void importIntoVectorSchemaRoot(
426+
BufferAllocator allocator,
427+
ArrowArray array,
428+
VectorSchemaRoot root,
429+
DictionaryProvider provider) {
430+
importIntoVectorSchemaRoot(allocator, array, root, provider, true);
431+
}
432+
323433
/**
324434
* Import record batch from the C data interface into vector schema root.
325435
*
@@ -333,15 +443,18 @@ public static FieldVector importVector(
333443
* @param array C data interface struct holding the record batch data
334444
* @param root vector schema root to load into
335445
* @param provider Dictionary provider to load dictionary vectors to (optional)
446+
* @param closeImportedStructs if true, the ArrowArray struct will be closed when this method
447+
* completes successfully
336448
*/
337449
public static void importIntoVectorSchemaRoot(
338450
BufferAllocator allocator,
339451
ArrowArray array,
340452
VectorSchemaRoot root,
341-
DictionaryProvider provider) {
453+
DictionaryProvider provider,
454+
boolean closeImportedStructs) {
342455
try (StructVector structVector = StructVector.emptyWithDuplicates("", allocator)) {
343456
structVector.initializeChildrenFromFields(root.getSchema().getFields());
344-
importIntoVector(allocator, array, structVector, provider);
457+
importIntoVector(allocator, array, structVector, provider, closeImportedStructs);
345458
StructVectorUnloader unloader = new StructVectorUnloader(structVector);
346459
VectorLoader loader = new VectorLoader(root);
347460
try (ArrowRecordBatch recordBatch = unloader.getRecordBatch()) {
@@ -350,6 +463,21 @@ public static void importIntoVectorSchemaRoot(
350463
}
351464
}
352465

466+
/**
467+
* Equivalent to calling {@link #importVectorSchemaRoot(BufferAllocator, ArrowSchema,
468+
* CDataDictionaryProvider, boolean) importVectorSchemaRoot(allocator, schema, provider, true)}.
469+
*
470+
* @param allocator Buffer allocator for allocating the output VectorSchemaRoot
471+
* @param schema C data interface struct holding the record batch schema
472+
* @param provider Dictionary provider to load dictionary vectors to (optional)
473+
* @return Imported vector schema root
474+
* @see #importVectorSchemaRoot(BufferAllocator, ArrowSchema, CDataDictionaryProvider, boolean)
475+
*/
476+
public static VectorSchemaRoot importVectorSchemaRoot(
477+
BufferAllocator allocator, ArrowSchema schema, CDataDictionaryProvider provider) {
478+
return importVectorSchemaRoot(allocator, schema, provider, true);
479+
}
480+
353481
/**
354482
* Import Java vector schema root from a C data interface Schema.
355483
*
@@ -360,11 +488,37 @@ public static void importIntoVectorSchemaRoot(
360488
* @param allocator Buffer allocator for allocating the output VectorSchemaRoot
361489
* @param schema C data interface struct holding the record batch schema
362490
* @param provider Dictionary provider to load dictionary vectors to (optional)
491+
* @param closeImportedStructs if true, the ArrowSchema struct will be closed when this method
492+
* completes
363493
* @return Imported vector schema root
364494
*/
365495
public static VectorSchemaRoot importVectorSchemaRoot(
366-
BufferAllocator allocator, ArrowSchema schema, CDataDictionaryProvider provider) {
367-
return importVectorSchemaRoot(allocator, null, schema, provider);
496+
BufferAllocator allocator,
497+
ArrowSchema schema,
498+
CDataDictionaryProvider provider,
499+
boolean closeImportedStructs) {
500+
return importVectorSchemaRoot(allocator, null, schema, provider, closeImportedStructs);
501+
}
502+
503+
/**
504+
* Equivalent to calling {@link #importVectorSchemaRoot(BufferAllocator, ArrowArray, ArrowSchema,
505+
* CDataDictionaryProvider, boolean) importVectorSchemaRoot(allocator, array, schema, provider,
506+
* true)}.
507+
*
508+
* @param allocator Buffer allocator for allocating the output VectorSchemaRoot
509+
* @param array C data interface struct holding the record batch data (optional)
510+
* @param schema C data interface struct holding the record batch schema
511+
* @param provider Dictionary provider to load dictionary vectors to (optional)
512+
* @return Imported vector schema root
513+
* @see #importVectorSchemaRoot(BufferAllocator, ArrowArray, ArrowSchema, CDataDictionaryProvider,
514+
* boolean)
515+
*/
516+
public static VectorSchemaRoot importVectorSchemaRoot(
517+
BufferAllocator allocator,
518+
ArrowArray array,
519+
ArrowSchema schema,
520+
CDataDictionaryProvider provider) {
521+
return importVectorSchemaRoot(allocator, array, schema, provider, true);
368522
}
369523

370524
/**
@@ -383,29 +537,56 @@ public static VectorSchemaRoot importVectorSchemaRoot(
383537
* @param array C data interface struct holding the record batch data (optional)
384538
* @param schema C data interface struct holding the record batch schema
385539
* @param provider Dictionary provider to load dictionary vectors to (optional)
540+
* @param closeImportedStructs if true, the ArrowArray struct will be closed when this method
541+
* completes successfully and the ArrowSchema struct will be always be closed.
386542
* @return Imported vector schema root
387543
*/
388544
public static VectorSchemaRoot importVectorSchemaRoot(
389545
BufferAllocator allocator,
390546
ArrowArray array,
391547
ArrowSchema schema,
392-
CDataDictionaryProvider provider) {
548+
CDataDictionaryProvider provider,
549+
boolean closeImportedStructs) {
393550
VectorSchemaRoot vsr =
394-
VectorSchemaRoot.create(importSchema(allocator, schema, provider), allocator);
551+
VectorSchemaRoot.create(
552+
importSchema(allocator, schema, provider, closeImportedStructs), allocator);
395553
if (array != null) {
396-
importIntoVectorSchemaRoot(allocator, array, vsr, provider);
554+
importIntoVectorSchemaRoot(allocator, array, vsr, provider, closeImportedStructs);
397555
}
398556
return vsr;
399557
}
400558

401559
/**
402-
* Import an ArrowArrayStream as an {@link ArrowReader}.
560+
* Equivalent to calling {@link #importArrayStream(BufferAllocator, ArrowArrayStream, boolean)
561+
* importArrayStream(allocator, stream, true)}.
403562
*
404563
* @param allocator Buffer allocator for allocating the output data.
405564
* @param stream C stream interface struct to import.
406565
* @return Imported reader
566+
* @see #importArrayStream(BufferAllocator, ArrowArrayStream, boolean)
407567
*/
408568
public static ArrowReader importArrayStream(BufferAllocator allocator, ArrowArrayStream stream) {
409-
return new ArrowArrayStreamReader(allocator, stream);
569+
return importArrayStream(allocator, stream, true);
570+
}
571+
572+
/**
573+
* Import an ArrowArrayStream as an {@link ArrowReader}.
574+
*
575+
* <p>On successful completion, the ArrowArrayStream struct will have been moved (as per the C
576+
* data interface specification) to a private object held alive by the resulting ArrowReader.
577+
*
578+
* @param allocator Buffer allocator for allocating the output data.
579+
* @param stream C stream interface struct to import.
580+
* @param closeImportedStructs if true, the ArrowArrayStream struct will be closed when this
581+
* method completes successfully
582+
* @return Imported reader
583+
*/
584+
public static ArrowReader importArrayStream(
585+
BufferAllocator allocator, ArrowArrayStream stream, boolean closeImportedStructs) {
586+
ArrowArrayStreamReader reader = new ArrowArrayStreamReader(allocator, stream);
587+
if (closeImportedStructs) {
588+
stream.close();
589+
}
590+
return reader;
410591
}
411592
}

0 commit comments

Comments
 (0)