Skip to content

Commit 44efeaa

Browse files
committed
Appender: fix buffer size for nested STRUCTs
This change fixes the buffer size calculation for `STRUCT`s nested inside `LIST`s and for `MAP`s. It also fixes the validity mask size calculation for these cases. Testing: additional tests added with longer `MAP`s (to check nested `STRUCT`s) and with `NULL` values (to check validity masks). Fix: #437
1 parent 3c6fb0a commit 44efeaa

File tree

6 files changed

+286
-96
lines changed

6 files changed

+286
-96
lines changed

src/jni/bindings_vector.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,16 +107,24 @@ JNIEXPORT jobject JNICALL Java_org_duckdb_DuckDBBindings_duckdb_1vector_1get_1da
107107
*/
108108
JNIEXPORT jobject JNICALL Java_org_duckdb_DuckDBBindings_duckdb_1vector_1get_1validity(JNIEnv *env, jclass,
109109
jobject vector,
110-
jlong array_size) {
110+
jlong vector_size_elems) {
111111

112112
duckdb_vector vec = vector_buf_to_vector(env, vector);
113113
if (env->ExceptionCheck()) {
114114
return nullptr;
115115
}
116116

117+
idx_t vector_size = jlong_to_idx(env, vector_size_elems);
118+
if (env->ExceptionCheck()) {
119+
return nullptr;
120+
}
121+
117122
uint64_t *mask = duckdb_vector_get_validity(vec);
118-
idx_t vec_len = duckdb_vector_size();
119-
idx_t mask_len = vec_len * sizeof(uint64_t) * array_size / 64;
123+
idx_t vector_size_rounded = vector_size;
124+
if (vector_size % 64 != 0) {
125+
vector_size_rounded += 64 - (vector_size % 64);
126+
}
127+
idx_t mask_len = vector_size_rounded * sizeof(uint64_t) / 64;
120128

121129
return make_data_buf(env, mask, mask_len);
122130
}

src/main/java/org/duckdb/DuckDBAppender.java

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -74,14 +74,14 @@ public class DuckDBAppender implements AutoCloseable {
7474

7575
private static final LocalDateTime EPOCH_DATE_TIME = LocalDateTime.ofEpochSecond(0, 0, UTC);
7676

77+
private static final long MAX_TOP_LEVEL_ROWS = duckdb_vector_size();
78+
7779
private final DuckDBConnection conn;
7880

7981
private final String catalog;
8082
private final String schema;
8183
private final String table;
8284

83-
private final long maxRows;
84-
8585
private ByteBuffer appenderRef;
8686
private final Lock appenderRefLock = new ReentrantLock();
8787

@@ -101,8 +101,6 @@ public class DuckDBAppender implements AutoCloseable {
101101
this.schema = schema;
102102
this.table = table;
103103

104-
this.maxRows = duckdb_vector_size();
105-
106104
ByteBuffer appenderRef = null;
107105
ByteBuffer[] colTypes = null;
108106
ByteBuffer chunkRef = null;
@@ -163,7 +161,7 @@ public DuckDBAppender endRow() throws SQLException {
163161
rowIdx++;
164162
Column prev = prevColumn;
165163
this.prevColumn = null;
166-
if (rowIdx >= maxRows) {
164+
if (rowIdx >= MAX_TOP_LEVEL_ROWS) {
167165
try {
168166
flush();
169167
} catch (SQLException e) {
@@ -2325,8 +2323,10 @@ private Column(Column parent, int idx, ByteBuffer colTypeRef, ByteBuffer vector,
23252323
this.arraySize = duckdb_array_type_array_size(parent.colTypeRef);
23262324
}
23272325

2326+
long maxElems = maxElementsCount();
23282327
if (colType.widthBytes > 0 || colType == DUCKDB_TYPE_DECIMAL) {
2329-
this.data = duckdb_vector_get_data(vectorRef, vectorSize());
2328+
long vectorSizeBytes = maxElems * widthBytes();
2329+
this.data = duckdb_vector_get_data(vectorRef, vectorSizeBytes);
23302330
if (null == this.data) {
23312331
throw new SQLException("cannot initialize data chunk vector data");
23322332
}
@@ -2335,7 +2335,7 @@ private Column(Column parent, int idx, ByteBuffer colTypeRef, ByteBuffer vector,
23352335
}
23362336

23372337
duckdb_vector_ensure_validity_writable(vectorRef);
2338-
this.validity = duckdb_vector_get_validity(vectorRef, arraySize * parentArraySize());
2338+
this.validity = duckdb_vector_get_validity(vectorRef, maxElems);
23392339
if (null == this.validity) {
23402340
throw new SQLException("cannot initialize data chunk vector validity");
23412341
}
@@ -2353,15 +2353,18 @@ void reset(long listSize) throws SQLException {
23532353
}
23542354

23552355
void reset() throws SQLException {
2356+
long maxElems = maxElementsCount();
2357+
23562358
if (null != this.data) {
2357-
this.data = duckdb_vector_get_data(vectorRef, vectorSize());
2359+
long vectorSizeBytes = maxElems * widthBytes();
2360+
this.data = duckdb_vector_get_data(vectorRef, vectorSizeBytes);
23582361
if (null == this.data) {
23592362
throw new SQLException("cannot reset data chunk vector data");
23602363
}
23612364
}
23622365

23632366
duckdb_vector_ensure_validity_writable(vectorRef);
2364-
this.validity = duckdb_vector_get_validity(vectorRef, arraySize * parentArraySize());
2367+
this.validity = duckdb_vector_get_validity(vectorRef, maxElems);
23652368
if (null == this.validity) {
23662369
throw new SQLException("cannot reset data chunk vector validity");
23672370
}
@@ -2432,12 +2435,17 @@ long parentArraySize() {
24322435
return parent.arraySize;
24332436
}
24342437

2435-
long vectorSize() {
2436-
if (null != parent && (parent.colType == DUCKDB_TYPE_LIST || parent.colType == DUCKDB_TYPE_MAP)) {
2437-
return listSize * widthBytes();
2438-
} else {
2439-
return duckdb_vector_size() * widthBytes() * arraySize * parentArraySize();
2438+
long maxElementsCount() {
2439+
Column ancestor = this;
2440+
while (null != ancestor) {
2441+
if (null != ancestor.parent &&
2442+
(ancestor.parent.colType == DUCKDB_TYPE_LIST || ancestor.parent.colType == DUCKDB_TYPE_MAP)) {
2443+
break;
2444+
}
2445+
ancestor = ancestor.parent;
24402446
}
2447+
long maxEntries = null != ancestor ? ancestor.listSize : DuckDBAppender.MAX_TOP_LEVEL_ROWS;
2448+
return maxEntries * arraySize * parentArraySize();
24412449
}
24422450
}
24432451
}

src/main/java/org/duckdb/DuckDBBindings.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ public class DuckDBBindings {
5353

5454
static native ByteBuffer duckdb_vector_get_data(ByteBuffer vector, long size_bytes);
5555

56-
static native ByteBuffer duckdb_vector_get_validity(ByteBuffer vector, long array_size);
56+
static native ByteBuffer duckdb_vector_get_validity(ByteBuffer vector, long vector_size_elems);
5757

5858
static native void duckdb_vector_ensure_validity_writable(ByteBuffer vector);
5959

src/test/java/org/duckdb/TestAppenderCollection.java

Lines changed: 35 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1410,93 +1410,55 @@ public static void test_appender_list_basic_nested_list() throws Exception {
14101410
}
14111411
}
14121412

1413-
private static void assertMapsEqual(Object obj1, Map<?, ?> map2) throws Exception {
1414-
Map<?, ?> map1 = (Map<?, ?>) obj1;
1415-
assertEquals(map1.size(), map2.size());
1416-
List<Map.Entry<?, ?>> list2 = new ArrayList<>(map2.entrySet());
1417-
int i = 0;
1418-
for (Map.Entry<?, ?> en : map1.entrySet()) {
1419-
assertEquals(en.getKey(), list2.get(i).getKey());
1420-
assertEquals(en.getValue(), list2.get(i).getValue());
1421-
i += 1;
1422-
}
1423-
}
1413+
public static void test_appender_list_bigint() throws Exception {
1414+
int count = 1 << 12; // auto flush twice
1415+
int tail = 7; // flushed on close
1416+
int listLen = (1 << 6) + 7; // increase this for stress tests
14241417

1425-
public static void test_appender_map_basic() throws Exception {
1426-
Map<Integer, String> map1 = createMap(41, "foo", 42, "bar");
1427-
Map<Integer, String> map2 = createMap(41, "foo", 42, null, 43, "baz");
14281418
try (DuckDBConnection conn = DriverManager.getConnection(JDBC_URL).unwrap(DuckDBConnection.class);
14291419
Statement stmt = conn.createStatement()) {
1430-
stmt.execute("CREATE TABLE tab1(col1 INTEGER, col2 MAP(INTEGER, VARCHAR))");
1420+
stmt.execute("CREATE TABLE tab1(col1 INTEGER, col2 BIGINT[])");
14311421

14321422
try (DuckDBAppender appender = conn.createAppender("tab1")) {
1433-
appender.beginRow()
1434-
.append(41)
1435-
.append(map1)
1436-
.endRow()
1437-
.beginRow()
1438-
.append(42)
1439-
.append(map2)
1440-
.endRow()
1441-
.flush();
1423+
for (int i = 0; i < count + tail; i++) {
1424+
List<Long> list = new ArrayList<>();
1425+
for (long j = 0; j < Math.min(i, listLen); j++) {
1426+
if (0 == (i + j) % 13) {
1427+
list.add(null);
1428+
} else {
1429+
list.add(i + j);
1430+
}
1431+
}
1432+
appender.beginRow().append(i).append(list).endRow();
1433+
}
14421434
}
14431435

1444-
try (ResultSet rs = stmt.executeQuery("SELECT col2 FROM tab1 ORDER BY col1")) {
1445-
assertTrue(rs.next());
1446-
assertMapsEqual(rs.getObject(1), map1);
1436+
try (ResultSet rs = stmt.executeQuery("SELECT count(*) FROM tab1")) {
14471437
assertTrue(rs.next());
1448-
assertMapsEqual(rs.getObject(1), map2);
1438+
assertEquals(rs.getInt(1), count + tail);
14491439
assertFalse(rs.next());
14501440
}
1451-
}
1452-
}
1453-
1454-
public static void test_appender_list_basic_map() throws Exception {
1455-
Map<Integer, String> map1 = createMap(41, "foo1", 42, "bar1", 43, "baz1");
1456-
Map<Integer, String> map2 = createMap(44, null, 45, "bar2");
1457-
Map<Integer, String> map3 = new LinkedHashMap<>();
1458-
Map<Integer, String> map4 = createMap(46, "foo3");
1459-
try (DuckDBConnection conn = DriverManager.getConnection(JDBC_URL).unwrap(DuckDBConnection.class);
1460-
Statement stmt = conn.createStatement()) {
1461-
stmt.execute("CREATE TABLE tab1(col1 INT, col2 MAP(INTEGER, VARCHAR)[])");
1462-
try (DuckDBAppender appender = conn.createAppender("tab1")) {
1463-
appender.beginRow()
1464-
.append(42)
1465-
.append(asList(map1, map2, map3))
1466-
.endRow()
1467-
.beginRow()
1468-
.append(43)
1469-
.append((List<Object>) null)
1470-
.endRow()
1471-
.beginRow()
1472-
.append(44)
1473-
.append(asList(null, map4))
1474-
.endRow()
1475-
.flush();
1476-
}
14771441

1478-
try (ResultSet rs = stmt.executeQuery("SELECT unnest(col2) from tab1 WHERE col1 = 42")) {
1479-
assertTrue(rs.next());
1480-
assertMapsEqual(rs.getObject(1), map1);
1442+
try (ResultSet rs = stmt.executeQuery(
1443+
"SELECT count(*) FROM (SELECT unnest(col2) FROM tab1 WHERE col1 = " + (listLen - 7) + ")")) {
14811444
assertTrue(rs.next());
1482-
assertMapsEqual(rs.getObject(1), map2);
1483-
assertTrue(rs.next());
1484-
assertMapsEqual(rs.getObject(1), map3);
1485-
assertFalse(rs.next());
1486-
}
1487-
try (ResultSet rs = stmt.executeQuery("SELECT col2 from tab1 WHERE col1 = 43")) {
1488-
assertTrue(rs.next());
1489-
assertNull(rs.getObject(1));
1490-
assertTrue(rs.wasNull());
1445+
assertEquals(rs.getInt(1), listLen - 7);
14911446
assertFalse(rs.next());
14921447
}
1493-
try (ResultSet rs = stmt.executeQuery("SELECT unnest(col2) from tab1 WHERE col1 = 44")) {
1494-
assertTrue(rs.next());
1495-
assertNull(rs.getObject(1));
1496-
assertTrue(rs.wasNull());
1497-
assertTrue(rs.next());
1498-
assertMapsEqual(rs.getObject(1), map4);
1499-
assertFalse(rs.next());
1448+
1449+
try (ResultSet rs = stmt.executeQuery("SELECT col1, unnest(col2) FROM tab1 ORDER BY col1")) {
1450+
for (int i = 0; i < count + tail; i++) {
1451+
for (long j = 0; j < Math.min(i, listLen); j++) {
1452+
assertTrue(rs.next());
1453+
assertEquals(rs.getInt(1), i);
1454+
if (0 == (i + j) % 13) {
1455+
assertNull(rs.getObject(2));
1456+
assertTrue(rs.wasNull());
1457+
} else {
1458+
assertEquals(rs.getLong(2), i + j);
1459+
}
1460+
}
1461+
}
15001462
}
15011463
}
15021464
}

0 commit comments

Comments
 (0)