Skip to content

GH-774: Consoliate BitVectorHelper.getValidityBufferSize and BaseValueVector.getValidityBufferSizeFromCount #775

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.apache.arrow.vector;

import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
import static org.apache.arrow.vector.BitVectorHelper.getValidityBufferSizeFromCount;

import java.nio.ByteBuffer;
import java.util.ArrayList;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.apache.arrow.vector;

import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
import static org.apache.arrow.vector.BitVectorHelper.getValidityBufferSizeFromCount;

import java.nio.ByteBuffer;
import java.util.ArrayList;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,6 @@ protected ArrowBuf releaseBuffer(ArrowBuf buffer) {
return buffer;
}

/* number of bytes for the validity buffer for the given valueCount */
protected static int getValidityBufferSizeFromCount(final int valueCount) {
return DataSizeRoundingUtil.divideBy8Ceil(valueCount);
}

/* round up bytes for the validity buffer for the given valueCount */
private static long roundUp8ForValidityBuffer(long valueCount) {
return ((valueCount + 63) >> 6) << 3;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.apache.arrow.vector;

import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
import static org.apache.arrow.vector.BitVectorHelper.getValidityBufferSizeFromCount;

import java.nio.ByteBuffer;
import java.util.ArrayList;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.apache.arrow.vector;

import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
import static org.apache.arrow.vector.BitVectorHelper.getValidityBufferSizeFromCount;
import static org.apache.arrow.vector.util.DataSizeRoundingUtil.roundUpToMultipleOf16;

import java.nio.ByteBuffer;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.apache.arrow.vector;

import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
import static org.apache.arrow.vector.BitVectorHelper.getValidityBufferSizeFromCount;
import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;

import org.apache.arrow.memory.ArrowBuf;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,11 +135,11 @@ public static void setValidityBit(ArrowBuf validityBuffer, int index, int value)
public static ArrowBuf setValidityBit(
ArrowBuf validityBuffer, BufferAllocator allocator, int valueCount, int index, int value) {
if (validityBuffer == null) {
validityBuffer = allocator.buffer(getValidityBufferSize(valueCount));
validityBuffer = allocator.buffer(getValidityBufferSizeFromCount(valueCount));
}
setValidityBit(validityBuffer, index, value);
if (index == (valueCount - 1)) {
validityBuffer.writerIndex(getValidityBufferSize(valueCount));
validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount));
}

return validityBuffer;
Expand All @@ -165,7 +165,7 @@ public static int get(final ArrowBuf buffer, int index) {
* @param valueCount number of elements in the vector
* @return buffer size
*/
public static int getValidityBufferSize(int valueCount) {
public static int getValidityBufferSizeFromCount(int valueCount) {
return DataSizeRoundingUtil.divideBy8Ceil(valueCount);
}

Expand All @@ -182,7 +182,7 @@ public static int getNullCount(final ArrowBuf validityBuffer, final int valueCou
return 0;
}
int count = 0;
final int sizeInBytes = getValidityBufferSize(valueCount);
final int sizeInBytes = getValidityBufferSizeFromCount(valueCount);
// If value count is not a multiple of 8, then calculate number of used bits in the last byte
final int remainder = valueCount % 8;
final int fullBytesCount = remainder == 0 ? sizeInBytes : sizeInBytes - 1;
Expand Down Expand Up @@ -233,7 +233,7 @@ public static boolean checkAllBitsEqualTo(
if (valueCount == 0) {
return true;
}
final int sizeInBytes = getValidityBufferSize(valueCount);
final int sizeInBytes = getValidityBufferSizeFromCount(valueCount);

// boundary check
validityBuffer.checkBytes(0, sizeInBytes);
Expand Down Expand Up @@ -325,7 +325,7 @@ public static ArrowBuf loadValidityBuffer(
sourceValidityBuffer == null || sourceValidityBuffer.capacity() == 0;
if (isValidityBufferNull
&& (fieldNode.getNullCount() == 0 || fieldNode.getNullCount() == valueCount)) {
newBuffer = allocator.buffer(getValidityBufferSize(valueCount));
newBuffer = allocator.buffer(getValidityBufferSizeFromCount(valueCount));
newBuffer.setZero(0, newBuffer.capacity());
if (fieldNode.getNullCount() != 0) {
/* all NULLs */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
import static org.apache.arrow.util.Preconditions.checkArgument;
import static org.apache.arrow.vector.BitVectorHelper.getValidityBufferSizeFromCount;
import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.DATA_VECTOR_NAME;

import java.util.ArrayList;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
import static org.apache.arrow.util.Preconditions.checkArgument;
import static org.apache.arrow.vector.BitVectorHelper.getValidityBufferSizeFromCount;

import java.util.ArrayList;
import java.util.Arrays;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
import static org.apache.arrow.util.Preconditions.checkArgument;
import static org.apache.arrow.vector.BitVectorHelper.getValidityBufferSizeFromCount;

import java.util.ArrayList;
import java.util.Arrays;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
import static org.apache.arrow.util.Preconditions.checkArgument;
import static org.apache.arrow.vector.BitVectorHelper.getValidityBufferSizeFromCount;

import java.util.ArrayList;
import java.util.Arrays;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt;
import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
import static org.apache.arrow.util.Preconditions.checkArgument;
import static org.apache.arrow.vector.BitVectorHelper.getValidityBufferSizeFromCount;

import java.util.ArrayList;
import java.util.Arrays;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.apache.arrow.vector.complex;

import static org.apache.arrow.util.Preconditions.checkArgument;
import static org.apache.arrow.vector.BitVectorHelper.getValidityBufferSizeFromCount;

import java.util.List;
import org.apache.arrow.memory.BufferAllocator;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt;
import static org.apache.arrow.util.Preconditions.checkNotNull;
import static org.apache.arrow.vector.BitVectorHelper.getValidityBufferSizeFromCount;

import java.util.ArrayList;
import java.util.Arrays;
Expand Down Expand Up @@ -89,7 +90,7 @@ public StructVector(
super(name, checkNotNull(allocator), fieldType, callBack);
this.validityBuffer = allocator.getEmpty();
this.validityAllocationSizeInBytes =
BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION);
getValidityBufferSizeFromCount(BaseValueVector.INITIAL_VALUE_ALLOCATION);
}

/**
Expand Down Expand Up @@ -118,7 +119,7 @@ public StructVector(
allowConflictPolicyChanges);
this.validityBuffer = allocator.getEmpty();
this.validityAllocationSizeInBytes =
BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION);
getValidityBufferSizeFromCount(BaseValueVector.INITIAL_VALUE_ALLOCATION);
}

/**
Expand All @@ -132,7 +133,7 @@ public StructVector(Field field, BufferAllocator allocator, CallBack callBack) {
super(field, checkNotNull(allocator), callBack);
this.validityBuffer = allocator.getEmpty();
this.validityAllocationSizeInBytes =
BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION);
getValidityBufferSizeFromCount(BaseValueVector.INITIAL_VALUE_ALLOCATION);
}

/**
Expand All @@ -153,7 +154,7 @@ public StructVector(
super(field, checkNotNull(allocator), callBack, conflictPolicy, allowConflictPolicyChanges);
this.validityBuffer = allocator.getEmpty();
this.validityAllocationSizeInBytes =
BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION);
getValidityBufferSizeFromCount(BaseValueVector.INITIAL_VALUE_ALLOCATION);
}

@Override
Expand Down Expand Up @@ -182,7 +183,7 @@ public List<ArrowBuf> getFieldBuffers() {

private void setReaderAndWriterIndex() {
validityBuffer.readerIndex(0);
validityBuffer.writerIndex(BitVectorHelper.getValidityBufferSize(valueCount));
validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount));
}

/**
Expand Down Expand Up @@ -318,7 +319,7 @@ public void splitAndTransfer(int startIndex, int length) {
private void splitAndTransferValidityBuffer(int startIndex, int length, StructVector target) {
int firstByteSource = BitVectorHelper.byteIndex(startIndex);
int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1);
int byteSizeTarget = BitVectorHelper.getValidityBufferSize(length);
int byteSizeTarget = getValidityBufferSizeFromCount(length);
int offset = startIndex % 8;

if (length > 0) {
Expand Down Expand Up @@ -464,7 +465,7 @@ public int getBufferSize() {
if (valueCount == 0) {
return 0;
}
return super.getBufferSize() + BitVectorHelper.getValidityBufferSize(valueCount);
return super.getBufferSize() + getValidityBufferSizeFromCount(valueCount);
}

/**
Expand All @@ -478,18 +479,18 @@ public int getBufferSizeFor(final int valueCount) {
if (valueCount == 0) {
return 0;
}
return super.getBufferSizeFor(valueCount) + BitVectorHelper.getValidityBufferSize(valueCount);
return super.getBufferSizeFor(valueCount) + getValidityBufferSizeFromCount(valueCount);
}

@Override
public void setInitialCapacity(int numRecords) {
validityAllocationSizeInBytes = BitVectorHelper.getValidityBufferSize(numRecords);
validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords);
super.setInitialCapacity(numRecords);
}

@Override
public void setInitialCapacity(int numRecords, double density) {
validityAllocationSizeInBytes = BitVectorHelper.getValidityBufferSize(numRecords);
validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords);
super.setInitialCapacity(numRecords, density);
}

Expand Down Expand Up @@ -547,7 +548,7 @@ private long getNewAllocationSize(int currentBufferCapacity) {
newAllocationSize = validityAllocationSizeInBytes;
} else {
newAllocationSize =
BitVectorHelper.getValidityBufferSize(BaseValueVector.INITIAL_VALUE_ALLOCATION) * 2L;
getValidityBufferSizeFromCount(BaseValueVector.INITIAL_VALUE_ALLOCATION) * 2L;
}
}
newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import static com.fasterxml.jackson.core.JsonToken.END_OBJECT;
import static com.fasterxml.jackson.core.JsonToken.START_ARRAY;
import static com.fasterxml.jackson.core.JsonToken.START_OBJECT;
import static org.apache.arrow.vector.BitVectorHelper.getValidityBufferSizeFromCount;
import static org.apache.arrow.vector.BufferLayout.BufferType.DATA;
import static org.apache.arrow.vector.BufferLayout.BufferType.OFFSET;
import static org.apache.arrow.vector.BufferLayout.BufferType.SIZE;
Expand Down Expand Up @@ -381,7 +382,7 @@ private class BufferHelper {
new BufferReader() {
@Override
protected ArrowBuf read(BufferAllocator allocator, int count) throws IOException {
final int bufferSize = BitVectorHelper.getValidityBufferSize(count);
final int bufferSize = getValidityBufferSizeFromCount(count);
ArrowBuf buf = allocator.buffer(bufferSize);

// C++ integration test fails without this.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.arrow.vector;

import static org.apache.arrow.vector.BitVectorHelper.getValidityBufferSizeFromCount;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNull;
Expand Down Expand Up @@ -943,7 +944,7 @@ public void testGetBufferSizeFor() {
int[] indices = new int[] {0, 2, 4, 6, 10, 14};

for (int valueCount = 1; valueCount <= 5; valueCount++) {
int validityBufferSize = BitVectorHelper.getValidityBufferSize(valueCount);
int validityBufferSize = getValidityBufferSizeFromCount(valueCount);
int offsetBufferSize = (valueCount + 1) * LargeListVector.OFFSET_WIDTH;

int expectedSize =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.arrow.vector;

import static org.apache.arrow.vector.BitVectorHelper.getValidityBufferSizeFromCount;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertSame;
Expand Down Expand Up @@ -1062,7 +1063,7 @@ public void testGetBufferSizeFor() {
int[] indices = new int[] {0, 2, 4, 6, 10, 14};

for (int valueCount = 1; valueCount <= 5; valueCount++) {
int validityBufferSize = BitVectorHelper.getValidityBufferSize(valueCount);
int validityBufferSize = getValidityBufferSizeFromCount(valueCount);
int offsetBufferSize = valueCount * BaseLargeRepeatedValueViewVector.OFFSET_WIDTH;
int sizeBufferSize = valueCount * BaseLargeRepeatedValueViewVector.SIZE_WIDTH;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.arrow.vector;

import static org.apache.arrow.vector.BitVectorHelper.getValidityBufferSizeFromCount;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNull;
Expand Down Expand Up @@ -1123,7 +1124,7 @@ public void testGetBufferSizeFor() {
int[] indices = new int[] {0, 2, 4, 6, 10, 14};

for (int valueCount = 1; valueCount <= 5; valueCount++) {
int validityBufferSize = BitVectorHelper.getValidityBufferSize(valueCount);
int validityBufferSize = getValidityBufferSizeFromCount(valueCount);
int offsetBufferSize = (valueCount + 1) * BaseRepeatedValueVector.OFFSET_WIDTH;

int expectedSize =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.arrow.vector;

import static org.apache.arrow.vector.BitVectorHelper.getValidityBufferSizeFromCount;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertThrows;
Expand Down Expand Up @@ -1075,7 +1076,7 @@ public void testGetBufferSizeFor() {
int[] indices = new int[] {0, 2, 4, 6, 10, 14};

for (int valueCount = 1; valueCount <= 5; valueCount++) {
int validityBufferSize = BitVectorHelper.getValidityBufferSize(valueCount);
int validityBufferSize = getValidityBufferSizeFromCount(valueCount);
int offsetBufferSize = valueCount * BaseRepeatedValueViewVector.OFFSET_WIDTH;
int sizeBufferSize = valueCount * BaseRepeatedValueViewVector.SIZE_WIDTH;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package org.apache.arrow.vector;

import static org.apache.arrow.vector.BitVectorHelper.getValidityBufferSizeFromCount;
import static org.apache.arrow.vector.TestUtils.newVarBinaryVector;
import static org.apache.arrow.vector.TestUtils.newVarCharVector;
import static org.apache.arrow.vector.TestUtils.newVector;
Expand Down Expand Up @@ -1233,7 +1234,7 @@ public void testSplitAndTransfer3() {
// the size needed for the validity buffer
final long validitySize =
DefaultRoundingPolicy.DEFAULT_ROUNDING_POLICY.getRoundedSize(
BaseValueVector.getValidityBufferSizeFromCount(2));
getValidityBufferSizeFromCount(2));
assertEquals(allocatedMem + validitySize, allocator.getAllocatedMemory());
// The validity and offset buffers are sliced from a same buffer.See
// BaseFixedWidthVector#allocateBytes.
Expand Down Expand Up @@ -2464,7 +2465,7 @@ public void testDefaultAllocNewAll() {
assertTrue(intVector.getValueCapacity() >= defaultCapacity);
expectedSize =
(defaultCapacity * IntVector.TYPE_WIDTH)
+ BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity);
+ getValidityBufferSizeFromCount(defaultCapacity);
assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05);

// verify that the wastage is within bounds for BigIntVector.
Expand All @@ -2473,7 +2474,7 @@ public void testDefaultAllocNewAll() {
assertTrue(bigIntVector.getValueCapacity() >= defaultCapacity);
expectedSize =
(defaultCapacity * bigIntVector.TYPE_WIDTH)
+ BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity);
+ getValidityBufferSizeFromCount(defaultCapacity);
assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05);

// verify that the wastage is within bounds for DecimalVector.
Expand All @@ -2482,7 +2483,7 @@ public void testDefaultAllocNewAll() {
assertTrue(decimalVector.getValueCapacity() >= defaultCapacity);
expectedSize =
(defaultCapacity * decimalVector.TYPE_WIDTH)
+ BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity);
+ getValidityBufferSizeFromCount(defaultCapacity);
assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05);

// verify that the wastage is within bounds for VarCharVector.
Expand All @@ -2492,7 +2493,7 @@ public void testDefaultAllocNewAll() {
assertTrue(varCharVector.getValueCapacity() >= defaultCapacity - 1);
expectedSize =
(defaultCapacity * VarCharVector.OFFSET_WIDTH)
+ BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity)
+ getValidityBufferSizeFromCount(defaultCapacity)
+ defaultCapacity * 8;
// wastage should be less than 5%.
assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05);
Expand All @@ -2501,7 +2502,7 @@ public void testDefaultAllocNewAll() {
beforeSize = childAllocator.getAllocatedMemory();
bitVector.allocateNew();
assertTrue(bitVector.getValueCapacity() >= defaultCapacity);
expectedSize = BaseFixedWidthVector.getValidityBufferSizeFromCount(defaultCapacity) * 2;
expectedSize = getValidityBufferSizeFromCount(defaultCapacity) * 2;
assertTrue(childAllocator.getAllocatedMemory() - beforeSize <= expectedSize * 1.05);
}
}
Expand Down
Loading
Loading