Skip to content

Commit 6942d8c

Browse files
committed
Support cast from decimal to varchar (6210)
1 parent fe50ba7 commit 6942d8c

File tree

6 files changed

+196
-16
lines changed

6 files changed

+196
-16
lines changed

velox/benchmarks/basic/CastBenchmark.cpp

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,27 +27,41 @@ int main(int argc, char** argv) {
2727
folly::Init init(&argc, &argv);
2828

2929
ExpressionBenchmarkBuilder benchmarkBuilder;
30-
30+
const vector_size_t vectorSize = 1000;
3131
auto vectorMaker = benchmarkBuilder.vectorMaker();
3232
auto invalidInput = vectorMaker.flatVector<facebook::velox::StringView>({""});
3333
auto validInput = vectorMaker.flatVector<facebook::velox::StringView>({""});
3434
auto nanInput = vectorMaker.flatVector<facebook::velox::StringView>({""});
35+
auto shortDecimalInput = vectorMaker.flatVector<int64_t>(
36+
vectorSize, [&](auto j) { return 12345 * j; }, nullptr, DECIMAL(18, 6));
37+
auto longDecimalInput = vectorMaker.flatVector<int128_t>(
38+
vectorSize,
39+
[&](auto j) {
40+
return facebook::velox::HugeInt::build(12345 * j, 56789 * j + 12345);
41+
},
42+
nullptr,
43+
DECIMAL(38, 16));
3544

36-
invalidInput->resize(1000);
37-
validInput->resize(1000);
38-
nanInput->resize(1000);
45+
invalidInput->resize(vectorSize);
46+
validInput->resize(vectorSize);
47+
nanInput->resize(vectorSize);
3948

40-
for (int i = 0; i < 1000; i++) {
49+
for (int i = 0; i < vectorSize; i++) {
4150
nanInput->set(i, "$"_sv);
4251
invalidInput->set(i, StringView::makeInline(std::string("")));
4352
validInput->set(i, StringView::makeInline(std::to_string(i)));
4453
}
4554

4655
benchmarkBuilder
4756
.addBenchmarkSet(
48-
"cast_int",
57+
"cast",
4958
vectorMaker.rowVector(
50-
{"valid", "empty", "nan"}, {validInput, invalidInput, nanInput}))
59+
{"valid", "empty", "nan", "short_decimal", "long_decimal"},
60+
{validInput,
61+
invalidInput,
62+
nanInput,
63+
shortDecimalInput,
64+
longDecimalInput}))
5165
.addExpression("try_cast_invalid_empty_input", "try_cast (empty as int) ")
5266
.addExpression(
5367
"tryexpr_cast_invalid_empty_input", "try (cast (empty as int))")
@@ -56,6 +70,8 @@ int main(int argc, char** argv) {
5670
.addExpression("try_cast_valid", "try_cast (valid as int)")
5771
.addExpression("tryexpr_cast_valid", "try (cast (valid as int))")
5872
.addExpression("cast_valid", "cast(valid as int)")
73+
.addExpression("cast_short_decimal", "cast (short_decimal as varchar)")
74+
.addExpression("cast_long_decimal", "cast (long_decimal as varchar)")
5975
.withIterations(100)
6076
.disableTesting();
6177

velox/docs/functions/presto/conversion.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ supported conversions to/from JSON are listed in :doc:`json`.
173173
- Y
174174
- Y
175175
- Y
176-
-
176+
- Y
177177
-
178178
-
179179
- Y
@@ -457,6 +457,7 @@ Valid examples
457457
SELECT cast(infinity() as varchar); -- 'Infinity'
458458
SELECT cast(true as varchar); -- 'true'
459459
SELECT cast(timestamp '1970-01-01 00:00:00' as varchar); -- '1970-01-01T00:00:00.000'
460+
SELECT cast(cast(22.51 as DECIMAL(4, 2)) as varchar); -- '22.51'
460461

461462
Cast to Timestamp
462463
-----------------

velox/expression/CastExpr-inl.h

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
* limitations under the License.
1515
*/
1616
#pragma once
17+
18+
#include <charconv>
19+
1720
#include "velox/common/base/Exceptions.h"
1821
#include "velox/core/CoreTypeSystem.h"
1922
#include "velox/expression/StringWriter.h"
@@ -48,6 +51,27 @@ inline std::exception_ptr makeBadCastException(
4851
false));
4952
}
5053

54+
/// Returns the number of decimal digits in n. Leading zeros are not counted
55+
/// except for n == 0 in which case count_digits returns 1.
56+
inline int countDigits(uint128_t n) {
57+
int count = 1;
58+
for (;;) {
59+
// Integer division is slow so do it for a group of four digits instead
60+
// of for every digit. The idea comes from the talk by Alexandrescu
61+
// "Three Optimization Tips for C++". See speed-test for a comparison.
62+
if (n < 10)
63+
return count;
64+
if (n < 100)
65+
return count + 1;
66+
if (n < 1000)
67+
return count + 2;
68+
if (n < 10000)
69+
return count + 3;
70+
n /= 10000U;
71+
count += 4;
72+
}
73+
}
74+
5175
} // namespace
5276

5377
template <bool adjustForTimeZone>
@@ -366,6 +390,86 @@ VectorPtr CastExpr::applyDecimalToBooleanCast(
366390
return result;
367391
}
368392

393+
template <typename FromNativeType>
394+
VectorPtr CastExpr::applyDecimalToVarcharCast(
395+
const SelectivityVector& rows,
396+
const BaseVector& input,
397+
exec::EvalCtx& context,
398+
const TypePtr& fromType) {
399+
VectorPtr result;
400+
context.ensureWritable(rows, VARCHAR(), result);
401+
(*result).clearNulls(rows);
402+
const auto simpleInput = input.as<SimpleVector<FromNativeType>>();
403+
int precision = getDecimalPrecisionScale(*fromType).first;
404+
int scale = getDecimalPrecisionScale(*fromType).second;
405+
// A varchar's size is estimated with unscaled value digits, dot, leading
406+
// zero, and possible minus sign.
407+
int varcharSize = precision + !!(scale > 0) + !!(precision == scale) + 1;
408+
// Calculate the max total size of the raw string buffer.
409+
size_t maxTotalResultBytes = rows.countSelected() * varcharSize;
410+
411+
auto flatResult = result->asFlatVector<StringView>();
412+
Buffer* buffer = flatResult->getBufferWithSpace(maxTotalResultBytes);
413+
size_t bufferSize = buffer->size();
414+
char* rawBuffer = buffer->asMutable<char>() + bufferSize;
415+
buffer->setSize(bufferSize + maxTotalResultBytes);
416+
size_t offset = 0;
417+
applyToSelectedNoThrowLocal(context, rows, result, [&](vector_size_t row) {
418+
if (simpleInput->isNullAt(row)) {
419+
result->setNull(row, true);
420+
} else {
421+
auto unscaledValue = simpleInput->valueAt(row);
422+
const char zero = '0';
423+
size_t startOffset = offset;
424+
if (unscaledValue == 0) {
425+
memcpy(rawBuffer + offset, &zero, sizeof(char));
426+
offset += sizeof(char);
427+
} else {
428+
if (unscaledValue < 0) {
429+
const char minusSign = '-';
430+
memcpy(rawBuffer + offset, &minusSign, sizeof(char));
431+
offset += sizeof(char);
432+
unscaledValue = ~unscaledValue + 1;
433+
}
434+
auto [position, ec] = std::to_chars(
435+
rawBuffer + offset,
436+
rawBuffer + offset + varcharSize,
437+
unscaledValue / DecimalUtil::kPowersOfTen[scale]);
438+
VELOX_USER_CHECK(
439+
ec == std::errc(),
440+
"Cast from decimal to varchar fails because {}.",
441+
std::make_error_code(ec).message());
442+
offset = position - rawBuffer;
443+
if (scale > 0) {
444+
const char dot = '.';
445+
memcpy(rawBuffer + offset, &dot, sizeof(char));
446+
offset += sizeof(char);
447+
448+
uint128_t fraction = unscaledValue % DecimalUtil::kPowersOfTen[scale];
449+
// Append leading zeros.
450+
for (size_t i = 0; i < std::max(scale - countDigits(fraction), 0);
451+
i++) {
452+
memcpy(rawBuffer + offset, &zero, sizeof(char));
453+
offset += sizeof(char);
454+
}
455+
auto [position, ec] = std::to_chars(
456+
rawBuffer + offset, rawBuffer + offset + varcharSize, fraction);
457+
VELOX_USER_CHECK(
458+
ec == std::errc(),
459+
"Cast from decimal to varchar fails because {}.",
460+
std::make_error_code(ec).message());
461+
offset = position - rawBuffer;
462+
}
463+
}
464+
flatResult->setNoCopy(
465+
row, StringView(rawBuffer + startOffset, offset - startOffset));
466+
}
467+
});
468+
// Update the exact buffer size.
469+
buffer->setSize(bufferSize + offset);
470+
return result;
471+
}
472+
369473
template <typename FromNativeType>
370474
VectorPtr CastExpr::applyDecimalToPrimitiveCast(
371475
const SelectivityVector& rows,

velox/expression/CastExpr.cpp

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -526,14 +526,26 @@ void CastExpr::applyPeeled(
526526
} else if (toType->isLongDecimal()) {
527527
result = applyDecimal<int128_t>(rows, input, context, fromType, toType);
528528
} else if (fromType->isDecimal()) {
529-
result = VELOX_DYNAMIC_DECIMAL_TYPE_DISPATCH(
530-
applyDecimalToPrimitiveCast,
531-
fromType,
532-
rows,
533-
input,
534-
context,
535-
fromType,
536-
toType);
529+
switch (toType->kind()) {
530+
case TypeKind::VARCHAR:
531+
result = VELOX_DYNAMIC_DECIMAL_TYPE_DISPATCH(
532+
applyDecimalToVarcharCast,
533+
fromType,
534+
rows,
535+
input,
536+
context,
537+
fromType);
538+
break;
539+
default:
540+
result = VELOX_DYNAMIC_DECIMAL_TYPE_DISPATCH(
541+
applyDecimalToPrimitiveCast,
542+
fromType,
543+
rows,
544+
input,
545+
context,
546+
fromType,
547+
toType);
548+
}
537549
} else {
538550
switch (toType->kind()) {
539551
case TypeKind::MAP:

velox/expression/CastExpr.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,13 @@ class CastExpr : public SpecialForm {
264264
const BaseVector& input,
265265
VectorPtr& result);
266266

267+
template <typename FromNativeType>
268+
VectorPtr applyDecimalToVarcharCast(
269+
const SelectivityVector& rows,
270+
const BaseVector& input,
271+
exec::EvalCtx& context,
272+
const TypePtr& fromType);
273+
267274
template <TypeKind ToKind>
268275
void applyCastPrimitivesDispatch(
269276
const TypePtr& fromType,

velox/expression/tests/CastExprTest.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1431,6 +1431,46 @@ TEST_F(CastExprTest, decimalToBool) {
14311431
"c0", longFlat, makeNullableFlatVector<bool>({1, 0, std::nullopt}));
14321432
}
14331433

1434+
TEST_F(CastExprTest, decimalToVarchar) {
1435+
auto shortFlat = makeNullableFlatVector<int64_t>(
1436+
{DecimalUtil::kShortDecimalMin,
1437+
-3,
1438+
0,
1439+
55,
1440+
DecimalUtil::kShortDecimalMax,
1441+
std::nullopt},
1442+
DECIMAL(18, 18));
1443+
testComplexCast(
1444+
"c0",
1445+
shortFlat,
1446+
makeNullableFlatVector<StringView>(
1447+
{"-0.999999999999999999",
1448+
"-0.000000000000000003",
1449+
"0",
1450+
"0.000000000000000055",
1451+
"0.999999999999999999",
1452+
std::nullopt}));
1453+
1454+
auto longFlat = makeNullableFlatVector<int128_t>(
1455+
{DecimalUtil::kLongDecimalMin,
1456+
0,
1457+
DecimalUtil::kLongDecimalMax,
1458+
HugeInt::build(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFFFFFFFFFFull),
1459+
HugeInt::build(0xffff, 0xffffffffffffffff),
1460+
std::nullopt},
1461+
DECIMAL(38, 5));
1462+
testComplexCast(
1463+
"c0",
1464+
longFlat,
1465+
makeNullableFlatVector<StringView>(
1466+
{"-999999999999999999999999999999999.99999",
1467+
"0",
1468+
"999999999999999999999999999999999.99999",
1469+
"-0.00001",
1470+
"12089258196146291747.06175",
1471+
std::nullopt}));
1472+
}
1473+
14341474
TEST_F(CastExprTest, decimalToDecimal) {
14351475
// short to short, scale up.
14361476
auto shortFlat =

0 commit comments

Comments
 (0)