Support decimal compare functions on different presicion and scales (6207)

rui-mo · rui-mo · commit c4ceb71fe663 · 2023-08-31T14:07:07.000+08:00
diff --git a/velox/docs/functions/spark/comparison.rst b/velox/docs/functions/spark/comparison.rst
@@ -78,5 +78,27 @@ Comparison Functions
     Returns true if x is not equal to y. Supports all scalar types. The types
     of x and y must be the same. Corresponds to Spark's operator ``!=``.
 
+.. spark:function:: decimal_lt(x, y) -> boolean
 
+    Returns true if x is less than y. Supports decimal types with different precison and scales.
+    Corresponds to Spark's operator ``<``.
 
+.. spark:function:: decimal_lte(x, y) -> boolean
+
+    Returns true if x is less than y or x is equal to y. Supports decimal types with different precison and scales.
+    Corresponds to Spark's operator ``<=``.
+
+.. spark:function:: decimal_eq(x, y) -> boolean
+
+    Returns true if x is equal to y. Supports decimal types with different precison and scales.
+    Corresponds to Spark's operator ``==``.
+
+.. spark:function:: decimal_gt(x, y) -> boolean
+
+    Returns true if x is greater than y. Supports decimal types with different precison and scales.
+    Corresponds to Spark's operator ``>``.
+
+.. spark:function:: decimal_gte(x, y) -> boolean
+
+    Returns true if x is greater than y or x is equal to y. Supports decimal types with different precison and scales.
+    Corresponds to Spark's operator ``>=``.
diff --git a/velox/functions/sparksql/DecimalVectorFunctions.cpp b/velox/functions/sparksql/DecimalVectorFunctions.cpp
@@ -247,6 +247,284 @@ class DecimalRoundFunction : public exec::VectorFunction {
     }
   }
 };
+
+// Rescale two inputs as the same scale and compare. Returns 0 when a is equal
+// with b. Returns -1 when a is less than b. Returns 1 when a is greater than b.
+template <typename T>
+int32_t rescaleAndCompare(T a, T b, int32_t deltaScale) {
+  T aScaled = a;
+  T bScaled = b;
+  if (deltaScale < 0) {
+    aScaled = a * velox::DecimalUtil::kPowersOfTen[-deltaScale];
+  } else if (deltaScale > 0) {
+    bScaled = b * velox::DecimalUtil::kPowersOfTen[deltaScale];
+  }
+  if (aScaled == bScaled) {
+    return 0;
+  } else if (aScaled < bScaled) {
+    return -1;
+  } else {
+    return 1;
+  }
+}
+
+// Compare two decimals. Rescale one of them When they are of different scales.
+int32_t decimalCompare(
+    int128_t a,
+    uint8_t aPrecision,
+    uint8_t aScale,
+    int128_t b,
+    uint8_t bPrecision,
+    uint8_t bScale) {
+  int32_t deltaScale = aScale - bScale;
+  // Check if we need 256-bits after adjusting the scale.
+  bool need256 = (deltaScale < 0 &&
+                  aPrecision - deltaScale > LongDecimalType::kMaxPrecision) ||
+      (bPrecision + deltaScale > LongDecimalType::kMaxPrecision);
+  if (need256) {
+    return rescaleAndCompare<int256_t>(
+        static_cast<int256_t>(a), static_cast<int256_t>(b), deltaScale);
+  }
+  return rescaleAndCompare<int128_t>(a, b, deltaScale);
+}
+
+// GreaterThan decimal compare function.
+class Gt {
+ public:
+  inline static bool apply(
+      int128_t a,
+      uint8_t aPrecision,
+      uint8_t aScale,
+      int128_t b,
+      uint8_t bPrecision,
+      uint8_t bScale) {
+    return decimalCompare(a, aPrecision, aScale, b, bPrecision, bScale) > 0;
+  }
+};
+
+// GreaterThanOrEqual decimal compare function.
+class Gte {
+ public:
+  inline static bool apply(
+      int128_t a,
+      uint8_t aPrecision,
+      uint8_t aScale,
+      int128_t b,
+      uint8_t bPrecision,
+      uint8_t bScale) {
+    return decimalCompare(a, aPrecision, aScale, b, bPrecision, bScale) >= 0;
+  }
+};
+
+// LessThan decimal compare function.
+class Lt {
+ public:
+  inline static bool apply(
+      int128_t a,
+      uint8_t aPrecision,
+      uint8_t aScale,
+      int128_t b,
+      uint8_t bPrecision,
+      uint8_t bScale) {
+    return decimalCompare(a, aPrecision, aScale, b, bPrecision, bScale) < 0;
+  }
+};
+
+// LessThanOrEqual decimal compare function.
+class Lte {
+ public:
+  inline static bool apply(
+      int128_t a,
+      uint8_t aPrecision,
+      uint8_t aScale,
+      int128_t b,
+      uint8_t bPrecision,
+      uint8_t bScale) {
+    return decimalCompare(a, aPrecision, aScale, b, bPrecision, bScale) <= 0;
+  }
+};
+
+// Equal decimal compare function.
+class Eq {
+ public:
+  inline static bool apply(
+      int128_t a,
+      uint8_t aPrecision,
+      uint8_t aScale,
+      int128_t b,
+      uint8_t bPrecision,
+      uint8_t bScale) {
+    return decimalCompare(a, aPrecision, aScale, b, bPrecision, bScale) == 0;
+  }
+};
+
+// Class for decimal compare operations.
+template <typename A, typename B, typename Operation /* Arithmetic operation */>
+class DecimalCompareFunction : public exec::VectorFunction {
+ public:
+  DecimalCompareFunction(
+      uint8_t aPrecision,
+      uint8_t aScale,
+      uint8_t bPrecision,
+      uint8_t bScale)
+      : aPrecision_(aPrecision),
+        aScale_(aScale),
+        bPrecision_(bPrecision),
+        bScale_(bScale) {}
+
+  void apply(
+      const SelectivityVector& rows,
+      std::vector<VectorPtr>& args,
+      const TypePtr& resultType,
+      exec::EvalCtx& context,
+      VectorPtr& result) const override {
+    prepareResults(rows, resultType, context, result);
+
+    // Fast path when the first argument is a flat vector.
+    if (args[0]->isFlatEncoding()) {
+      auto rawA = args[0]->asUnchecked<FlatVector<A>>()->mutableRawValues();
+
+      if (args[1]->isConstantEncoding()) {
+        auto constantB = args[1]->asUnchecked<SimpleVector<B>>()->valueAt(0);
+        context.applyToSelectedNoThrow(rows, [&](auto row) {
+          result->asUnchecked<FlatVector<bool>>()->set(
+              row,
+              Operation::apply(
+                  (int128_t)rawA[row],
+                  aPrecision_,
+                  aScale_,
+                  (int128_t)constantB,
+                  bPrecision_,
+                  bScale_));
+        });
+        return;
+      }
+
+      if (args[1]->isFlatEncoding()) {
+        auto rawB = args[1]->asUnchecked<FlatVector<B>>()->mutableRawValues();
+        context.applyToSelectedNoThrow(rows, [&](auto row) {
+          result->asUnchecked<FlatVector<bool>>()->set(
+              row,
+              Operation::apply(
+                  (int128_t)rawA[row],
+                  aPrecision_,
+                  aScale_,
+                  (int128_t)rawB[row],
+                  bPrecision_,
+                  bScale_));
+        });
+        return;
+      }
+    } else {
+      // Fast path when the first argument is encoded but the second is
+      // constant.
+      exec::DecodedArgs decodedArgs(rows, args, context);
+      auto aDecoded = decodedArgs.at(0);
+      auto aDecodedData = aDecoded->data<A>();
+
+      if (args[1]->isConstantEncoding()) {
+        auto constantB = args[1]->asUnchecked<SimpleVector<B>>()->valueAt(0);
+        context.applyToSelectedNoThrow(rows, [&](auto row) {
+          auto value = aDecodedData[aDecoded->index(row)];
+          result->asUnchecked<FlatVector<bool>>()->set(
+              row,
+              Operation::apply(
+                  (int128_t)value,
+                  aPrecision_,
+                  aScale_,
+                  (int128_t)constantB,
+                  bPrecision_,
+                  bScale_));
+        });
+        return;
+      }
+    }
+
+    // Decode the input in all other cases.
+    exec::DecodedArgs decodedArgs(rows, args, context);
+    auto aDecoded = decodedArgs.at(0);
+    auto bDecoded = decodedArgs.at(1);
+
+    auto aDecodedData = aDecoded->data<A>();
+    auto bDecodedData = bDecoded->data<B>();
+
+    context.applyToSelectedNoThrow(rows, [&](auto row) {
+      auto aValue = aDecodedData[aDecoded->index(row)];
+      auto bValue = bDecodedData[bDecoded->index(row)];
+      result->asUnchecked<FlatVector<bool>>()->set(
+          row,
+          Operation::apply(
+              (int128_t)aValue,
+              aPrecision_,
+              aScale_,
+              (int128_t)bValue,
+              bPrecision_,
+              bScale_));
+    });
+  }
+
+ private:
+  void prepareResults(
+      const SelectivityVector& rows,
+      const TypePtr& resultType,
+      exec::EvalCtx& context,
+      VectorPtr& result) const {
+    context.ensureWritable(rows, resultType, result);
+    result->clearNulls(rows);
+  }
+
+  const uint8_t aPrecision_;
+  const uint8_t aScale_;
+  const uint8_t bPrecision_;
+  const uint8_t bScale_;
+};
+
+template <typename Operation>
+std::shared_ptr<exec::VectorFunction> createDecimalCompareFunction(
+    const std::string& name,
+    const std::vector<exec::VectorFunctionArg>& inputArgs,
+    const core::QueryConfig& /*config*/) {
+  const auto& aType = inputArgs[0].type;
+  const auto& bType = inputArgs[1].type;
+  auto [aPrecision, aScale] = getDecimalPrecisionScale(*aType);
+  auto [bPrecision, bScale] = getDecimalPrecisionScale(*bType);
+  if (aType->isShortDecimal()) {
+    if (bType->isShortDecimal()) {
+      return std::make_shared<
+          DecimalCompareFunction<int64_t, int64_t, Operation>>(
+          aPrecision, aScale, bPrecision, bScale);
+    } else if (bType->isLongDecimal()) {
+      return std::make_shared<
+          DecimalCompareFunction<int64_t, int128_t, Operation>>(
+          aPrecision, aScale, bPrecision, bScale);
+    }
+  }
+  if (aType->isLongDecimal()) {
+    if (bType->isShortDecimal()) {
+      return std::make_shared<
+          DecimalCompareFunction<int128_t, int64_t, Operation>>(
+          aPrecision, aScale, bPrecision, bScale);
+    } else if (bType->isLongDecimal()) {
+      return std::make_shared<
+          DecimalCompareFunction<int128_t, int128_t, Operation>>(
+          aPrecision, aScale, bPrecision, bScale);
+    }
+  }
+  VELOX_UNREACHABLE();
+}
+
+std::vector<std::shared_ptr<exec::FunctionSignature>>
+decimalCompareSignature() {
+  return {exec::FunctionSignatureBuilder()
+              .integerVariable("a_precision")
+              .integerVariable("a_scale")
+              .integerVariable("b_precision")
+              .integerVariable("b_scale")
+              .returnType("boolean")
+              .argumentType("DECIMAL(a_precision, a_scale)")
+              .argumentType("DECIMAL(b_precision, b_scale)")
+              .build()};
+}
 } // namespace
 
 std::vector<std::shared_ptr<exec::FunctionSignature>>
@@ -308,4 +586,29 @@ VELOX_DECLARE_VECTOR_FUNCTION(
     udf_decimal_round,
     std::vector<std::shared_ptr<exec::FunctionSignature>>{},
     std::make_unique<DecimalRoundFunction>());
+
+VELOX_DECLARE_STATEFUL_VECTOR_FUNCTION(
+    udf_decimal_gt,
+    decimalCompareSignature(),
+    createDecimalCompareFunction<Gt>);
+
+VELOX_DECLARE_STATEFUL_VECTOR_FUNCTION(
+    udf_decimal_gte,
+    decimalCompareSignature(),
+    createDecimalCompareFunction<Gte>);
+
+VELOX_DECLARE_STATEFUL_VECTOR_FUNCTION(
+    udf_decimal_lt,
+    decimalCompareSignature(),
+    createDecimalCompareFunction<Lt>);
+
+VELOX_DECLARE_STATEFUL_VECTOR_FUNCTION(
+    udf_decimal_lte,
+    decimalCompareSignature(),
+    createDecimalCompareFunction<Lte>);
+
+VELOX_DECLARE_STATEFUL_VECTOR_FUNCTION(
+    udf_decimal_eq,
+    decimalCompareSignature(),
+    createDecimalCompareFunction<Eq>);
 } // namespace facebook::velox::functions::sparksql
diff --git a/velox/functions/sparksql/RegisterCompare.cpp b/velox/functions/sparksql/RegisterCompare.cpp
@@ -50,6 +50,12 @@ void registerCompareFunctions(const std::string& prefix) {
       {prefix + "between"});
   registerFunction<BetweenFunction, bool, double, double, double>(
       {prefix + "between"});
+  // Decimal comapre functions.
+  VELOX_REGISTER_VECTOR_FUNCTION(udf_decimal_gt, "decimal_gt");
+  VELOX_REGISTER_VECTOR_FUNCTION(udf_decimal_gte, "decimal_gte");
+  VELOX_REGISTER_VECTOR_FUNCTION(udf_decimal_lt, "decimal_lt");
+  VELOX_REGISTER_VECTOR_FUNCTION(udf_decimal_lte, "decimal_lte");
+  VELOX_REGISTER_VECTOR_FUNCTION(udf_decimal_eq, "decimal_eq");
 }
 
 } // namespace facebook::velox::functions::sparksql
diff --git a/velox/type/Type.h b/velox/type/Type.h
@@ -15,6 +15,7 @@
  */
 #pragma once
 
+#include <boost/multiprecision/cpp_int.hpp>
 #include <fmt/core.h>
 #include <fmt/format.h>
 #include <folly/Format.h>
@@ -44,6 +45,7 @@
 namespace facebook::velox {
 
 using int128_t = __int128_t;
+using int256_t = boost::multiprecision::int256_t;
 
 /// Velox type system supports a small set of SQL-compatible composeable types:
 /// BOOLEAN, TINYINT, SMALLINT, INTEGER, BIGINT, HUGEINT, REAL, DOUBLE, VARCHAR,