Support Decimal32/64 types

AdamGS · AdamGS · commit e8cf5e2850b4 · 2025-09-10T11:12:40.000+01:00
diff --git a/datafusion/common/src/cast.rs b/datafusion/common/src/cast.rs
@@ -22,9 +22,10 @@
 
 use crate::{downcast_value, Result};
 use arrow::array::{
-    BinaryViewArray, DurationMicrosecondArray, DurationMillisecondArray,
-    DurationNanosecondArray, DurationSecondArray, Float16Array, Int16Array, Int8Array,
-    LargeBinaryArray, LargeStringArray, StringViewArray, UInt16Array,
+    BinaryViewArray, Decimal32Array, Decimal64Array, DurationMicrosecondArray,
+    DurationMillisecondArray, DurationNanosecondArray, DurationSecondArray, Float16Array,
+    Int16Array, Int8Array, LargeBinaryArray, LargeStringArray, StringViewArray,
+    UInt16Array,
 };
 use arrow::{
     array::{
@@ -97,6 +98,16 @@ pub fn as_uint64_array(array: &dyn Array) -> Result<&UInt64Array> {
     Ok(downcast_value!(array, UInt64Array))
 }
 
+// Downcast Array to Decimal32Array
+pub fn as_decimal32_array(array: &dyn Array) -> Result<&Decimal32Array> {
+    Ok(downcast_value!(array, Decimal32Array))
+}
+
+// Downcast Array to Decimal64Array
+pub fn as_decimal64_array(array: &dyn Array) -> Result<&Decimal64Array> {
+    Ok(downcast_value!(array, Decimal64Array))
+}
+
 // Downcast Array to Decimal128Array
 pub fn as_decimal128_array(array: &dyn Array) -> Result<&Decimal128Array> {
     Ok(downcast_value!(array, Decimal128Array))
diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs
diff --git a/datafusion/expr-common/src/type_coercion/aggregates.rs b/datafusion/expr-common/src/type_coercion/aggregates.rs
@@ -18,7 +18,8 @@
 use crate::signature::TypeSignature;
 use arrow::datatypes::{
     DataType, FieldRef, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
-    DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
+    DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, DECIMAL32_MAX_PRECISION,
+    DECIMAL64_MAX_PRECISION,
 };
 
 use datafusion_common::{internal_err, plan_err, Result};
@@ -150,6 +151,18 @@ pub fn sum_return_type(arg_type: &DataType) -> Result<DataType> {
         DataType::Int64 => Ok(DataType::Int64),
         DataType::UInt64 => Ok(DataType::UInt64),
         DataType::Float64 => Ok(DataType::Float64),
+        DataType::Decimal32(precision, scale) => {
+            // in the spark, the result type is DECIMAL(min(38,precision+10), s)
+            // ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
+            let new_precision = DECIMAL32_MAX_PRECISION.min(*precision + 10);
+            Ok(DataType::Decimal128(new_precision, *scale))
+        }
+        DataType::Decimal64(precision, scale) => {
+            // in the spark, the result type is DECIMAL(min(38,precision+10), s)
+            // ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
+            let new_precision = DECIMAL64_MAX_PRECISION.min(*precision + 10);
+            Ok(DataType::Decimal128(new_precision, *scale))
+        }
         DataType::Decimal128(precision, scale) => {
             // In the spark, the result type is DECIMAL(min(38,precision+10), s)
             // Ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
@@ -222,6 +235,16 @@ pub fn avg_return_type(func_name: &str, arg_type: &DataType) -> Result<DataType>
 /// Internal sum type of an average
 pub fn avg_sum_type(arg_type: &DataType) -> Result<DataType> {
     match arg_type {
+        DataType::Decimal32(precision, scale) => {
+            // In the spark, the sum type of avg is DECIMAL(min(38,precision+10), s)
+            let new_precision = DECIMAL32_MAX_PRECISION.min(*precision + 10);
+            Ok(DataType::Decimal32(new_precision, *scale))
+        }
+        DataType::Decimal64(precision, scale) => {
+            // In the spark, the sum type of avg is DECIMAL(min(38,precision+10), s)
+            let new_precision = DECIMAL64_MAX_PRECISION.min(*precision + 10);
+            Ok(DataType::Decimal64(new_precision, *scale))
+        }
         DataType::Decimal128(precision, scale) => {
             // In the spark, the sum type of avg is DECIMAL(min(38,precision+10), s)
             let new_precision = DECIMAL128_MAX_PRECISION.min(*precision + 10);
@@ -249,7 +272,7 @@ pub fn is_sum_support_arg_type(arg_type: &DataType) -> bool {
         _ => matches!(
             arg_type,
             arg_type if NUMERICS.contains(arg_type)
-            || matches!(arg_type, DataType::Decimal128(_, _) | DataType::Decimal256(_, _))
+            || matches!(arg_type, DataType::Decimal32(_, _) | DataType::Decimal64(_, _) |DataType::Decimal128(_, _) | DataType::Decimal256(_, _))
         ),
     }
 }
@@ -262,7 +285,7 @@ pub fn is_avg_support_arg_type(arg_type: &DataType) -> bool {
         _ => matches!(
             arg_type,
             arg_type if NUMERICS.contains(arg_type)
-                || matches!(arg_type, DataType::Decimal128(_, _)| DataType::Decimal256(_, _))
+            || matches!(arg_type, DataType::Decimal32(_, _) | DataType::Decimal64(_, _) |DataType::Decimal128(_, _) | DataType::Decimal256(_, _))
         ),
     }
 }
@@ -297,6 +320,8 @@ pub fn coerce_avg_type(func_name: &str, arg_types: &[DataType]) -> Result<Vec<Da
     // Refer to https://www.postgresql.org/docs/8.2/functions-aggregate.html doc
     fn coerced_type(func_name: &str, data_type: &DataType) -> Result<DataType> {
         match &data_type {
+            DataType::Decimal32(p, s) => Ok(DataType::Decimal32(*p, *s)),
+            DataType::Decimal64(p, s) => Ok(DataType::Decimal64(*p, *s)),
             DataType::Decimal128(p, s) => Ok(DataType::Decimal128(*p, *s)),
             DataType::Decimal256(p, s) => Ok(DataType::Decimal256(*p, *s)),
             d if d.is_numeric() => Ok(DataType::Float64),
diff --git a/datafusion/expr/src/type_coercion/mod.rs b/datafusion/expr/src/type_coercion/mod.rs
@@ -51,6 +51,8 @@ pub fn is_signed_numeric(dt: &DataType) -> bool {
             | DataType::Float16
             | DataType::Float32
             | DataType::Float64
+            | DataType::Decimal32(_, _)
+            | DataType::Decimal64(_, _)
             | DataType::Decimal128(_, _)
             | DataType::Decimal256(_, _),
     )
@@ -89,5 +91,11 @@ pub fn is_utf8_or_utf8view_or_large_utf8(dt: &DataType) -> bool {
 
 /// Determine whether the given data type `dt` is a `Decimal`.
 pub fn is_decimal(dt: &DataType) -> bool {
-    matches!(dt, DataType::Decimal128(_, _) | DataType::Decimal256(_, _))
+    matches!(
+        dt,
+        DataType::Decimal32(_, _)
+            | DataType::Decimal64(_, _)
+            | DataType::Decimal128(_, _)
+            | DataType::Decimal256(_, _)
+    )
 }
diff --git a/datafusion/functions-aggregate/src/average.rs b/datafusion/functions-aggregate/src/average.rs
@@ -24,9 +24,10 @@ use arrow::array::{
 
 use arrow::compute::sum;
 use arrow::datatypes::{
-    i256, ArrowNativeType, DataType, Decimal128Type, Decimal256Type, DecimalType,
-    DurationMicrosecondType, DurationMillisecondType, DurationNanosecondType,
-    DurationSecondType, Field, FieldRef, Float64Type, TimeUnit, UInt64Type,
+    i256, ArrowNativeType, DataType, Decimal128Type, Decimal256Type, Decimal32Type,
+    Decimal64Type, DecimalType, DurationMicrosecondType, DurationMillisecondType,
+    DurationNanosecondType, DurationSecondType, Field, FieldRef, Float64Type, TimeUnit,
+    UInt64Type,
 };
 use datafusion_common::{
     exec_err, not_impl_err, utils::take_function_args, Result, ScalarValue,
@@ -128,6 +129,28 @@ impl AggregateUDFImpl for Avg {
         } else {
             match (&data_type, acc_args.return_field.data_type()) {
                 (Float64, Float64) => Ok(Box::<AvgAccumulator>::default()),
+                (
+                    Decimal32(sum_precision, sum_scale),
+                    Decimal32(target_precision, target_scale),
+                ) => Ok(Box::new(DecimalAvgAccumulator::<Decimal32Type> {
+                    sum: None,
+                    count: 0,
+                    sum_scale: *sum_scale,
+                    sum_precision: *sum_precision,
+                    target_precision: *target_precision,
+                    target_scale: *target_scale,
+                })),
+                (
+                    Decimal64(sum_precision, sum_scale),
+                    Decimal64(target_precision, target_scale),
+                ) => Ok(Box::new(DecimalAvgAccumulator::<Decimal64Type> {
+                    sum: None,
+                    count: 0,
+                    sum_scale: *sum_scale,
+                    sum_precision: *sum_precision,
+                    target_precision: *target_precision,
+                    target_scale: *target_scale,
+                })),
                 (
                     Decimal128(sum_precision, sum_scale),
                     Decimal128(target_precision, target_scale),
@@ -202,7 +225,11 @@ impl AggregateUDFImpl for Avg {
     fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
         matches!(
             args.return_field.data_type(),
-            DataType::Float64 | DataType::Decimal128(_, _) | DataType::Duration(_)
+            DataType::Float64
+                | DataType::Decimal32(_, _)
+                | DataType::Decimal64(_, _)
+                | DataType::Decimal128(_, _)
+                | DataType::Duration(_)
         ) && !args.is_distinct
     }
 
@@ -222,6 +249,44 @@ impl AggregateUDFImpl for Avg {
                     |sum: f64, count: u64| Ok(sum / count as f64),
                 )))
             }
+            (
+                Decimal32(_sum_precision, sum_scale),
+                Decimal32(target_precision, target_scale),
+            ) => {
+                let decimal_averager = DecimalAverager::<Decimal32Type>::try_new(
+                    *sum_scale,
+                    *target_precision,
+                    *target_scale,
+                )?;
+
+                let avg_fn =
+                    move |sum: i32, count: u64| decimal_averager.avg(sum, count as i32);
+
+                Ok(Box::new(AvgGroupsAccumulator::<Decimal32Type, _>::new(
+                    &data_type,
+                    args.return_field.data_type(),
+                    avg_fn,
+                )))
+            }
+            (
+                Decimal64(_sum_precision, sum_scale),
+                Decimal64(target_precision, target_scale),
+            ) => {
+                let decimal_averager = DecimalAverager::<Decimal64Type>::try_new(
+                    *sum_scale,
+                    *target_precision,
+                    *target_scale,
+                )?;
+
+                let avg_fn =
+                    move |sum: i64, count: u64| decimal_averager.avg(sum, count as i64);
+
+                Ok(Box::new(AvgGroupsAccumulator::<Decimal64Type, _>::new(
+                    &data_type,
+                    args.return_field.data_type(),
+                    avg_fn,
+                )))
+            }
             (
                 Decimal128(_sum_precision, sum_scale),
                 Decimal128(target_precision, target_scale),
diff --git a/datafusion/functions-aggregate/src/sum.rs b/datafusion/functions-aggregate/src/sum.rs
@@ -18,6 +18,8 @@
 //! Defines `SUM` and `SUM DISTINCT` aggregate accumulators
 
 use ahash::RandomState;
+use arrow::datatypes::DECIMAL32_MAX_PRECISION;
+use arrow::datatypes::DECIMAL64_MAX_PRECISION;
 use datafusion_expr::utils::AggregateOrderSensitivity;
 use std::any::Any;
 use std::mem::size_of_val;
@@ -27,8 +29,8 @@ use arrow::array::ArrowNativeTypeOp;
 use arrow::array::{ArrowNumericType, AsArray};
 use arrow::datatypes::{ArrowNativeType, FieldRef};
 use arrow::datatypes::{
-    DataType, Decimal128Type, Decimal256Type, Float64Type, Int64Type, UInt64Type,
-    DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION,
+    DataType, Decimal128Type, Decimal256Type, Decimal32Type, Decimal64Type, Float64Type,
+    Int64Type, UInt64Type, DECIMAL128_MAX_PRECISION, DECIMAL256_MAX_PRECISION,
 };
 use arrow::{array::ArrayRef, datatypes::Field};
 use datafusion_common::{
@@ -71,6 +73,12 @@ macro_rules! downcast_sum {
             DataType::Float64 => {
                 $helper!(Float64Type, $args.return_field.data_type().clone())
             }
+            DataType::Decimal32(_, _) => {
+                $helper!(Decimal32Type, $args.return_field.data_type().clone())
+            }
+            DataType::Decimal64(_, _) => {
+                $helper!(Decimal64Type, $args.return_field.data_type().clone())
+            }
             DataType::Decimal128(_, _) => {
                 $helper!(Decimal128Type, $args.return_field.data_type().clone())
             }
@@ -145,9 +153,10 @@ impl AggregateUDFImpl for Sum {
                 DataType::Dictionary(_, v) => coerced_type(v),
                 // in the spark, the result type is DECIMAL(min(38,precision+10), s)
                 // ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
-                DataType::Decimal128(_, _) | DataType::Decimal256(_, _) => {
-                    Ok(data_type.clone())
-                }
+                DataType::Decimal32(_, _)
+                | DataType::Decimal64(_, _)
+                | DataType::Decimal128(_, _)
+                | DataType::Decimal256(_, _) => Ok(data_type.clone()),
                 dt if dt.is_signed_integer() => Ok(DataType::Int64),
                 dt if dt.is_unsigned_integer() => Ok(DataType::UInt64),
                 dt if dt.is_floating() => Ok(DataType::Float64),
@@ -163,6 +172,18 @@ impl AggregateUDFImpl for Sum {
             DataType::Int64 => Ok(DataType::Int64),
             DataType::UInt64 => Ok(DataType::UInt64),
             DataType::Float64 => Ok(DataType::Float64),
+            DataType::Decimal32(precision, scale) => {
+                // in the spark, the result type is DECIMAL(min(38,precision+10), s)
+                // ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
+                let new_precision = DECIMAL32_MAX_PRECISION.min(*precision + 10);
+                Ok(DataType::Decimal128(new_precision, *scale))
+            }
+            DataType::Decimal64(precision, scale) => {
+                // in the spark, the result type is DECIMAL(min(38,precision+10), s)
+                // ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
+                let new_precision = DECIMAL64_MAX_PRECISION.min(*precision + 10);
+                Ok(DataType::Decimal128(new_precision, *scale))
+            }
             DataType::Decimal128(precision, scale) => {
                 // in the spark, the result type is DECIMAL(min(38,precision+10), s)
                 // ref: https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
diff --git a/datafusion/proto-common/src/from_proto/mod.rs b/datafusion/proto-common/src/from_proto/mod.rs
@@ -37,7 +37,6 @@ use datafusion_common::{
         TableParquetOptions,
     },
     file_options::{csv_writer::CsvWriterOptions, json_writer::JsonWriterOptions},
-    not_impl_err,
     parsers::CompressionTypeVariant,
     plan_datafusion_err,
     stats::Precision,
@@ -478,13 +477,13 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue {
                 let null_type: DataType = v.try_into()?;
                 null_type.try_into().map_err(Error::DataFusionError)?
             }
-            Value::Decimal32Value(_val) => {
-                return not_impl_err!("Decimal32 protobuf deserialization")
-                    .map_err(Error::DataFusionError)
+            Value::Decimal32Value(val) => {
+                let array = vec_to_array(val.value.clone());
+                Self::Decimal32(Some(i32::from_be_bytes(array)), val.p as u8, val.s as i8)
             }
-            Value::Decimal64Value(_val) => {
-                return not_impl_err!("Decimal64 protobuf deserialization")
-                    .map_err(Error::DataFusionError)
+            Value::Decimal64Value(val) => {
+                let array = vec_to_array(val.value.clone());
+                Self::Decimal64(Some(i64::from_be_bytes(array)), val.p as u8, val.s as i8)
             }
             Value::Decimal128Value(val) => {
                 let array = vec_to_array(val.value.clone());
diff --git a/datafusion/proto-common/src/to_proto/mod.rs b/datafusion/proto-common/src/to_proto/mod.rs
@@ -405,6 +405,42 @@ impl TryFrom<&ScalarValue> for protobuf::ScalarValue {
                     })
                 })
             }
+            ScalarValue::Decimal32(val, p, s) => match *val {
+                Some(v) => {
+                    let array = v.to_be_bytes();
+                    let vec_val: Vec<u8> = array.to_vec();
+                    Ok(protobuf::ScalarValue {
+                        value: Some(Value::Decimal32Value(protobuf::Decimal32 {
+                            value: vec_val,
+                            p: *p as i64,
+                            s: *s as i64,
+                        })),
+                    })
+                }
+                None => Ok(protobuf::ScalarValue {
+                    value: Some(protobuf::scalar_value::Value::NullValue(
+                        (&data_type).try_into()?,
+                    )),
+                }),
+            },
+            ScalarValue::Decimal64(val, p, s) => match *val {
+                Some(v) => {
+                    let array = v.to_be_bytes();
+                    let vec_val: Vec<u8> = array.to_vec();
+                    Ok(protobuf::ScalarValue {
+                        value: Some(Value::Decimal64Value(protobuf::Decimal64 {
+                            value: vec_val,
+                            p: *p as i64,
+                            s: *s as i64,
+                        })),
+                    })
+                }
+                None => Ok(protobuf::ScalarValue {
+                    value: Some(protobuf::scalar_value::Value::NullValue(
+                        (&data_type).try_into()?,
+                    )),
+                }),
+            },
             ScalarValue::Decimal128(val, p, s) => match *val {
                 Some(v) => {
                     let array = v.to_be_bytes();
diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs