diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs index b593f8411d24..42a5f9b26239 100644 --- a/datafusion/expr/src/udaf.rs +++ b/datafusion/expr/src/udaf.rs @@ -329,9 +329,9 @@ impl AggregateUDF { self.inner.supports_null_handling_clause() } - /// See [`AggregateUDFImpl::is_ordered_set_aggregate`] for more details. - pub fn is_ordered_set_aggregate(&self) -> bool { - self.inner.is_ordered_set_aggregate() + /// See [`AggregateUDFImpl::supports_within_group_clause`] for more details. + pub fn supports_within_group_clause(&self) -> bool { + self.inner.supports_within_group_clause() } /// Returns the documentation for this Aggregate UDF. @@ -746,18 +746,25 @@ pub trait AggregateUDFImpl: Debug + DynEq + DynHash + Send + Sync { true } - /// If this function is an ordered-set aggregate function, return `true`. - /// Otherwise, return `false` (default). + /// If this function supports the `WITHIN GROUP (ORDER BY column [ASC|DESC])` + /// SQL syntax, return `true`. Otherwise, return `false` (default) which will + /// cause an error when parsing SQL where this syntax is detected for this + /// function. + /// + /// This function should return `true` for ordered-set aggregate functions + /// only. + /// + /// # Ordered-set aggregate functions /// /// Ordered-set aggregate functions allow specifying a sort order that affects /// how the function calculates its result, unlike other aggregate functions - /// like `SUM` or `COUNT`. For example, `percentile_cont` is an ordered-set + /// like `sum` or `count`. For example, `percentile_cont` is an ordered-set /// aggregate function that calculates the exact percentile value from a list /// of values; the output of calculating the `0.75` percentile depends on if /// you're calculating on an ascending or descending list of values. /// - /// Setting this to return `true` affects only SQL parsing & planning; it allows - /// use of the `WITHIN GROUP` clause to specify this order, for example: + /// An example of how an ordered-set aggregate function is called with the + /// `WITHIN GROUP` SQL syntax: /// /// ```sql /// -- Ascending @@ -784,15 +791,11 @@ pub trait AggregateUDFImpl: Debug + DynEq + DynHash + Send + Sync { /// without the `WITHIN GROUP` clause, though a default of ascending is the /// standard practice. /// - /// Note that setting this to `true` does not guarantee input sort order to - /// the aggregate function; it expects the function to handle ordering the - /// input values themselves (e.g. `percentile_cont` must buffer and sort - /// the values internally). That is, DataFusion does not introduce any kind - /// of sort into the plan for these functions. - /// - /// Setting this to `false` disallows calling this function with the `WITHIN GROUP` - /// clause. - fn is_ordered_set_aggregate(&self) -> bool { + /// Ordered-set aggregate function implementations are responsible for handling + /// the input sort order themselves (e.g. `percentile_cont` must buffer and + /// sort the values internally). That is, DataFusion does not introduce any + /// kind of sort into the plan for these functions with this syntax. + fn supports_within_group_clause(&self) -> bool { false } @@ -843,7 +846,7 @@ pub fn udaf_default_schema_name( // exclude the first function argument(= column) in ordered set aggregate function, // because it is duplicated with the WITHIN GROUP clause in schema name. - let args = if func.is_ordered_set_aggregate() && !order_by.is_empty() { + let args = if func.supports_within_group_clause() && !order_by.is_empty() { &args[1..] } else { &args[..] @@ -867,7 +870,7 @@ pub fn udaf_default_schema_name( }; if !order_by.is_empty() { - let clause = match func.is_ordered_set_aggregate() { + let clause = match func.supports_within_group_clause() { true => "WITHIN GROUP", false => "ORDER BY", }; @@ -1259,8 +1262,8 @@ impl AggregateUDFImpl for AliasedAggregateUDFImpl { self.inner.supports_null_handling_clause() } - fn is_ordered_set_aggregate(&self) -> bool { - self.inner.is_ordered_set_aggregate() + fn supports_within_group_clause(&self) -> bool { + self.inner.supports_within_group_clause() } fn set_monotonicity(&self, data_type: &DataType) -> SetMonotonicity { diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont.rs b/datafusion/functions-aggregate/src/approx_percentile_cont.rs index 6513504b30b0..4015abc6adf7 100644 --- a/datafusion/functions-aggregate/src/approx_percentile_cont.rs +++ b/datafusion/functions-aggregate/src/approx_percentile_cont.rs @@ -319,7 +319,7 @@ impl AggregateUDFImpl for ApproxPercentileCont { false } - fn is_ordered_set_aggregate(&self) -> bool { + fn supports_within_group_clause(&self) -> bool { true } diff --git a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs index 215341b507af..51891ce7f277 100644 --- a/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs +++ b/datafusion/functions-aggregate/src/approx_percentile_cont_with_weight.rs @@ -262,7 +262,7 @@ impl AggregateUDFImpl for ApproxPercentileContWithWeight { false } - fn is_ordered_set_aggregate(&self) -> bool { + fn supports_within_group_clause(&self) -> bool { true } diff --git a/datafusion/functions-aggregate/src/percentile_cont.rs b/datafusion/functions-aggregate/src/percentile_cont.rs index 8e9e9a3144d4..545d13b4014b 100644 --- a/datafusion/functions-aggregate/src/percentile_cont.rs +++ b/datafusion/functions-aggregate/src/percentile_cont.rs @@ -360,7 +360,7 @@ impl AggregateUDFImpl for PercentileCont { false } - fn is_ordered_set_aggregate(&self) -> bool { + fn supports_within_group_clause(&self) -> bool { true } diff --git a/datafusion/sql/src/expr/function.rs b/datafusion/sql/src/expr/function.rs index cb34bb0f7eb7..2d20aaf52358 100644 --- a/datafusion/sql/src/expr/function.rs +++ b/datafusion/sql/src/expr/function.rs @@ -467,7 +467,7 @@ impl SqlToRel<'_, S> { let mut args = self.function_args_to_expr(args, schema, planner_context)?; - let order_by = if fm.is_ordered_set_aggregate() { + let order_by = if fm.supports_within_group_clause() { let within_group = self.order_by_to_sort_expr( within_group, schema, diff --git a/datafusion/sql/src/unparser/expr.rs b/datafusion/sql/src/unparser/expr.rs index a7fe8efa153c..97f2b58bf840 100644 --- a/datafusion/sql/src/unparser/expr.rs +++ b/datafusion/sql/src/unparser/expr.rs @@ -336,7 +336,7 @@ impl Unparser<'_> { None => None, }; let within_group: Vec = - if agg.func.is_ordered_set_aggregate() { + if agg.func.supports_within_group_clause() { order_by .iter() .map(|sort_expr| self.sort_to_sql(sort_expr)) diff --git a/docs/source/library-user-guide/upgrading.md b/docs/source/library-user-guide/upgrading.md index f34b8b2a5cf0..6cc3af528555 100644 --- a/docs/source/library-user-guide/upgrading.md +++ b/docs/source/library-user-guide/upgrading.md @@ -133,20 +133,16 @@ The `projection` field in `FileScanConfig` has been renamed to `projection_exprs If you directly access the `projection` field: -```rust -# /* comment to avoid running +```rust,ignore let config: FileScanConfig = ...; let projection = config.projection; -# */ ``` You should update to: -```rust -# /* comment to avoid running +```rust,ignore let config: FileScanConfig = ...; let projection_exprs = config.projection_exprs; -# */ ``` **Impact on builders:** @@ -168,12 +164,10 @@ Note: `with_projection()` still works but is deprecated and will be removed in a You can access column indices from `ProjectionExprs` using its methods if needed: -```rust -# /* comment to avoid running +```rust,ignore let projection_exprs: ProjectionExprs = ...; // Get the column indices if the projection only contains simple column references let indices = projection_exprs.column_indices(); -# */ ``` ### `DESCRIBE query` support @@ -260,6 +254,13 @@ let full_schema = table_schema.table_schema(); // Complete schema with let partition_cols_ref = table_schema.table_partition_cols(); // Just the partition columns ``` +### `AggregateUDFImpl::is_ordered_set_aggregate` has been renamed to `AggregateUDFImpl::supports_within_group_clause` + +This method has been renamed to better reflect the actual impact it has for aggregate UDF implementations. +The accompanying `AggregateUDF::is_ordered_set_aggregate` has also been renamed to `AggregateUDF::supports_within_group_clause`. +No functionality has been changed with regards to this method; it still refers only to permitting use of `WITHIN GROUP` +SQL syntax for the aggregate function. + ## DataFusion `50.0.0` ### ListingTable automatically detects Hive Partitioned tables