Skip to content

Commit 7f71eae

Browse files
committed
Add rank and dense_rank Spark window function (6289)
1 parent 74f7dc9 commit 7f71eae

File tree

8 files changed

+50
-15
lines changed

8 files changed

+50
-15
lines changed

velox/docs/functions/spark/window.rst

+10-1
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,13 @@ Rank functions
1919

2020
.. spark:function:: row_number() -> integer
2121
22-
Returns a unique, sequential number to each row, starting with one, according to the ordering of rows within the window partition.
22+
Returns a unique, sequential number to each row, starting with one, according to the ordering of rows within the window partition.
23+
24+
.. spark:function:: rank() -> integer
25+
26+
Returns the rank of a value in a group of values. The rank is one plus the number of rows preceding the row that are not peer with the row. Thus, the values in the ordering will produce gaps in the sequence. The ranking is performed for each window partition.
27+
28+
.. spark:function:: dense_rank() -> integer
29+
30+
Returns the rank of a value in a group of values. This is similar to rank(), except that tie values do not produce gaps in the sequence.
31+

velox/functions/lib/window/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
add_library(velox_functions_window NthValue.cpp RowNumber.cpp)
15+
add_library(velox_functions_window NthValue.cpp Rank.cpp RowNumber.cpp)
1616

1717
target_link_libraries(velox_functions_window velox_buffer velox_exec
1818
Folly::folly)

velox/functions/prestosql/window/Rank.cpp renamed to velox/functions/lib/window/Rank.cpp

+10-4
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
#include "velox/expression/FunctionSignature.h"
2020
#include "velox/vector/FlatVector.h"
2121

22-
namespace facebook::velox::window::prestosql {
22+
namespace facebook::velox::functions::window {
2323

2424
// Types of rank functions.
2525
enum class RankType {
@@ -112,14 +112,20 @@ void registerRankInternal(
112112
});
113113
}
114114

115-
void registerRank(const std::string& name) {
115+
void registerRankBigint(const std::string& name) {
116116
registerRankInternal<RankType::kRank, int64_t>(name, "bigint");
117117
}
118-
void registerDenseRank(const std::string& name) {
118+
void registerRankInteger(const std::string& name) {
119+
registerRankInternal<RankType::kRank, int32_t>(name, "integer");
120+
}
121+
void registerDenseRankBigint(const std::string& name) {
119122
registerRankInternal<RankType::kDenseRank, int64_t>(name, "bigint");
120123
}
124+
void registerDenseRankInteger(const std::string& name) {
125+
registerRankInternal<RankType::kDenseRank, int32_t>(name, "integer");
126+
}
121127
void registerPercentRank(const std::string& name) {
122128
registerRankInternal<RankType::kPercentRank, double>(name, "double");
123129
}
124130

125-
} // namespace facebook::velox::window::prestosql
131+
} // namespace facebook::velox::functions::window

velox/functions/lib/window/RegistrationFunctions.h

+20
Original file line numberDiff line numberDiff line change
@@ -34,4 +34,24 @@ void registerRowNumberBigint(const std::string& name);
3434
// Register the Spark function row_number() with the integer data type
3535
// for the return value.
3636
void registerRowNumberInteger(const std::string& name);
37+
38+
// Register the Presto function rank() with the bigint data type
39+
// for the return value.
40+
void registerRankBigint(const std::string& name);
41+
42+
// Register the Spark function rank() with the integer data type
43+
// for the return value.
44+
void registerRankInteger(const std::string& name);
45+
46+
// Register the Presto function dense_rank() with the bigint data type
47+
// for the return value.
48+
void registerDenseRankBigint(const std::string& name);
49+
50+
// Register the Spark function dense_rank() with the integer data type
51+
// for the return value.
52+
void registerDenseRankInteger(const std::string& name);
53+
54+
// Returns the percentage ranking of a value in a group of values.
55+
void registerPercentRank(const std::string& name);
56+
3757
} // namespace facebook::velox::functions::window

velox/functions/prestosql/window/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ if(${VELOX_BUILD_TESTING})
1616
endif()
1717

1818
add_library(velox_window CumeDist.cpp FirstLastValue.cpp LeadLag.cpp Ntile.cpp
19-
Rank.cpp WindowFunctionsRegistration.cpp)
19+
WindowFunctionsRegistration.cpp)
2020

2121
target_link_libraries(velox_window velox_buffer velox_exec
2222
velox_functions_window Folly::folly)

velox/functions/prestosql/window/WindowFunctionsRegistration.cpp

+3-7
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,6 @@ namespace facebook::velox::window {
2020

2121
namespace prestosql {
2222

23-
extern void registerRowNumber(const std::string& name);
24-
extern void registerRank(const std::string& name);
25-
extern void registerDenseRank(const std::string& name);
26-
extern void registerPercentRank(const std::string& name);
2723
extern void registerCumeDist(const std::string& name);
2824
extern void registerNtile(const std::string& name);
2925
extern void registerFirstValue(const std::string& name);
@@ -33,9 +29,9 @@ extern void registerLead(const std::string& name);
3329

3430
void registerAllWindowFunctions(const std::string& prefix) {
3531
functions::window::registerRowNumberBigint(prefix + "row_number");
36-
registerRank(prefix + "rank");
37-
registerDenseRank(prefix + "dense_rank");
38-
registerPercentRank(prefix + "percent_rank");
32+
functions::window::registerRankBigint(prefix + "rank");
33+
functions::window::registerDenseRankBigint(prefix + "dense_rank");
34+
functions::window::registerPercentRank(prefix + "percent_rank");
3935
registerCumeDist(prefix + "cume_dist");
4036
registerNtile(prefix + "ntile");
4137
functions::window::registerNthValueBigint(prefix + "nth_value");

velox/functions/sparksql/window/WindowFunctionsRegistration.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ namespace facebook::velox::functions::window::sparksql {
2121
void registerWindowFunctions(const std::string& prefix) {
2222
functions::window::registerNthValueInteger(prefix + "nth_value");
2323
functions::window::registerRowNumberInteger(prefix + "row_number");
24+
functions::window::registerRankInteger(prefix + "rank");
25+
functions::window::registerDenseRankInteger(prefix + "dense_rank");
2426
}
2527

2628
} // namespace facebook::velox::functions::window::sparksql

velox/functions/sparksql/window/tests/SparkWindowTest.cpp

+3-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,9 @@ namespace {
2525

2626
static const std::vector<std::string> kSparkWindowFunctions = {
2727
std::string("nth_value(c0, 1)"),
28-
std::string("row_number()")};
28+
std::string("row_number()"),
29+
std::string("rank()"),
30+
std::string("dense_rank()")};
2931

3032
struct SparkWindowTestParam {
3133
const std::string function;

0 commit comments

Comments
 (0)