Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
83 commits
Select commit Hold shift + click to select a range
30d3b6e
expect the table in-out function to produce a tuple mapping vector
Tishj Mar 7, 2023
69606cc
move some logic to separate functions to reduce the size and nesting
Tishj Mar 7, 2023
caf58ac
fixed some bugs, 'unnest' now works with the new path
Tishj Mar 7, 2023
b030a44
add the ability for in-out table functions to produce a in-out row ma…
Tishj Mar 8, 2023
3fa49ff
optimization: directly use the column as the selection vector + small…
Tishj Mar 8, 2023
c0dee98
fix tidy-check
Tishj Mar 8, 2023
6bab159
implement 'in_out_mapping' for summary + optimize the implementation …
Tishj Mar 8, 2023
980eae6
move range in-out table function code to separate file
Tishj Mar 8, 2023
ffd2e30
progress
Tishj Mar 8, 2023
b1b4848
revert change - we can't know the input types beforehand
Tishj Mar 8, 2023
5228e3b
'functional' in-out range function
Tishj Mar 9, 2023
c667408
fixed issue with the mapping vector -> selection vector, also changed…
Tishj Mar 9, 2023
9cf1230
Merge branch 'master' into scalar_table_function
Tishj Mar 10, 2023
5eace1f
Merge branch 'scalar_table_function' into scalar_table_function_step2
Tishj Mar 10, 2023
d2a6e52
added increment_t to prepare for adding timestamps, which have differ…
Tishj Mar 10, 2023
3b342fa
move to virtual methods for range implementation, since we can't know…
Tishj Mar 10, 2023
1a4e014
made the int range executor generic
Tishj Mar 10, 2023
c1ee593
add missing file
Tishj Mar 10, 2023
cf24bd1
add start of timestamp executor
Tishj Mar 10, 2023
dc55811
working on timestamp version of range table in-out
Tishj Mar 13, 2023
c69a07a
move a long condition to a separate function for readability
Tishj Mar 13, 2023
3fdc6d7
Merge branch 'master' into scalar_table_function_step2
Tishj Mar 13, 2023
3e1bd83
add lateraljoin + unnest benchmark
Tishj Mar 14, 2023
077829d
apply feedback
Tishj Mar 14, 2023
e12efeb
Merge branch 'scalar_table_function' into scalar_table_function_step2
Tishj Mar 14, 2023
dfe4683
Merge branch 'master' into scalar_table_function
Tishj Mar 14, 2023
8db765c
Merge branch 'master' into scalar_table_function_step2
Tishj Mar 14, 2023
3190942
Merge branch 'scalar_table_function' into scalar_table_function_step2
Tishj Mar 14, 2023
8ed52b8
get rid of TABLE, expand to the types of the subquery instead, and ig…
Tishj Mar 14, 2023
a14a324
semi-functional state
Tishj Mar 15, 2023
823a7b2
casting subquery expressions successfully
Tishj Mar 15, 2023
0c74f76
add missing conversion when chosen function is table in-out, and the …
Tishj Mar 15, 2023
eb895dc
might be a good idea to actually update the state :)
Tishj Mar 15, 2023
d80cd71
add support for NULLs in the input
Tishj Mar 15, 2023
8d9912b
change all TABLE taking functions to use varargs instead, fix random …
Tishj Mar 16, 2023
750bcc3
no longer clear the parameters, to preserve the old behavior when con…
Tishj Mar 16, 2023
e8ded11
remove dead code
Tishj Mar 16, 2023
e2268e9
fix formatting with clang-format 11
Tishj Mar 16, 2023
737a074
reformat
Tishj Mar 16, 2023
98b9a86
switched the conditions accidentally
Tishj Mar 27, 2023
e86f688
Merge branch 'master' into scalar_table_function_step2
Tishj Mar 30, 2023
3a40ba3
logical get can sit on logical projection, probably logical compariso…
Tmonster Apr 7, 2023
d3890f0
Merge branch 'master' into scalar_table_function_step2
Tishj Apr 7, 2023
59146cf
Merge remote-tracking branch 'tom/thijs_issue' into scalar_table_func…
Tishj Apr 7, 2023
92aac78
Merge branch 'master' into scalar_table_function
Tishj Apr 7, 2023
523cd79
Merge branch 'scalar_table_function' into scalar_table_function_step2
Tishj Apr 7, 2023
0b331ff
Merge branch 'master' into scalar_table_function_step2
Tishj Apr 11, 2023
26abcec
format
Tishj Apr 11, 2023
58dde0b
replace the polymorphic behavior with templating
Tishj Apr 11, 2023
626fbc0
remove duplicate include
Tishj Apr 12, 2023
bf1589c
double include
Tishj Apr 14, 2023
5b1593a
Merge branch 'master' into scalar_table_function_step2
Tishj Apr 14, 2023
4711205
fix tidy issue
Tishj Apr 14, 2023
03a12fc
Merge branch 'master' into scalar_table_function
Tishj Apr 25, 2023
0d74150
Merge branch 'scalar_table_function' into scalar_table_function_step2
Tishj Apr 25, 2023
2258253
added generate series, almost working correctly in all cases now
Tishj Apr 25, 2023
26043d9
generate series fix
Tishj Apr 26, 2023
c10f9aa
Merge branch 'master' into scalar_table_function_step2
Tishj Apr 26, 2023
05ca641
remove dead code
Tishj Apr 30, 2023
e980108
format
Tishj May 15, 2023
452b509
Merge branch 'master' into scalar_table_function_step2
Tishj May 15, 2023
1d1b601
Merge branch 'master' into scalar_table_function_step2
Tishj May 16, 2023
eaeaab6
Merge branch 'master' into scalar_table_function
Tishj May 16, 2023
d387b53
Merge branch 'master' into scalar_table_function_step2
Tishj May 17, 2023
d793369
Merge branch 'master' into scalar_table_function
Tishj Aug 3, 2023
849434a
small tidy issues
Tishj Aug 3, 2023
fc509dd
remove code that can not be tested for now
Tishj Aug 4, 2023
2e38141
add test with summary to get coverage for ExecuteWithoutMapping
Tishj Aug 4, 2023
aa45c6c
add [filter] to the tests contributing to coverage, to cover Physical…
Tishj Aug 4, 2023
a53aa40
update coverage
Tishj Aug 4, 2023
f75b9c3
Merge branch 'master' into scalar_table_function
Tishj Aug 14, 2023
8fcacce
this breaks verification, removing it for now
Tishj Aug 14, 2023
b3475e5
Merge branch 'master' into scalar_table_function_step2
Tishj Aug 14, 2023
2713918
Merge branch 'scalar_table_function' into scalar_table_function_step2
Tishj Aug 14, 2023
b47f131
add an extra step to extract the required arguments from the input chunk
Tishj Aug 14, 2023
adf096d
fix compilation issues + fix discrepancy between input types and subq…
Tishj Aug 14, 2023
2dc4082
correlated subquery test
Tishj Aug 15, 2023
1653d5b
wrapping the given subquery in a 'SELECT * FROM <subquery>' so we ens…
Tishj Aug 16, 2023
7b9887f
split this branch from 'scalar_table_function'
Tishj Aug 16, 2023
e6ceec7
put back some accidentally removed changes
Tishj Aug 16, 2023
eac90cc
in 'map' we send POINTER constant expressions along with the subquery…
Tishj Aug 16, 2023
d098ae0
the ICU range function is not a table in-out function, so we need to …
Tishj Aug 17, 2023
e12bbda
Merge branch 'feature' into scalar_table_function_step2
Tishj Sep 29, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions scripts/coverage_check.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ build/coverage/test/unittest "[coverage]"
build/coverage/test/unittest "[intraquery]"
build/coverage/test/unittest "[interquery]"
build/coverage/test/unittest "[detailed_profiler]"
build/coverage/test/unittest "[filter]"
build/coverage/test/unittest test/sql/tpch/tpch_sf01.test_slow
python3 -m pytest --shell-binary build/coverage/duckdb tools/shell/tests/

Expand Down
2 changes: 2 additions & 0 deletions src/execution/physical_plan/plan_get.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalGet &op) {
}
// create the table scan node
if (!op.function.projection_pushdown) {
// Verify that the function has a regular 'function' variable set to execute the table scan
D_ASSERT(op.function.function);
// function does not support projection pushdown
auto node = make_uniq<PhysicalTableScan>(op.returned_types, op.function, std::move(op.bind_data),
op.returned_types, op.column_ids, vector<column_t>(), op.names,
Expand Down
5 changes: 1 addition & 4 deletions src/function/function_binder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,16 +134,13 @@ idx_t FunctionBinder::BindFunctionFromArguments(const string &name, FunctionSet<
}
if (candidate_functions.size() > 1) {
// multiple candidates, check if there are any unknown arguments
bool has_parameters = false;
for (auto &arg_type : arguments) {
if (arg_type.id() == LogicalTypeId::UNKNOWN) {
//! there are! we could not resolve parameters in this case
throw ParameterNotResolvedException();
}
}
if (!has_parameters) {
return MultipleCandidateException(name, functions, candidate_functions, arguments, error);
}
return MultipleCandidateException(name, functions, candidate_functions, arguments, error);
}
return candidate_functions[0];
}
Expand Down
1 change: 1 addition & 0 deletions src/function/table/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
add_subdirectory(system)
add_subdirectory(range)
add_subdirectory(version)
add_subdirectory(arrow)
add_library_unity(
Expand Down
254 changes: 3 additions & 251 deletions src/function/table/range.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,261 +8,13 @@

namespace duckdb {

//===--------------------------------------------------------------------===//
// Range (integers)
//===--------------------------------------------------------------------===//
struct RangeFunctionBindData : public TableFunctionData {
hugeint_t start;
hugeint_t end;
hugeint_t increment;

public:
bool Equals(const FunctionData &other_p) const override {
auto &other = other_p.Cast<RangeFunctionBindData>();
return other.start == start && other.end == end && other.increment == increment;
}
};

template <bool GENERATE_SERIES>
static void GenerateRangeParameters(const vector<Value> &inputs, RangeFunctionBindData &result) {
for (auto &input : inputs) {
if (input.IsNull()) {
result.start = GENERATE_SERIES ? 1 : 0;
result.end = 0;
result.increment = 1;
return;
}
}
if (inputs.size() < 2) {
// single argument: only the end is specified
result.start = 0;
result.end = inputs[0].GetValue<int64_t>();
} else {
// two arguments: first two arguments are start and end
result.start = inputs[0].GetValue<int64_t>();
result.end = inputs[1].GetValue<int64_t>();
}
if (inputs.size() < 3) {
result.increment = 1;
} else {
result.increment = inputs[2].GetValue<int64_t>();
}
if (result.increment == 0) {
throw BinderException("interval cannot be 0!");
}
if (result.start > result.end && result.increment > 0) {
throw BinderException("start is bigger than end, but increment is positive: cannot generate infinite series");
} else if (result.start < result.end && result.increment < 0) {
throw BinderException("start is smaller than end, but increment is negative: cannot generate infinite series");
}
}

template <bool GENERATE_SERIES>
static unique_ptr<FunctionData> RangeFunctionBind(ClientContext &context, TableFunctionBindInput &input,
vector<LogicalType> &return_types, vector<string> &names) {
auto result = make_uniq<RangeFunctionBindData>();
auto &inputs = input.inputs;
GenerateRangeParameters<GENERATE_SERIES>(inputs, *result);

return_types.emplace_back(LogicalType::BIGINT);
if (GENERATE_SERIES) {
// generate_series has inclusive bounds on the RHS
if (result->increment < 0) {
result->end = result->end - 1;
} else {
result->end = result->end + 1;
}
names.emplace_back("generate_series");
} else {
names.emplace_back("range");
}
return std::move(result);
}

struct RangeFunctionState : public GlobalTableFunctionState {
RangeFunctionState() : current_idx(0) {
}

int64_t current_idx;
};

static unique_ptr<GlobalTableFunctionState> RangeFunctionInit(ClientContext &context, TableFunctionInitInput &input) {
return make_uniq<RangeFunctionState>();
}

static void RangeFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
auto &bind_data = data_p.bind_data->Cast<RangeFunctionBindData>();
auto &state = data_p.global_state->Cast<RangeFunctionState>();

auto increment = bind_data.increment;
auto end = bind_data.end;
hugeint_t current_value = bind_data.start + increment * state.current_idx;
int64_t current_value_i64;
if (!Hugeint::TryCast<int64_t>(current_value, current_value_i64)) {
return;
}
int64_t offset = increment < 0 ? 1 : -1;
idx_t remaining = MinValue<idx_t>(Hugeint::Cast<idx_t>((end - current_value + (increment + offset)) / increment),
STANDARD_VECTOR_SIZE);
// set the result vector as a sequence vector
output.data[0].Sequence(current_value_i64, Hugeint::Cast<int64_t>(increment), remaining);
// increment the index pointer by the remaining count
state.current_idx += remaining;
output.SetCardinality(remaining);
}

unique_ptr<NodeStatistics> RangeCardinality(ClientContext &context, const FunctionData *bind_data_p) {
auto &bind_data = bind_data_p->Cast<RangeFunctionBindData>();
idx_t cardinality = Hugeint::Cast<idx_t>((bind_data.end - bind_data.start) / bind_data.increment);
return make_uniq<NodeStatistics>(cardinality, cardinality);
}

//===--------------------------------------------------------------------===//
// Range (timestamp)
//===--------------------------------------------------------------------===//
struct RangeDateTimeBindData : public TableFunctionData {
timestamp_t start;
timestamp_t end;
interval_t increment;
bool inclusive_bound;
bool greater_than_check;

public:
bool Equals(const FunctionData &other_p) const override {
auto &other = other_p.Cast<RangeDateTimeBindData>();
return other.start == start && other.end == end && other.increment == increment &&
other.inclusive_bound == inclusive_bound && other.greater_than_check == greater_than_check;
}

bool Finished(timestamp_t current_value) const {
if (greater_than_check) {
if (inclusive_bound) {
return current_value > end;
} else {
return current_value >= end;
}
} else {
if (inclusive_bound) {
return current_value < end;
} else {
return current_value <= end;
}
}
}
};

template <bool GENERATE_SERIES>
static unique_ptr<FunctionData> RangeDateTimeBind(ClientContext &context, TableFunctionBindInput &input,
vector<LogicalType> &return_types, vector<string> &names) {
auto result = make_uniq<RangeDateTimeBindData>();
auto &inputs = input.inputs;
D_ASSERT(inputs.size() == 3);
result->start = inputs[0].GetValue<timestamp_t>();
result->end = inputs[1].GetValue<timestamp_t>();
result->increment = inputs[2].GetValue<interval_t>();

// Infinities either cause errors or infinite loops, so just ban them
if (!Timestamp::IsFinite(result->start) || !Timestamp::IsFinite(result->end)) {
throw BinderException("RANGE with infinite bounds is not supported");
}

if (result->increment.months == 0 && result->increment.days == 0 && result->increment.micros == 0) {
throw BinderException("interval cannot be 0!");
}
// all elements should point in the same direction
if (result->increment.months > 0 || result->increment.days > 0 || result->increment.micros > 0) {
if (result->increment.months < 0 || result->increment.days < 0 || result->increment.micros < 0) {
throw BinderException("RANGE with composite interval that has mixed signs is not supported");
}
result->greater_than_check = true;
if (result->start > result->end) {
throw BinderException(
"start is bigger than end, but increment is positive: cannot generate infinite series");
}
} else {
result->greater_than_check = false;
if (result->start < result->end) {
throw BinderException(
"start is smaller than end, but increment is negative: cannot generate infinite series");
}
}
return_types.push_back(inputs[0].type());
if (GENERATE_SERIES) {
// generate_series has inclusive bounds on the RHS
result->inclusive_bound = true;
names.emplace_back("generate_series");
} else {
result->inclusive_bound = false;
names.emplace_back("range");
}
return std::move(result);
}

struct RangeDateTimeState : public GlobalTableFunctionState {
explicit RangeDateTimeState(timestamp_t start_p) : current_state(start_p) {
}

timestamp_t current_state;
bool finished = false;
};

static unique_ptr<GlobalTableFunctionState> RangeDateTimeInit(ClientContext &context, TableFunctionInitInput &input) {
auto &bind_data = input.bind_data->Cast<RangeDateTimeBindData>();
return make_uniq<RangeDateTimeState>(bind_data.start);
}

static void RangeDateTimeFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) {
auto &bind_data = data_p.bind_data->Cast<RangeDateTimeBindData>();
auto &state = data_p.global_state->Cast<RangeDateTimeState>();
if (state.finished) {
return;
}

idx_t size = 0;
auto data = FlatVector::GetData<timestamp_t>(output.data[0]);
while (true) {
data[size++] = state.current_state;
state.current_state =
AddOperator::Operation<timestamp_t, interval_t, timestamp_t>(state.current_state, bind_data.increment);
if (bind_data.Finished(state.current_state)) {
state.finished = true;
break;
}
if (size >= STANDARD_VECTOR_SIZE) {
break;
}
}
output.SetCardinality(size);
}

void RangeTableFunction::RegisterFunction(BuiltinFunctions &set) {
TableFunctionSet range("range");

TableFunction range_function({LogicalType::BIGINT}, RangeFunction, RangeFunctionBind<false>, RangeFunctionInit);
range_function.cardinality = RangeCardinality;

// single argument range: (end) - implicit start = 0 and increment = 1
range.AddFunction(range_function);
// two arguments range: (start, end) - implicit increment = 1
range_function.arguments = {LogicalType::BIGINT, LogicalType::BIGINT};
range.AddFunction(range_function);
// three arguments range: (start, end, increment)
range_function.arguments = {LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT};
range.AddFunction(range_function);
range.AddFunction(TableFunction({LogicalType::TIMESTAMP, LogicalType::TIMESTAMP, LogicalType::INTERVAL},
RangeDateTimeFunction, RangeDateTimeBind<false>, RangeDateTimeInit));
RangeInOutTableFunction::RegisterFunction(range);
set.AddFunction(range);
// generate_series: similar to range, but inclusive instead of exclusive bounds on the RHS

TableFunctionSet generate_series("generate_series");
range_function.bind = RangeFunctionBind<true>;
range_function.arguments = {LogicalType::BIGINT};
generate_series.AddFunction(range_function);
range_function.arguments = {LogicalType::BIGINT, LogicalType::BIGINT};
generate_series.AddFunction(range_function);
range_function.arguments = {LogicalType::BIGINT, LogicalType::BIGINT, LogicalType::BIGINT};
generate_series.AddFunction(range_function);
generate_series.AddFunction(TableFunction({LogicalType::TIMESTAMP, LogicalType::TIMESTAMP, LogicalType::INTERVAL},
RangeDateTimeFunction, RangeDateTimeBind<true>, RangeDateTimeInit));
GenerateSeriesInOutTableFunction::RegisterFunction(generate_series);
set.AddFunction(generate_series);
}

Expand Down
5 changes: 5 additions & 0 deletions src/function/table/range/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
add_library_unity(duckdb_func_table_range OBJECT range.cpp)

set(ALL_OBJECT_FILES
${ALL_OBJECT_FILES} $<TARGET_OBJECTS:duckdb_func_table_range>
PARENT_SCOPE)
Loading