diff --git a/.clang-tidy b/.clang-tidy index aa8e995d6c24..ee2b06b745d7 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1,4 +1,4 @@ -Checks: '-*,clang-diagnostic-*,bugprone-*,performance-*,google-explicit-constructor,google-build-using-namespace,google-runtime-int,misc-definitions-in-headers,modernize-use-nullptr,modernize-use-override,-bugprone-macro-parentheses,readability-braces-around-statements,-bugprone-branch-clone,readability-identifier-naming,hicpp-exception-baseclass,misc-throw-by-value-catch-by-reference,-bugprone-signed-char-misuse,-bugprone-misplaced-widening-cast,-bugprone-sizeof-expression,-bugprone-narrowing-conversions,-bugprone-easily-swappable-parameters,google-global-names-in-headers,llvm-header-guard,misc-definitions-in-headers,modernize-use-emplace,modernize-use-bool-literals,-performance-inefficient-string-concatenation,-performance-no-int-to-ptr,readability-container-size-empty,cppcoreguidelines-pro-type-cstyle-cast,-llvm-header-guard,-performance-enum-size' +Checks: '-*,clang-diagnostic-*,bugprone-*,performance-*,google-explicit-constructor,google-build-using-namespace,google-runtime-int,misc-definitions-in-headers,modernize-use-nullptr,modernize-use-override,-bugprone-macro-parentheses,readability-braces-around-statements,-bugprone-branch-clone,readability-identifier-naming,hicpp-exception-baseclass,misc-throw-by-value-catch-by-reference,-bugprone-signed-char-misuse,-bugprone-misplaced-widening-cast,-bugprone-sizeof-expression,-bugprone-narrowing-conversions,-bugprone-easily-swappable-parameters,google-global-names-in-headers,llvm-header-guard,misc-definitions-in-headers,modernize-use-emplace,modernize-use-bool-literals,-performance-inefficient-string-concatenation,-performance-no-int-to-ptr,readability-container-size-empty,cppcoreguidelines-pro-type-cstyle-cast,-llvm-header-guard,-performance-enum-size,cppcoreguidelines-pro-type-const-cast,cppcoreguidelines-avoid-non-const-global-variables,cppcoreguidelines-interfaces-global-init,cppcoreguidelines-slicing,cppcoreguidelines-rvalue-reference-param-not-moved,cppcoreguidelines-virtual-class-destructor' WarningsAsErrors: '*' HeaderFilterRegex: 'src/include/duckdb/.*' FormatStyle: none @@ -47,4 +47,6 @@ CheckOptions: value: lower_case - key: modernize-use-emplace.SmartPointers value: '::std::shared_ptr;::duckdb::unique_ptr;::std::auto_ptr;::std::weak_ptr' + - key: cppcoreguidelines-rvalue-reference-param-not-moved.IgnoreUnnamedParams + value: true diff --git a/.github/config/out_of_tree_extensions.cmake b/.github/config/out_of_tree_extensions.cmake index 11fc11c1c8d1..181a2338feff 100644 --- a/.github/config/out_of_tree_extensions.cmake +++ b/.github/config/out_of_tree_extensions.cmake @@ -38,7 +38,7 @@ if (NOT MINGW) duckdb_extension_load(azure LOAD_TESTS GIT_URL https://github.com/duckdb/duckdb_azure - GIT_TAG 86f39d76157de970d16d6d6537bc90c0ee1c7d35 + GIT_TAG 6620a32454c1eb2e455104d87262061d2464aad0 APPLY_PATCHES ) endif() diff --git a/.github/config/uncovered_files.csv b/.github/config/uncovered_files.csv index 4bff1a13c06c..e29afac74c4d 100644 --- a/.github/config/uncovered_files.csv +++ b/.github/config/uncovered_files.csv @@ -1,5 +1,6 @@ catalog/catalog.cpp 49 catalog/catalog_entry.cpp 11 +catalog/catalog_entry_retriever.cpp 14 catalog/catalog_entry/duck_schema_entry.cpp 10 catalog/catalog_entry/duck_table_entry.cpp 7 catalog/catalog_entry/index_catalog_entry.cpp 2 @@ -12,7 +13,8 @@ catalog/catalog_search_path.cpp 9 catalog/catalog_set.cpp 20 catalog/catalog_transaction.cpp 3 catalog/default/default_functions.cpp 9 -catalog/dependency_manager.cpp 5 +catalog/dependency_manager.cpp 6 +catalog/dependency_list.cpp 4 common/allocator.cpp 20 common/arrow/arrow_appender.cpp 23 common/arrow/appender/map_data.cpp 7 @@ -213,7 +215,7 @@ execution/operator/csv_scanner/parallel_csv_reader.cpp 54 execution/operator/persistent/physical_batch_copy_to_file.cpp 7 execution/operator/persistent/physical_batch_insert.cpp 43 execution/operator/persistent/physical_copy_to_file.cpp 2 -execution/operator/persistent/physical_export.cpp 2 +execution/operator/persistent/physical_export.cpp 53 execution/operator/persistent/physical_fixed_batch_copy.cpp 7 execution/operator/persistent/physical_insert.cpp 5 execution/operator/projection/physical_projection.cpp 16 @@ -342,6 +344,7 @@ extension/parquet/parquet_timestamp.cpp 12 extension/parquet/parquet_writer.cpp 9 extension/parquet/zstd_file_system.cpp 6 include/duckdb/catalog/catalog.hpp 3 +include/duckdb/catalog/dependency_manager.hpp 2 include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp 3 include/duckdb/catalog/catalog_set.hpp 2 include/duckdb/catalog/mapping_value.hpp 2 @@ -686,13 +689,13 @@ planner/binder/statement/bind_copy.cpp 3 planner/binder/statement/bind_create.cpp 21 planner/binder/statement/bind_create_table.cpp 2 planner/binder/statement/bind_drop.cpp 20 -planner/binder/statement/bind_export.cpp 3 +planner/binder/statement/bind_export.cpp 17 planner/binder/statement/bind_insert.cpp 19 planner/binder/statement/bind_logical_plan.cpp 21 planner/binder/statement/bind_pragma.cpp 3 planner/binder/statement/bind_update.cpp 4 planner/binder/statement/bind_vacuum.cpp 3 -planner/binder/tableref/bind_basetableref.cpp 15 +planner/binder/tableref/bind_basetableref.cpp 17 planner/binder/tableref/bind_joinref.cpp 3 planner/binder/tableref/bind_pivot.cpp 6 planner/binder/tableref/bind_table_function.cpp 18 diff --git a/.github/workflows/IssuesCloseStale.yml b/.github/workflows/IssuesCloseStale.yml index c94b4e3f5a2e..0dcce9ac7edd 100644 --- a/.github/workflows/IssuesCloseStale.yml +++ b/.github/workflows/IssuesCloseStale.yml @@ -11,8 +11,11 @@ jobs: uses: actions/stale@v8 with: stale-issue-message: 'This issue is stale because it has been open 90 days with no activity. Remove stale label or comment or this will be closed in 30 days.' + stale-pr-message: 'This pull request is stale because it has been open 90 days with no activity. Remove stale label or comment or this will be closed in 30 days.' close-issue-message: 'This issue was closed because it has been stale for 30 days with no activity.' + close-pr-message: 'This pull request was closed because it has been stale for 30 days with no activity.' days-before-stale: 90 days-before-close: 30 operations-per-run: 500 stale-issue-label: stale + stale-pr-label: stale diff --git a/.github/workflows/R.yml b/.github/workflows/R.yml index 409840fe68e3..151b3c0c8489 100644 --- a/.github/workflows/R.yml +++ b/.github/workflows/R.yml @@ -60,12 +60,6 @@ jobs: update-rtools: true rtools-version: '42' # linker bug in 43 ^^ - - name: Setup Ccache - uses: hendrikmuhs/ccache-action@main - with: - key: ${{ github.job }} - save: ${{ github.ref == 'refs/heads/main' || github.repository != 'duckdb/duckdb' }} - - uses: ./.github/actions/build_extensions with: deploy_as: windows_amd64_rtools diff --git a/data/csv/quoted_newline.csv b/data/csv/quoted_newline.csv new file mode 100644 index 000000000000..a5451c270972 --- /dev/null +++ b/data/csv/quoted_newline.csv @@ -0,0 +1,3 @@ +col1 +"cell with +newline" \ No newline at end of file diff --git a/data/json/11407.json b/data/json/11407.json new file mode 100644 index 000000000000..1e513416a7ce --- /dev/null +++ b/data/json/11407.json @@ -0,0 +1,8 @@ +[ + { + "k": "v" + }, + { + "k": "v2" + } +] diff --git a/extension/httpfs/httpfs_extension.cpp b/extension/httpfs/httpfs_extension.cpp index 7d6286788dc2..685062c7c7a7 100644 --- a/extension/httpfs/httpfs_extension.cpp +++ b/extension/httpfs/httpfs_extension.cpp @@ -19,48 +19,41 @@ static void LoadInternal(DatabaseInstance &instance) { // Global HTTP config // Single timeout value is used for all 4 types of timeouts, we could split it into 4 if users need that - config.AddExtensionOption("http_timeout", "HTTP timeout read/write/connection/retry (default 30000ms)", - LogicalType::UBIGINT, Value(30000)); - config.AddExtensionOption("http_retries", "HTTP retries on I/O error (default 3)", LogicalType::UBIGINT, Value(3)); - config.AddExtensionOption("http_retry_wait_ms", "Time between retries (default 100ms)", LogicalType::UBIGINT, - Value(100)); + config.AddExtensionOption("http_timeout", "HTTP timeout read/write/connection/retry", LogicalType::UBIGINT, + Value(30000)); + config.AddExtensionOption("http_retries", "HTTP retries on I/O error", LogicalType::UBIGINT, Value(3)); + config.AddExtensionOption("http_retry_wait_ms", "Time between retries", LogicalType::UBIGINT, Value(100)); config.AddExtensionOption("force_download", "Forces upfront download of file", LogicalType::BOOLEAN, Value(false)); // Reduces the number of requests made while waiting, for example retry_wait_ms of 50 and backoff factor of 2 will // result in wait times of 0 50 100 200 400...etc. - config.AddExtensionOption("http_retry_backoff", - "Backoff factor for exponentially increasing retry wait time (default 4)", + config.AddExtensionOption("http_retry_backoff", "Backoff factor for exponentially increasing retry wait time", LogicalType::FLOAT, Value(4)); config.AddExtensionOption( "http_keep_alive", "Keep alive connections. Setting this to false can help when running into connection failures", LogicalType::BOOLEAN, Value(true)); - config.AddExtensionOption("enable_server_cert_verification", - "Enable server side certificate verification, defaults to False.", LogicalType::BOOLEAN, - Value(false)); - config.AddExtensionOption("ca_cert_file", - "Path to a custom certificate file for self-signed certificates. By default not set.", + config.AddExtensionOption("enable_server_cert_verification", "Enable server side certificate verification.", + LogicalType::BOOLEAN, Value(false)); + config.AddExtensionOption("ca_cert_file", "Path to a custom certificate file for self-signed certificates.", LogicalType::VARCHAR, Value("")); // Global S3 config - config.AddExtensionOption("s3_region", "S3 Region (default us-east-1)", LogicalType::VARCHAR, Value("us-east-1")); + config.AddExtensionOption("s3_region", "S3 Region", LogicalType::VARCHAR, Value("us-east-1")); config.AddExtensionOption("s3_access_key_id", "S3 Access Key ID", LogicalType::VARCHAR); config.AddExtensionOption("s3_secret_access_key", "S3 Access Key", LogicalType::VARCHAR); config.AddExtensionOption("s3_session_token", "S3 Session Token", LogicalType::VARCHAR); - config.AddExtensionOption("s3_endpoint", "S3 Endpoint (empty for default endpoint)", LogicalType::VARCHAR); - config.AddExtensionOption("s3_url_style", "S3 URL style ('vhost' (default) or 'path')", LogicalType::VARCHAR, - Value("vhost")); - config.AddExtensionOption("s3_use_ssl", "S3 use SSL (default true)", LogicalType::BOOLEAN, Value(true)); + config.AddExtensionOption("s3_endpoint", "S3 Endpoint", LogicalType::VARCHAR); + config.AddExtensionOption("s3_url_style", "S3 URL style", LogicalType::VARCHAR, Value("vhost")); + config.AddExtensionOption("s3_use_ssl", "S3 use SSL", LogicalType::BOOLEAN, Value(true)); config.AddExtensionOption("s3_url_compatibility_mode", "Disable Globs and Query Parameters on S3 URLs", LogicalType::BOOLEAN, Value(false)); // S3 Uploader config - config.AddExtensionOption("s3_uploader_max_filesize", - "S3 Uploader max filesize (between 50GB and 5TB, default 800GB)", LogicalType::VARCHAR, - "800GB"); - config.AddExtensionOption("s3_uploader_max_parts_per_file", - "S3 Uploader max parts per file (between 1 and 10000, default 10000)", + config.AddExtensionOption("s3_uploader_max_filesize", "S3 Uploader max filesize (between 50GB and 5TB)", + LogicalType::VARCHAR, "800GB"); + config.AddExtensionOption("s3_uploader_max_parts_per_file", "S3 Uploader max parts per file (between 1 and 10000)", LogicalType::UBIGINT, Value(10000)); - config.AddExtensionOption("s3_uploader_thread_limit", "S3 Uploader global thread limit (default 50)", - LogicalType::UBIGINT, Value(50)); + config.AddExtensionOption("s3_uploader_thread_limit", "S3 Uploader global thread limit", LogicalType::UBIGINT, + Value(50)); auto provider = make_uniq(config); provider->SetAll(); diff --git a/extension/json/buffered_json_reader.cpp b/extension/json/buffered_json_reader.cpp index f36db96c6de6..c5dc68f54bfa 100644 --- a/extension/json/buffered_json_reader.cpp +++ b/extension/json/buffered_json_reader.cpp @@ -44,6 +44,10 @@ bool JSONFileHandle::RequestedReadsComplete() { return requested_reads == actual_reads; } +bool JSONFileHandle::LastReadRequested() const { + return last_read_requested; +} + idx_t JSONFileHandle::FileSize() const { return file_size; } @@ -56,6 +60,10 @@ bool JSONFileHandle::CanSeek() const { return can_seek; } +bool JSONFileHandle::IsPipe() const { + return file_handle->IsPipe(); +} + FileHandle &JSONFileHandle::GetHandle() { return *file_handle; } diff --git a/extension/json/include/buffered_json_reader.hpp b/extension/json/include/buffered_json_reader.hpp index b4a894a53529..6fddc78a0c45 100644 --- a/extension/json/include/buffered_json_reader.hpp +++ b/extension/json/include/buffered_json_reader.hpp @@ -60,11 +60,13 @@ struct JSONFileHandle { void Reset(); bool RequestedReadsComplete(); + bool LastReadRequested() const; idx_t FileSize() const; idx_t Remaining() const; bool CanSeek() const; + bool IsPipe() const; FileHandle &GetHandle(); diff --git a/extension/json/include/json_scan.hpp b/extension/json/include/json_scan.hpp index 959c3e14a72a..1e0b9dc2d9b8 100644 --- a/extension/json/include/json_scan.hpp +++ b/extension/json/include/json_scan.hpp @@ -9,13 +9,13 @@ #pragma once #include "buffered_json_reader.hpp" -#include "json_enums.hpp" #include "duckdb/common/multi_file_reader.hpp" #include "duckdb/common/mutex.hpp" #include "duckdb/common/pair.hpp" #include "duckdb/common/types/type_map.hpp" #include "duckdb/function/scalar/strftime_format.hpp" #include "duckdb/function/table_function.hpp" +#include "json_enums.hpp" #include "json_transform.hpp" namespace duckdb { @@ -226,14 +226,20 @@ struct JSONScanLocalState { private: bool ReadNextBuffer(JSONScanGlobalState &gstate); - bool ReadNextBufferInternal(JSONScanGlobalState &gstate, optional_idx &buffer_index, bool &file_done); - bool ReadNextBufferSeek(JSONScanGlobalState &gstate, optional_idx &buffer_index, bool &file_done); - bool ReadNextBufferNoSeek(JSONScanGlobalState &gstate, optional_idx &buffer_index, bool &file_done); + bool ReadNextBufferInternal(JSONScanGlobalState &gstate, AllocatedData &buffer, optional_idx &buffer_index, + bool &file_done); + bool ReadNextBufferSeek(JSONScanGlobalState &gstate, AllocatedData &buffer, optional_idx &buffer_index, + bool &file_done); + bool ReadNextBufferNoSeek(JSONScanGlobalState &gstate, AllocatedData &buffer, optional_idx &buffer_index, + bool &file_done); + AllocatedData AllocateBuffer(JSONScanGlobalState &gstate); + data_ptr_t GetReconstructBuffer(JSONScanGlobalState &gstate); + void SkipOverArrayStart(); - void ReadAndAutoDetect(JSONScanGlobalState &gstate, optional_idx &buffer_index); - bool ReconstructFirstObject(); - void ParseNextChunk(); + void ReadAndAutoDetect(JSONScanGlobalState &gstate, AllocatedData &buffer, optional_idx &buffer_index); + bool ReconstructFirstObject(JSONScanGlobalState &gstate); + void ParseNextChunk(JSONScanGlobalState &gstate); void ParseJSON(char *const json_start, const idx_t json_size, const idx_t remaining); void ThrowObjectSizeError(const idx_t object_size); diff --git a/extension/json/include/json_serializer.hpp b/extension/json/include/json_serializer.hpp index 17f5f53d2ca2..d8f80a50554f 100644 --- a/extension/json/include/json_serializer.hpp +++ b/extension/json/include/json_serializer.hpp @@ -25,15 +25,16 @@ struct JsonSerializer : Serializer { void PushValue(yyjson_mut_val *val); public: - explicit JsonSerializer(yyjson_mut_doc *doc, bool skip_if_null, bool skip_if_empty) + explicit JsonSerializer(yyjson_mut_doc *doc, bool skip_if_null, bool skip_if_empty, bool skip_if_default) : doc(doc), stack({yyjson_mut_obj(doc)}), skip_if_null(skip_if_null), skip_if_empty(skip_if_empty) { serialize_enum_as_string = true; - serialize_default_values = true; + serialize_default_values = !skip_if_default; } template - static yyjson_mut_val *Serialize(T &value, yyjson_mut_doc *doc, bool skip_if_null, bool skip_if_empty) { - JsonSerializer serializer(doc, skip_if_null, skip_if_empty); + static yyjson_mut_val *Serialize(T &value, yyjson_mut_doc *doc, bool skip_if_null, bool skip_if_empty, + bool skip_if_default) { + JsonSerializer serializer(doc, skip_if_null, skip_if_empty, skip_if_default); value.Serialize(serializer); return serializer.GetRootObject(); } diff --git a/extension/json/json_functions/json_serialize_plan.cpp b/extension/json/json_functions/json_serialize_plan.cpp index bfe34b1f7238..1fe6c8cf721c 100644 --- a/extension/json/json_functions/json_serialize_plan.cpp +++ b/extension/json/json_functions/json_serialize_plan.cpp @@ -19,16 +19,19 @@ namespace duckdb { struct JsonSerializePlanBindData : public FunctionData { bool skip_if_null = false; bool skip_if_empty = false; + bool skip_if_default = false; bool format = false; bool optimize = false; - JsonSerializePlanBindData(bool skip_if_null_p, bool skip_if_empty_p, bool format_p, bool optimize_p) - : skip_if_null(skip_if_null_p), skip_if_empty(skip_if_empty_p), format(format_p), optimize(optimize_p) { + JsonSerializePlanBindData(bool skip_if_null_p, bool skip_if_empty_p, bool skip_if_default_p, bool format_p, + bool optimize_p) + : skip_if_null(skip_if_null_p), skip_if_empty(skip_if_empty_p), skip_if_default(skip_if_default_p), + format(format_p), optimize(optimize_p) { } public: unique_ptr Copy() const override { - return make_uniq(skip_if_null, skip_if_empty, format, optimize); + return make_uniq(skip_if_null, skip_if_empty, skip_if_default, format, optimize); } bool Equals(const FunctionData &other_p) const override { return true; @@ -48,6 +51,7 @@ static unique_ptr JsonSerializePlanBind(ClientContext &context, Sc // Optional arguments bool skip_if_null = false; bool skip_if_empty = false; + bool skip_if_default = false; bool format = false; bool optimize = false; @@ -69,6 +73,11 @@ static unique_ptr JsonSerializePlanBind(ClientContext &context, Sc throw BinderException("json_serialize_plan: 'skip_empty' argument must be a boolean"); } skip_if_empty = BooleanValue::Get(ExpressionExecutor::EvaluateScalar(context, *arg)); + } else if (arg->alias == "skip_default") { + if (arg->return_type.id() != LogicalTypeId::BOOLEAN) { + throw BinderException("json_serialize_plan: 'skip_default' argument must be a boolean"); + } + skip_if_default = BooleanValue::Get(ExpressionExecutor::EvaluateScalar(context, *arg)); } else if (arg->alias == "format") { if (arg->return_type.id() != LogicalTypeId::BOOLEAN) { throw BinderException("json_serialize_plan: 'format' argument must be a boolean"); @@ -83,7 +92,7 @@ static unique_ptr JsonSerializePlanBind(ClientContext &context, Sc throw BinderException(StringUtil::Format("json_serialize_plan: Unknown argument '%s'", arg->alias.c_str())); } } - return make_uniq(skip_if_null, skip_if_empty, format, optimize); + return make_uniq(skip_if_null, skip_if_empty, skip_if_default, format, optimize); } static bool OperatorSupportsSerialization(LogicalOperator &op, string &operator_name) { @@ -144,7 +153,8 @@ static void JsonSerializePlanFunction(DataChunk &args, ExpressionState &state, V throw InvalidInputException("Operator '%s' does not support serialization", operator_name); } - auto plan_json = JsonSerializer::Serialize(*plan, doc, info.skip_if_null, info.skip_if_empty); + auto plan_json = + JsonSerializer::Serialize(*plan, doc, info.skip_if_null, info.skip_if_empty, info.skip_if_default); yyjson_mut_arr_append(plans_arr, plan_json); } diff --git a/extension/json/json_functions/json_serialize_sql.cpp b/extension/json/json_functions/json_serialize_sql.cpp index 3d3adb67ccd9..17fd333647a3 100644 --- a/extension/json/json_functions/json_serialize_sql.cpp +++ b/extension/json/json_functions/json_serialize_sql.cpp @@ -12,15 +12,17 @@ namespace duckdb { struct JsonSerializeBindData : public FunctionData { bool skip_if_null = false; bool skip_if_empty = false; + bool skip_if_default = false; bool format = false; - JsonSerializeBindData(bool skip_if_null_p, bool skip_if_empty_p, bool format_p) - : skip_if_null(skip_if_null_p), skip_if_empty(skip_if_empty_p), format(format_p) { + JsonSerializeBindData(bool skip_if_null_p, bool skip_if_empty_p, bool skip_if_default_p, bool format_p) + : skip_if_null(skip_if_null_p), skip_if_empty(skip_if_empty_p), skip_if_default(skip_if_default_p), + format(format_p) { } public: unique_ptr Copy() const override { - return make_uniq(skip_if_null, skip_if_empty, format); + return make_uniq(skip_if_null, skip_if_empty, skip_if_default, format); } bool Equals(const FunctionData &other_p) const override { return true; @@ -41,6 +43,7 @@ static unique_ptr JsonSerializeBind(ClientContext &context, Scalar bool skip_if_null = false; bool skip_if_empty = false; + bool skip_if_default = false; bool format = false; for (idx_t i = 1; i < arguments.size(); i++) { @@ -66,11 +69,16 @@ static unique_ptr JsonSerializeBind(ClientContext &context, Scalar throw BinderException("json_serialize_sql: 'format' argument must be a boolean"); } format = BooleanValue::Get(ExpressionExecutor::EvaluateScalar(context, *arg)); + } else if (arg->alias == "skip_default") { + if (arg->return_type.id() != LogicalTypeId::BOOLEAN) { + throw BinderException("json_serialize_sql: 'skip_default' argument must be a boolean"); + } + skip_if_default = BooleanValue::Get(ExpressionExecutor::EvaluateScalar(context, *arg)); } else { - throw BinderException(StringUtil::Format("json_serialize_sql: Unknown argument '%s'", arg->alias.c_str())); + throw BinderException(StringUtil::Format("json_serialize_sql: Unknown argument '%s'", arg->alias)); } } - return make_uniq(skip_if_null, skip_if_empty, format); + return make_uniq(skip_if_null, skip_if_empty, skip_if_default, format); } static void JsonSerializeFunction(DataChunk &args, ExpressionState &state, Vector &result) { @@ -97,7 +105,8 @@ static void JsonSerializeFunction(DataChunk &args, ExpressionState &state, Vecto throw NotImplementedException("Only SELECT statements can be serialized to json!"); } auto &select = statement->Cast(); - auto json = JsonSerializer::Serialize(select, doc, info.skip_if_null, info.skip_if_empty); + auto json = + JsonSerializer::Serialize(select, doc, info.skip_if_null, info.skip_if_empty, info.skip_if_default); yyjson_mut_arr_append(statements_arr, json); } diff --git a/extension/json/json_scan.cpp b/extension/json/json_scan.cpp index 1c2578cb3b73..332603a7fdfa 100644 --- a/extension/json/json_scan.cpp +++ b/extension/json/json_scan.cpp @@ -148,9 +148,6 @@ JSONScanLocalState::JSONScanLocalState(ClientContext &context, JSONScanGlobalSta : scan_count(0), batch_index(DConstants::INVALID_INDEX), total_read_size(0), total_tuple_count(0), bind_data(gstate.bind_data), allocator(BufferAllocator::Get(context)), is_last(false), fs(FileSystem::GetFileSystem(context)), buffer_size(0), buffer_offset(0), prev_buffer_remainder(0) { - - // Buffer to reconstruct JSON values when they cross a buffer boundary - reconstruct_buffer = gstate.allocator.Allocate(gstate.buffer_capacity); } JSONGlobalTableFunctionState::JSONGlobalTableFunctionState(ClientContext &context, TableFunctionInitInput &input) @@ -276,13 +273,13 @@ idx_t JSONScanLocalState::ReadNext(JSONScanGlobalState &gstate) { } if (current_buffer_handle->buffer_index != 0 && current_reader->GetFormat() == JSONFormat::NEWLINE_DELIMITED) { - if (ReconstructFirstObject()) { + if (ReconstructFirstObject(gstate)) { scan_count++; } } } - ParseNextChunk(); + ParseNextChunk(gstate); } return scan_count; @@ -540,16 +537,12 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) { } } - // If we cannot re-use a buffer we create a new one - if (!buffer.IsSet()) { - buffer = gstate.allocator.Allocate(gstate.buffer_capacity); - } - - buffer_ptr = char_ptr_cast(buffer.get()); - // Copy last bit of previous buffer if (current_reader && current_reader->GetFormat() != JSONFormat::NEWLINE_DELIMITED && !is_last) { - memcpy(buffer_ptr, reconstruct_buffer.get(), prev_buffer_remainder); + if (!buffer.IsSet()) { + buffer = AllocateBuffer(gstate); + } + memcpy(buffer_ptr, GetReconstructBuffer(gstate), prev_buffer_remainder); } optional_idx buffer_index; @@ -558,7 +551,7 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) { if (current_reader) { // Try to read (if we were not the last read in the previous iteration) bool file_done = false; - bool read_success = ReadNextBufferInternal(gstate, buffer_index, file_done); + bool read_success = ReadNextBufferInternal(gstate, buffer, buffer_index, file_done); if (!is_last && read_success) { // We read something if (buffer_index.GetIndex() == 0 && current_reader->GetFormat() == JSONFormat::ARRAY) { @@ -603,7 +596,7 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) { // Open the file if it is not yet open if (!current_reader->IsOpen()) { current_reader->OpenJSONFile(); - if (current_reader->GetFileHandle().FileSize() == 0) { + if (current_reader->GetFileHandle().FileSize() == 0 && !current_reader->GetFileHandle().IsPipe()) { current_reader->GetFileHandle().Close(); // Skip over empty files if (gstate.enable_parallel_scans) { @@ -616,7 +609,7 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) { // Auto-detect if we haven't yet done this during the bind if (gstate.bind_data.options.record_type == JSONRecordType::AUTO_DETECT || current_reader->GetFormat() == JSONFormat::AUTO_DETECT) { - ReadAndAutoDetect(gstate, buffer_index); + ReadAndAutoDetect(gstate, buffer, buffer_index); } if (gstate.enable_parallel_scans) { @@ -655,10 +648,11 @@ bool JSONScanLocalState::ReadNextBuffer(JSONScanGlobalState &gstate) { return true; } -void JSONScanLocalState::ReadAndAutoDetect(JSONScanGlobalState &gstate, optional_idx &buffer_index) { +void JSONScanLocalState::ReadAndAutoDetect(JSONScanGlobalState &gstate, AllocatedData &buffer, + optional_idx &buffer_index) { // We have to detect the JSON format - hold the gstate lock while we do this bool file_done = false; - if (!ReadNextBufferInternal(gstate, buffer_index, file_done)) { + if (!ReadNextBufferInternal(gstate, buffer, buffer_index, file_done)) { return; } if (buffer_size == 0) { @@ -683,14 +677,14 @@ void JSONScanLocalState::ReadAndAutoDetect(JSONScanGlobalState &gstate, optional } } -bool JSONScanLocalState::ReadNextBufferInternal(JSONScanGlobalState &gstate, optional_idx &buffer_index, - bool &file_done) { +bool JSONScanLocalState::ReadNextBufferInternal(JSONScanGlobalState &gstate, AllocatedData &buffer, + optional_idx &buffer_index, bool &file_done) { if (current_reader->GetFileHandle().CanSeek()) { - if (!ReadNextBufferSeek(gstate, buffer_index, file_done)) { + if (!ReadNextBufferSeek(gstate, buffer, buffer_index, file_done)) { return false; } } else { - if (!ReadNextBufferNoSeek(gstate, buffer_index, file_done)) { + if (!ReadNextBufferNoSeek(gstate, buffer, buffer_index, file_done)) { return false; } } @@ -700,7 +694,8 @@ bool JSONScanLocalState::ReadNextBufferInternal(JSONScanGlobalState &gstate, opt return true; } -bool JSONScanLocalState::ReadNextBufferSeek(JSONScanGlobalState &gstate, optional_idx &buffer_index, bool &file_done) { +bool JSONScanLocalState::ReadNextBufferSeek(JSONScanGlobalState &gstate, AllocatedData &buffer, + optional_idx &buffer_index, bool &file_done) { auto &file_handle = current_reader->GetFileHandle(); idx_t request_size = gstate.buffer_capacity - prev_buffer_remainder - YYJSON_PADDING_SIZE; @@ -709,6 +704,12 @@ bool JSONScanLocalState::ReadNextBufferSeek(JSONScanGlobalState &gstate, optiona { lock_guard reader_guard(current_reader->lock); + if (file_handle.LastReadRequested()) { + return false; + } + if (!buffer.IsSet()) { + buffer = AllocateBuffer(gstate); + } if (!file_handle.GetPositionAndSize(read_position, read_size, request_size)) { return false; // We weren't able to read } @@ -742,16 +743,25 @@ bool JSONScanLocalState::ReadNextBufferSeek(JSONScanGlobalState &gstate, optiona return true; } -bool JSONScanLocalState::ReadNextBufferNoSeek(JSONScanGlobalState &gstate, optional_idx &buffer_index, - bool &file_done) { +bool JSONScanLocalState::ReadNextBufferNoSeek(JSONScanGlobalState &gstate, AllocatedData &buffer, + optional_idx &buffer_index, bool &file_done) { idx_t request_size = gstate.buffer_capacity - prev_buffer_remainder - YYJSON_PADDING_SIZE; idx_t read_size; { lock_guard reader_guard(current_reader->lock); - if (!current_reader->HasFileHandle() || !current_reader->IsOpen() || - !current_reader->GetFileHandle().Read(buffer_ptr + prev_buffer_remainder, read_size, request_size, - file_done, gstate.bind_data.type == JSONScanType::SAMPLE)) { + if (!current_reader->HasFileHandle() || !current_reader->IsOpen()) { + return false; // Couldn't read anything + } + auto &file_handle = current_reader->GetFileHandle(); + if (file_handle.LastReadRequested()) { + return false; + } + if (!buffer.IsSet()) { + buffer = AllocateBuffer(gstate); + } + if (!file_handle.Read(buffer_ptr + prev_buffer_remainder, read_size, request_size, file_done, + gstate.bind_data.type == JSONScanType::SAMPLE)) { return false; // Couldn't read anything } buffer_index = current_reader->GetBufferIndex(); @@ -766,6 +776,19 @@ bool JSONScanLocalState::ReadNextBufferNoSeek(JSONScanGlobalState &gstate, optio return true; } +AllocatedData JSONScanLocalState::AllocateBuffer(JSONScanGlobalState &gstate) { + auto buffer = gstate.allocator.Allocate(gstate.buffer_capacity); + buffer_ptr = char_ptr_cast(buffer.get()); + return buffer; +} + +data_ptr_t JSONScanLocalState::GetReconstructBuffer(JSONScanGlobalState &gstate) { + if (!reconstruct_buffer.IsSet()) { + reconstruct_buffer = gstate.allocator.Allocate(gstate.buffer_capacity); + } + return reconstruct_buffer.get(); +} + void JSONScanLocalState::SkipOverArrayStart() { // First read of this buffer, check if it's actually an array and skip over the bytes SkipWhitespace(buffer_ptr, buffer_offset, buffer_size); @@ -795,7 +818,7 @@ void JSONScanLocalState::SkipOverArrayStart() { } } -bool JSONScanLocalState::ReconstructFirstObject() { +bool JSONScanLocalState::ReconstructFirstObject(JSONScanGlobalState &gstate) { D_ASSERT(current_buffer_handle->buffer_index != 0); D_ASSERT(current_reader->GetFormat() == JSONFormat::NEWLINE_DELIMITED); @@ -811,7 +834,7 @@ bool JSONScanLocalState::ReconstructFirstObject() { auto part1_size = prev_buffer_ptr - part1_ptr; // Now copy the data to our reconstruct buffer - const auto reconstruct_ptr = reconstruct_buffer.get(); + const auto reconstruct_ptr = GetReconstructBuffer(gstate); memcpy(reconstruct_ptr, part1_ptr, part1_size); // We copied the object, so we are no longer reading the previous buffer @@ -851,10 +874,11 @@ bool JSONScanLocalState::ReconstructFirstObject() { return true; } -void JSONScanLocalState::ParseNextChunk() { +void JSONScanLocalState::ParseNextChunk(JSONScanGlobalState &gstate) { auto buffer_offset_before = buffer_offset; const auto format = current_reader->GetFormat(); + D_ASSERT(format != JSONFormat::AUTO_DETECT); for (; scan_count < STANDARD_VECTOR_SIZE; scan_count++) { SkipWhitespace(buffer_ptr, buffer_offset, buffer_size); auto json_start = buffer_ptr + buffer_offset; @@ -872,7 +896,7 @@ void JSONScanLocalState::ParseNextChunk() { if (remaining > bind_data.maximum_object_size) { ThrowObjectSizeError(remaining); } - memcpy(reconstruct_buffer.get(), json_start, remaining); + memcpy(GetReconstructBuffer(gstate), json_start, remaining); prev_buffer_remainder = remaining; } buffer_offset = buffer_size; diff --git a/scripts/generate_functions.py b/scripts/generate_functions.py index 91b0981852a3..572d7b703221 100644 --- a/scripts/generate_functions.py +++ b/scripts/generate_functions.py @@ -166,7 +166,7 @@ def sanitize_string(text): with open(function_list_file, 'r') as f: text = f.read() -static_function = 'static StaticFunctionDefinition internal_functions[] = {' +static_function = 'static const StaticFunctionDefinition internal_functions[] = {' pos = text.find(static_function) header = text[:pos] footer_lines = text[pos:].split('\n') diff --git a/src/catalog/CMakeLists.txt b/src/catalog/CMakeLists.txt index 0536fae3bddc..4a8a8e3e1a69 100644 --- a/src/catalog/CMakeLists.txt +++ b/src/catalog/CMakeLists.txt @@ -10,6 +10,7 @@ add_library_unity( duckdb_catalog OBJECT catalog_entry.cpp + catalog_entry_retriever.cpp catalog.cpp catalog_search_path.cpp catalog_set.cpp diff --git a/src/catalog/catalog.cpp b/src/catalog/catalog.cpp index 775af179588d..7d9aca2d69ee 100644 --- a/src/catalog/catalog.cpp +++ b/src/catalog/catalog.cpp @@ -301,17 +301,6 @@ struct CatalogLookup { string schema; }; -//! Return value of Catalog::LookupEntry -struct CatalogEntryLookup { - optional_ptr schema; - optional_ptr entry; - ErrorData error; - - DUCKDB_API bool Found() const { - return entry; - } -}; - //===--------------------------------------------------------------------===// // Generic //===--------------------------------------------------------------------===// diff --git a/src/catalog/catalog_entry/duck_schema_entry.cpp b/src/catalog/catalog_entry/duck_schema_entry.cpp index 105270d49b41..7780d2e14a2c 100644 --- a/src/catalog/catalog_entry/duck_schema_entry.cpp +++ b/src/catalog/catalog_entry/duck_schema_entry.cpp @@ -36,13 +36,11 @@ namespace duckdb { -void FindForeignKeyInformation(CatalogEntry &entry, AlterForeignKeyType alter_fk_type, +void FindForeignKeyInformation(TableCatalogEntry &table, AlterForeignKeyType alter_fk_type, vector> &fk_arrays) { - if (entry.type != CatalogType::TABLE_ENTRY) { - return; - } - auto &table_entry = entry.Cast(); - auto &constraints = table_entry.GetConstraints(); + auto &constraints = table.GetConstraints(); + auto &catalog = table.ParentCatalog(); + auto &name = table.name; for (idx_t i = 0; i < constraints.size(); i++) { auto &cond = constraints[i]; if (cond->type != ConstraintType::FOREIGN_KEY) { @@ -50,9 +48,9 @@ void FindForeignKeyInformation(CatalogEntry &entry, AlterForeignKeyType alter_fk } auto &fk = cond->Cast(); if (fk.info.type == ForeignKeyType::FK_TYPE_FOREIGN_KEY_TABLE) { - AlterEntryData alter_data(entry.ParentCatalog().GetName(), fk.info.schema, fk.info.table, + AlterEntryData alter_data(catalog.GetName(), fk.info.schema, fk.info.table, OnEntryNotFound::THROW_EXCEPTION); - fk_arrays.push_back(make_uniq(std::move(alter_data), entry.name, fk.pk_columns, + fk_arrays.push_back(make_uniq(std::move(alter_data), name, fk.pk_columns, fk.fk_columns, fk.info.pk_keys, fk.info.fk_keys, alter_fk_type)); } else if (fk.info.type == ForeignKeyType::FK_TYPE_PRIMARY_KEY_TABLE && @@ -94,6 +92,9 @@ optional_ptr DuckSchemaEntry::AddEntryInternal(CatalogTransaction // CREATE OR REPLACE: first try to drop the entry auto old_entry = set.GetEntry(transaction, entry_name); if (old_entry) { + if (dependencies.Contains(*old_entry)) { + throw CatalogException("CREATE OR REPLACE is not allowed to depend on itself"); + } if (old_entry->type != entry_type) { throw CatalogException("Existing object %s is of type %s, trying to replace with type %s", entry_name, CatalogTypeToString(old_entry->type), CatalogTypeToString(entry_type)); @@ -101,6 +102,7 @@ optional_ptr DuckSchemaEntry::AddEntryInternal(CatalogTransaction (void)set.DropEntry(transaction, entry_name, false, entry->internal); } } + // now try to add the entry if (!set.CreateEntry(transaction, entry_name, std::move(entry), dependencies)) { // entry already exists! @@ -118,14 +120,9 @@ optional_ptr DuckSchemaEntry::CreateTable(CatalogTransaction trans auto &storage = table->GetStorage(); storage.info->cardinality = storage.GetTotalRows(); - auto entry = AddEntryInternal(transaction, std::move(table), info.Base().on_conflict, info.dependencies); - if (!entry) { - return nullptr; - } - // add a foreign key constraint in main key table if there is a foreign key constraint vector> fk_arrays; - FindForeignKeyInformation(*entry, AlterForeignKeyType::AFT_ADD, fk_arrays); + FindForeignKeyInformation(*table, AlterForeignKeyType::AFT_ADD, fk_arrays); for (idx_t i = 0; i < fk_arrays.size(); i++) { // alter primary key table auto &fk_info = *fk_arrays[i]; @@ -135,6 +132,12 @@ optional_ptr DuckSchemaEntry::CreateTable(CatalogTransaction trans auto &set = GetCatalogSet(CatalogType::TABLE_ENTRY); info.dependencies.AddDependency(*set.GetEntry(transaction, fk_info.name)); } + + auto entry = AddEntryInternal(transaction, std::move(table), info.Base().on_conflict, info.dependencies); + if (!entry) { + return nullptr; + } + return entry; } @@ -184,7 +187,7 @@ optional_ptr DuckSchemaEntry::CreateFunction(CatalogTransaction tr optional_ptr DuckSchemaEntry::AddEntry(CatalogTransaction transaction, unique_ptr entry, OnCreateConflict on_conflict) { - LogicalDependencyList dependencies; + LogicalDependencyList dependencies = entry->dependencies; return AddEntryInternal(transaction, std::move(entry), on_conflict, dependencies); } @@ -205,8 +208,7 @@ optional_ptr DuckSchemaEntry::CreateView(CatalogTransaction transa optional_ptr DuckSchemaEntry::CreateIndex(ClientContext &context, CreateIndexInfo &info, TableCatalogEntry &table) { - LogicalDependencyList dependencies; - dependencies.AddDependency(table); + info.dependencies.AddDependency(table); // currently, we can not alter PK/FK/UNIQUE constraints // concurrency-safe name checks against other INDEX catalog entries happens in the catalog @@ -215,6 +217,8 @@ optional_ptr DuckSchemaEntry::CreateIndex(ClientContext &context, } auto index = make_uniq(catalog, *this, info); + + LogicalDependencyList dependencies = index->dependencies; return AddEntryInternal(GetCatalogTransaction(context), std::move(index), info.on_conflict, dependencies); } @@ -273,6 +277,16 @@ void DuckSchemaEntry::Scan(CatalogType type, const std::function &callback) { + static const CatalogType ALL_SETS[] = { + CatalogType::TABLE_ENTRY, CatalogType::INDEX_ENTRY, CatalogType::TABLE_MACRO_ENTRY, + CatalogType::COPY_FUNCTION_ENTRY, CatalogType::PRAGMA_FUNCTION_ENTRY, CatalogType::MACRO_ENTRY, + CatalogType::SEQUENCE_ENTRY, CatalogType::COLLATION_ENTRY, CatalogType::TYPE_ENTRY}; + for (auto &set : ALL_SETS) { + GetCatalogSet(set).Scan(transaction, callback); + } +} + void DuckSchemaEntry::DropEntry(ClientContext &context, DropInfo &info) { auto &set = GetCatalogSet(info.type); @@ -289,7 +303,10 @@ void DuckSchemaEntry::DropEntry(ClientContext &context, DropInfo &info) { // if there is a foreign key constraint, get that information vector> fk_arrays; - FindForeignKeyInformation(*existing_entry, AlterForeignKeyType::AFT_DELETE, fk_arrays); + if (existing_entry->type == CatalogType::TABLE_ENTRY) { + FindForeignKeyInformation(existing_entry->Cast(), AlterForeignKeyType::AFT_DELETE, + fk_arrays); + } if (!set.DropEntry(transaction, info.name, info.cascade, info.allow_drop_internal)) { throw InternalException("Could not drop element because of an internal error"); diff --git a/src/catalog/catalog_entry/duck_table_entry.cpp b/src/catalog/catalog_entry/duck_table_entry.cpp index 994b98b26a82..d9e710d7b69c 100644 --- a/src/catalog/catalog_entry/duck_table_entry.cpp +++ b/src/catalog/catalog_entry/duck_table_entry.cpp @@ -334,14 +334,14 @@ unique_ptr DuckTableEntry::AddColumn(ClientContext &context, AddCo for (auto &constraint : constraints) { create_info->constraints.push_back(constraint->Copy()); } - Binder::BindLogicalType(context, info.new_column.TypeMutable(), &catalog, schema.name); + auto binder = Binder::CreateBinder(context); + binder->BindLogicalType(info.new_column.TypeMutable(), &catalog, schema.name); info.new_column.SetOid(columns.LogicalColumnCount()); info.new_column.SetStorageOid(columns.PhysicalColumnCount()); auto col = info.new_column.Copy(); create_info->columns.AddColumn(std::move(col)); - auto binder = Binder::CreateBinder(context); auto bound_create_info = binder->BindCreateTableInfo(std::move(create_info), schema); auto new_storage = make_shared(context, *storage, info.new_column, *bound_create_info->bound_defaults.back()); @@ -577,7 +577,8 @@ unique_ptr DuckTableEntry::DropNotNull(ClientContext &context, Dro } unique_ptr DuckTableEntry::ChangeColumnType(ClientContext &context, ChangeColumnTypeInfo &info) { - Binder::BindLogicalType(context, info.target_type, &catalog, schema.name); + auto binder = Binder::CreateBinder(context); + binder->BindLogicalType(info.target_type, &catalog, schema.name); auto change_idx = GetColumnIndex(info.column_name); auto create_info = make_uniq(schema, name); create_info->temporary = temporary; @@ -643,7 +644,6 @@ unique_ptr DuckTableEntry::ChangeColumnType(ClientContext &context create_info->constraints.push_back(std::move(constraint)); } - auto binder = Binder::CreateBinder(context); // bind the specified expression vector bound_columns; AlterBinder expr_binder(*binder, context, *this, bound_columns, info.target_type); @@ -763,7 +763,7 @@ void DuckTableEntry::SetAsRoot() { void DuckTableEntry::CommitAlter(string &column_name) { D_ASSERT(!column_name.empty()); - idx_t removed_index = DConstants::INVALID_INDEX; + optional_idx removed_index; for (auto &col : columns.Logical()) { if (col.Name() == column_name) { // No need to alter storage, removed column is generated column @@ -774,8 +774,7 @@ void DuckTableEntry::CommitAlter(string &column_name) { break; } } - D_ASSERT(removed_index != DConstants::INVALID_INDEX); - storage->CommitDropColumn(columns.LogicalToPhysical(LogicalIndex(removed_index)).index); + storage->CommitDropColumn(columns.LogicalToPhysical(LogicalIndex(removed_index.GetIndex())).index); } void DuckTableEntry::CommitDrop() { diff --git a/src/catalog/catalog_entry/index_catalog_entry.cpp b/src/catalog/catalog_entry/index_catalog_entry.cpp index b6da579bc123..9f762932f63f 100644 --- a/src/catalog/catalog_entry/index_catalog_entry.cpp +++ b/src/catalog/catalog_entry/index_catalog_entry.cpp @@ -7,6 +7,7 @@ IndexCatalogEntry::IndexCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schem index_type(info.index_type), index_constraint_type(info.constraint_type), column_ids(info.column_ids) { this->temporary = info.temporary; + this->dependencies = info.dependencies; this->comment = info.comment; } @@ -21,6 +22,7 @@ unique_ptr IndexCatalogEntry::GetInfo() const { result->index_type = index_type; result->constraint_type = index_constraint_type; result->column_ids = column_ids; + result->dependencies = dependencies; for (auto &expr : expressions) { result->expressions.push_back(expr->Copy()); diff --git a/src/catalog/catalog_entry/macro_catalog_entry.cpp b/src/catalog/catalog_entry/macro_catalog_entry.cpp index 3ac57460b62d..a1657bc2ab3e 100644 --- a/src/catalog/catalog_entry/macro_catalog_entry.cpp +++ b/src/catalog/catalog_entry/macro_catalog_entry.cpp @@ -42,6 +42,7 @@ unique_ptr MacroCatalogEntry::GetInfo() const { info->schema = schema.name; info->name = name; info->function = function->Copy(); + info->dependencies = dependencies; info->comment = comment; return std::move(info); } diff --git a/src/catalog/catalog_entry/sequence_catalog_entry.cpp b/src/catalog/catalog_entry/sequence_catalog_entry.cpp index 936fae03854f..a104799d68e0 100644 --- a/src/catalog/catalog_entry/sequence_catalog_entry.cpp +++ b/src/catalog/catalog_entry/sequence_catalog_entry.cpp @@ -98,6 +98,7 @@ unique_ptr SequenceCatalogEntry::GetInfo() const { result->max_value = seq_data.max_value; result->start_value = seq_data.counter; result->cycle = seq_data.cycle; + result->dependencies = dependencies; result->comment = comment; return std::move(result); } diff --git a/src/catalog/catalog_entry/table_catalog_entry.cpp b/src/catalog/catalog_entry/table_catalog_entry.cpp index f89d214f5465..ac69bd09e8af 100644 --- a/src/catalog/catalog_entry/table_catalog_entry.cpp +++ b/src/catalog/catalog_entry/table_catalog_entry.cpp @@ -21,6 +21,7 @@ TableCatalogEntry::TableCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schem : StandardEntry(CatalogType::TABLE_ENTRY, schema, catalog, info.table), columns(std::move(info.columns)), constraints(std::move(info.constraints)) { this->temporary = info.temporary; + this->dependencies = info.dependencies; this->comment = info.comment; } @@ -62,6 +63,7 @@ unique_ptr TableCatalogEntry::GetInfo() const { result->table = name; result->columns = columns.Copy(); result->constraints.reserve(constraints.size()); + result->dependencies = dependencies; std::for_each(constraints.begin(), constraints.end(), [&result](const unique_ptr &c) { result->constraints.emplace_back(c->Copy()); }); result->comment = comment; diff --git a/src/catalog/catalog_entry/type_catalog_entry.cpp b/src/catalog/catalog_entry/type_catalog_entry.cpp index 4e04ee26b1bd..1e5b31a523c5 100644 --- a/src/catalog/catalog_entry/type_catalog_entry.cpp +++ b/src/catalog/catalog_entry/type_catalog_entry.cpp @@ -13,6 +13,7 @@ TypeCatalogEntry::TypeCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, : StandardEntry(CatalogType::TYPE_ENTRY, schema, catalog, info.name), user_type(info.type) { this->temporary = info.temporary; this->internal = info.internal; + this->dependencies = info.dependencies; this->comment = info.comment; } @@ -29,6 +30,7 @@ unique_ptr TypeCatalogEntry::GetInfo() const { result->schema = schema.name; result->name = name; result->type = user_type; + result->dependencies = dependencies; result->comment = comment; return std::move(result); } diff --git a/src/catalog/catalog_entry/view_catalog_entry.cpp b/src/catalog/catalog_entry/view_catalog_entry.cpp index 8d3a0debb8ad..7cddf913b868 100644 --- a/src/catalog/catalog_entry/view_catalog_entry.cpp +++ b/src/catalog/catalog_entry/view_catalog_entry.cpp @@ -20,6 +20,7 @@ void ViewCatalogEntry::Initialize(CreateViewInfo &info) { this->temporary = info.temporary; this->sql = info.sql; this->internal = info.internal; + this->dependencies = info.dependencies; this->comment = info.comment; this->column_comments = info.column_comments; } @@ -38,6 +39,7 @@ unique_ptr ViewCatalogEntry::GetInfo() const { result->aliases = aliases; result->names = names; result->types = types; + result->dependencies = dependencies; result->temporary = temporary; result->comment = comment; result->column_comments = column_comments; diff --git a/src/catalog/catalog_entry_retriever.cpp b/src/catalog/catalog_entry_retriever.cpp new file mode 100644 index 000000000000..db0d0d423984 --- /dev/null +++ b/src/catalog/catalog_entry_retriever.cpp @@ -0,0 +1,64 @@ +#include "duckdb/catalog/catalog_entry_retriever.hpp" +#include "duckdb/catalog/catalog.hpp" +#include "duckdb/catalog/catalog_entry.hpp" +#include "duckdb/parser/query_error_context.hpp" +#include "duckdb/main/client_context.hpp" +#include "duckdb/common/enums/on_entry_not_found.hpp" +#include "duckdb/common/enums/catalog_type.hpp" +#include "duckdb/common/optional_ptr.hpp" +#include "duckdb/catalog/catalog_entry/type_catalog_entry.hpp" + +namespace duckdb { + +LogicalType CatalogEntryRetriever::GetType(Catalog &catalog, const string &schema, const string &name, + OnEntryNotFound on_entry_not_found) { + QueryErrorContext error_context; + auto result = GetEntry(CatalogType::TYPE_ENTRY, catalog, schema, name, on_entry_not_found, error_context); + if (!result) { + return LogicalType::INVALID; + } + auto &type_entry = result->Cast(); + return type_entry.user_type; +} + +LogicalType CatalogEntryRetriever::GetType(const string &catalog, const string &schema, const string &name, + OnEntryNotFound on_entry_not_found) { + QueryErrorContext error_context; + auto result = GetEntry(CatalogType::TYPE_ENTRY, catalog, schema, name, on_entry_not_found, error_context); + if (!result) { + return LogicalType::INVALID; + } + auto &type_entry = result->Cast(); + return type_entry.user_type; +} + +optional_ptr CatalogEntryRetriever::GetEntry(CatalogType type, const string &catalog, + const string &schema, const string &name, + OnEntryNotFound on_entry_not_found, + QueryErrorContext error_context) { + return GetEntryInternal( + [&]() { return Catalog::GetEntry(context, type, catalog, schema, name, on_entry_not_found, error_context); }); +} + +optional_ptr CatalogEntryRetriever::GetSchema(const string &catalog, const string &name, + OnEntryNotFound on_entry_not_found, + QueryErrorContext error_context) { + auto result = Catalog::GetSchema(context, catalog, name, on_entry_not_found, error_context); + if (!result) { + return result; + } + if (callback) { + // Call the callback if it's set + callback(*result); + } + return result; +} + +optional_ptr CatalogEntryRetriever::GetEntry(CatalogType type, Catalog &catalog, const string &schema, + const string &name, OnEntryNotFound on_entry_not_found, + QueryErrorContext error_context) { + return GetEntryInternal( + [&]() { return catalog.GetEntry(context, type, schema, name, on_entry_not_found, error_context); }); +} + +} // namespace duckdb diff --git a/src/catalog/catalog_set.cpp b/src/catalog/catalog_set.cpp index d167fd90d31e..1a9f533105a6 100644 --- a/src/catalog/catalog_set.cpp +++ b/src/catalog/catalog_set.cpp @@ -565,6 +565,20 @@ optional_ptr CatalogSet::GetEntry(ClientContext &context, const st return GetEntry(catalog.GetCatalogTransaction(context), name); } +optional_ptr CatalogSet::GetEntry(const string &name) { + unique_lock read_lock(catalog_lock); + auto entry_value = map.GetEntry(name); + if (!entry_value) { + return nullptr; + } + auto &entry = *entry_value; + auto &committed_entry = GetCommittedEntry(entry); + if (committed_entry.deleted) { + return nullptr; + } + return &committed_entry; +} + void CatalogSet::UpdateTimestamp(CatalogEntry &entry, transaction_t timestamp) { entry.timestamp = timestamp; } diff --git a/src/catalog/default/default_functions.cpp b/src/catalog/default/default_functions.cpp index 9e5adcae23d6..ee8ba09bb32c 100644 --- a/src/catalog/default/default_functions.cpp +++ b/src/catalog/default/default_functions.cpp @@ -9,7 +9,7 @@ namespace duckdb { -static DefaultMacro internal_macros[] = { +static const DefaultMacro internal_macros[] = { {DEFAULT_SCHEMA, "current_role", {nullptr}, "'duckdb'"}, // user name of current execution context {DEFAULT_SCHEMA, "current_user", {nullptr}, "'duckdb'"}, // user name of current execution context {DEFAULT_SCHEMA, "current_catalog", {nullptr}, "current_database()"}, // name of current database (called "catalog" in the SQL standard) @@ -171,7 +171,7 @@ static DefaultMacro internal_macros[] = { {nullptr, nullptr, {nullptr}, nullptr} }; -unique_ptr DefaultFunctionGenerator::CreateInternalTableMacroInfo(DefaultMacro &default_macro, unique_ptr function) { +unique_ptr DefaultFunctionGenerator::CreateInternalTableMacroInfo(const DefaultMacro &default_macro, unique_ptr function) { for (idx_t param_idx = 0; default_macro.parameters[param_idx] != nullptr; param_idx++) { function->parameters.push_back( make_uniq(default_macro.parameters[param_idx])); @@ -188,7 +188,7 @@ unique_ptr DefaultFunctionGenerator::CreateInternalTableMacroIn } -unique_ptr DefaultFunctionGenerator::CreateInternalMacroInfo(DefaultMacro &default_macro) { +unique_ptr DefaultFunctionGenerator::CreateInternalMacroInfo(const DefaultMacro &default_macro) { // parse the expression auto expressions = Parser::ParseExpressionList(default_macro.macro); D_ASSERT(expressions.size() == 1); @@ -197,7 +197,7 @@ unique_ptr DefaultFunctionGenerator::CreateInternalMacroInfo(De return CreateInternalTableMacroInfo(default_macro, std::move(result)); } -unique_ptr DefaultFunctionGenerator::CreateInternalTableMacroInfo(DefaultMacro &default_macro) { +unique_ptr DefaultFunctionGenerator::CreateInternalTableMacroInfo(const DefaultMacro &default_macro) { Parser parser; parser.ParseQuery(default_macro.macro); D_ASSERT(parser.statements.size() == 1); diff --git a/src/catalog/default/default_schemas.cpp b/src/catalog/default/default_schemas.cpp index bda6f6abc9f5..72a95da7b5ec 100644 --- a/src/catalog/default/default_schemas.cpp +++ b/src/catalog/default/default_schemas.cpp @@ -9,7 +9,7 @@ struct DefaultSchema { const char *name; }; -static DefaultSchema internal_schemas[] = {{"information_schema"}, {"pg_catalog"}, {nullptr}}; +static const DefaultSchema internal_schemas[] = {{"information_schema"}, {"pg_catalog"}, {nullptr}}; static bool GetDefaultSchema(const string &input_schema) { auto schema = StringUtil::Lower(input_schema); diff --git a/src/catalog/default/default_views.cpp b/src/catalog/default/default_views.cpp index 1e48e1b48edd..967d1d576ebc 100644 --- a/src/catalog/default/default_views.cpp +++ b/src/catalog/default/default_views.cpp @@ -12,7 +12,7 @@ struct DefaultView { const char *sql; }; -static DefaultView internal_views[] = { +static const DefaultView internal_views[] = { {DEFAULT_SCHEMA, "pragma_database_list", "SELECT database_oid AS seq, database_name AS name, path AS file FROM duckdb_databases() WHERE NOT internal ORDER BY 1"}, {DEFAULT_SCHEMA, "sqlite_master", "select 'table' \"type\", table_name \"name\", table_name \"tbl_name\", 0 rootpage, sql from duckdb_tables union all select 'view' \"type\", view_name \"name\", view_name \"tbl_name\", 0 rootpage, sql from duckdb_views union all select 'index' \"type\", index_name \"name\", table_name \"tbl_name\", 0 rootpage, sql from duckdb_indexes;"}, {DEFAULT_SCHEMA, "sqlite_schema", "SELECT * FROM sqlite_master"}, diff --git a/src/catalog/dependency_list.cpp b/src/catalog/dependency_list.cpp index 1ef3be15fbc4..d0cb3bed4386 100644 --- a/src/catalog/dependency_list.cpp +++ b/src/catalog/dependency_list.cpp @@ -61,6 +61,22 @@ LogicalDependency::LogicalDependency(CatalogEntry &entry) { } } +void LogicalDependency::Serialize(Serializer &serializer) const { + serializer.WriteProperty(0, "name", entry.name); + serializer.WriteProperty(1, "schema", entry.schema); + serializer.WriteProperty(2, "catalog", catalog); + serializer.WriteProperty(3, "type", entry.type); +} + +LogicalDependency LogicalDependency::Deserialize(Deserializer &deserializer) { + LogicalDependency dependency; + dependency.entry.name = deserializer.ReadProperty(0, "name"); + dependency.entry.schema = deserializer.ReadProperty(1, "schema"); + dependency.catalog = deserializer.ReadProperty(2, "catalog"); + dependency.entry.type = deserializer.ReadProperty(3, "type"); + return dependency; +} + bool LogicalDependency::operator==(const LogicalDependency &other) const { return other.entry.name == entry.name && other.entry.schema == entry.schema && other.entry.type == entry.type; } @@ -90,10 +106,20 @@ void LogicalDependencyList::VerifyDependencies(Catalog &catalog, const string &n } } +void LogicalDependencyList::Serialize(Serializer &serializer) const { + serializer.WriteProperty(0, "logical_dependencies", set); +} + const LogicalDependencyList::create_info_set_t &LogicalDependencyList::Set() const { return set; } +LogicalDependencyList LogicalDependencyList::Deserialize(Deserializer &deserializer) { + LogicalDependencyList dependency; + dependency.set = deserializer.ReadProperty(0, "logical_dependencies"); + return dependency; +} + bool LogicalDependencyList::operator==(const LogicalDependencyList &other) const { if (set.size() != other.set.size()) { return false; diff --git a/src/catalog/dependency_manager.cpp b/src/catalog/dependency_manager.cpp index 86b1fb251b1e..e293ef7e5210 100644 --- a/src/catalog/dependency_manager.cpp +++ b/src/catalog/dependency_manager.cpp @@ -255,7 +255,7 @@ void DependencyManager::CreateDependency(CatalogTransaction transaction, Depende } void DependencyManager::CreateDependencies(CatalogTransaction transaction, const CatalogEntry &object, - const LogicalDependencyList &dependencies) { + const LogicalDependencyList &unfiltered_dependencies) { DependencyDependentFlags dependency_flags; if (object.type != CatalogType::INDEX_ENTRY) { // indexes do not require CASCADE to be dropped, they are simply always dropped along with the table @@ -263,14 +263,16 @@ void DependencyManager::CreateDependencies(CatalogTransaction transaction, const } const auto object_info = GetLookupProperties(object); + LogicalDependencyList dependencies; // check for each object in the sources if they were not deleted yet - for (auto &dependency : dependencies.Set()) { + for (auto &dependency : unfiltered_dependencies.Set()) { if (dependency.catalog != object.ParentCatalog().GetName()) { - throw DependencyException( - "Error adding dependency for object \"%s\" - dependency \"%s\" is in catalog " - "\"%s\", which does not match the catalog \"%s\".\nCross catalog dependencies are not supported.", - object.name, dependency.entry.name, dependency.catalog, object.ParentCatalog().GetName()); + continue; + } + if (object_info == dependency.entry) { + continue; } + dependencies.AddDependency(dependency); } // add the object to the dependents_map of each object that it depends on @@ -408,9 +410,16 @@ void DependencyManager::AlterObject(CatalogTransaction transaction, CatalogEntry // It makes no sense to have a schema depend on anything D_ASSERT(dep.EntryInfo().type != CatalogType::SCHEMA_ENTRY); - throw DependencyException("Cannot alter entry \"%s\" because there are entries that " - "depend on it.", - old_obj.name); + if (dep.EntryInfo().type == CatalogType::INDEX_ENTRY) { + // FIXME: this is only done because the table name is baked into the SQL of the Index Entry + // If we update that then there is no reason this has to throw an exception. + + // conflict: attempting to alter this object but the dependent object still exists + // no cascade and there are objects that depend on this object: throw error + throw DependencyException("Cannot alter entry \"%s\" because there are entries that " + "depend on it.", + old_obj.name); + } auto dep_info = DependencyInfo::FromDependent(dep); dep_info.subject.entry = new_info; @@ -444,6 +453,101 @@ void DependencyManager::AlterObject(CatalogTransaction transaction, CatalogEntry } } +bool AllExportDependenciesWritten(const catalog_entry_vector_t &dependencies, catalog_entry_set_t &exported) { + for (auto &entry : dependencies) { + auto &dep = entry.get().Cast(); + // This is an entry that needs to be written before 'object' can be written + bool contains = false; + for (auto &to_check : exported) { + LogicalDependency a(entry); + LogicalDependency b(to_check); + + if (a == b) { + contains = true; + break; + } + auto &flags = dep.Subject().flags; + if (flags.IsOwnership()) { + // 'object' is owned by this entry + // it needs to be written first + contains = true; + break; + } + continue; + } + if (!contains) { + return false; + } + // We do not need to check recursively, if the object is written + // that means that the objects it depends on have also been written + } + return true; +} + +void AddDependentsToBacklog(stack> &backlog, const catalog_entry_vector_t &dependents) { + catalog_entry_vector_t tables; + for (auto &dependent : dependents) { + backlog.push(dependent); + } +} + +catalog_entry_vector_t DependencyManager::GetExportOrder(CatalogTransaction &transaction) { + auto all_entries = catalog.GetNonSystemEntries(transaction); + CatalogEntryOrdering ordering; + auto &entries = ordering.ordered_set; + auto &export_order = ordering.ordered_vector; + + stack> backlog; + for (auto &obj : all_entries) { + if (obj.get().type == CatalogType::SCHEMA_ENTRY) { + export_order.push_back(obj); + entries.insert(obj); + continue; + } + backlog.push(obj); + } + + while (!backlog.empty()) { + // As long as we still have unordered entries + auto &object = backlog.top(); + backlog.pop(); + const auto info = GetLookupProperties(object); + auto it = std::find_if(entries.begin(), entries.end(), [&](CatalogEntry &to_check) { + const auto other_info = GetLookupProperties(to_check); + return info == other_info; + }); + if (it != entries.end()) { + // This entry has already been written + continue; + } + + catalog_entry_vector_t dependencies; + DependencyCatalogSet subjects_map(Subjects(), info); + subjects_map.Scan(transaction, [&dependencies](CatalogEntry &entry) { dependencies.push_back(entry); }); + + bool is_ordered = AllExportDependenciesWritten(dependencies, entries); + if (!is_ordered) { + for (auto &dependency : dependencies) { + backlog.emplace(dependency); + } + continue; + } + + // All dependencies written, we can write this now + auto insert_result = entries.insert(object); + (void)insert_result; + D_ASSERT(insert_result.second); + auto entry = LookupEntry(transaction, object); + export_order.push_back(*entry); + catalog_entry_vector_t dependents; + DependencyCatalogSet dependents_map(Dependents(), info); + dependents_map.Scan(transaction, [&dependents](CatalogEntry &entry) { dependents.push_back(entry); }); + AddDependentsToBacklog(backlog, dependents); + } + + return std::move(ordering.ordered_vector); +} + void DependencyManager::Scan( ClientContext &context, const std::function &callback) { diff --git a/src/catalog/duck_catalog.cpp b/src/catalog/duck_catalog.cpp index ebf1bb53b72c..fc7b5ba062fc 100644 --- a/src/catalog/duck_catalog.cpp +++ b/src/catalog/duck_catalog.cpp @@ -106,9 +106,32 @@ void DuckCatalog::DropSchema(ClientContext &context, DropInfo &info) { DropSchema(GetCatalogTransaction(context), info); } +catalog_entry_vector_t DuckCatalog::GetNonSystemEntries(CatalogTransaction transaction) { + catalog_entry_vector_t all_entries; + catalog_entry_vector_t schema_entries; + ScanSchemas(transaction, [&schema_entries, &all_entries](SchemaCatalogEntry &schema) { + all_entries.push_back(schema); + schema_entries.push_back(schema); + }); + + for (auto &schema_p : schema_entries) { + auto &schema = schema_p.get().Cast(); + schema.ScanAll(transaction, [&all_entries](CatalogEntry &entry) { + if (entry.internal || entry.temporary) { + return; + } + all_entries.push_back(entry); + }); + } + return all_entries; +} + +void DuckCatalog::ScanSchemas(CatalogTransaction transaction, std::function callback) { + schemas->Scan(transaction, [&](CatalogEntry &entry) { callback(entry.Cast()); }); +} + void DuckCatalog::ScanSchemas(ClientContext &context, std::function callback) { - schemas->Scan(GetCatalogTransaction(context), - [&](CatalogEntry &entry) { callback(entry.Cast()); }); + ScanSchemas(GetCatalogTransaction(context), callback); } void DuckCatalog::ScanSchemas(std::function callback) { diff --git a/src/common/adbc/adbc.cpp b/src/common/adbc/adbc.cpp index 47d2ff88e9e0..d1489139465c 100644 --- a/src/common/adbc/adbc.cpp +++ b/src/common/adbc/adbc.cpp @@ -67,6 +67,8 @@ struct DuckDBAdbcStatementWrapper { char *ingestion_table_name; ArrowArrayStream ingestion_stream; IngestionMode ingestion_mode = IngestionMode::CREATE; + uint8_t *substrait_plan; + uint64_t plan_length; }; static AdbcStatusCode QueryInternal(struct AdbcConnection *connection, struct ArrowArrayStream *out, const char *query, @@ -168,11 +170,10 @@ AdbcStatusCode StatementSetSubstraitPlan(struct AdbcStatement *statement, const return ADBC_STATUS_INVALID_ARGUMENT; } auto wrapper = reinterpret_cast(statement->private_data); - auto plan_str = std::string(reinterpret_cast(plan), length); - auto query = "CALL from_substrait('" + plan_str + "'::BLOB)"; - auto res = duckdb_prepare(wrapper->connection, query.c_str(), &wrapper->statement); - auto error_msg = duckdb_prepare_error(wrapper->statement); - return CheckResult(res, error, error_msg); + wrapper->substrait_plan = (uint8_t *)malloc(sizeof(uint8_t) * (length)); + wrapper->plan_length = length; + memcpy(wrapper->substrait_plan, plan, length); + return ADBC_STATUS_OK; } AdbcStatusCode DatabaseSetOption(struct AdbcDatabase *database, const char *key, const char *value, @@ -643,6 +644,8 @@ AdbcStatusCode StatementNew(struct AdbcConnection *connection, struct AdbcStatem statement_wrapper->result = nullptr; statement_wrapper->ingestion_stream.release = nullptr; statement_wrapper->ingestion_table_name = nullptr; + statement_wrapper->substrait_plan = nullptr; + statement_wrapper->ingestion_mode = IngestionMode::CREATE; return ADBC_STATUS_OK; } @@ -668,6 +671,10 @@ AdbcStatusCode StatementRelease(struct AdbcStatement *statement, struct AdbcErro free(wrapper->ingestion_table_name); wrapper->ingestion_table_name = nullptr; } + if (wrapper->substrait_plan) { + free(wrapper->substrait_plan); + wrapper->substrait_plan = nullptr; + } free(statement->private_data); statement->private_data = nullptr; return ADBC_STATUS_OK; @@ -758,8 +765,24 @@ AdbcStatusCode StatementExecuteQuery(struct AdbcStatement *statement, struct Arr if (has_stream && to_table) { return IngestToTableFromBoundStream(wrapper, error); } - - if (has_stream) { + if (wrapper->substrait_plan != nullptr) { + auto plan_str = std::string(reinterpret_cast(wrapper->substrait_plan), wrapper->plan_length); + duckdb::vector params; + params.emplace_back(duckdb::Value::BLOB_RAW(plan_str)); + duckdb::unique_ptr query_result; + try { + query_result = + ((duckdb::Connection *)wrapper->connection)->TableFunction("from_substrait", params)->Execute(); + } catch (duckdb::Exception &e) { + std::string error_msg = "It was not possible to execute substrait query. " + std::string(e.what()); + SetError(error, error_msg); + return ADBC_STATUS_INVALID_ARGUMENT; + } + auto arrow_wrapper = new duckdb::ArrowResultWrapper(); + arrow_wrapper->result = + duckdb::unique_ptr_cast(std::move(query_result)); + wrapper->result = reinterpret_cast(arrow_wrapper); + } else if (has_stream) { // A stream was bound to the statement, use that to bind parameters duckdb::unique_ptr result; ArrowArrayStream stream = wrapper->ingestion_stream; diff --git a/src/common/enums/optimizer_type.cpp b/src/common/enums/optimizer_type.cpp index 828d7eac8db6..d3f13108e417 100644 --- a/src/common/enums/optimizer_type.cpp +++ b/src/common/enums/optimizer_type.cpp @@ -10,7 +10,7 @@ struct DefaultOptimizerType { OptimizerType type; }; -static DefaultOptimizerType internal_optimizer_types[] = { +static const DefaultOptimizerType internal_optimizer_types[] = { {"expression_rewriter", OptimizerType::EXPRESSION_REWRITER}, {"filter_pullup", OptimizerType::FILTER_PULLUP}, {"filter_pushdown", OptimizerType::FILTER_PUSHDOWN}, diff --git a/src/common/file_system.cpp b/src/common/file_system.cpp index baec6b136872..d2288b42f609 100644 --- a/src/common/file_system.cpp +++ b/src/common/file_system.cpp @@ -117,7 +117,7 @@ void FileSystem::SetWorkingDirectory(const string &path) { } } -idx_t FileSystem::GetAvailableMemory() { +optional_idx FileSystem::GetAvailableMemory() { errno = 0; #ifdef __MVS__ @@ -128,7 +128,7 @@ idx_t FileSystem::GetAvailableMemory() { idx_t max_memory = MinValue((idx_t)sysconf(_SC_PHYS_PAGES) * (idx_t)sysconf(_SC_PAGESIZE), UINTPTR_MAX); #endif if (errno != 0) { - return DConstants::INVALID_INDEX; + return optional_idx(); } return max_memory; } @@ -218,7 +218,7 @@ void FileSystem::SetWorkingDirectory(const string &path) { } } -idx_t FileSystem::GetAvailableMemory() { +optional_idx FileSystem::GetAvailableMemory() { ULONGLONG available_memory_kb; if (GetPhysicallyInstalledSystemMemory(&available_memory_kb)) { return MinValue(available_memory_kb * 1000, UINTPTR_MAX); @@ -230,7 +230,7 @@ idx_t FileSystem::GetAvailableMemory() { if (GlobalMemoryStatusEx(&mem_state)) { return MinValue(mem_state.ullTotalPhys, UINTPTR_MAX); } - return DConstants::INVALID_INDEX; + return optional_idx(); } string FileSystem::GetWorkingDirectory() { @@ -497,6 +497,10 @@ bool FileSystem::CanSeek() { throw NotImplementedException("%s: CanSeek is not implemented!", GetName()); } +bool FileSystem::IsManuallySet() { + return false; +} + unique_ptr FileSystem::OpenCompressedFile(unique_ptr handle, bool write) { throw NotImplementedException("%s: OpenCompressedFile is not implemented!", GetName()); } diff --git a/src/common/local_file_system.cpp b/src/common/local_file_system.cpp index 908498f77fe9..b05189b4ea0f 100644 --- a/src/common/local_file_system.cpp +++ b/src/common/local_file_system.cpp @@ -212,7 +212,7 @@ static string AdditionalProcessInfo(FileSystem &fs, pid_t pid) { try { auto cmdline_file = fs.OpenFile(StringUtil::Format("/proc/%d/cmdline", pid), FileFlags::FILE_FLAGS_READ); auto cmdline = cmdline_file->ReadLine(); - process_name = basename(const_cast(cmdline.c_str())); + process_name = basename(const_cast(cmdline.c_str())); // NOLINT: old C API does not take const } catch (std::exception &) { // ignore } diff --git a/src/common/multi_file_reader.cpp b/src/common/multi_file_reader.cpp index bbb59705e29f..5ba38299ef32 100644 --- a/src/common/multi_file_reader.cpp +++ b/src/common/multi_file_reader.cpp @@ -172,7 +172,7 @@ MultiFileReaderBindData MultiFileReader::BindOptions(MultiFileReaderOptions &opt } for (auto &part : partitions) { - idx_t hive_partitioning_index = DConstants::INVALID_INDEX; + idx_t hive_partitioning_index; auto lookup = std::find(names.begin(), names.end(), part.first); if (lookup != names.end()) { // hive partitioning column also exists in file - override diff --git a/src/common/serializer/buffered_file_writer.cpp b/src/common/serializer/buffered_file_writer.cpp index 96be0bb04443..dcbe7d4f0a45 100644 --- a/src/common/serializer/buffered_file_writer.cpp +++ b/src/common/serializer/buffered_file_writer.cpp @@ -37,7 +37,7 @@ void BufferedFileWriter::WriteData(const_data_ptr_t buffer, idx_t write_size) { Flush(); // Flush buffer before writing every things else } idx_t remaining_to_write = write_size - to_copy; - fs.Write(*handle, const_cast(buffer + to_copy), remaining_to_write); + fs.Write(*handle, const_cast(buffer + to_copy), remaining_to_write); // NOLINT: wrong API in Write total_written += remaining_to_write; } else { // first copy anything we can from the buffer diff --git a/src/common/string_util.cpp b/src/common/string_util.cpp index e88ab38f8716..7898a343a927 100644 --- a/src/common/string_util.cpp +++ b/src/common/string_util.cpp @@ -229,7 +229,7 @@ bool StringUtil::CIEquals(const string &l1, const string &l2) { if (l1.size() != l2.size()) { return false; } - const auto charmap = LowerFun::ascii_to_lower_map; + const auto charmap = LowerFun::ASCII_TO_LOWER_MAP; for (idx_t c = 0; c < l1.size(); c++) { if (charmap[(uint8_t)l1[c]] != charmap[(uint8_t)l2[c]]) { return false; @@ -239,7 +239,7 @@ bool StringUtil::CIEquals(const string &l1, const string &l2) { } bool StringUtil::CILessThan(const string &s1, const string &s2) { - const auto charmap = UpperFun::ascii_to_upper_map; + const auto charmap = UpperFun::ASCII_TO_UPPER_MAP; unsigned char u1, u2; diff --git a/src/common/types/data_chunk.cpp b/src/common/types/data_chunk.cpp index 6e1b5ed89ef5..114beb1f49ef 100644 --- a/src/common/types/data_chunk.cpp +++ b/src/common/types/data_chunk.cpp @@ -218,7 +218,7 @@ void DataChunk::Flatten() { } } -vector DataChunk::GetTypes() { +vector DataChunk::GetTypes() const { vector types; for (idx_t i = 0; i < ColumnCount(); i++) { types.push_back(data[i].GetType()); @@ -290,7 +290,7 @@ void DataChunk::Slice(const SelectionVector &sel_vector, idx_t count_p) { } } -void DataChunk::Slice(DataChunk &other, const SelectionVector &sel, idx_t count_p, idx_t col_offset) { +void DataChunk::Slice(const DataChunk &other, const SelectionVector &sel, idx_t count_p, idx_t col_offset) { D_ASSERT(other.ColumnCount() <= col_offset + ColumnCount()); this->count = count_p; SelCache merge_cache; diff --git a/src/common/types/vector.cpp b/src/common/types/vector.cpp index a3039f772320..b177911278b0 100644 --- a/src/common/types/vector.cpp +++ b/src/common/types/vector.cpp @@ -78,11 +78,11 @@ Vector::Vector(Vector &other) : type(other.type) { Reference(other); } -Vector::Vector(Vector &other, const SelectionVector &sel, idx_t count) : type(other.type) { +Vector::Vector(const Vector &other, const SelectionVector &sel, idx_t count) : type(other.type) { Slice(other, sel, count); } -Vector::Vector(Vector &other, idx_t offset, idx_t end) : type(other.type) { +Vector::Vector(const Vector &other, idx_t offset, idx_t end) : type(other.type) { Slice(other, offset, end); } @@ -574,8 +574,8 @@ Value Vector::GetValueInternal(const Vector &v_p, idx_t index_p) { throw InternalException("FSST Vector with non-string datatype found!"); } auto str_compressed = reinterpret_cast(data)[index]; - Value result = FSSTPrimitives::DecompressValue(FSSTVector::GetDecoder(const_cast(*vector)), - str_compressed.GetData(), str_compressed.GetSize()); + Value result = FSSTPrimitives::DecompressValue(FSSTVector::GetDecoder(*vector), str_compressed.GetData(), + str_compressed.GetSize()); return result; } @@ -776,8 +776,8 @@ string Vector::ToString(idx_t count) const { case VectorType::FSST_VECTOR: { for (idx_t i = 0; i < count; i++) { string_t compressed_string = reinterpret_cast(data)[i]; - Value val = FSSTPrimitives::DecompressValue(FSSTVector::GetDecoder(const_cast(*this)), - compressed_string.GetData(), compressed_string.GetSize()); + Value val = FSSTPrimitives::DecompressValue(FSSTVector::GetDecoder(*this), compressed_string.GetData(), + compressed_string.GetSize()); retval += GetValue(i).ToString() + (i == count - 1 ? "" : ", "); } } break; @@ -2127,7 +2127,8 @@ const vector> &StructVector::GetEntries(const Vector &vector) //===--------------------------------------------------------------------===// // ListVector //===--------------------------------------------------------------------===// -const Vector &ListVector::GetEntry(const Vector &vector) { +template +T &ListVector::GetEntryInternal(T &vector) { D_ASSERT(vector.GetType().id() == LogicalTypeId::LIST || vector.GetType().id() == LogicalTypeId::MAP); if (vector.GetVectorType() == VectorType::DICTIONARY_VECTOR) { auto &child = DictionaryVector::Child(vector); @@ -2137,12 +2138,15 @@ const Vector &ListVector::GetEntry(const Vector &vector) { vector.GetVectorType() == VectorType::CONSTANT_VECTOR); D_ASSERT(vector.auxiliary); D_ASSERT(vector.auxiliary->GetBufferType() == VectorBufferType::LIST_BUFFER); - return vector.auxiliary->Cast().GetChild(); + return vector.auxiliary->template Cast().GetChild(); +} + +const Vector &ListVector::GetEntry(const Vector &vector) { + return GetEntryInternal(vector); } Vector &ListVector::GetEntry(Vector &vector) { - const Vector &cvector = vector; - return const_cast(ListVector::GetEntry(cvector)); + return GetEntryInternal(vector); } void ListVector::Reserve(Vector &vector, idx_t required_capacity) { @@ -2496,7 +2500,8 @@ UnionInvalidReason UnionVector::CheckUnionValidity(Vector &vector_p, idx_t count //===--------------------------------------------------------------------===// // ArrayVector //===--------------------------------------------------------------------===// -const Vector &ArrayVector::GetEntry(const Vector &vector) { +template +T &ArrayVector::GetEntryInternal(T &vector) { D_ASSERT(vector.GetType().id() == LogicalTypeId::ARRAY); if (vector.GetVectorType() == VectorType::DICTIONARY_VECTOR) { auto &child = DictionaryVector::Child(vector); @@ -2506,12 +2511,15 @@ const Vector &ArrayVector::GetEntry(const Vector &vector) { vector.GetVectorType() == VectorType::CONSTANT_VECTOR); D_ASSERT(vector.auxiliary); D_ASSERT(vector.auxiliary->GetBufferType() == VectorBufferType::ARRAY_BUFFER); - return vector.auxiliary->Cast().GetChild(); + return vector.auxiliary->template Cast().GetChild(); +} + +const Vector &ArrayVector::GetEntry(const Vector &vector) { + return GetEntryInternal(vector); } Vector &ArrayVector::GetEntry(Vector &vector) { - const Vector &cvector = vector; - return const_cast(ArrayVector::GetEntry(cvector)); + return GetEntryInternal(vector); } idx_t ArrayVector::GetTotalSize(const Vector &vector) { diff --git a/src/common/virtual_file_system.cpp b/src/common/virtual_file_system.cpp index beaeeff59078..3bc099a2bd01 100644 --- a/src/common/virtual_file_system.cpp +++ b/src/common/virtual_file_system.cpp @@ -180,11 +180,18 @@ FileSystem &VirtualFileSystem::FindFileSystem(const string &path) { } FileSystem &VirtualFileSystem::FindFileSystemInternal(const string &path) { + FileSystem *fs = nullptr; for (auto &sub_system : sub_systems) { if (sub_system->CanHandleFile(path)) { - return *sub_system; + if (sub_system->IsManuallySet()) { + return *sub_system; + } + fs = sub_system.get(); } } + if (fs) { + return *fs; + } return *default_fs; } diff --git a/src/core_functions/aggregate/distributive/minmax.cpp b/src/core_functions/aggregate/distributive/minmax.cpp index aa324b23f310..d3a5dd49c682 100644 --- a/src/core_functions/aggregate/distributive/minmax.cpp +++ b/src/core_functions/aggregate/distributive/minmax.cpp @@ -607,12 +607,12 @@ unique_ptr BindMinMax(ClientContext &context, AggregateFunction &f FunctionBinder function_binder(context); vector types {arguments[0]->return_type, arguments[0]->return_type}; ErrorData error; - idx_t best_function = function_binder.BindFunction(func_entry.name, func_entry.functions, types, error); - if (best_function == DConstants::INVALID_INDEX) { + auto best_function = function_binder.BindFunction(func_entry.name, func_entry.functions, types, error); + if (!best_function.IsValid()) { throw BinderException(string("Fail to find corresponding function for collation min/max: ") + error.Message()); } - function = func_entry.functions.GetFunctionByOffset(best_function); + function = func_entry.functions.GetFunctionByOffset(best_function.GetIndex()); // Create a copied child and PushCollation for it. arguments.push_back(arguments[0]->Copy()); diff --git a/src/core_functions/core_functions.cpp b/src/core_functions/core_functions.cpp index 25a57f59c713..ef1687dc65af 100644 --- a/src/core_functions/core_functions.cpp +++ b/src/core_functions/core_functions.cpp @@ -6,7 +6,7 @@ namespace duckdb { template -void FillExtraInfo(StaticFunctionDefinition &function, T &info) { +void FillExtraInfo(const StaticFunctionDefinition &function, T &info) { info.internal = true; info.description = function.description; info.parameter_names = StringUtil::Split(function.parameters, ","); diff --git a/src/core_functions/function_list.cpp b/src/core_functions/function_list.cpp index 0652abbe061d..540752a4f48c 100644 --- a/src/core_functions/function_list.cpp +++ b/src/core_functions/function_list.cpp @@ -47,7 +47,7 @@ namespace duckdb { { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr } // this list is generated by scripts/generate_functions.py -static StaticFunctionDefinition internal_functions[] = { +static const StaticFunctionDefinition internal_functions[] = { DUCKDB_SCALAR_FUNCTION(FactorialOperatorFun), DUCKDB_SCALAR_FUNCTION_SET(BitwiseAndFun), DUCKDB_SCALAR_FUNCTION(PowOperatorFun), @@ -394,7 +394,7 @@ static StaticFunctionDefinition internal_functions[] = { FINAL_FUNCTION }; -StaticFunctionDefinition *StaticFunctionDefinition::GetFunctionList() { +const StaticFunctionDefinition *StaticFunctionDefinition::GetFunctionList() { return internal_functions; } diff --git a/src/core_functions/scalar/list/list_aggregates.cpp b/src/core_functions/scalar/list/list_aggregates.cpp index 7dabeb86acc9..fe7a95fcb11b 100644 --- a/src/core_functions/scalar/list/list_aggregates.cpp +++ b/src/core_functions/scalar/list/list_aggregates.cpp @@ -474,12 +474,12 @@ static unique_ptr ListAggregatesBind(ClientContext &context, Scala FunctionBinder function_binder(context); auto best_function_idx = function_binder.BindFunction(func.name, func.functions, types, error); - if (best_function_idx == DConstants::INVALID_INDEX) { + if (!best_function_idx.IsValid()) { throw BinderException("No matching aggregate function\n%s", error.Message()); } // found a matching function, bind it as an aggregate - auto best_function = func.functions.GetFunctionByOffset(best_function_idx); + auto best_function = func.functions.GetFunctionByOffset(best_function_idx.GetIndex()); if (IS_AGGR) { return ListAggregatesBindFunction(context, bound_function, child_type, best_function, arguments); } diff --git a/src/execution/index/art/art.cpp b/src/execution/index/art/art.cpp index 17fa823012c6..9fa44deafedf 100644 --- a/src/execution/index/art/art.cpp +++ b/src/execution/index/art/art.cpp @@ -133,13 +133,13 @@ unique_ptr ART::TryInitializeScan(const Transaction &transaction // match on a comparison type matcher.expr_type = make_uniq(); // match on a constant comparison with the indexed expression - matcher.matchers.push_back(make_uniq(const_cast(index_expr))); + matcher.matchers.push_back(make_uniq(index_expr)); matcher.matchers.push_back(make_uniq()); matcher.policy = SetMatcher::Policy::UNORDERED; vector> bindings; - if (matcher.Match(const_cast(filter_expr), bindings)) { + if (matcher.Match(const_cast(filter_expr), bindings)) { // NOLINT: Match does not alter the expr // range or equality comparison with constant value // we can use our index here // bindings[0] = the expression diff --git a/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp b/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp index 08111c60f277..80bddc9d158f 100644 --- a/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +++ b/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp @@ -176,6 +176,11 @@ void CSVSniffer::AnalyzeDialectCandidate(unique_ptr scanner, } auto &sniffing_state_machine = scanner->GetStateMachine(); + if (!candidates.empty() && candidates.front()->ever_quoted && !scanner->ever_quoted) { + // Give preference to quoted boys. + return; + } + best_consistent_rows = consistent_rows; max_columns_found = num_cols; prev_padding_count = padding_count; diff --git a/src/execution/operator/persistent/physical_export.cpp b/src/execution/operator/persistent/physical_export.cpp index 3979a88eeb59..f04567b91fca 100644 --- a/src/execution/operator/persistent/physical_export.cpp +++ b/src/execution/operator/persistent/physical_export.cpp @@ -9,15 +9,19 @@ #include "duckdb/parallel/pipeline.hpp" #include "duckdb/parser/keyword_helper.hpp" #include "duckdb/transaction/transaction.hpp" +#include "duckdb/catalog/duck_catalog.hpp" +#include "duckdb/catalog/dependency_manager.hpp" #include #include namespace duckdb { +void ReorderTableEntries(catalog_entry_vector_t &tables); + using std::stringstream; -static void WriteCatalogEntries(stringstream &ss, vector> &entries) { +static void WriteCatalogEntries(stringstream &ss, catalog_entry_vector_t &entries) { for (auto &entry : entries) { if (entry.get().internal) { continue; @@ -131,27 +135,20 @@ void PhysicalExport::ExtractEntries(ClientContext &context, vector(); - if (state.finished) { - return SourceResultType::FINISHED; +static void AddEntries(catalog_entry_vector_t &all_entries, catalog_entry_vector_t &to_add) { + for (auto &entry : to_add) { + all_entries.push_back(entry); } + to_add.clear(); +} - auto &ccontext = context.client; - auto &fs = FileSystem::GetFileSystem(ccontext); - +catalog_entry_vector_t PhysicalExport::GetNaiveExportOrder(ClientContext &context, Catalog &catalog) { // gather all catalog types to export ExportEntries entries; + auto schema_list = catalog.GetSchemas(context); + PhysicalExport::ExtractEntries(context, schema_list, entries); - auto schema_list = Catalog::GetSchemas(ccontext, info->catalog); - ExtractEntries(context.client, schema_list, entries); - - // consider the order of tables because of foreign key constraint - entries.tables.clear(); - for (idx_t i = 0; i < exported_tables.data.size(); i++) { - entries.tables.push_back(exported_tables.data[i].entry); - } + ReorderTableEntries(entries.tables); // order macro's by timestamp so nested macro's are imported nicely sort(entries.macros.begin(), entries.macros.end(), @@ -159,18 +156,52 @@ SourceResultType PhysicalExport::GetData(ExecutionContext &context, DataChunk &c return lhs.get().oid < rhs.get().oid; }); - // write the schema.sql file + catalog_entry_vector_t catalog_entries; + idx_t size = 0; + size += entries.schemas.size(); + size += entries.custom_types.size(); + size += entries.sequences.size(); + size += entries.tables.size(); + size += entries.views.size(); + size += entries.indexes.size(); + size += entries.macros.size(); + catalog_entries.reserve(size); + AddEntries(catalog_entries, entries.schemas); + AddEntries(catalog_entries, entries.custom_types); + AddEntries(catalog_entries, entries.sequences); + AddEntries(catalog_entries, entries.tables); + AddEntries(catalog_entries, entries.views); + AddEntries(catalog_entries, entries.indexes); + AddEntries(catalog_entries, entries.macros); + return catalog_entries; +} + +SourceResultType PhysicalExport::GetData(ExecutionContext &context, DataChunk &chunk, + OperatorSourceInput &input) const { + auto &state = input.global_state.Cast(); + if (state.finished) { + return SourceResultType::FINISHED; + } + + auto &ccontext = context.client; + auto &fs = FileSystem::GetFileSystem(ccontext); + + auto &catalog = Catalog::GetCatalog(ccontext, info->catalog); + // export order is SCHEMA -> SEQUENCE -> TABLE -> VIEW -> INDEX + catalog_entry_vector_t catalog_entries; + if (catalog.IsDuckCatalog()) { + auto &duck_catalog = catalog.Cast(); + auto &dependency_manager = duck_catalog.GetDependencyManager(); + auto transaction = catalog.GetCatalogTransaction(context.client); + catalog_entries = dependency_manager.GetExportOrder(transaction); + } else { + catalog_entries = GetNaiveExportOrder(context.client, catalog); + } + // write the schema.sql file stringstream ss; - WriteCatalogEntries(ss, entries.schemas); - WriteCatalogEntries(ss, entries.custom_types); - WriteCatalogEntries(ss, entries.sequences); - WriteCatalogEntries(ss, entries.tables); - WriteCatalogEntries(ss, entries.views); - WriteCatalogEntries(ss, entries.indexes); - WriteCatalogEntries(ss, entries.macros); - + WriteCatalogEntries(ss, catalog_entries); WriteStringStreamToFile(fs, ss, fs.JoinPath(info->file_path, "schema.sql")); // write the load.sql file diff --git a/src/execution/window_segment_tree.cpp b/src/execution/window_segment_tree.cpp index c6ec899898fe..5ea8429a2998 100644 --- a/src/execution/window_segment_tree.cpp +++ b/src/execution/window_segment_tree.cpp @@ -470,7 +470,7 @@ WindowNaiveState::WindowNaiveState(const WindowNaiveAggregator &gstate) row_set(STANDARD_VECTOR_SIZE, hash_row, equal_row) { InitSubFrames(frames, gstate.exclude_mode); - auto &inputs = const_cast(gstate.GetInputs()); + auto &inputs = gstate.GetInputs(); if (inputs.ColumnCount() > 0) { leaves.Initialize(Allocator::DefaultAllocator(), inputs.GetTypes()); } @@ -494,9 +494,8 @@ void WindowNaiveState::FlushStates() { return; } - auto &inputs = const_cast(gstate.GetInputs()); - leaves.Reference(inputs); - leaves.Slice(update_sel, flush_count); + auto &inputs = gstate.GetInputs(); + leaves.Slice(inputs, update_sel, flush_count); auto &aggr = gstate.aggr; AggregateInputData aggr_input_data(aggr.GetFunctionData(), allocator); @@ -506,19 +505,18 @@ void WindowNaiveState::FlushStates() { } size_t WindowNaiveState::Hash(idx_t rid) { - auto &inputs = const_cast(gstate.GetInputs()); - leaves.Reference(inputs); + auto &inputs = gstate.GetInputs(); auto s = UnsafeNumericCast(rid); SelectionVector sel(&s); - leaves.Slice(sel, 1); + leaves.Slice(inputs, sel, 1); leaves.Hash(hashes); return *FlatVector::GetData(hashes); } bool WindowNaiveState::KeyEqual(const idx_t &lhs, const idx_t &rhs) { - auto &inputs = const_cast(gstate.GetInputs()); + auto &inputs = gstate.GetInputs(); auto l = UnsafeNumericCast(lhs); SelectionVector lsel(&l); @@ -644,7 +642,7 @@ class WindowSegmentTreePart { enum FramePart : uint8_t { FULL = 0, LEFT = 1, RIGHT = 2 }; - WindowSegmentTreePart(ArenaAllocator &allocator, const AggregateObject &aggr, DataChunk &inputs, + WindowSegmentTreePart(ArenaAllocator &allocator, const AggregateObject &aggr, const DataChunk &inputs, const ValidityMask &filter_mask); ~WindowSegmentTreePart(); @@ -681,7 +679,7 @@ class WindowSegmentTreePart { //! Order insensitive aggregate (we can optimise internal combines) const bool order_insensitive; //! The partition arguments - DataChunk &inputs; + const DataChunk &inputs; //! The filtered rows in inputs const ValidityMask &filter_mask; //! The size of a single aggregate state @@ -706,14 +704,14 @@ class WindowSegmentTreePart { class WindowSegmentTreeState : public WindowAggregatorState { public: - WindowSegmentTreeState(const AggregateObject &aggr, DataChunk &inputs, const ValidityMask &filter_mask) + WindowSegmentTreeState(const AggregateObject &aggr, const DataChunk &inputs, const ValidityMask &filter_mask) : aggr(aggr), inputs(inputs), filter_mask(filter_mask), part(allocator, aggr, inputs, filter_mask) { } //! The aggregate function const AggregateObject &aggr; //! The aggregate function - DataChunk &inputs; + const DataChunk &inputs; //! The filtered rows in inputs const ValidityMask &filter_mask; //! The left (default) segment tree part @@ -722,8 +720,8 @@ class WindowSegmentTreeState : public WindowAggregatorState { unique_ptr right_part; }; -WindowSegmentTreePart::WindowSegmentTreePart(ArenaAllocator &allocator, const AggregateObject &aggr, DataChunk &inputs, - const ValidityMask &filter_mask) +WindowSegmentTreePart::WindowSegmentTreePart(ArenaAllocator &allocator, const AggregateObject &aggr, + const DataChunk &inputs, const ValidityMask &filter_mask) : allocator(allocator), aggr(aggr), order_insensitive(aggr.function.order_dependent == AggregateOrderDependent::NOT_ORDER_DEPENDENT), inputs(inputs), filter_mask(filter_mask), state_size(aggr.function.state_size()), state(state_size * STANDARD_VECTOR_SIZE), @@ -749,7 +747,7 @@ WindowSegmentTreePart::~WindowSegmentTreePart() { } unique_ptr WindowSegmentTree::GetLocalState() const { - return make_uniq(aggr, const_cast(inputs), filter_mask); + return make_uniq(aggr, inputs, filter_mask); } void WindowSegmentTreePart::FlushStates(bool combining) { @@ -762,8 +760,7 @@ void WindowSegmentTreePart::FlushStates(bool combining) { statel.Verify(flush_count); aggr.function.combine(statel, statep, aggr_input_data, flush_count); } else { - leaves.Reference(inputs); - leaves.Slice(filter_sel, flush_count); + leaves.Slice(inputs, filter_sel, flush_count); aggr.function.update(&leaves.data[0], aggr_input_data, leaves.ColumnCount(), statep, flush_count); } @@ -1382,7 +1379,7 @@ WindowDistinctAggregator::DistinctSortTree::DistinctSortTree(ZippedElements &&pr class WindowDistinctState : public WindowAggregatorState { public: - WindowDistinctState(const AggregateObject &aggr, DataChunk &inputs, const WindowDistinctAggregator &tree); + WindowDistinctState(const AggregateObject &aggr, const DataChunk &inputs, const WindowDistinctAggregator &tree); void Evaluate(const DataChunk &bounds, Vector &result, idx_t count, idx_t row_idx); @@ -1393,7 +1390,7 @@ class WindowDistinctState : public WindowAggregatorState { //! The aggregate function const AggregateObject &aggr; //! The aggregate function - DataChunk &inputs; + const DataChunk &inputs; //! The merge sort tree data const WindowDistinctAggregator &tree; //! The size of a single aggregate state @@ -1412,7 +1409,7 @@ class WindowDistinctState : public WindowAggregatorState { SubFrames frames; }; -WindowDistinctState::WindowDistinctState(const AggregateObject &aggr, DataChunk &inputs, +WindowDistinctState::WindowDistinctState(const AggregateObject &aggr, const DataChunk &inputs, const WindowDistinctAggregator &tree) : aggr(aggr), inputs(inputs), tree(tree), state_size(aggr.function.state_size()), state((state_size * STANDARD_VECTOR_SIZE)), statef(LogicalType::POINTER), statep(LogicalType::POINTER), @@ -1488,7 +1485,7 @@ void WindowDistinctState::Evaluate(const DataChunk &bounds, Vector &result, idx_ } unique_ptr WindowDistinctAggregator::GetLocalState() const { - return make_uniq(aggr, const_cast(inputs), *this); + return make_uniq(aggr, inputs, *this); } void WindowDistinctAggregator::Evaluate(WindowAggregatorState &lstate, const DataChunk &bounds, Vector &result, diff --git a/src/function/aggregate/sorted_aggregate_function.cpp b/src/function/aggregate/sorted_aggregate_function.cpp index 9011b4cf1002..78964dca3489 100644 --- a/src/function/aggregate/sorted_aggregate_function.cpp +++ b/src/function/aggregate/sorted_aggregate_function.cpp @@ -538,7 +538,7 @@ struct SortedAggregateFunction { template static void Combine(const STATE &source, STATE &target, AggregateInputData &aggr_input_data) { auto &order_bind = aggr_input_data.bind_data->Cast(); - auto &other = const_cast(source); + auto &other = const_cast(source); // NOLINT: absorb explicitly allows destruction target.Absorb(order_bind, other); } diff --git a/src/function/compression_config.cpp b/src/function/compression_config.cpp index 177a2d86a767..62ba1ce6ada3 100644 --- a/src/function/compression_config.cpp +++ b/src/function/compression_config.cpp @@ -14,7 +14,7 @@ struct DefaultCompressionMethod { compression_supports_type_t supports_type; }; -static DefaultCompressionMethod internal_compression_methods[] = { +static const DefaultCompressionMethod internal_compression_methods[] = { {CompressionType::COMPRESSION_CONSTANT, ConstantFun::GetFunction, ConstantFun::TypeIsSupported}, {CompressionType::COMPRESSION_UNCOMPRESSED, UncompressedFun::GetFunction, UncompressedFun::TypeIsSupported}, {CompressionType::COMPRESSION_RLE, RLEFun::GetFunction, RLEFun::TypeIsSupported}, diff --git a/src/function/function_binder.cpp b/src/function/function_binder.cpp index 3a70befdf9d7..267f589d8ff3 100644 --- a/src/function/function_binder.cpp +++ b/src/function/function_binder.cpp @@ -18,12 +18,12 @@ namespace duckdb { FunctionBinder::FunctionBinder(ClientContext &context) : context(context) { } -int64_t FunctionBinder::BindVarArgsFunctionCost(const SimpleFunction &func, const vector &arguments) { +optional_idx FunctionBinder::BindVarArgsFunctionCost(const SimpleFunction &func, const vector &arguments) { if (arguments.size() < func.arguments.size()) { // not enough arguments to fulfill the non-vararg part of the function - return -1; + return optional_idx(); } - int64_t cost = 0; + idx_t cost = 0; for (idx_t i = 0; i < arguments.size(); i++) { LogicalType arg_type = i < func.arguments.size() ? func.arguments[i] : func.varargs; if (arguments[i] == arg_type) { @@ -33,25 +33,25 @@ int64_t FunctionBinder::BindVarArgsFunctionCost(const SimpleFunction &func, cons int64_t cast_cost = CastFunctionSet::Get(context).ImplicitCastCost(arguments[i], arg_type); if (cast_cost >= 0) { // we can implicitly cast, add the cost to the total cost - cost += cast_cost; + cost += idx_t(cast_cost); } else { // we can't implicitly cast: throw an error - return -1; + return optional_idx(); } } return cost; } -int64_t FunctionBinder::BindFunctionCost(const SimpleFunction &func, const vector &arguments) { +optional_idx FunctionBinder::BindFunctionCost(const SimpleFunction &func, const vector &arguments) { if (func.HasVarArgs()) { // special case varargs function return BindVarArgsFunctionCost(func, arguments); } if (func.arguments.size() != arguments.size()) { // invalid argument count: check the next function - return -1; + return optional_idx(); } - int64_t cost = 0; + idx_t cost = 0; bool has_parameter = false; for (idx_t i = 0; i < arguments.size(); i++) { if (arguments[i].id() == LogicalTypeId::UNKNOWN) { @@ -61,10 +61,10 @@ int64_t FunctionBinder::BindFunctionCost(const SimpleFunction &func, const vecto int64_t cast_cost = CastFunctionSet::Get(context).ImplicitCastCost(arguments[i], func.arguments[i]); if (cast_cost >= 0) { // we can implicitly cast, add the cost to the total cost - cost += cast_cost; + cost += idx_t(cast_cost); } else { // we can't implicitly cast: throw an error - return -1; + return optional_idx(); } } if (has_parameter) { @@ -77,17 +77,18 @@ int64_t FunctionBinder::BindFunctionCost(const SimpleFunction &func, const vecto template vector FunctionBinder::BindFunctionsFromArguments(const string &name, FunctionSet &functions, const vector &arguments, ErrorData &error) { - idx_t best_function = DConstants::INVALID_INDEX; - int64_t lowest_cost = NumericLimits::Maximum(); + optional_idx best_function; + idx_t lowest_cost = NumericLimits::Maximum(); vector candidate_functions; for (idx_t f_idx = 0; f_idx < functions.functions.size(); f_idx++) { auto &func = functions.functions[f_idx]; // check the arguments of the function - int64_t cost = BindFunctionCost(func, arguments); - if (cost < 0) { + auto bind_cost = BindFunctionCost(func, arguments); + if (!bind_cost.IsValid()) { // auto casting was not possible continue; } + auto cost = bind_cost.GetIndex(); if (cost == lowest_cost) { candidate_functions.push_back(f_idx); continue; @@ -99,7 +100,7 @@ vector FunctionBinder::BindFunctionsFromArguments(const string &name, Fun lowest_cost = cost; best_function = f_idx; } - if (best_function == DConstants::INVALID_INDEX) { + if (!best_function.IsValid()) { // no matching function was found, throw an error vector candidates; for (auto &f : functions.functions) { @@ -108,14 +109,14 @@ vector FunctionBinder::BindFunctionsFromArguments(const string &name, Fun error = ErrorData(BinderException::NoMatchingFunction(name, arguments, candidates)); return candidate_functions; } - candidate_functions.push_back(best_function); + candidate_functions.push_back(best_function.GetIndex()); return candidate_functions; } template -idx_t FunctionBinder::MultipleCandidateException(const string &name, FunctionSet &functions, - vector &candidate_functions, - const vector &arguments, ErrorData &error) { +optional_idx FunctionBinder::MultipleCandidateException(const string &name, FunctionSet &functions, + vector &candidate_functions, + const vector &arguments, ErrorData &error) { D_ASSERT(functions.functions.size() > 1); // there are multiple possible function definitions // throw an exception explaining which overloads are there @@ -130,16 +131,16 @@ idx_t FunctionBinder::MultipleCandidateException(const string &name, FunctionSet StringUtil::Format("Could not choose a best candidate function for the function call \"%s\". In order to " "select one, please add explicit type casts.\n\tCandidate functions:\n%s", call_str, candidate_str)); - return DConstants::INVALID_INDEX; + return optional_idx(); } template -idx_t FunctionBinder::BindFunctionFromArguments(const string &name, FunctionSet &functions, - const vector &arguments, ErrorData &error) { +optional_idx FunctionBinder::BindFunctionFromArguments(const string &name, FunctionSet &functions, + const vector &arguments, ErrorData &error) { auto candidate_functions = BindFunctionsFromArguments(name, functions, arguments, error); if (candidate_functions.empty()) { // no candidates - return DConstants::INVALID_INDEX; + return optional_idx(); } if (candidate_functions.size() > 1) { // multiple candidates, check if there are any unknown arguments @@ -157,32 +158,32 @@ idx_t FunctionBinder::BindFunctionFromArguments(const string &name, FunctionSet< return candidate_functions[0]; } -idx_t FunctionBinder::BindFunction(const string &name, ScalarFunctionSet &functions, - const vector &arguments, ErrorData &error) { +optional_idx FunctionBinder::BindFunction(const string &name, ScalarFunctionSet &functions, + const vector &arguments, ErrorData &error) { return BindFunctionFromArguments(name, functions, arguments, error); } -idx_t FunctionBinder::BindFunction(const string &name, AggregateFunctionSet &functions, - const vector &arguments, ErrorData &error) { +optional_idx FunctionBinder::BindFunction(const string &name, AggregateFunctionSet &functions, + const vector &arguments, ErrorData &error) { return BindFunctionFromArguments(name, functions, arguments, error); } -idx_t FunctionBinder::BindFunction(const string &name, TableFunctionSet &functions, - const vector &arguments, ErrorData &error) { +optional_idx FunctionBinder::BindFunction(const string &name, TableFunctionSet &functions, + const vector &arguments, ErrorData &error) { return BindFunctionFromArguments(name, functions, arguments, error); } -idx_t FunctionBinder::BindFunction(const string &name, PragmaFunctionSet &functions, vector ¶meters, - ErrorData &error) { +optional_idx FunctionBinder::BindFunction(const string &name, PragmaFunctionSet &functions, vector ¶meters, + ErrorData &error) { vector types; for (auto &value : parameters) { types.push_back(value.type()); } - idx_t entry = BindFunctionFromArguments(name, functions, types, error); - if (entry == DConstants::INVALID_INDEX) { + auto entry = BindFunctionFromArguments(name, functions, types, error); + if (!entry.IsValid()) { error.Throw(); } - auto candidate_function = functions.GetFunctionByOffset(entry); + auto candidate_function = functions.GetFunctionByOffset(entry.GetIndex()); // cast the input parameters for (idx_t i = 0; i < parameters.size(); i++) { auto target_type = @@ -201,20 +202,20 @@ vector FunctionBinder::GetLogicalTypesFromExpressions(vector> &arguments, ErrorData &error) { +optional_idx FunctionBinder::BindFunction(const string &name, ScalarFunctionSet &functions, + vector> &arguments, ErrorData &error) { auto types = GetLogicalTypesFromExpressions(arguments); return BindFunction(name, functions, types, error); } -idx_t FunctionBinder::BindFunction(const string &name, AggregateFunctionSet &functions, - vector> &arguments, ErrorData &error) { +optional_idx FunctionBinder::BindFunction(const string &name, AggregateFunctionSet &functions, + vector> &arguments, ErrorData &error) { auto types = GetLogicalTypesFromExpressions(arguments); return BindFunction(name, functions, types, error); } -idx_t FunctionBinder::BindFunction(const string &name, TableFunctionSet &functions, - vector> &arguments, ErrorData &error) { +optional_idx FunctionBinder::BindFunction(const string &name, TableFunctionSet &functions, + vector> &arguments, ErrorData &error) { auto types = GetLogicalTypesFromExpressions(arguments); return BindFunction(name, functions, types, error); } @@ -291,13 +292,13 @@ unique_ptr FunctionBinder::BindScalarFunction(ScalarFunctionCatalogE vector> children, ErrorData &error, bool is_operator, Binder *binder) { // bind the function - idx_t best_function = BindFunction(func.name, func.functions, children, error); - if (best_function == DConstants::INVALID_INDEX) { + auto best_function = BindFunction(func.name, func.functions, children, error); + if (!best_function.IsValid()) { return nullptr; } // found a matching function! - auto bound_function = func.functions.GetFunctionByOffset(best_function); + auto bound_function = func.functions.GetFunctionByOffset(best_function.GetIndex()); // If any of the parameters are NULL, the function will just be replaced with a NULL constant // But this NULL constant needs to have to correct type, because we use LogicalType::SQLNULL for binding macro's diff --git a/src/function/function_set.cpp b/src/function/function_set.cpp index fd4cf3e702bb..cf48c14e4f18 100644 --- a/src/function/function_set.cpp +++ b/src/function/function_set.cpp @@ -16,12 +16,12 @@ ScalarFunctionSet::ScalarFunctionSet(ScalarFunction fun) : FunctionSet(std::move ScalarFunction ScalarFunctionSet::GetFunctionByArguments(ClientContext &context, const vector &arguments) { ErrorData error; FunctionBinder binder(context); - idx_t index = binder.BindFunction(name, *this, arguments, error); - if (index == DConstants::INVALID_INDEX) { + auto index = binder.BindFunction(name, *this, arguments, error); + if (!index.IsValid()) { throw InternalException("Failed to find function %s(%s)\n%s", name, StringUtil::ToString(arguments, ","), error.Message()); } - return GetFunctionByOffset(index); + return GetFunctionByOffset(index.GetIndex()); } AggregateFunctionSet::AggregateFunctionSet() : FunctionSet("") { @@ -38,8 +38,8 @@ AggregateFunction AggregateFunctionSet::GetFunctionByArguments(ClientContext &co const vector &arguments) { ErrorData error; FunctionBinder binder(context); - idx_t index = binder.BindFunction(name, *this, arguments, error); - if (index == DConstants::INVALID_INDEX) { + auto index = binder.BindFunction(name, *this, arguments, error); + if (!index.IsValid()) { // check if the arguments are a prefix of any of the arguments // this is used for functions such as quantile or string_agg that delete part of their arguments during bind // FIXME: we should come up with a better solution here @@ -61,7 +61,7 @@ AggregateFunction AggregateFunctionSet::GetFunctionByArguments(ClientContext &co throw InternalException("Failed to find function %s(%s)\n%s", name, StringUtil::ToString(arguments, ","), error.Message()); } - return GetFunctionByOffset(index); + return GetFunctionByOffset(index.GetIndex()); } TableFunctionSet::TableFunctionSet(string name) : FunctionSet(std::move(name)) { @@ -74,12 +74,12 @@ TableFunctionSet::TableFunctionSet(TableFunction fun) : FunctionSet(std::move(fu TableFunction TableFunctionSet::GetFunctionByArguments(ClientContext &context, const vector &arguments) { ErrorData error; FunctionBinder binder(context); - idx_t index = binder.BindFunction(name, *this, arguments, error); - if (index == DConstants::INVALID_INDEX) { + auto index = binder.BindFunction(name, *this, arguments, error); + if (!index.IsValid()) { throw InternalException("Failed to find function %s(%s)\n%s", name, StringUtil::ToString(arguments, ","), error.Message()); } - return GetFunctionByOffset(index); + return GetFunctionByOffset(index.GetIndex()); } PragmaFunctionSet::PragmaFunctionSet(string name) : FunctionSet(std::move(name)) { diff --git a/src/function/scalar/sequence/nextval.cpp b/src/function/scalar/sequence/nextval.cpp index 2167e8e4a49a..b8b8c98e3665 100644 --- a/src/function/scalar/sequence/nextval.cpp +++ b/src/function/scalar/sequence/nextval.cpp @@ -30,7 +30,8 @@ struct NextSequenceValueOperator { SequenceCatalogEntry &BindSequence(ClientContext &context, const string &name) { auto qname = QualifiedName::Parse(name); // fetch the sequence from the catalog - Binder::BindSchemaOrCatalog(context, qname.catalog, qname.schema); + auto binder = Binder::CreateBinder(context); + binder->BindSchemaOrCatalog(qname.catalog, qname.schema); return Catalog::GetEntry(context, qname.catalog, qname.schema, qname.name); } diff --git a/src/function/scalar/strftime_format.cpp b/src/function/scalar/strftime_format.cpp index f50f3dac4b7f..5181b005ed45 100644 --- a/src/function/scalar/strftime_format.cpp +++ b/src/function/scalar/strftime_format.cpp @@ -1342,11 +1342,11 @@ bool StrTimeFormat::Empty() const { return format_specifier.empty(); } -string StrpTimeFormat::FormatStrpTimeError(const string &input, idx_t position) { - if (position == DConstants::INVALID_INDEX) { +string StrpTimeFormat::FormatStrpTimeError(const string &input, optional_idx position) { + if (!position.IsValid()) { return string(); } - return input + "\n" + string(position, ' ') + "^"; + return input + "\n" + string(position.GetIndex(), ' ') + "^"; } date_t StrpTimeFormat::ParseResult::ToDate() { diff --git a/src/function/scalar/string/caseconvert.cpp b/src/function/scalar/string/caseconvert.cpp index 4193ee600bad..100ed9765a35 100644 --- a/src/function/scalar/string/caseconvert.cpp +++ b/src/function/scalar/string/caseconvert.cpp @@ -11,7 +11,7 @@ namespace duckdb { -uint8_t UpperFun::ascii_to_upper_map[] = { +const uint8_t UpperFun::ASCII_TO_UPPER_MAP[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, @@ -24,7 +24,7 @@ uint8_t UpperFun::ascii_to_upper_map[] = { 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255}; -uint8_t LowerFun::ascii_to_lower_map[] = { +const uint8_t LowerFun::ASCII_TO_LOWER_MAP[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, @@ -44,8 +44,8 @@ static string_t ASCIICaseConvert(Vector &result, const char *input_data, idx_t i auto result_str = StringVector::EmptyString(result, output_length); auto result_data = result_str.GetDataWriteable(); for (idx_t i = 0; i < input_length; i++) { - result_data[i] = IS_UPPER ? UpperFun::ascii_to_upper_map[uint8_t(input_data[i])] - : LowerFun::ascii_to_lower_map[uint8_t(input_data[i])]; + result_data[i] = IS_UPPER ? UpperFun::ASCII_TO_UPPER_MAP[uint8_t(input_data[i])] + : LowerFun::ASCII_TO_LOWER_MAP[uint8_t(input_data[i])]; } result_str.Finalize(); return result_str; @@ -88,8 +88,8 @@ static void CaseConvert(const char *input_data, idx_t input_length, char *result i += sz; } else { // ascii - *result_data = IS_UPPER ? UpperFun::ascii_to_upper_map[uint8_t(input_data[i])] - : LowerFun::ascii_to_lower_map[uint8_t(input_data[i])]; + *result_data = IS_UPPER ? UpperFun::ASCII_TO_UPPER_MAP[uint8_t(input_data[i])] + : LowerFun::ASCII_TO_LOWER_MAP[uint8_t(input_data[i])]; result_data++; i++; } diff --git a/src/function/scalar/string/like.cpp b/src/function/scalar/string/like.cpp index 2a45e7d92c77..edca8d8a0d75 100644 --- a/src/function/scalar/string/like.cpp +++ b/src/function/scalar/string/like.cpp @@ -26,7 +26,7 @@ struct ASCIILCaseReader { } static char Operation(const char *data, idx_t pos) { - return (char)LowerFun::ascii_to_lower_map[(uint8_t)data[pos]]; + return (char)LowerFun::ASCII_TO_LOWER_MAP[(uint8_t)data[pos]]; } }; diff --git a/src/function/scalar/system/aggregate_export.cpp b/src/function/scalar/system/aggregate_export.cpp index e1e068b23ee4..035a1e33e702 100644 --- a/src/function/scalar/system/aggregate_export.cpp +++ b/src/function/scalar/system/aggregate_export.cpp @@ -216,13 +216,13 @@ static unique_ptr BindAggregateState(ClientContext &context, Scala ErrorData error; FunctionBinder function_binder(context); - idx_t best_function = + auto best_function = function_binder.BindFunction(aggr.name, aggr.functions, state_type.bound_argument_types, error); - if (best_function == DConstants::INVALID_INDEX) { + if (!best_function.IsValid()) { throw InternalException("Could not re-bind exported aggregate %s: %s", state_type.function_name, error.Message()); } - auto bound_aggr = aggr.functions.GetFunctionByOffset(best_function); + auto bound_aggr = aggr.functions.GetFunctionByOffset(best_function.GetIndex()); if (bound_aggr.bind) { // FIXME: this is really hacky // but the aggregate state export needs a rework around how it handles more complex aggregates anyway diff --git a/src/function/table/arrow.cpp b/src/function/table/arrow.cpp index a65257a8363b..dd742b78f14e 100644 --- a/src/function/table/arrow.cpp +++ b/src/function/table/arrow.cpp @@ -125,6 +125,10 @@ static unique_ptr GetArrowLogicalTypeNoDictionary(ArrowSchema &schema } else if (format == "+s") { child_list_t child_types; vector> children; + if (schema.n_children == 0) { + throw InvalidInputException( + "Attempted to convert a STRUCT with no fields to DuckDB which is not supported"); + } for (idx_t type_idx = 0; type_idx < (idx_t)schema.n_children; type_idx++) { children.emplace_back(ArrowTableFunction::GetArrowLogicalType(*schema.children[type_idx])); child_types.emplace_back(schema.children[type_idx]->name, children.back()->GetDuckType()); @@ -144,6 +148,9 @@ static unique_ptr GetArrowLogicalTypeNoDictionary(ArrowSchema &schema child_list_t members; vector> children; + if (schema.n_children == 0) { + throw InvalidInputException("Attempted to convert a UNION with no fields to DuckDB which is not supported"); + } for (idx_t type_idx = 0; type_idx < (idx_t)schema.n_children; type_idx++) { auto type = schema.children[type_idx]; diff --git a/src/function/table/system/duckdb_databases.cpp b/src/function/table/system/duckdb_databases.cpp index 79a8e4325b64..f0a1fc99b5b8 100644 --- a/src/function/table/system/duckdb_databases.cpp +++ b/src/function/table/system/duckdb_databases.cpp @@ -32,6 +32,9 @@ static unique_ptr DuckDBDatabasesBind(ClientContext &context, Tabl names.emplace_back("type"); return_types.emplace_back(LogicalType::VARCHAR); + names.emplace_back("readonly"); + return_types.emplace_back(LogicalType::BOOLEAN); + return nullptr; } @@ -64,8 +67,9 @@ void DuckDBDatabasesFunction(ClientContext &context, TableFunctionInput &data_p, output.SetValue(col++, count, attached.GetName()); // database_oid, BIGINT output.SetValue(col++, count, Value::BIGINT(attached.oid)); - // path, VARCHAR bool is_internal = attached.IsSystem() || attached.IsTemporary(); + bool is_readonly = attached.IsReadOnly(); + // path, VARCHAR Value db_path; if (!is_internal) { bool in_memory = attached.GetCatalog().InMemory(); @@ -80,6 +84,8 @@ void DuckDBDatabasesFunction(ClientContext &context, TableFunctionInput &data_p, output.SetValue(col++, count, Value::BOOLEAN(is_internal)); // type, VARCHAR output.SetValue(col++, count, Value(attached.GetCatalog().GetCatalogType())); + // readonly, BOOLEAN + output.SetValue(col++, count, Value::BOOLEAN(is_readonly)); count++; } diff --git a/src/function/table/system/duckdb_tables.cpp b/src/function/table/system/duckdb_tables.cpp index 2503cfbf80c9..e96c8c50b15f 100644 --- a/src/function/table/system/duckdb_tables.cpp +++ b/src/function/table/system/duckdb_tables.cpp @@ -146,7 +146,7 @@ void DuckDBTablesFunction(ClientContext &context, TableFunctionInput &data_p, Da output.SetValue(col++, count, Value::BOOLEAN(TableHasPrimaryKey(table))); // estimated_size, LogicalType::BIGINT Value card_val = - storage_info.cardinality == DConstants::INVALID_INDEX ? Value() : Value::BIGINT(storage_info.cardinality); + !storage_info.cardinality.IsValid() ? Value() : Value::BIGINT(storage_info.cardinality.GetIndex()); output.SetValue(col++, count, card_val); // column_count, LogicalType::BIGINT output.SetValue(col++, count, Value::BIGINT(table.GetColumns().LogicalColumnCount())); diff --git a/src/function/table/system/pragma_storage_info.cpp b/src/function/table/system/pragma_storage_info.cpp index 90c60d15c040..488528fb7aac 100644 --- a/src/function/table/system/pragma_storage_info.cpp +++ b/src/function/table/system/pragma_storage_info.cpp @@ -82,7 +82,8 @@ static unique_ptr PragmaStorageInfoBind(ClientContext &context, Ta auto qname = QualifiedName::Parse(input.inputs[0].GetValue()); // look up the table name in the catalog - Binder::BindSchemaOrCatalog(context, qname.catalog, qname.schema); + auto binder = Binder::CreateBinder(context); + binder->BindSchemaOrCatalog(qname.catalog, qname.schema); auto &table_entry = Catalog::GetEntry(context, qname.catalog, qname.schema, qname.name); auto result = make_uniq(table_entry); result->column_segments_info = table_entry.GetColumnSegmentInfo(); diff --git a/src/function/table/system/pragma_table_info.cpp b/src/function/table/system/pragma_table_info.cpp index 3e47ff398bd6..776266e9fdb3 100644 --- a/src/function/table/system/pragma_table_info.cpp +++ b/src/function/table/system/pragma_table_info.cpp @@ -172,7 +172,8 @@ static unique_ptr PragmaTableInfoBind(ClientContext &context, Tabl auto qname = QualifiedName::Parse(input.inputs[0].GetValue()); // look up the table name in the catalog - Binder::BindSchemaOrCatalog(context, qname.catalog, qname.schema); + auto binder = Binder::CreateBinder(context); + binder->BindSchemaOrCatalog(qname.catalog, qname.schema); auto &entry = Catalog::GetEntry(context, CatalogType::TABLE_ENTRY, qname.catalog, qname.schema, qname.name); return make_uniq(entry, IS_PRAGMA_TABLE_INFO); } diff --git a/src/include/duckdb/catalog/catalog.hpp b/src/include/duckdb/catalog/catalog.hpp index 871738a975de..d81a73395e70 100644 --- a/src/include/duckdb/catalog/catalog.hpp +++ b/src/include/duckdb/catalog/catalog.hpp @@ -70,6 +70,17 @@ class LogicalDelete; class LogicalUpdate; class CreateStatement; +//! Return value of Catalog::LookupEntry +struct CatalogEntryLookup { + optional_ptr schema; + optional_ptr entry; + ErrorData error; + + DUCKDB_API bool Found() const { + return entry; + } +}; + //! The Catalog object represents the catalog of the database. class Catalog { public: @@ -328,6 +339,10 @@ class Catalog { AttachedDatabase &db; public: + //! Lookup an entry using TryLookupEntry, throws if entry not found and if_not_found == THROW_EXCEPTION + CatalogEntryLookup LookupEntry(ClientContext &context, CatalogType type, const string &schema, const string &name, + OnEntryNotFound if_not_found, QueryErrorContext error_context = QueryErrorContext()); + private: //! Lookup an entry in the schema, returning a lookup with the entry and schema if they exist CatalogEntryLookup TryLookupEntryInternal(CatalogTransaction transaction, CatalogType type, const string &schema, @@ -337,9 +352,6 @@ class Catalog { CatalogEntryLookup TryLookupEntry(ClientContext &context, CatalogType type, const string &schema, const string &name, OnEntryNotFound if_not_found, QueryErrorContext error_context = QueryErrorContext()); - //! Lookup an entry using TryLookupEntry, throws if entry not found and if_not_found == THROW_EXCEPTION - CatalogEntryLookup LookupEntry(ClientContext &context, CatalogType type, const string &schema, const string &name, - OnEntryNotFound if_not_found, QueryErrorContext error_context = QueryErrorContext()); static CatalogEntryLookup TryLookupEntry(ClientContext &context, vector &lookups, CatalogType type, const string &name, OnEntryNotFound if_not_found, QueryErrorContext error_context = QueryErrorContext()); diff --git a/src/include/duckdb/catalog/catalog_entry/dependency/dependency_entry.hpp b/src/include/duckdb/catalog/catalog_entry/dependency/dependency_entry.hpp index b90a5c5e0b84..0342af5b00f5 100644 --- a/src/include/duckdb/catalog/catalog_entry/dependency/dependency_entry.hpp +++ b/src/include/duckdb/catalog/catalog_entry/dependency/dependency_entry.hpp @@ -23,8 +23,6 @@ namespace duckdb { class DependencyManager; -class DependencySetCatalogEntry; - //! Resembles a connection between an object and the CatalogEntry that can be retrieved from the Catalog using the //! identifiers listed here diff --git a/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp b/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp index 33bbd0106859..173df9edf91c 100644 --- a/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp +++ b/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp @@ -63,6 +63,7 @@ class DuckSchemaEntry : public SchemaCatalogEntry { void DropEntry(ClientContext &context, DropInfo &info) override; optional_ptr GetEntry(CatalogTransaction transaction, CatalogType type, const string &name) override; SimilarCatalogEntry GetSimilarEntry(CatalogTransaction transaction, CatalogType type, const string &name) override; + void ScanAll(CatalogTransaction transaction, const std::function &callback); unique_ptr Copy(ClientContext &context) const override; diff --git a/src/include/duckdb/catalog/catalog_entry/function_entry.hpp b/src/include/duckdb/catalog/catalog_entry/function_entry.hpp index 69f4918c33bd..788c6561f0a8 100644 --- a/src/include/duckdb/catalog/catalog_entry/function_entry.hpp +++ b/src/include/duckdb/catalog/catalog_entry/function_entry.hpp @@ -21,6 +21,7 @@ class FunctionEntry : public StandardEntry { description = std::move(info.description); parameter_names = std::move(info.parameter_names); example = std::move(info.example); + this->dependencies = info.dependencies; } //! The description (if any) diff --git a/src/include/duckdb/catalog/catalog_entry_map.hpp b/src/include/duckdb/catalog/catalog_entry_map.hpp index 7557b3b7dbea..8ce69d3c5131 100644 --- a/src/include/duckdb/catalog/catalog_entry_map.hpp +++ b/src/include/duckdb/catalog/catalog_entry_map.hpp @@ -35,4 +35,9 @@ using catalog_entry_map_t = unordered_map, T, CatalogEnt using catalog_entry_vector_t = vector>; +struct CatalogEntryOrdering { + catalog_entry_set_t ordered_set; + catalog_entry_vector_t ordered_vector; +}; + } // namespace duckdb diff --git a/src/include/duckdb/catalog/catalog_entry_retriever.hpp b/src/include/duckdb/catalog/catalog_entry_retriever.hpp new file mode 100644 index 000000000000..3369a22a0f9c --- /dev/null +++ b/src/include/duckdb/catalog/catalog_entry_retriever.hpp @@ -0,0 +1,72 @@ +#pragma once + +#include +#include "duckdb/common/enums/catalog_type.hpp" +#include "duckdb/common/enums/on_entry_not_found.hpp" +#include "duckdb/common/string.hpp" +#include "duckdb/parser/query_error_context.hpp" +#include "duckdb/catalog/catalog_entry/schema_catalog_entry.hpp" + +namespace duckdb { + +class ClientContext; +class Catalog; +class CatalogEntry; + +using catalog_entry_callback_t = std::function; + +// Wraps the Catalog::GetEntry method +class CatalogEntryRetriever { +public: + CatalogEntryRetriever(ClientContext &context) : context(context) { + } + CatalogEntryRetriever(const CatalogEntryRetriever &other) : callback(other.callback), context(other.context) { + } + +public: + optional_ptr GetEntry(CatalogType type, const string &catalog, const string &schema, + const string &name, + OnEntryNotFound on_entry_not_found = OnEntryNotFound::THROW_EXCEPTION, + QueryErrorContext error_context = QueryErrorContext()); + + optional_ptr GetEntry(CatalogType type, Catalog &catalog, const string &schema, const string &name, + OnEntryNotFound on_entry_not_found = OnEntryNotFound::THROW_EXCEPTION, + QueryErrorContext error_context = QueryErrorContext()); + + LogicalType GetType(const string &catalog, const string &schema, const string &name, + OnEntryNotFound on_entry_not_found = OnEntryNotFound::RETURN_NULL); + LogicalType GetType(Catalog &catalog, const string &schema, const string &name, + OnEntryNotFound on_entry_not_found = OnEntryNotFound::RETURN_NULL); + + optional_ptr GetSchema(const string &catalog, const string &name, + OnEntryNotFound on_entry_not_found = OnEntryNotFound::THROW_EXCEPTION, + QueryErrorContext error_context = QueryErrorContext()); + + void SetCallback(catalog_entry_callback_t callback) { + this->callback = callback; + } + catalog_entry_callback_t GetCallback() { + return callback; + } + +private: + using catalog_entry_retrieve_func_t = std::function()>; + optional_ptr GetEntryInternal(catalog_entry_retrieve_func_t retriever) { + auto result = retriever(); + if (!result) { + return result; + } + if (callback) { + // Call the callback if it's set + callback(*result); + } + return result; + } + +private: + //! (optional) callback, called on every succesful entry retrieval + catalog_entry_callback_t callback = nullptr; + ClientContext &context; +}; + +} // namespace duckdb diff --git a/src/include/duckdb/catalog/catalog_set.hpp b/src/include/duckdb/catalog/catalog_set.hpp index aaef6af9353b..3da7d2e310cd 100644 --- a/src/include/duckdb/catalog/catalog_set.hpp +++ b/src/include/duckdb/catalog/catalog_set.hpp @@ -86,6 +86,7 @@ class CatalogSet { DUCKDB_API EntryLookup GetEntryDetailed(CatalogTransaction transaction, const string &name); DUCKDB_API optional_ptr GetEntry(CatalogTransaction transaction, const string &name); DUCKDB_API optional_ptr GetEntry(ClientContext &context, const string &name); + DUCKDB_API optional_ptr GetEntry(const string &name); //! Gets the entry that is most similar to the given name (i.e. smallest levenshtein distance), or empty string if //! none is found. The returned pair consists of the entry name and the distance (smaller means closer). diff --git a/src/include/duckdb/catalog/default/default_functions.hpp b/src/include/duckdb/catalog/default/default_functions.hpp index c35d438f65e9..bc05d2061830 100644 --- a/src/include/duckdb/catalog/default/default_functions.hpp +++ b/src/include/duckdb/catalog/default/default_functions.hpp @@ -27,15 +27,15 @@ class DefaultFunctionGenerator : public DefaultGenerator { SchemaCatalogEntry &schema; - DUCKDB_API static unique_ptr CreateInternalMacroInfo(DefaultMacro &default_macro); - DUCKDB_API static unique_ptr CreateInternalTableMacroInfo(DefaultMacro &default_macro); + DUCKDB_API static unique_ptr CreateInternalMacroInfo(const DefaultMacro &default_macro); + DUCKDB_API static unique_ptr CreateInternalTableMacroInfo(const DefaultMacro &default_macro); public: unique_ptr CreateDefaultEntry(ClientContext &context, const string &entry_name) override; vector GetDefaultEntries() override; private: - static unique_ptr CreateInternalTableMacroInfo(DefaultMacro &default_macro, + static unique_ptr CreateInternalTableMacroInfo(const DefaultMacro &default_macro, unique_ptr function); }; diff --git a/src/include/duckdb/catalog/dependency.hpp b/src/include/duckdb/catalog/dependency.hpp index 4d7f97778151..120b5fd7d156 100644 --- a/src/include/duckdb/catalog/dependency.hpp +++ b/src/include/duckdb/catalog/dependency.hpp @@ -16,12 +16,12 @@ namespace duckdb { class CatalogEntry; struct DependencyFlags { -private: public: DependencyFlags() : value(0) { } DependencyFlags(const DependencyFlags &other) : value(other.value) { } + virtual ~DependencyFlags() = default; DependencyFlags &operator=(const DependencyFlags &other) { value = other.value; return *this; diff --git a/src/include/duckdb/catalog/dependency_list.hpp b/src/include/duckdb/catalog/dependency_list.hpp index ed08f01ac945..d3dcf6aff5d3 100644 --- a/src/include/duckdb/catalog/dependency_list.hpp +++ b/src/include/duckdb/catalog/dependency_list.hpp @@ -32,6 +32,10 @@ struct LogicalDependency { explicit LogicalDependency(CatalogEntry &entry); LogicalDependency(); bool operator==(const LogicalDependency &other) const; + +public: + void Serialize(Serializer &serializer) const; + static LogicalDependency Deserialize(Deserializer &deserializer); }; struct LogicalDependencyHashFunction { @@ -54,6 +58,8 @@ class LogicalDependencyList { public: DUCKDB_API void VerifyDependencies(Catalog &catalog, const string &name); + void Serialize(Serializer &serializer) const; + static LogicalDependencyList Deserialize(Deserializer &deserializer); bool operator==(const LogicalDependencyList &other) const; const create_info_set_t &Set() const; diff --git a/src/include/duckdb/catalog/dependency_manager.hpp b/src/include/duckdb/catalog/dependency_manager.hpp index e580bfccbae0..a87a54d81685 100644 --- a/src/include/duckdb/catalog/dependency_manager.hpp +++ b/src/include/duckdb/catalog/dependency_manager.hpp @@ -91,6 +91,9 @@ class DependencyManager { void AddOwnership(CatalogTransaction transaction, CatalogEntry &owner, CatalogEntry &entry); + //! Get the order of entries needed by EXPORT, the objects with no dependencies are exported first + catalog_entry_vector_t GetExportOrder(CatalogTransaction &transaction); + private: DuckCatalog &catalog; CatalogSet subjects; diff --git a/src/include/duckdb/catalog/duck_catalog.hpp b/src/include/duckdb/catalog/duck_catalog.hpp index 3685f7ae219d..f014b3f26aa6 100644 --- a/src/include/duckdb/catalog/duck_catalog.hpp +++ b/src/include/duckdb/catalog/duck_catalog.hpp @@ -9,6 +9,7 @@ #pragma once #include "duckdb/catalog/catalog.hpp" +#include "duckdb/catalog/catalog_entry_map.hpp" namespace duckdb { @@ -33,7 +34,9 @@ class DuckCatalog : public Catalog { } public: + DUCKDB_API catalog_entry_vector_t GetNonSystemEntries(CatalogTransaction transaction); DUCKDB_API optional_ptr CreateSchema(CatalogTransaction transaction, CreateSchemaInfo &info) override; + DUCKDB_API void ScanSchemas(CatalogTransaction transaction, std::function callback); DUCKDB_API void ScanSchemas(ClientContext &context, std::function callback) override; DUCKDB_API void ScanSchemas(std::function callback); diff --git a/src/include/duckdb/catalog/standard_entry.hpp b/src/include/duckdb/catalog/standard_entry.hpp index cab74168070c..a3fa83d88c4a 100644 --- a/src/include/duckdb/catalog/standard_entry.hpp +++ b/src/include/duckdb/catalog/standard_entry.hpp @@ -25,6 +25,8 @@ class StandardEntry : public InCatalogEntry { //! The schema the entry belongs to SchemaCatalogEntry &schema; + //! The dependencies of the entry, can be empty + LogicalDependencyList dependencies; public: SchemaCatalogEntry &ParentSchema() override { diff --git a/src/include/duckdb/common/bit_utils.hpp b/src/include/duckdb/common/bit_utils.hpp index 28bc4a975e23..3c4f4a6bfcb8 100644 --- a/src/include/duckdb/common/bit_utils.hpp +++ b/src/include/duckdb/common/bit_utils.hpp @@ -11,7 +11,7 @@ #include "duckdb/common/hugeint.hpp" #include "duckdb/common/uhugeint.hpp" -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) #define __restrict__ #define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ #define __ORDER_LITTLE_ENDIAN__ 2 diff --git a/src/include/duckdb/common/file_system.hpp b/src/include/duckdb/common/file_system.hpp index 149a393f3d09..0c99490997c6 100644 --- a/src/include/duckdb/common/file_system.hpp +++ b/src/include/duckdb/common/file_system.hpp @@ -171,7 +171,7 @@ class FileSystem { //! Expands a given path, including e.g. expanding the home directory of the user DUCKDB_API virtual string ExpandPath(const string &path); //! Returns the system-available memory in bytes. Returns DConstants::INVALID_INDEX if the system function fails. - DUCKDB_API static idx_t GetAvailableMemory(); + DUCKDB_API static optional_idx GetAvailableMemory(); //! Path separator for path DUCKDB_API virtual string PathSeparator(const string &path); //! Checks if path is starts with separator (i.e., '/' on UNIX '\\' on Windows) @@ -217,6 +217,8 @@ class FileSystem { DUCKDB_API virtual void Reset(FileHandle &handle); DUCKDB_API virtual idx_t SeekPosition(FileHandle &handle); + //! If FS was manually set by the user + DUCKDB_API virtual bool IsManuallySet(); //! Whether or not we can seek into the file DUCKDB_API virtual bool CanSeek(); //! Whether or not the FS handles plain files on disk. This is relevant for certain optimizations, as random reads diff --git a/src/include/duckdb/common/opener_file_system.hpp b/src/include/duckdb/common/opener_file_system.hpp index 89c8746e8b2e..2d35512b21f4 100644 --- a/src/include/duckdb/common/opener_file_system.hpp +++ b/src/include/duckdb/common/opener_file_system.hpp @@ -27,7 +27,7 @@ class OpenerFileSystem : public FileSystem { unique_ptr OpenFile(const string &path, FileOpenFlags flags, optional_ptr opener = nullptr) override { VerifyNoOpener(opener); - return GetFileSystem().OpenFile(path, flags, GetOpener().get()); + return GetFileSystem().OpenFile(path, flags, GetOpener()); } void Read(FileHandle &handle, void *buffer, int64_t nr_bytes, idx_t location) override { @@ -66,28 +66,27 @@ class OpenerFileSystem : public FileSystem { bool DirectoryExists(const string &directory, optional_ptr opener) override { VerifyNoOpener(opener); - return GetFileSystem().DirectoryExists(directory); + return GetFileSystem().DirectoryExists(directory, GetOpener()); } void CreateDirectory(const string &directory, optional_ptr opener) override { VerifyNoOpener(opener); - return GetFileSystem().CreateDirectory(directory); + return GetFileSystem().CreateDirectory(directory, GetOpener()); } void RemoveDirectory(const string &directory, optional_ptr opener) override { VerifyNoOpener(opener); - return GetFileSystem().RemoveDirectory(directory); + return GetFileSystem().RemoveDirectory(directory, GetOpener()); } bool ListFiles(const string &directory, const std::function &callback, FileOpener *opener = nullptr) override { - if (opener) { - throw InternalException("OpenerFileSystem cannot take an opener - the opener is pushed automatically"); - } + VerifyNoOpener(opener); return GetFileSystem().ListFiles(directory, callback, GetOpener().get()); } void MoveFile(const string &source, const string &target, optional_ptr opener) override { - GetFileSystem().MoveFile(source, target); + VerifyNoOpener(opener); + GetFileSystem().MoveFile(source, target, GetOpener()); } string GetHomeDirectory() override { @@ -117,9 +116,7 @@ class OpenerFileSystem : public FileSystem { } vector Glob(const string &path, FileOpener *opener = nullptr) override { - if (opener) { - throw InternalException("OpenerFileSystem cannot take an opener - the opener is pushed automatically"); - } + VerifyNoOpener(opener); return GetFileSystem().Glob(path, GetOpener().get()); } diff --git a/src/include/duckdb/common/serializer/deserialization_data.hpp b/src/include/duckdb/common/serializer/deserialization_data.hpp index deaf48073ee8..d092229b09e7 100644 --- a/src/include/duckdb/common/serializer/deserialization_data.hpp +++ b/src/include/duckdb/common/serializer/deserialization_data.hpp @@ -23,7 +23,7 @@ struct DeserializationData { stack> databases; stack enums; stack> parameter_data; - stack> types; + stack> types; template void Set(T entry) = delete; @@ -167,13 +167,24 @@ inline void DeserializationData::Set(LogicalType &type) { } template <> -inline LogicalType &DeserializationData::Get() { +inline void DeserializationData::Unset() { + AssertNotEmpty(types); + types.pop(); +} + +template <> +inline void DeserializationData::Set(const LogicalType &type) { + types.emplace(type); +} + +template <> +inline const LogicalType &DeserializationData::Get() { AssertNotEmpty(types); return types.top(); } template <> -inline void DeserializationData::Unset() { +inline void DeserializationData::Unset() { AssertNotEmpty(types); types.pop(); } diff --git a/src/include/duckdb/common/serializer/serialization_traits.hpp b/src/include/duckdb/common/serializer/serialization_traits.hpp index 95e9bf064a19..5dc3e60706da 100644 --- a/src/include/duckdb/common/serializer/serialization_traits.hpp +++ b/src/include/duckdb/common/serializer/serialization_traits.hpp @@ -11,6 +11,7 @@ #include "duckdb/common/shared_ptr.hpp" #include "duckdb/common/unique_ptr.hpp" #include "duckdb/common/optional_ptr.hpp" +#include "duckdb/common/optional_idx.hpp" namespace duckdb { @@ -28,6 +29,7 @@ using void_t = void; // Check for anything implementing a `void Serialize(Serializer &Serializer)` method template struct has_serialize : std::false_type {}; + template struct has_serialize< T, typename std::enable_if< @@ -261,6 +263,16 @@ struct SerializationDefaultValue { static inline bool IsDefault(const typename std::enable_if::value, T>::type &value) { return value.empty(); } + + template + static inline typename std::enable_if::value, T>::type GetDefault() { + return optional_idx(); + } + + template + static inline bool IsDefault(const typename std::enable_if::value, T>::type &value) { + return !value.IsValid(); + } }; } // namespace duckdb diff --git a/src/include/duckdb/common/sort/partition_state.hpp b/src/include/duckdb/common/sort/partition_state.hpp index e4e9edcbe09e..ef12c69e4db6 100644 --- a/src/include/duckdb/common/sort/partition_state.hpp +++ b/src/include/duckdb/common/sort/partition_state.hpp @@ -209,6 +209,8 @@ class PartitionLocalMergeState { class PartitionGlobalMergeStates { public: struct Callback { + virtual ~Callback() = default; + virtual bool HasError() const { return false; } diff --git a/src/include/duckdb/common/types/data_chunk.hpp b/src/include/duckdb/common/types/data_chunk.hpp index 141dee911a18..353fdf582c02 100644 --- a/src/include/duckdb/common/types/data_chunk.hpp +++ b/src/include/duckdb/common/types/data_chunk.hpp @@ -131,7 +131,7 @@ class DataChunk { //! Slice all Vectors from other.data[i] to data[i + 'col_offset'] //! Turning all Vectors into Dictionary Vectors, using 'sel' - DUCKDB_API void Slice(DataChunk &other, const SelectionVector &sel, idx_t count, idx_t col_offset = 0); + DUCKDB_API void Slice(const DataChunk &other, const SelectionVector &sel, idx_t count, idx_t col_offset = 0); //! Slice a DataChunk from "offset" to "offset + count" DUCKDB_API void Slice(idx_t offset, idx_t count); @@ -150,7 +150,7 @@ class DataChunk { DUCKDB_API void Hash(vector &column_ids, Vector &result); //! Returns a list of types of the vectors of this data chunk - DUCKDB_API vector GetTypes(); + DUCKDB_API vector GetTypes() const; //! Converts this DataChunk to a printable string representation DUCKDB_API string ToString() const; diff --git a/src/include/duckdb/common/types/vector.hpp b/src/include/duckdb/common/types/vector.hpp index 80d2c2ce9654..b0786597a662 100644 --- a/src/include/duckdb/common/types/vector.hpp +++ b/src/include/duckdb/common/types/vector.hpp @@ -83,9 +83,9 @@ class Vector { //! Create a vector that references the other vector DUCKDB_API Vector(Vector &other); //! Create a vector that slices another vector - DUCKDB_API explicit Vector(Vector &other, const SelectionVector &sel, idx_t count); + DUCKDB_API explicit Vector(const Vector &other, const SelectionVector &sel, idx_t count); //! Create a vector that slices another vector between a pair of offsets - DUCKDB_API explicit Vector(Vector &other, idx_t offset, idx_t end); + DUCKDB_API explicit Vector(const Vector &other, idx_t offset, idx_t end); //! Create a vector of size one holding the passed on value DUCKDB_API explicit Vector(const Value &value); //! Create a vector of size tuple_count (non-standard) @@ -390,6 +390,10 @@ struct ListVector { DUCKDB_API static void GetConsecutiveChildSelVector(Vector &list, SelectionVector &sel, idx_t offset, idx_t count); //! Share the entry of the other list vector DUCKDB_API static void ReferenceEntry(Vector &vector, Vector &other); + +private: + template + static T &GetEntryInternal(T &vector); }; struct StringVector { @@ -493,6 +497,10 @@ struct ArrayVector { DUCKDB_API static Vector &GetEntry(Vector &vector); //! Gets the total size of the underlying child-vector of an array DUCKDB_API static idx_t GetTotalSize(const Vector &vector); + +private: + template + static T &GetEntryInternal(T &vector); }; enum class UnionInvalidReason : uint8_t { diff --git a/src/include/duckdb/common/unique_ptr.hpp b/src/include/duckdb/common/unique_ptr.hpp index 328bf7e99ecb..d9f0b835832c 100644 --- a/src/include/duckdb/common/unique_ptr.hpp +++ b/src/include/duckdb/common/unique_ptr.hpp @@ -13,7 +13,7 @@ template class unique_ptr : public std::unique_ptr { // NOLINT: naming public: using original = std::unique_ptr; - using original::original; + using original::original; // NOLINT private: static inline void AssertNotNull(const bool null) { diff --git a/src/include/duckdb/core_functions/function_list.hpp b/src/include/duckdb/core_functions/function_list.hpp index 87eb5f8c4622..024ca49f864e 100644 --- a/src/include/duckdb/core_functions/function_list.hpp +++ b/src/include/duckdb/core_functions/function_list.hpp @@ -27,7 +27,7 @@ struct StaticFunctionDefinition { get_aggregate_function_t get_aggregate_function; get_aggregate_function_set_t get_aggregate_function_set; - static StaticFunctionDefinition *GetFunctionList(); + static const StaticFunctionDefinition *GetFunctionList(); }; } // namespace duckdb diff --git a/src/include/duckdb/execution/merge_sort_tree.hpp b/src/include/duckdb/execution/merge_sort_tree.hpp index 39e220656c10..6ce3048b0341 100644 --- a/src/include/duckdb/execution/merge_sort_tree.hpp +++ b/src/include/duckdb/execution/merge_sort_tree.hpp @@ -254,7 +254,7 @@ MergeSortTree::MergeSortTree(Elements &&lowest_level, const CMP const auto fanout = F; const auto cascading = C; const auto count = lowest_level.size(); - tree.emplace_back(Level(lowest_level, Offsets())); + tree.emplace_back(Level(std::move(lowest_level), Offsets())); const RunElement SENTINEL(MergeSortTraits::SENTINEL(), MergeSortTraits::SENTINEL()); diff --git a/src/include/duckdb/execution/operator/persistent/physical_export.hpp b/src/include/duckdb/execution/operator/persistent/physical_export.hpp index a74b270fc555..4032aea65c3d 100644 --- a/src/include/duckdb/execution/operator/persistent/physical_export.hpp +++ b/src/include/duckdb/execution/operator/persistent/physical_export.hpp @@ -57,6 +57,7 @@ class PhysicalExport : public PhysicalOperator { static void ExtractEntries(ClientContext &context, vector> &schemas, ExportEntries &result); + static catalog_entry_vector_t GetNaiveExportOrder(ClientContext &context, Catalog &catalog); public: // Sink interface diff --git a/src/include/duckdb/function/function.hpp b/src/include/duckdb/function/function.hpp index 8573ea297fbe..65a186f0525d 100644 --- a/src/include/duckdb/function/function.hpp +++ b/src/include/duckdb/function/function.hpp @@ -64,7 +64,7 @@ struct FunctionData { // FIXME: this function should be removed in the future template TARGET &CastNoConst() const { - return const_cast(reinterpret_cast(*this)); + return const_cast(reinterpret_cast(*this)); // NOLINT: FIXME } }; diff --git a/src/include/duckdb/function/function_binder.hpp b/src/include/duckdb/function/function_binder.hpp index 2c0929576f82..bddc1ffbb7a8 100644 --- a/src/include/duckdb/function/function_binder.hpp +++ b/src/include/duckdb/function/function_binder.hpp @@ -27,26 +27,26 @@ class FunctionBinder { public: //! Bind a scalar function from the set of functions and input arguments. Returns the index of the chosen function, - //! returns DConstants::INVALID_INDEX and sets error if none could be found - DUCKDB_API idx_t BindFunction(const string &name, ScalarFunctionSet &functions, - const vector &arguments, ErrorData &error); - DUCKDB_API idx_t BindFunction(const string &name, ScalarFunctionSet &functions, - vector> &arguments, ErrorData &error); + //! returns optional_idx() and sets error if none could be found + DUCKDB_API optional_idx BindFunction(const string &name, ScalarFunctionSet &functions, + const vector &arguments, ErrorData &error); + DUCKDB_API optional_idx BindFunction(const string &name, ScalarFunctionSet &functions, + vector> &arguments, ErrorData &error); //! Bind an aggregate function from the set of functions and input arguments. Returns the index of the chosen - //! function, returns DConstants::INVALID_INDEX and sets error if none could be found - DUCKDB_API idx_t BindFunction(const string &name, AggregateFunctionSet &functions, - const vector &arguments, ErrorData &error); - DUCKDB_API idx_t BindFunction(const string &name, AggregateFunctionSet &functions, - vector> &arguments, ErrorData &error); + //! function, returns optional_idx() and sets error if none could be found + DUCKDB_API optional_idx BindFunction(const string &name, AggregateFunctionSet &functions, + const vector &arguments, ErrorData &error); + DUCKDB_API optional_idx BindFunction(const string &name, AggregateFunctionSet &functions, + vector> &arguments, ErrorData &error); //! Bind a table function from the set of functions and input arguments. Returns the index of the chosen - //! function, returns DConstants::INVALID_INDEX and sets error if none could be found - DUCKDB_API idx_t BindFunction(const string &name, TableFunctionSet &functions, const vector &arguments, - ErrorData &error); - DUCKDB_API idx_t BindFunction(const string &name, TableFunctionSet &functions, - vector> &arguments, ErrorData &error); + //! function, returns optional_idx() and sets error if none could be found + DUCKDB_API optional_idx BindFunction(const string &name, TableFunctionSet &functions, + const vector &arguments, ErrorData &error); + DUCKDB_API optional_idx BindFunction(const string &name, TableFunctionSet &functions, + vector> &arguments, ErrorData &error); //! Bind a pragma function from the set of functions and input arguments - DUCKDB_API idx_t BindFunction(const string &name, PragmaFunctionSet &functions, vector ¶meters, - ErrorData &error); + DUCKDB_API optional_idx BindFunction(const string &name, PragmaFunctionSet &functions, vector ¶meters, + ErrorData &error); DUCKDB_API unique_ptr BindScalarFunction(const string &schema, const string &name, vector> children, ErrorData &error, @@ -70,20 +70,21 @@ class FunctionBinder { private: //! Cast a set of expressions to the arguments of this function void CastToFunctionArguments(SimpleFunction &function, vector> &children); - int64_t BindVarArgsFunctionCost(const SimpleFunction &func, const vector &arguments); - int64_t BindFunctionCost(const SimpleFunction &func, const vector &arguments); + optional_idx BindVarArgsFunctionCost(const SimpleFunction &func, const vector &arguments); + optional_idx BindFunctionCost(const SimpleFunction &func, const vector &arguments); template vector BindFunctionsFromArguments(const string &name, FunctionSet &functions, const vector &arguments, ErrorData &error); template - idx_t MultipleCandidateException(const string &name, FunctionSet &functions, vector &candidate_functions, - const vector &arguments, ErrorData &error); + optional_idx MultipleCandidateException(const string &name, FunctionSet &functions, + vector &candidate_functions, const vector &arguments, + ErrorData &error); template - idx_t BindFunctionFromArguments(const string &name, FunctionSet &functions, const vector &arguments, - ErrorData &error); + optional_idx BindFunctionFromArguments(const string &name, FunctionSet &functions, + const vector &arguments, ErrorData &error); vector GetLogicalTypesFromExpressions(vector> &arguments); }; diff --git a/src/include/duckdb/function/scalar/strftime_format.hpp b/src/include/duckdb/function/scalar/strftime_format.hpp index b371d370ad8b..466c3fe85bbc 100644 --- a/src/include/duckdb/function/scalar/strftime_format.hpp +++ b/src/include/duckdb/function/scalar/strftime_format.hpp @@ -137,7 +137,7 @@ struct StrpTimeFormat : public StrTimeFormat { // NOLINT: work-around bug in cla int32_t data[8]; // year, month, day, hour, min, sec, µs, offset string tz; string error_message; - idx_t error_position = DConstants::INVALID_INDEX; + optional_idx error_position; bool is_special; date_t special; @@ -169,7 +169,7 @@ struct StrpTimeFormat : public StrTimeFormat { // NOLINT: work-around bug in cla static StrpTimeFormat Deserialize(Deserializer &deserializer); protected: - static string FormatStrpTimeError(const string &input, idx_t position); + static string FormatStrpTimeError(const string &input, optional_idx position); DUCKDB_API void AddFormatSpecifier(string preceding_literal, StrTimeSpecifier specifier) override; int NumericSpecifierWidth(StrTimeSpecifier specifier); int32_t TryParseCollection(const char *data, idx_t &pos, idx_t size, const string_t collection[], diff --git a/src/include/duckdb/function/scalar/string_functions.hpp b/src/include/duckdb/function/scalar/string_functions.hpp index 100fcb5cdbc2..3fc6cf62978d 100644 --- a/src/include/duckdb/function/scalar/string_functions.hpp +++ b/src/include/duckdb/function/scalar/string_functions.hpp @@ -19,7 +19,7 @@ class RE2; namespace duckdb { struct LowerFun { - static uint8_t ascii_to_lower_map[]; + static const uint8_t ASCII_TO_LOWER_MAP[]; //! Returns the length of the result string obtained from lowercasing the given input (in bytes) static idx_t LowerLength(const char *input_data, idx_t input_length); @@ -31,7 +31,7 @@ struct LowerFun { }; struct UpperFun { - static uint8_t ascii_to_upper_map[]; + static const uint8_t ASCII_TO_UPPER_MAP[]; static void RegisterFunction(BuiltinFunctions &set); }; diff --git a/src/include/duckdb/main/config.hpp b/src/include/duckdb/main/config.hpp index 7b6e9c77c7f5..c90d43a7b347 100644 --- a/src/include/duckdb/main/config.hpp +++ b/src/include/duckdb/main/config.hpp @@ -186,7 +186,7 @@ struct DBConfigOptions { //! Whether or not the configuration settings can be altered bool lock_configuration = false; //! Whether to print bindings when printing the plan (debug mode only) - static bool debug_print_bindings; + static bool debug_print_bindings; // NOLINT: debug setting //! The peak allocation threshold at which to flush the allocator after completing a task (1 << 27, ~128MB) idx_t allocator_flush_threshold = 134217728; //! DuckDB API surface @@ -256,9 +256,9 @@ struct DBConfig { DUCKDB_API void AddExtensionOption(const string &name, string description, LogicalType parameter, const Value &default_value = Value(), set_option_callback_t function = nullptr); //! Fetch an option by index. Returns a pointer to the option, or nullptr if out of range - DUCKDB_API static ConfigurationOption *GetOptionByIndex(idx_t index); + DUCKDB_API static optional_ptr GetOptionByIndex(idx_t index); //! Fetch an option by name. Returns a pointer to the option, or nullptr if none exists. - DUCKDB_API static ConfigurationOption *GetOptionByName(const string &name); + DUCKDB_API static optional_ptr GetOptionByName(const string &name); DUCKDB_API void SetOption(const ConfigurationOption &option, const Value &value); DUCKDB_API void SetOption(DatabaseInstance *db, const ConfigurationOption &option, const Value &value); DUCKDB_API void SetOptionByName(const string &name, const Value &value); diff --git a/src/include/duckdb/main/settings.hpp b/src/include/duckdb/main/settings.hpp index 9cf9655f8d67..a1c6560b2a39 100644 --- a/src/include/duckdb/main/settings.hpp +++ b/src/include/duckdb/main/settings.hpp @@ -416,7 +416,7 @@ struct IntegerDivisionSetting { struct LogQueryPathSetting { static constexpr const char *Name = "log_query_path"; static constexpr const char *Description = - "Specifies the path to which queries should be logged (default: empty string, queries are not logged)"; + "Specifies the path to which queries should be logged (default: NULL, queries are not logged)"; static constexpr const LogicalTypeId InputType = LogicalTypeId::VARCHAR; static void SetLocal(ClientContext &context, const Value ¶meter); static void ResetLocal(ClientContext &context); @@ -492,7 +492,7 @@ struct PasswordSetting { struct PerfectHashThresholdSetting { static constexpr const char *Name = "perfect_ht_threshold"; - static constexpr const char *Description = "Threshold in bytes for when to use a perfect hash table (default: 12)"; + static constexpr const char *Description = "Threshold in bytes for when to use a perfect hash table"; static constexpr const LogicalTypeId InputType = LogicalTypeId::BIGINT; static void SetLocal(ClientContext &context, const Value ¶meter); static void ResetLocal(ClientContext &context); @@ -511,8 +511,7 @@ struct PivotFilterThreshold { struct PivotLimitSetting { static constexpr const char *Name = "pivot_limit"; - static constexpr const char *Description = - "The maximum number of pivot columns in a pivot statement (default: 100000)"; + static constexpr const char *Description = "The maximum number of pivot columns in a pivot statement"; static constexpr const LogicalTypeId InputType = LogicalTypeId::BIGINT; static void SetLocal(ClientContext &context, const Value ¶meter); static void ResetLocal(ClientContext &context); diff --git a/src/include/duckdb/optimizer/compressed_materialization.hpp b/src/include/duckdb/optimizer/compressed_materialization.hpp index ef9229a3bb2e..86422c9c6001 100644 --- a/src/include/duckdb/optimizer/compressed_materialization.hpp +++ b/src/include/duckdb/optimizer/compressed_materialization.hpp @@ -102,7 +102,7 @@ class CompressedMaterialization { bool TryCompressChild(CompressedMaterializationInfo &info, const CMChildInfo &child_info, vector> &compress_expressions); void CreateCompressProjection(unique_ptr &child_op, - vector> &&compress_exprs, + vector> compress_exprs, CompressedMaterializationInfo &info, CMChildInfo &child_info); void CreateDecompressProjection(unique_ptr &op, CompressedMaterializationInfo &info); diff --git a/src/include/duckdb/optimizer/matcher/expression_matcher.hpp b/src/include/duckdb/optimizer/matcher/expression_matcher.hpp index 30839a0a1fd8..b7d2a3d72c24 100644 --- a/src/include/duckdb/optimizer/matcher/expression_matcher.hpp +++ b/src/include/duckdb/optimizer/matcher/expression_matcher.hpp @@ -40,7 +40,7 @@ class ExpressionMatcher { //! The ExpressionEqualityMatcher matches on equality with another (given) expression class ExpressionEqualityMatcher : public ExpressionMatcher { public: - explicit ExpressionEqualityMatcher(Expression &expr) + explicit ExpressionEqualityMatcher(const Expression &expr) : ExpressionMatcher(ExpressionClass::INVALID), expression(expr) { } diff --git a/src/include/duckdb/optimizer/matcher/function_matcher.hpp b/src/include/duckdb/optimizer/matcher/function_matcher.hpp index 6acf2bbf50ff..162789c3e67b 100644 --- a/src/include/duckdb/optimizer/matcher/function_matcher.hpp +++ b/src/include/duckdb/optimizer/matcher/function_matcher.hpp @@ -20,9 +20,9 @@ class FunctionMatcher { virtual ~FunctionMatcher() { } - virtual bool Match(string &name) = 0; + virtual bool Match(const string &name) = 0; - static bool Match(unique_ptr &matcher, string &name) { + static bool Match(unique_ptr &matcher, const string &name) { if (!matcher) { return true; } @@ -33,11 +33,11 @@ class FunctionMatcher { //! The SpecificFunctionMatcher class matches a single specified function name class SpecificFunctionMatcher : public FunctionMatcher { public: - explicit SpecificFunctionMatcher(string name) : name(std::move(name)) { + explicit SpecificFunctionMatcher(string name_p) : name(std::move(name_p)) { } - bool Match(string &name) override { - return name == this->name; + bool Match(const string &matched_name) override { + return matched_name == this->name; } private: @@ -47,10 +47,10 @@ class SpecificFunctionMatcher : public FunctionMatcher { //! The ManyFunctionMatcher class matches a set of functions class ManyFunctionMatcher : public FunctionMatcher { public: - explicit ManyFunctionMatcher(unordered_set names) : names(std::move(names)) { + explicit ManyFunctionMatcher(unordered_set names_p) : names(std::move(names_p)) { } - bool Match(string &name) override { + bool Match(const string &name) override { return names.find(name) != names.end(); } diff --git a/src/include/duckdb/parser/parsed_data/comment_on_column_info.hpp b/src/include/duckdb/parser/parsed_data/comment_on_column_info.hpp index b13aaacc4740..6b0fb80715ba 100644 --- a/src/include/duckdb/parser/parsed_data/comment_on_column_info.hpp +++ b/src/include/duckdb/parser/parsed_data/comment_on_column_info.hpp @@ -14,7 +14,7 @@ #include "duckdb/parser/qualified_name.hpp" namespace duckdb { -class ClientContext; +class CatalogEntryRetriever; class CatalogEntry; struct SetColumnCommentInfo : public AlterInfo { @@ -35,7 +35,7 @@ struct SetColumnCommentInfo : public AlterInfo { Value comment_value; public: - optional_ptr TryResolveCatalogEntry(ClientContext &context); + optional_ptr TryResolveCatalogEntry(CatalogEntryRetriever &retriever); unique_ptr Copy() const override; CatalogType GetCatalogType() const override; diff --git a/src/include/duckdb/parser/parsed_data/create_info.hpp b/src/include/duckdb/parser/parsed_data/create_info.hpp index 50637103d923..38956c04ece1 100644 --- a/src/include/duckdb/parser/parsed_data/create_info.hpp +++ b/src/include/duckdb/parser/parsed_data/create_info.hpp @@ -10,6 +10,7 @@ #include "duckdb/common/enums/catalog_type.hpp" #include "duckdb/parser/parsed_data/parse_info.hpp" +#include "duckdb/catalog/dependency_list.hpp" #include "duckdb/common/enum_util.hpp" #include "duckdb/common/enums/on_create_conflict.hpp" #include "duckdb/common/types/value.hpp" @@ -44,6 +45,8 @@ struct CreateInfo : public ParseInfo { bool internal; //! The SQL string of the CREATE statement string sql; + //! The inherent dependencies of the created entry + LogicalDependencyList dependencies; //! User provided comment Value comment; diff --git a/src/include/duckdb/planner/binder.hpp b/src/include/duckdb/planner/binder.hpp index 5e54701f97a4..755ad4923742 100644 --- a/src/include/duckdb/planner/binder.hpp +++ b/src/include/duckdb/planner/binder.hpp @@ -119,6 +119,7 @@ class Binder : public std::enable_shared_from_this { unique_ptr BindCreateTableInfo(unique_ptr info); unique_ptr BindCreateTableInfo(unique_ptr info, SchemaCatalogEntry &schema); + void SetCatalogLookupCallback(catalog_entry_callback_t callback); void BindCreateViewInfo(CreateViewInfo &base); SchemaCatalogEntry &BindSchema(CreateInfo &info); SchemaCatalogEntry &BindCreateFunctionInfo(CreateInfo &info); @@ -134,10 +135,14 @@ class Binder : public std::enable_shared_from_this { //! Generates an unused index for a table idx_t GenerateTableIndex(); + optional_ptr GetCatalogEntry(CatalogType type, const string &catalog, const string &schema, + const string &name, OnEntryNotFound on_entry_not_found, + QueryErrorContext &error_context); + //! Add a common table expression to the binder void AddCTE(const string &name, CommonTableExpressionInfo &cte); - //! Find a common table expression by name; returns nullptr if none exists - optional_ptr FindCTE(const string &name, bool skip = false); + //! Find all candidate common table expression by name; returns empty vector if none exists + vector> FindCTE(const string &name, bool skip = false); bool CTEIsAlreadyBound(CommonTableExpressionInfo &cte); @@ -163,9 +168,8 @@ class Binder : public std::enable_shared_from_this { TableCatalogEntry &table, TableStorageInfo &storage_info); void BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &table, InsertStatement &stmt); - static void BindSchemaOrCatalog(ClientContext &context, string &catalog, string &schema); - static void BindLogicalType(ClientContext &context, LogicalType &type, optional_ptr catalog = nullptr, - const string &schema = INVALID_SCHEMA); + void BindLogicalType(LogicalType &type, optional_ptr catalog = nullptr, + const string &schema = INVALID_SCHEMA); bool HasMatchingBinding(const string &table_name, const string &column_name, ErrorData &error); bool HasMatchingBinding(const string &schema_name, const string &table_name, const string &column_name, @@ -207,6 +211,8 @@ class Binder : public std::enable_shared_from_this { unordered_set table_names; //! The set of bound views reference_set_t bound_views; + //! Used to retrieve CatalogEntry's + CatalogEntryRetriever entry_retriever; //! Unnamed subquery index idx_t unnamed_subquery_index = 1; @@ -361,8 +367,6 @@ class Binder : public std::enable_shared_from_this { void ReplaceStarExpression(unique_ptr &expr, unique_ptr &replacement); void BindWhereStarExpression(unique_ptr &expr); - //! If only a schema name is provided (e.g. "a.b") then figure out if "a" is a schema or a catalog name - void BindSchemaOrCatalog(string &catalog_name, string &schema_name); const string BindCatalog(string &catalog_name); SchemaCatalogEntry &BindCreateSchema(CreateInfo &info); @@ -378,6 +382,8 @@ class Binder : public std::enable_shared_from_this { unique_ptr BindSummarize(ShowRef &ref); public: + //! If only a schema name is provided (e.g. "a.b") then figure out if "a" is a schema or a catalog name + void BindSchemaOrCatalog(string &catalog_name, string &schema_name); // This should really be a private constructor, but make_shared does not allow it... // If you are thinking about calling this, you should probably call Binder::CreateBinder Binder(bool i_know_what_i_am_doing, ClientContext &context, shared_ptr parent, bool inherit_ctes); diff --git a/src/include/duckdb/planner/expression_binder.hpp b/src/include/duckdb/planner/expression_binder.hpp index b74e0d289705..8cf32123eed2 100644 --- a/src/include/duckdb/planner/expression_binder.hpp +++ b/src/include/duckdb/planner/expression_binder.hpp @@ -18,6 +18,7 @@ #include "duckdb/parser/parsed_expression.hpp" #include "duckdb/parser/tokens.hpp" #include "duckdb/planner/expression.hpp" +#include "duckdb/catalog/catalog_entry_retriever.hpp" #include "duckdb/planner/expression/bound_lambda_expression.hpp" #include "duckdb/function/scalar_function.hpp" @@ -89,6 +90,7 @@ class ExpressionBinder { return bound_columns; } + void SetCatalogLookupCallback(catalog_entry_callback_t callback); ErrorData Bind(unique_ptr &expr, idx_t depth, bool root_expression = false); //! Returns the STRUCT_EXTRACT operator expression @@ -193,6 +195,9 @@ class ExpressionBinder { virtual string UnsupportedAggregateMessage(); virtual string UnsupportedUnnestMessage(); + optional_ptr GetCatalogEntry(CatalogType type, const string &catalog, const string &schema, + const string &name, OnEntryNotFound on_entry_not_found, + QueryErrorContext &error_context); Binder &binder; ClientContext &context; diff --git a/src/include/duckdb/planner/expression_binder/table_function_binder.hpp b/src/include/duckdb/planner/expression_binder/table_function_binder.hpp index 46dbd1fddede..9e14a852e255 100644 --- a/src/include/duckdb/planner/expression_binder/table_function_binder.hpp +++ b/src/include/duckdb/planner/expression_binder/table_function_binder.hpp @@ -15,14 +15,17 @@ namespace duckdb { //! The table function binder can bind standard table function parameters (i.e., non-table-in-out functions) class TableFunctionBinder : public ExpressionBinder { public: - TableFunctionBinder(Binder &binder, ClientContext &context); + TableFunctionBinder(Binder &binder, ClientContext &context, string table_function_name = string()); protected: BindResult BindLambdaReference(LambdaRefExpression &expr, idx_t depth); - BindResult BindColumnReference(ColumnRefExpression &expr, idx_t depth, bool root_expression); + BindResult BindColumnReference(unique_ptr &expr, idx_t depth, bool root_expression); BindResult BindExpression(unique_ptr &expr, idx_t depth, bool root_expression = false) override; string UnsupportedAggregateMessage() override; + +private: + string table_function_name; }; } // namespace duckdb diff --git a/src/include/duckdb/planner/expression_iterator.hpp b/src/include/duckdb/planner/expression_iterator.hpp index c07f60e1064e..36825b3acd1b 100644 --- a/src/include/duckdb/planner/expression_iterator.hpp +++ b/src/include/duckdb/planner/expression_iterator.hpp @@ -31,6 +31,8 @@ class ExpressionIterator { class BoundNodeVisitor { public: + virtual ~BoundNodeVisitor() = default; + virtual void VisitBoundQueryNode(BoundQueryNode &op); virtual void VisitBoundTableRef(BoundTableRef &ref); virtual void VisitExpression(unique_ptr &expression); diff --git a/src/include/duckdb/storage/buffer/block_handle.hpp b/src/include/duckdb/storage/buffer/block_handle.hpp index 196fb27c7654..92a646c8acab 100644 --- a/src/include/duckdb/storage/buffer/block_handle.hpp +++ b/src/include/duckdb/storage/buffer/block_handle.hpp @@ -38,7 +38,7 @@ struct BufferPoolReservation { ~BufferPoolReservation(); void Resize(idx_t new_size); - void Merge(BufferPoolReservation &&src); + void Merge(BufferPoolReservation src); }; struct TempBufferPoolReservation : BufferPoolReservation { diff --git a/src/include/duckdb/storage/checkpoint/table_data_writer.hpp b/src/include/duckdb/storage/checkpoint/table_data_writer.hpp index 4c3f42d56b2a..4ab7a90589cd 100644 --- a/src/include/duckdb/storage/checkpoint/table_data_writer.hpp +++ b/src/include/duckdb/storage/checkpoint/table_data_writer.hpp @@ -31,10 +31,10 @@ class TableDataWriter { CompressionType GetColumnCompressionType(idx_t i); - virtual void FinalizeTable(TableStatistics &&global_stats, DataTableInfo *info, Serializer &serializer) = 0; + virtual void FinalizeTable(const TableStatistics &global_stats, DataTableInfo *info, Serializer &serializer) = 0; virtual unique_ptr GetRowGroupWriter(RowGroup &row_group) = 0; - virtual void AddRowGroup(RowGroupPointer &&row_group_pointer, unique_ptr &&writer); + virtual void AddRowGroup(RowGroupPointer &&row_group_pointer, unique_ptr writer); TaskScheduler &GetScheduler(); @@ -50,7 +50,7 @@ class SingleFileTableDataWriter : public TableDataWriter { MetadataWriter &table_data_writer); public: - void FinalizeTable(TableStatistics &&global_stats, DataTableInfo *info, Serializer &serializer) override; + void FinalizeTable(const TableStatistics &global_stats, DataTableInfo *info, Serializer &serializer) override; unique_ptr GetRowGroupWriter(RowGroup &row_group) override; private: diff --git a/src/include/duckdb/storage/compression/chimp/chimp_analyze.hpp b/src/include/duckdb/storage/compression/chimp/chimp_analyze.hpp index 2b56d15baaa3..4976e2ca8732 100644 --- a/src/include/duckdb/storage/compression/chimp/chimp_analyze.hpp +++ b/src/include/duckdb/storage/compression/chimp/chimp_analyze.hpp @@ -33,7 +33,6 @@ bool ChimpAnalyze(AnalyzeState &state, Vector &input, idx_t count) { template idx_t ChimpFinalAnalyze(AnalyzeState &state) { throw InternalException("Chimp has been deprecated, can no longer be used to compress data"); - return DConstants::INVALID_INDEX; } } // namespace duckdb diff --git a/src/include/duckdb/storage/compression/patas/patas_analyze.hpp b/src/include/duckdb/storage/compression/patas/patas_analyze.hpp index 0f9dab421efc..c86d2538d2df 100644 --- a/src/include/duckdb/storage/compression/patas/patas_analyze.hpp +++ b/src/include/duckdb/storage/compression/patas/patas_analyze.hpp @@ -33,7 +33,6 @@ bool PatasAnalyze(AnalyzeState &state, Vector &input, idx_t count) { template idx_t PatasFinalAnalyze(AnalyzeState &state) { throw InternalException("Patas has been deprecated, can no longer be used to compress data"); - return DConstants::INVALID_INDEX; } } // namespace duckdb diff --git a/src/include/duckdb/storage/partial_block_manager.hpp b/src/include/duckdb/storage/partial_block_manager.hpp index 818f2ff6b54b..935f0126993a 100644 --- a/src/include/duckdb/storage/partial_block_manager.hpp +++ b/src/include/duckdb/storage/partial_block_manager.hpp @@ -110,7 +110,7 @@ class PartialBlockManager { PartialBlockAllocation GetBlockAllocation(uint32_t segment_size); //! Register a partially filled block that is filled with "segment_size" entries - void RegisterPartialBlock(PartialBlockAllocation &&allocation); + void RegisterPartialBlock(PartialBlockAllocation allocation); //! Clear remaining blocks without writing them to disk void ClearBlocks(); diff --git a/src/include/duckdb/storage/serialization/create_info.json b/src/include/duckdb/storage/serialization/create_info.json index c850a71e854a..35cc8b9a5815 100644 --- a/src/include/duckdb/storage/serialization/create_info.json +++ b/src/include/duckdb/storage/serialization/create_info.json @@ -49,6 +49,12 @@ "name": "comment", "type": "Value", "default": "Value()" + }, + { + "id": 108, + "name": "dependencies", + "type": "LogicalDependencyList", + "default": "LogicalDependencyList()" } ] }, diff --git a/src/include/duckdb/storage/table/chunk_info.hpp b/src/include/duckdb/storage/table/chunk_info.hpp index 14ed981e3707..bc089eccd5bc 100644 --- a/src/include/duckdb/storage/table/chunk_info.hpp +++ b/src/include/duckdb/storage/table/chunk_info.hpp @@ -17,7 +17,7 @@ class RowGroup; struct SelectionVector; class Transaction; struct TransactionData; - +struct DeleteInfo; class Serializer; class Deserializer; @@ -132,7 +132,7 @@ class ChunkVectorInfo : public ChunkInfo { //! Note that "rows" is written to to reflect the row ids that were actually deleted //! i.e. after calling this function, rows will hold [0..actual_delete_count] row ids of the actually deleted tuples idx_t Delete(transaction_t transaction_id, row_t rows[], idx_t count); - void CommitDelete(transaction_t commit_id, row_t rows[], idx_t count); + void CommitDelete(transaction_t commit_id, const DeleteInfo &info); bool HasDeletes() const override; diff --git a/src/include/duckdb/storage/table/row_group.hpp b/src/include/duckdb/storage/table/row_group.hpp index ee413dc9532b..6265279f52cb 100644 --- a/src/include/duckdb/storage/table/row_group.hpp +++ b/src/include/duckdb/storage/table/row_group.hpp @@ -54,7 +54,7 @@ class RowGroup : public SegmentBase { public: RowGroup(RowGroupCollection &collection, idx_t start, idx_t count); - RowGroup(RowGroupCollection &collection, RowGroupPointer &&pointer); + RowGroup(RowGroupCollection &collection, RowGroupPointer pointer); ~RowGroup(); private: diff --git a/src/include/duckdb/storage/table/row_version_manager.hpp b/src/include/duckdb/storage/table/row_version_manager.hpp index 0763513b767d..962505a338d6 100644 --- a/src/include/duckdb/storage/table/row_version_manager.hpp +++ b/src/include/duckdb/storage/table/row_version_manager.hpp @@ -15,6 +15,7 @@ namespace duckdb { +struct DeleteInfo; class MetadataManager; struct MetaBlockPointer; @@ -38,7 +39,7 @@ class RowVersionManager { void RevertAppend(idx_t start_row); idx_t DeleteRows(idx_t vector_idx, transaction_t transaction_id, row_t rows[], idx_t count); - void CommitDelete(idx_t vector_idx, transaction_t commit_id, row_t rows[], idx_t count); + void CommitDelete(idx_t vector_idx, transaction_t commit_id, const DeleteInfo &info); vector Checkpoint(MetadataManager &manager); static shared_ptr Deserialize(MetaBlockPointer delete_pointer, MetadataManager &manager, diff --git a/src/include/duckdb/storage/table_storage_info.hpp b/src/include/duckdb/storage/table_storage_info.hpp index 3ab3ea79b5d7..9f1eece71cf8 100644 --- a/src/include/duckdb/storage/table_storage_info.hpp +++ b/src/include/duckdb/storage/table_storage_info.hpp @@ -38,7 +38,7 @@ struct ColumnSegmentInfo { class TableStorageInfo { public: //! The (estimated) cardinality of the table - idx_t cardinality = DConstants::INVALID_INDEX; + optional_idx cardinality; //! Info of the indexes of a table vector index_info; }; diff --git a/src/include/duckdb/transaction/delete_info.hpp b/src/include/duckdb/transaction/delete_info.hpp index 569d12f19c36..99b02320cdf4 100644 --- a/src/include/duckdb/transaction/delete_info.hpp +++ b/src/include/duckdb/transaction/delete_info.hpp @@ -20,7 +20,26 @@ struct DeleteInfo { idx_t vector_idx; idx_t count; idx_t base_row; - row_t rows[1]; + //! Whether or not row ids are consecutive (0, 1, 2, ..., count). + //! If this is true no rows are stored and `rows` should not be accessed. + bool is_consecutive; + + uint16_t *GetRows() { + if (is_consecutive) { + throw InternalException("DeleteInfo is consecutive - rows are not accessible"); + } + return rows; + } + const uint16_t *GetRows() const { + if (is_consecutive) { + throw InternalException("DeleteInfo is consecutive - rows are not accessible"); + } + return rows; + } + +private: + //! The per-vector row identifiers (actual row id is base_row + rows[x]) + uint16_t rows[1]; }; } // namespace duckdb diff --git a/src/main/attached_database.cpp b/src/main/attached_database.cpp index ce9ce5f4a82d..2de5865efafb 100644 --- a/src/main/attached_database.cpp +++ b/src/main/attached_database.cpp @@ -3,12 +3,12 @@ #include "duckdb/catalog/duck_catalog.hpp" #include "duckdb/common/constants.hpp" #include "duckdb/common/file_system.hpp" -#include "duckdb/main/database.hpp" #include "duckdb/main/database_manager.hpp" #include "duckdb/parser/parsed_data/attach_info.hpp" #include "duckdb/storage/storage_extension.hpp" #include "duckdb/storage/storage_manager.hpp" #include "duckdb/transaction/duck_transaction_manager.hpp" +#include "duckdb/main/database.hpp" #include "duckdb/main/database_path_and_type.hpp" namespace duckdb { diff --git a/src/main/config.cpp b/src/main/config.cpp index f1313d3b553a..dcf647b094b9 100644 --- a/src/main/config.cpp +++ b/src/main/config.cpp @@ -53,77 +53,78 @@ bool DBConfigOptions::debug_print_bindings = false; #define FINAL_SETTING \ { nullptr, nullptr, LogicalTypeId::INVALID, nullptr, nullptr, nullptr, nullptr, nullptr } -static ConfigurationOption internal_options[] = {DUCKDB_GLOBAL(AccessModeSetting), - DUCKDB_GLOBAL(AllowPersistentSecrets), - DUCKDB_GLOBAL(CheckpointThresholdSetting), - DUCKDB_GLOBAL(DebugCheckpointAbort), - DUCKDB_LOCAL(DebugForceExternal), - DUCKDB_LOCAL(DebugForceNoCrossProduct), - DUCKDB_LOCAL(DebugAsOfIEJoin), - DUCKDB_LOCAL(PreferRangeJoins), - DUCKDB_GLOBAL(DebugWindowMode), - DUCKDB_GLOBAL_LOCAL(DefaultCollationSetting), - DUCKDB_GLOBAL(DefaultOrderSetting), - DUCKDB_GLOBAL(DefaultNullOrderSetting), - DUCKDB_GLOBAL(DisabledFileSystemsSetting), - DUCKDB_GLOBAL(DisabledOptimizersSetting), - DUCKDB_GLOBAL(EnableExternalAccessSetting), - DUCKDB_GLOBAL(EnableFSSTVectors), - DUCKDB_GLOBAL(AllowUnsignedExtensionsSetting), - DUCKDB_GLOBAL(AllowUnredactedSecretsSetting), - DUCKDB_GLOBAL(CustomExtensionRepository), - DUCKDB_GLOBAL(AutoloadExtensionRepository), - DUCKDB_GLOBAL(AutoinstallKnownExtensions), - DUCKDB_GLOBAL(AutoloadKnownExtensions), - DUCKDB_GLOBAL(EnableObjectCacheSetting), - DUCKDB_GLOBAL(EnableHTTPMetadataCacheSetting), - DUCKDB_LOCAL(EnableProfilingSetting), - DUCKDB_LOCAL(EnableProgressBarSetting), - DUCKDB_LOCAL(EnableProgressBarPrintSetting), - DUCKDB_LOCAL(ErrorsAsJsonSetting), - DUCKDB_LOCAL(ExplainOutputSetting), - DUCKDB_GLOBAL(ExtensionDirectorySetting), - DUCKDB_GLOBAL(ExternalThreadsSetting), - DUCKDB_LOCAL(FileSearchPathSetting), - DUCKDB_GLOBAL(ForceCompressionSetting), - DUCKDB_GLOBAL(ForceBitpackingModeSetting), - DUCKDB_LOCAL(HomeDirectorySetting), - DUCKDB_LOCAL(LogQueryPathSetting), - DUCKDB_GLOBAL(LockConfigurationSetting), - DUCKDB_GLOBAL(ImmediateTransactionModeSetting), - DUCKDB_LOCAL(IntegerDivisionSetting), - DUCKDB_LOCAL(MaximumExpressionDepthSetting), - DUCKDB_GLOBAL(MaximumMemorySetting), - DUCKDB_GLOBAL(OldImplicitCasting), - DUCKDB_GLOBAL_ALIAS("memory_limit", MaximumMemorySetting), - DUCKDB_GLOBAL_ALIAS("null_order", DefaultNullOrderSetting), - DUCKDB_LOCAL(OrderedAggregateThreshold), - DUCKDB_GLOBAL(PasswordSetting), - DUCKDB_LOCAL(PerfectHashThresholdSetting), - DUCKDB_LOCAL(PivotFilterThreshold), - DUCKDB_LOCAL(PivotLimitSetting), - DUCKDB_LOCAL(PreserveIdentifierCase), - DUCKDB_GLOBAL(PreserveInsertionOrder), - DUCKDB_LOCAL(ProfileOutputSetting), - DUCKDB_LOCAL(ProfilingModeSetting), - DUCKDB_LOCAL_ALIAS("profiling_output", ProfileOutputSetting), - DUCKDB_LOCAL(ProgressBarTimeSetting), - DUCKDB_LOCAL(SchemaSetting), - DUCKDB_LOCAL(SearchPathSetting), - DUCKDB_GLOBAL(SecretDirectorySetting), - DUCKDB_GLOBAL(DefaultSecretStorage), - DUCKDB_GLOBAL(TempDirectorySetting), - DUCKDB_GLOBAL(ThreadsSetting), - DUCKDB_GLOBAL(UsernameSetting), - DUCKDB_GLOBAL(ExportLargeBufferArrow), - DUCKDB_GLOBAL_ALIAS("user", UsernameSetting), - DUCKDB_GLOBAL_ALIAS("wal_autocheckpoint", CheckpointThresholdSetting), - DUCKDB_GLOBAL_ALIAS("worker_threads", ThreadsSetting), - DUCKDB_GLOBAL(FlushAllocatorSetting), - DUCKDB_GLOBAL(DuckDBApiSetting), - DUCKDB_GLOBAL(CustomUserAgentSetting), - DUCKDB_LOCAL(PartitionedWriteFlushThreshold), - FINAL_SETTING}; +static const ConfigurationOption internal_options[] = { + DUCKDB_GLOBAL(AccessModeSetting), + DUCKDB_GLOBAL(AllowPersistentSecrets), + DUCKDB_GLOBAL(CheckpointThresholdSetting), + DUCKDB_GLOBAL(DebugCheckpointAbort), + DUCKDB_LOCAL(DebugForceExternal), + DUCKDB_LOCAL(DebugForceNoCrossProduct), + DUCKDB_LOCAL(DebugAsOfIEJoin), + DUCKDB_LOCAL(PreferRangeJoins), + DUCKDB_GLOBAL(DebugWindowMode), + DUCKDB_GLOBAL_LOCAL(DefaultCollationSetting), + DUCKDB_GLOBAL(DefaultOrderSetting), + DUCKDB_GLOBAL(DefaultNullOrderSetting), + DUCKDB_GLOBAL(DisabledFileSystemsSetting), + DUCKDB_GLOBAL(DisabledOptimizersSetting), + DUCKDB_GLOBAL(EnableExternalAccessSetting), + DUCKDB_GLOBAL(EnableFSSTVectors), + DUCKDB_GLOBAL(AllowUnsignedExtensionsSetting), + DUCKDB_GLOBAL(AllowUnredactedSecretsSetting), + DUCKDB_GLOBAL(CustomExtensionRepository), + DUCKDB_GLOBAL(AutoloadExtensionRepository), + DUCKDB_GLOBAL(AutoinstallKnownExtensions), + DUCKDB_GLOBAL(AutoloadKnownExtensions), + DUCKDB_GLOBAL(EnableObjectCacheSetting), + DUCKDB_GLOBAL(EnableHTTPMetadataCacheSetting), + DUCKDB_LOCAL(EnableProfilingSetting), + DUCKDB_LOCAL(EnableProgressBarSetting), + DUCKDB_LOCAL(EnableProgressBarPrintSetting), + DUCKDB_LOCAL(ErrorsAsJsonSetting), + DUCKDB_LOCAL(ExplainOutputSetting), + DUCKDB_GLOBAL(ExtensionDirectorySetting), + DUCKDB_GLOBAL(ExternalThreadsSetting), + DUCKDB_LOCAL(FileSearchPathSetting), + DUCKDB_GLOBAL(ForceCompressionSetting), + DUCKDB_GLOBAL(ForceBitpackingModeSetting), + DUCKDB_LOCAL(HomeDirectorySetting), + DUCKDB_LOCAL(LogQueryPathSetting), + DUCKDB_GLOBAL(LockConfigurationSetting), + DUCKDB_GLOBAL(ImmediateTransactionModeSetting), + DUCKDB_LOCAL(IntegerDivisionSetting), + DUCKDB_LOCAL(MaximumExpressionDepthSetting), + DUCKDB_GLOBAL(MaximumMemorySetting), + DUCKDB_GLOBAL(OldImplicitCasting), + DUCKDB_GLOBAL_ALIAS("memory_limit", MaximumMemorySetting), + DUCKDB_GLOBAL_ALIAS("null_order", DefaultNullOrderSetting), + DUCKDB_LOCAL(OrderedAggregateThreshold), + DUCKDB_GLOBAL(PasswordSetting), + DUCKDB_LOCAL(PerfectHashThresholdSetting), + DUCKDB_LOCAL(PivotFilterThreshold), + DUCKDB_LOCAL(PivotLimitSetting), + DUCKDB_LOCAL(PreserveIdentifierCase), + DUCKDB_GLOBAL(PreserveInsertionOrder), + DUCKDB_LOCAL(ProfileOutputSetting), + DUCKDB_LOCAL(ProfilingModeSetting), + DUCKDB_LOCAL_ALIAS("profiling_output", ProfileOutputSetting), + DUCKDB_LOCAL(ProgressBarTimeSetting), + DUCKDB_LOCAL(SchemaSetting), + DUCKDB_LOCAL(SearchPathSetting), + DUCKDB_GLOBAL(SecretDirectorySetting), + DUCKDB_GLOBAL(DefaultSecretStorage), + DUCKDB_GLOBAL(TempDirectorySetting), + DUCKDB_GLOBAL(ThreadsSetting), + DUCKDB_GLOBAL(UsernameSetting), + DUCKDB_GLOBAL(ExportLargeBufferArrow), + DUCKDB_GLOBAL_ALIAS("user", UsernameSetting), + DUCKDB_GLOBAL_ALIAS("wal_autocheckpoint", CheckpointThresholdSetting), + DUCKDB_GLOBAL_ALIAS("worker_threads", ThreadsSetting), + DUCKDB_GLOBAL(FlushAllocatorSetting), + DUCKDB_GLOBAL(DuckDBApiSetting), + DUCKDB_GLOBAL(CustomUserAgentSetting), + DUCKDB_LOCAL(PartitionedWriteFlushThreshold), + FINAL_SETTING}; vector DBConfig::GetOptions() { vector options; @@ -149,7 +150,7 @@ vector DBConfig::GetOptionNames() { return names; } -ConfigurationOption *DBConfig::GetOptionByIndex(idx_t target_index) { +optional_ptr DBConfig::GetOptionByIndex(idx_t target_index) { for (idx_t index = 0; internal_options[index].name; index++) { if (index == target_index) { return internal_options + index; @@ -158,7 +159,7 @@ ConfigurationOption *DBConfig::GetOptionByIndex(idx_t target_index) { return nullptr; } -ConfigurationOption *DBConfig::GetOptionByName(const string &name) { +optional_ptr DBConfig::GetOptionByName(const string &name) { auto lname = StringUtil::Lower(name); for (idx_t index = 0; internal_options[index].name; index++) { D_ASSERT(StringUtil::Lower(internal_options[index].name) == string(internal_options[index].name)); @@ -255,8 +256,8 @@ IndexTypeSet &DBConfig::GetIndexTypes() { void DBConfig::SetDefaultMaxMemory() { auto memory = FileSystem::GetAvailableMemory(); - if (memory != DConstants::INVALID_INDEX) { - options.maximum_memory = memory * 8 / 10; + if (memory.IsValid()) { + options.maximum_memory = memory.GetIndex() * 8 / 10; } } @@ -339,7 +340,8 @@ idx_t DBConfig::GetSystemMaxThreads(FileSystem &fs) { idx_t DBConfig::ParseMemoryLimit(const string &arg) { if (arg[0] == '-' || arg == "null" || arg == "none") { - return DConstants::INVALID_INDEX; + // infinite + return NumericLimits::Maximum(); } // split based on the number/non-number idx_t idx = 0; diff --git a/src/main/error_manager.cpp b/src/main/error_manager.cpp index ece4ce439343..4ec024c13aa6 100644 --- a/src/main/error_manager.cpp +++ b/src/main/error_manager.cpp @@ -10,7 +10,7 @@ struct DefaultError { const char *error; }; -static DefaultError internal_errors[] = { +static const DefaultError internal_errors[] = { {ErrorType::UNSIGNED_EXTENSION, "Extension \"%s\" could not be loaded because its signature is either missing or invalid and unsigned extensions " "are disabled by configuration (allow_unsigned_extensions)"}, diff --git a/src/main/extension/extension_alias.cpp b/src/main/extension/extension_alias.cpp index 84305552ff66..81d3c1e1b7ae 100644 --- a/src/main/extension/extension_alias.cpp +++ b/src/main/extension/extension_alias.cpp @@ -2,15 +2,15 @@ namespace duckdb { -static ExtensionAlias internal_aliases[] = {{"http", "httpfs"}, // httpfs - {"https", "httpfs"}, - {"md", "motherduck"}, // motherduck - {"mysql", "mysql_scanner"}, // mysql - {"s3", "httpfs"}, - {"postgres", "postgres_scanner"}, // postgres - {"sqlite", "sqlite_scanner"}, // sqlite - {"sqlite3", "sqlite_scanner"}, - {nullptr, nullptr}}; +static const ExtensionAlias internal_aliases[] = {{"http", "httpfs"}, // httpfs + {"https", "httpfs"}, + {"md", "motherduck"}, // motherduck + {"mysql", "mysql_scanner"}, // mysql + {"s3", "httpfs"}, + {"postgres", "postgres_scanner"}, // postgres + {"sqlite", "sqlite_scanner"}, // sqlite + {"sqlite3", "sqlite_scanner"}, + {nullptr, nullptr}}; idx_t ExtensionHelper::ExtensionAliasCount() { idx_t index; diff --git a/src/main/extension/extension_helper.cpp b/src/main/extension/extension_helper.cpp index bc446d141213..818a25850b77 100644 --- a/src/main/extension/extension_helper.cpp +++ b/src/main/extension/extension_helper.cpp @@ -100,7 +100,7 @@ namespace duckdb { //===--------------------------------------------------------------------===// // Default Extensions //===--------------------------------------------------------------------===// -static DefaultExtension internal_extensions[] = { +static const DefaultExtension internal_extensions[] = { {"icu", "Adds support for time zones and collations using the ICU library", DUCKDB_EXTENSION_ICU_LINKED}, {"excel", "Adds support for Excel-like format strings", DUCKDB_EXTENSION_EXCEL_LINKED}, {"parquet", "Adds support for reading and writing parquet files", DUCKDB_EXTENSION_PARQUET_LINKED}, @@ -139,7 +139,8 @@ DefaultExtension ExtensionHelper::GetDefaultExtension(idx_t index) { //===--------------------------------------------------------------------===// // Allow Auto-Install Extensions //===--------------------------------------------------------------------===// -static const char *auto_install[] = {"motherduck", "postgres_scanner", "mysql_scanner", "sqlite_scanner", nullptr}; +static const char *const auto_install[] = {"motherduck", "postgres_scanner", "mysql_scanner", "sqlite_scanner", + nullptr}; // TODO: unify with new autoload mechanism bool ExtensionHelper::AllowAutoInstall(const string &extension) { @@ -398,7 +399,7 @@ ExtensionLoadResult ExtensionHelper::LoadExtensionInternal(DuckDB &db, const std return ExtensionLoadResult::LOADED_EXTENSION; } -static vector public_keys = { +static const char *const public_keys[] = { R"( -----BEGIN PUBLIC KEY----- MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA6aZuHUa1cLR9YDDYaEfi @@ -618,10 +619,14 @@ SLWQo0+/ciQ21Zwz5SwimX8ep1YpqYirO04gcyGZzAfGboXRvdUwA+1bZvuUXdKC EMS5gLv50CzQqJXK9mNzPuYXNUIc4Pw4ssVWe0OfN3Od90gl5uFUwk/G9lWSYnBN 3wIDAQAB -----END PUBLIC KEY----- -)"}; +)", nullptr}; const vector ExtensionHelper::GetPublicKeys() { - return public_keys; + vector keys; + for (idx_t i = 0; public_keys[i]; i++) { + keys.emplace_back(public_keys[i]); + } + return keys; } } // namespace duckdb diff --git a/src/optimizer/compressed_materialization.cpp b/src/optimizer/compressed_materialization.cpp index 04699ad169a4..c54918a8479e 100644 --- a/src/optimizer/compressed_materialization.cpp +++ b/src/optimizer/compressed_materialization.cpp @@ -30,7 +30,7 @@ CMBindingInfo::CMBindingInfo(ColumnBinding binding_p, const LogicalType &type_p) CompressedMaterializationInfo::CompressedMaterializationInfo(LogicalOperator &op, vector &&child_idxs_p, const column_binding_set_t &referenced_bindings) - : child_idxs(child_idxs_p) { + : child_idxs(std::move(child_idxs_p)) { child_info.reserve(child_idxs.size()); for (const auto &child_idx : child_idxs) { child_info.emplace_back(*op.children[child_idx], referenced_bindings); @@ -158,7 +158,7 @@ bool CompressedMaterialization::TryCompressChild(CompressedMaterializationInfo & } void CompressedMaterialization::CreateCompressProjection(unique_ptr &child_op, - vector> &&compress_exprs, + vector> compress_exprs, CompressedMaterializationInfo &info, CMChildInfo &child_info) { // Replace child op with a projection vector> projections; diff --git a/src/optimizer/cse_optimizer.cpp b/src/optimizer/cse_optimizer.cpp index 1442a9a3f191..d3f11bdd47ba 100644 --- a/src/optimizer/cse_optimizer.cpp +++ b/src/optimizer/cse_optimizer.cpp @@ -13,9 +13,9 @@ namespace duckdb { //! underlying projection struct CSENode { idx_t count; - idx_t column_index; + optional_idx column_index; - CSENode() : count(1), column_index(DConstants::INVALID_INDEX) { + CSENode() : count(1), column_index() { } }; @@ -102,7 +102,7 @@ void CommonSubExpressionOptimizer::PerformCSEReplacement(unique_ptr // check if it has already been pushed into the projection auto alias = expr.alias; auto type = expr.return_type; - if (node.column_index == DConstants::INVALID_INDEX) { + if (!node.column_index.IsValid()) { // has not been pushed yet: push it node.column_index = state.expressions.size(); state.expressions.push_back(std::move(expr_ptr)); @@ -110,8 +110,8 @@ void CommonSubExpressionOptimizer::PerformCSEReplacement(unique_ptr state.cached_expressions.push_back(std::move(expr_ptr)); } // replace the original expression with a bound column ref - expr_ptr = make_uniq(alias, type, - ColumnBinding(state.projection_index, node.column_index)); + expr_ptr = make_uniq( + alias, type, ColumnBinding(state.projection_index, node.column_index.GetIndex())); return; } } diff --git a/src/optimizer/remove_unused_columns.cpp b/src/optimizer/remove_unused_columns.cpp index 73d106558a6c..6597d3e32302 100644 --- a/src/optimizer/remove_unused_columns.cpp +++ b/src/optimizer/remove_unused_columns.cpp @@ -234,17 +234,17 @@ void RemoveUnusedColumns::VisitOperator(LogicalOperator &op) { // for every table filter, push a column binding into the column references map to prevent the column from // being projected out for (auto &filter : get.table_filters.filters) { - idx_t index = DConstants::INVALID_INDEX; + optional_idx index; for (idx_t i = 0; i < get.column_ids.size(); i++) { if (get.column_ids[i] == filter.first) { index = i; break; } } - if (index == DConstants::INVALID_INDEX) { + if (!index.IsValid()) { throw InternalException("Could not find column index for table filter"); } - ColumnBinding filter_binding(get.table_index, index); + ColumnBinding filter_binding(get.table_index, index.GetIndex()); if (column_references.find(filter_binding) == column_references.end()) { column_references.insert(make_pair(filter_binding, vector())); } diff --git a/src/optimizer/rule/ordered_aggregate_optimizer.cpp b/src/optimizer/rule/ordered_aggregate_optimizer.cpp index 553c0e30f450..5aa9a4b77351 100644 --- a/src/optimizer/rule/ordered_aggregate_optimizer.cpp +++ b/src/optimizer/rule/ordered_aggregate_optimizer.cpp @@ -84,11 +84,11 @@ unique_ptr OrderedAggregateOptimizer::Apply(ClientContext &context, types.emplace_back(child->return_type); } auto best_function = binder.BindFunction(func.name, func.functions, types, error); - if (best_function == DConstants::INVALID_INDEX) { + if (!best_function.IsValid()) { error.Throw(); } // found a matching function! - auto bound_function = func.functions.GetFunctionByOffset(best_function); + auto bound_function = func.functions.GetFunctionByOffset(best_function.GetIndex()); return binder.BindAggregateFunction(bound_function, std::move(children), std::move(aggr.filter), aggr.IsDistinct() ? AggregateType::DISTINCT : AggregateType::NON_DISTINCT); } diff --git a/src/parser/parsed_data/comment_on_column_info.cpp b/src/parser/parsed_data/comment_on_column_info.cpp index 909990d69d77..fbce5d585170 100644 --- a/src/parser/parsed_data/comment_on_column_info.cpp +++ b/src/parser/parsed_data/comment_on_column_info.cpp @@ -1,5 +1,6 @@ #include "duckdb/parser/parsed_data/comment_on_column_info.hpp" #include "duckdb/catalog/catalog.hpp" +#include "duckdb/catalog/catalog_entry_retriever.hpp" namespace duckdb { @@ -21,8 +22,8 @@ unique_ptr SetColumnCommentInfo::Copy() const { return std::move(result); } -optional_ptr SetColumnCommentInfo::TryResolveCatalogEntry(ClientContext &context) { - auto entry = Catalog::GetEntry(context, CatalogType::TABLE_ENTRY, catalog, schema, name, if_not_found); +optional_ptr SetColumnCommentInfo::TryResolveCatalogEntry(CatalogEntryRetriever &retriever) { + auto entry = retriever.GetEntry(CatalogType::TABLE_ENTRY, catalog, schema, name, if_not_found); if (entry) { catalog_entry_type = entry->type; diff --git a/src/parser/parsed_data/create_info.cpp b/src/parser/parsed_data/create_info.cpp index a548955b31d9..06f470dd1f27 100644 --- a/src/parser/parsed_data/create_info.cpp +++ b/src/parser/parsed_data/create_info.cpp @@ -19,6 +19,7 @@ void CreateInfo::CopyProperties(CreateInfo &other) const { other.temporary = temporary; other.internal = internal; other.sql = sql; + other.dependencies = dependencies; other.comment = comment; } diff --git a/src/planner/binder.cpp b/src/planner/binder.cpp index f9c737ef1c17..47a382431735 100644 --- a/src/planner/binder.cpp +++ b/src/planner/binder.cpp @@ -50,9 +50,10 @@ shared_ptr Binder::CreateBinder(ClientContext &context, optional_ptr parent_p, bool inherit_ctes_p) - : context(context), bind_context(*this), parent(std::move(parent_p)), bound_tables(0), - inherit_ctes(inherit_ctes_p) { + : context(context), bind_context(*this), parent(std::move(parent_p)), bound_tables(0), inherit_ctes(inherit_ctes_p), + entry_retriever(context) { if (parent) { + entry_retriever.SetCallback(parent->entry_retriever.GetCallback()); // We have to inherit macro and lambda parameter bindings and from the parent binder, if there is a parent. macro_binding = parent->macro_binding; @@ -333,17 +334,19 @@ void Binder::AddCTE(const string &name, CommonTableExpressionInfo &info) { CTE_bindings.insert(make_pair(name, reference(info))); } -optional_ptr Binder::FindCTE(const string &name, bool skip) { +vector> Binder::FindCTE(const string &name, bool skip) { auto entry = CTE_bindings.find(name); + vector> ctes; if (entry != CTE_bindings.end()) { if (!skip || entry->second.get().query->node->type == QueryNodeType::RECURSIVE_CTE_NODE) { - return &entry->second.get(); + ctes.push_back(entry->second); } } if (parent && inherit_ctes) { - return parent->FindCTE(name, name == alias); + auto parent_ctes = parent->FindCTE(name, name == alias); + ctes.insert(ctes.end(), parent_ctes.begin(), parent_ctes.end()); } - return nullptr; + return ctes; } bool Binder::CTEIsAlreadyBound(CommonTableExpressionInfo &cte) { @@ -574,4 +577,10 @@ BoundStatement Binder::BindReturning(vector> return return result; } +optional_ptr Binder::GetCatalogEntry(CatalogType type, const string &catalog, const string &schema, + const string &name, OnEntryNotFound on_entry_not_found, + QueryErrorContext &error_context) { + return entry_retriever.GetEntry(type, catalog, schema, name, on_entry_not_found, error_context); +} + } // namespace duckdb diff --git a/src/planner/binder/expression/bind_aggregate_expression.cpp b/src/planner/binder/expression/bind_aggregate_expression.cpp index 2a2fcb630299..eecd07ea9d14 100644 --- a/src/planner/binder/expression/bind_aggregate_expression.cpp +++ b/src/planner/binder/expression/bind_aggregate_expression.cpp @@ -95,14 +95,17 @@ BindResult BaseSelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFu // Handle ordered-set aggregates by moving the single ORDER BY expression to the front of the children. // https://www.postgresql.org/docs/current/functions-aggregate.html#FUNCTIONS-ORDEREDSET-TABLE - bool ordered_set_agg = false; + // We also have to handle ORDER BY in the argument list, so note how many arguments we should have + // and only inject the ordering expression if there are too few. + idx_t ordered_set_agg = 0; bool negate_fractions = false; if (aggr.order_bys && aggr.order_bys->orders.size() == 1) { const auto &func_name = aggr.function_name; - ordered_set_agg = (func_name == "quantile_cont" || func_name == "quantile_disc" || - (func_name == "mode" && aggr.children.empty())); + if (func_name == "mode") { + ordered_set_agg = 1; + } else if (func_name == "quantile_cont" || func_name == "quantile_disc") { + ordered_set_agg = 2; - if (ordered_set_agg) { auto &config = DBConfig::GetConfig(context); const auto &order = aggr.order_bys->orders[0]; const auto sense = @@ -111,10 +114,11 @@ BindResult BaseSelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFu } } - for (auto &child : aggr.children) { + for (idx_t i = 0; i < aggr.children.size(); ++i) { + auto &child = aggr.children[i]; aggregate_binder.BindChild(child, 0, error); // We have to negate the fractions for PERCENTILE_XXXX DESC - if (!error.HasError() && ordered_set_agg) { + if (!error.HasError() && ordered_set_agg && i == aggr.children.size() - 1) { NegatePercentileFractions(context, child, negate_fractions); } } @@ -181,14 +185,17 @@ BindResult BaseSelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFu if (ordered_set_agg) { const bool order_sensitive = (aggr.function_name == "mode"); - for (auto &order : aggr.order_bys->orders) { - auto &child = BoundExpression::GetExpression(*order.expression); - types.push_back(child->return_type); - arguments.push_back(child->return_type); - if (order_sensitive) { - children.push_back(child->Copy()); - } else { - children.push_back(std::move(child)); + // Inject missing ordering arguments + if (aggr.children.size() < ordered_set_agg) { + for (auto &order : aggr.order_bys->orders) { + auto &child = BoundExpression::GetExpression(*order.expression); + types.push_back(child->return_type); + arguments.push_back(child->return_type); + if (order_sensitive) { + children.push_back(child->Copy()); + } else { + children.push_back(std::move(child)); + } } } if (!order_sensitive) { @@ -205,13 +212,13 @@ BindResult BaseSelectBinder::BindAggregate(FunctionExpression &aggr, AggregateFu // bind the aggregate FunctionBinder function_binder(context); - idx_t best_function = function_binder.BindFunction(func.name, func.functions, types, error); - if (best_function == DConstants::INVALID_INDEX) { + auto best_function = function_binder.BindFunction(func.name, func.functions, types, error); + if (!best_function.IsValid()) { error.AddQueryLocation(aggr); error.Throw(); } // found a matching function! - auto bound_function = func.functions.GetFunctionByOffset(best_function); + auto bound_function = func.functions.GetFunctionByOffset(best_function.GetIndex()); // Bind any sort columns, unless the aggregate is order-insensitive unique_ptr order_bys; diff --git a/src/planner/binder/expression/bind_cast_expression.cpp b/src/planner/binder/expression/bind_cast_expression.cpp index 68272d0a1871..79d9047e1b8f 100644 --- a/src/planner/binder/expression/bind_cast_expression.cpp +++ b/src/planner/binder/expression/bind_cast_expression.cpp @@ -14,7 +14,7 @@ BindResult ExpressionBinder::BindExpression(CastExpression &expr, idx_t depth) { } // FIXME: We can also implement 'hello'::schema.custom_type; and pass by the schema down here. // Right now just considering its DEFAULT_SCHEMA always - Binder::BindLogicalType(context, expr.cast_type); + binder.BindLogicalType(expr.cast_type); // the children have been successfully resolved auto &child = BoundExpression::GetExpression(*expr.child); if (expr.try_cast) { diff --git a/src/planner/binder/expression/bind_function_expression.cpp b/src/planner/binder/expression/bind_function_expression.cpp index 0d888e37b581..8eb9a8904595 100644 --- a/src/planner/binder/expression/bind_function_expression.cpp +++ b/src/planner/binder/expression/bind_function_expression.cpp @@ -39,13 +39,12 @@ BindResult ExpressionBinder::BindExpression(FunctionExpression &function, idx_t // lookup the function in the catalog QueryErrorContext error_context(function.query_location); binder.BindSchemaOrCatalog(function.catalog, function.schema); - auto func = Catalog::GetEntry(context, CatalogType::SCALAR_FUNCTION_ENTRY, function.catalog, function.schema, - function.function_name, OnEntryNotFound::RETURN_NULL, error_context); + auto func = GetCatalogEntry(CatalogType::SCALAR_FUNCTION_ENTRY, function.catalog, function.schema, + function.function_name, OnEntryNotFound::RETURN_NULL, error_context); if (!func) { // function was not found - check if we this is a table function - auto table_func = - Catalog::GetEntry(context, CatalogType::TABLE_FUNCTION_ENTRY, function.catalog, function.schema, - function.function_name, OnEntryNotFound::RETURN_NULL, error_context); + auto table_func = GetCatalogEntry(CatalogType::TABLE_FUNCTION_ENTRY, function.catalog, function.schema, + function.function_name, OnEntryNotFound::RETURN_NULL, error_context); if (table_func) { throw BinderException(function, "Function \"%s\" is a table function but it was used as a scalar function. This " @@ -75,8 +74,8 @@ BindResult ExpressionBinder::BindExpression(FunctionExpression &function, idx_t } } // rebind the function - func = Catalog::GetEntry(context, CatalogType::SCALAR_FUNCTION_ENTRY, function.catalog, function.schema, - function.function_name, OnEntryNotFound::THROW_EXCEPTION, error_context); + func = GetCatalogEntry(CatalogType::SCALAR_FUNCTION_ENTRY, function.catalog, function.schema, + function.function_name, OnEntryNotFound::THROW_EXCEPTION, error_context); } if (func->type != CatalogType::AGGREGATE_FUNCTION_ENTRY && @@ -281,4 +280,11 @@ string ExpressionBinder::UnsupportedUnnestMessage() { return "UNNEST not supported here"; } +optional_ptr ExpressionBinder::GetCatalogEntry(CatalogType type, const string &catalog, + const string &schema, const string &name, + OnEntryNotFound on_entry_not_found, + QueryErrorContext &error_context) { + return binder.GetCatalogEntry(type, catalog, schema, name, on_entry_not_found, error_context); +} + } // namespace duckdb diff --git a/src/planner/binder/expression/bind_window_expression.cpp b/src/planner/binder/expression/bind_window_expression.cpp index 7801f5e6b27c..2c64e2e05fe8 100644 --- a/src/planner/binder/expression/bind_window_expression.cpp +++ b/src/planner/binder/expression/bind_window_expression.cpp @@ -252,12 +252,12 @@ BindResult BaseSelectBinder::BindWindow(WindowExpression &window, idx_t depth) { ErrorData error; FunctionBinder function_binder(context); auto best_function = function_binder.BindFunction(func.name, func.functions, types, error); - if (best_function == DConstants::INVALID_INDEX) { + if (!best_function.IsValid()) { error.AddQueryLocation(window); error.Throw(); } // found a matching function! bind it as an aggregate - auto bound_function = func.functions.GetFunctionByOffset(best_function); + auto bound_function = func.functions.GetFunctionByOffset(best_function.GetIndex()); auto bound_aggregate = function_binder.BindAggregateFunction(bound_function, std::move(children)); // create the aggregate aggregate = make_uniq(bound_aggregate->function); diff --git a/src/planner/binder/statement/bind_copy_database.cpp b/src/planner/binder/statement/bind_copy_database.cpp index 8a0047da3263..e233130be30d 100644 --- a/src/planner/binder/statement/bind_copy_database.cpp +++ b/src/planner/binder/statement/bind_copy_database.cpp @@ -19,81 +19,31 @@ #include "duckdb/planner/expression/bound_constant_expression.hpp" #include "duckdb/planner/operator/logical_dummy_scan.hpp" #include "duckdb/planner/operator/logical_expression_get.hpp" +#include "duckdb/catalog/duck_catalog.hpp" +#include "duckdb/catalog/dependency_manager.hpp" namespace duckdb { unique_ptr Binder::BindCopyDatabaseSchema(CopyDatabaseStatement &stmt, Catalog &from_database, Catalog &to_database) { - auto from_schemas = from_database.GetSchemas(context); - ExportEntries entries; - PhysicalExport::ExtractEntries(context, from_schemas, entries); + catalog_entry_vector_t catalog_entries; + if (from_database.IsDuckCatalog()) { + auto &duck_catalog = from_database.Cast(); + auto &dependency_manager = duck_catalog.GetDependencyManager(); + auto transaction = from_database.GetCatalogTransaction(context); + catalog_entries = dependency_manager.GetExportOrder(transaction); + } else { + catalog_entries = PhysicalExport::GetNaiveExportOrder(context, from_database); + } auto info = make_uniq(from_database, to_database); - - // get a list of all schemas to copy over - for (auto &schema_ref : from_schemas) { - auto &schema = schema_ref.get().Cast(); - if (schema.internal) { - continue; - } - auto create_info = schema.GetInfo(); - create_info->catalog = to_database.GetName(); - create_info->on_conflict = OnCreateConflict::IGNORE_ON_CONFLICT; - info->entries.push_back(std::move(create_info)); - } - // get a list of all types to copy over - for (auto &seq_ref : entries.sequences) { - auto &seq_entry = seq_ref.get().Cast(); - if (seq_entry.internal) { - continue; - } - auto create_info = seq_entry.GetInfo(); - create_info->catalog = to_database.GetName(); - create_info->on_conflict = OnCreateConflict::ERROR_ON_CONFLICT; - info->entries.push_back(std::move(create_info)); - } - // get a list of all types to copy over - for (auto &type_ref : entries.custom_types) { - auto &type_entry = type_ref.get().Cast(); - if (type_entry.internal) { - continue; - } - auto create_info = type_entry.GetInfo(); - create_info->catalog = to_database.GetName(); - create_info->on_conflict = OnCreateConflict::ERROR_ON_CONFLICT; - info->entries.push_back(std::move(create_info)); - } - // get a list of all tables to copy over - for (auto &table_ref : entries.tables) { - auto &table = table_ref.get().Cast(); - if (table.internal) { - continue; - } - auto create_info = table.GetInfo(); - create_info->catalog = to_database.GetName(); - create_info->on_conflict = OnCreateConflict::ERROR_ON_CONFLICT; - info->entries.push_back(std::move(create_info)); - } - for (auto ¯o_ref : entries.macros) { - auto ¯o = macro_ref.get().Cast(); - if (macro.internal) { - continue; - } - auto create_info = macro.GetInfo(); - create_info->catalog = to_database.GetName(); - create_info->on_conflict = OnCreateConflict::ERROR_ON_CONFLICT; - info->entries.push_back(std::move(create_info)); - } - // get a list of all views to copy over - for (auto &view_ref : entries.views) { - auto &view = view_ref.get().Cast(); - if (view.internal) { - continue; - } - auto create_info = view.GetInfo(); + for (auto &entry : catalog_entries) { + auto create_info = entry.get().GetInfo(); create_info->catalog = to_database.GetName(); - create_info->on_conflict = OnCreateConflict::ERROR_ON_CONFLICT; + auto on_conflict = create_info->type == CatalogType::SCHEMA_ENTRY ? OnCreateConflict::IGNORE_ON_CONFLICT + : OnCreateConflict::ERROR_ON_CONFLICT; + create_info->on_conflict = on_conflict; info->entries.push_back(std::move(create_info)); } @@ -105,6 +55,7 @@ unique_ptr Binder::BindCopyDatabaseData(CopyDatabaseStatement & Catalog &to_database) { auto from_schemas = from_database.GetSchemas(context); + // We can just use ExtractEntries here because the order doesn't matter ExportEntries entries; PhysicalExport::ExtractEntries(context, from_schemas, entries); diff --git a/src/planner/binder/statement/bind_create.cpp b/src/planner/binder/statement/bind_create.cpp index 69fdc4afca7a..263b9a538b4d 100644 --- a/src/planner/binder/statement/bind_create.cpp +++ b/src/planner/binder/statement/bind_create.cpp @@ -45,7 +45,7 @@ namespace duckdb { -void Binder::BindSchemaOrCatalog(ClientContext &context, string &catalog, string &schema) { +void Binder::BindSchemaOrCatalog(string &catalog, string &schema) { if (catalog.empty() && !schema.empty()) { // schema is specified - but catalog is not // try searching for the catalog instead @@ -73,10 +73,6 @@ void Binder::BindSchemaOrCatalog(ClientContext &context, string &catalog, string } } -void Binder::BindSchemaOrCatalog(string &catalog, string &schema) { - BindSchemaOrCatalog(context, catalog, schema); -} - const string Binder::BindCatalog(string &catalog) { auto &db_manager = DatabaseManager::Get(context); optional_ptr database = db_manager.GetDatabase(context, catalog); @@ -133,10 +129,23 @@ SchemaCatalogEntry &Binder::BindCreateSchema(CreateInfo &info) { return schema; } +void Binder::SetCatalogLookupCallback(catalog_entry_callback_t callback) { + entry_retriever.SetCallback(std::move(callback)); +} + void Binder::BindCreateViewInfo(CreateViewInfo &base) { // bind the view as if it were a query so we can catch errors // note that we bind the original, and replace the original with a copy auto view_binder = Binder::CreateBinder(context); + auto &dependencies = base.dependencies; + auto &catalog = Catalog::GetCatalog(context, base.catalog); + view_binder->SetCatalogLookupCallback([&dependencies, &catalog](CatalogEntry &entry) { + if (&catalog != &entry.ParentCatalog()) { + // Don't register dependencies between catalogs + return; + } + dependencies.AddDependency(entry); + }); view_binder->can_contain_nulls = true; auto copy = base.query->Copy(); @@ -187,6 +196,16 @@ SchemaCatalogEntry &Binder::BindCreateFunctionInfo(CreateInfo &info) { auto sel_node = make_uniq(); auto group_info = make_uniq(); SelectBinder binder(*this, context, *sel_node, *group_info); + auto &dependencies = base.dependencies; + auto &catalog = Catalog::GetCatalog(context, info.catalog); + binder.SetCatalogLookupCallback([&dependencies, &catalog](CatalogEntry &entry) { + if (&catalog != &entry.ParentCatalog()) { + // Don't register any cross-catalog dependencies + return; + } + // Register any catalog entry required to bind the macro function + dependencies.AddDependency(entry); + }); error = binder.Bind(expression, 0, false); if (error.HasError()) { @@ -196,11 +215,10 @@ SchemaCatalogEntry &Binder::BindCreateFunctionInfo(CreateInfo &info) { return BindCreateSchema(info); } -void Binder::BindLogicalType(ClientContext &context, LogicalType &type, optional_ptr catalog, - const string &schema) { +void Binder::BindLogicalType(LogicalType &type, optional_ptr catalog, const string &schema) { if (type.id() == LogicalTypeId::LIST || type.id() == LogicalTypeId::MAP) { auto child_type = ListType::GetChildType(type); - BindLogicalType(context, child_type, catalog, schema); + BindLogicalType(child_type, catalog, schema); auto alias = type.GetAlias(); if (type.id() == LogicalTypeId::LIST) { type = LogicalType::LIST(child_type); @@ -213,7 +231,7 @@ void Binder::BindLogicalType(ClientContext &context, LogicalType &type, optional } else if (type.id() == LogicalTypeId::STRUCT) { auto child_types = StructType::GetChildTypes(type); for (auto &child_type : child_types) { - BindLogicalType(context, child_type.second, catalog, schema); + BindLogicalType(child_type.second, catalog, schema); } // Generate new Struct Type auto alias = type.GetAlias(); @@ -222,14 +240,14 @@ void Binder::BindLogicalType(ClientContext &context, LogicalType &type, optional } else if (type.id() == LogicalTypeId::ARRAY) { auto child_type = ArrayType::GetChildType(type); auto array_size = ArrayType::GetSize(type); - BindLogicalType(context, child_type, catalog, schema); + BindLogicalType(child_type, catalog, schema); auto alias = type.GetAlias(); type = LogicalType::ARRAY(child_type, array_size); type.SetAlias(alias); } else if (type.id() == LogicalTypeId::UNION) { auto member_types = UnionType::CopyMemberTypes(type); for (auto &member_type : member_types) { - BindLogicalType(context, member_type.second, catalog, schema); + BindLogicalType(member_type.second, catalog, schema); } // Generate new Union Type auto alias = type.GetAlias(); @@ -242,22 +260,41 @@ void Binder::BindLogicalType(ClientContext &context, LogicalType &type, optional // 1) In the same schema as the table // 2) In the same catalog // 3) System catalog - type = catalog->GetType(context, schema, user_type_name, OnEntryNotFound::RETURN_NULL); + auto entry = entry_retriever.GetEntry(CatalogType::TYPE_ENTRY, *catalog, schema, user_type_name, + OnEntryNotFound::RETURN_NULL); + if (!entry) { + type = LogicalType::INVALID; + } else { + auto &type_entry = entry->Cast(); + type = type_entry.user_type; + } if (type.id() == LogicalTypeId::INVALID) { - type = catalog->GetType(context, INVALID_SCHEMA, user_type_name, OnEntryNotFound::RETURN_NULL); + entry = entry_retriever.GetEntry(CatalogType::TYPE_ENTRY, *catalog, INVALID_SCHEMA, user_type_name, + OnEntryNotFound::RETURN_NULL); + if (!entry) { + type = LogicalType::INVALID; + } else { + auto &type_entry = entry->Cast(); + type = type_entry.user_type; + } } if (type.id() == LogicalTypeId::INVALID) { - type = Catalog::GetType(context, INVALID_CATALOG, INVALID_SCHEMA, user_type_name); + auto entry = + entry_retriever.GetEntry(CatalogType::TYPE_ENTRY, INVALID_CATALOG, INVALID_SCHEMA, user_type_name); + auto &type_entry = entry->Cast(); + type = type_entry.user_type; } } else { string type_catalog = UserType::GetCatalog(type); string type_schema = UserType::GetSchema(type); - BindSchemaOrCatalog(context, type_catalog, type_schema); - type = Catalog::GetType(context, type_catalog, type_schema, user_type_name); + BindSchemaOrCatalog(type_catalog, type_schema); + auto entry = entry_retriever.GetEntry(CatalogType::TYPE_ENTRY, type_catalog, type_schema, user_type_name); + auto &type_entry = entry->Cast(); + type = type_entry.user_type; } - BindLogicalType(context, type, catalog, schema); + BindLogicalType(type, catalog, schema); } } @@ -447,6 +484,15 @@ unique_ptr DuckCatalog::BindCreateIndex(Binder &binder, CreateS auto &get = plan->Cast(); // bind the index expressions IndexBinder index_binder(binder, binder.context); + auto &dependencies = base.dependencies; + auto &catalog = Catalog::GetCatalog(binder.context, base.catalog); + index_binder.SetCatalogLookupCallback([&dependencies, &catalog](CatalogEntry &entry) { + if (&catalog != &entry.ParentCatalog()) { + // Don't register any cross-catalog dependencies + return; + } + dependencies.AddDependency(entry); + }); vector> expressions; expressions.reserve(base.expressions.size()); for (auto &expr : base.expressions) { @@ -509,8 +555,9 @@ BoundStatement Binder::Bind(CreateStatement &stmt) { } case CatalogType::MACRO_ENTRY: { auto &schema = BindCreateFunctionInfo(*stmt.info); - result.plan = + auto logical_create = make_uniq(LogicalOperatorType::LOGICAL_CREATE_MACRO, std::move(stmt.info), &schema); + result.plan = std::move(logical_create); break; } case CatalogType::INDEX_ENTRY: { @@ -566,8 +613,9 @@ BoundStatement Binder::Bind(CreateStatement &stmt) { CheckForeignKeyTypes(create_info.columns, create_info.columns, fk); } else { // have to resolve referenced table - auto &pk_table_entry_ptr = - Catalog::GetEntry(context, INVALID_CATALOG, fk.info.schema, fk.info.table); + auto table_entry = + entry_retriever.GetEntry(CatalogType::TABLE_ENTRY, INVALID_CATALOG, fk.info.schema, fk.info.table); + auto &pk_table_entry_ptr = table_entry->Cast(); fk_schemas.insert(pk_table_entry_ptr.schema); FindMatchingPrimaryKeyColumns(pk_table_entry_ptr.GetColumns(), pk_table_entry_ptr.GetConstraints(), fk); FindForeignKeyIndexes(pk_table_entry_ptr.GetColumns(), fk.pk_columns, fk.info.pk_keys); @@ -612,6 +660,16 @@ BoundStatement Binder::Bind(CreateStatement &stmt) { auto &schema = BindCreateSchema(*stmt.info); auto &create_type_info = stmt.info->Cast(); result.plan = make_uniq(LogicalOperatorType::LOGICAL_CREATE_TYPE, std::move(stmt.info), &schema); + + auto &catalog = Catalog::GetCatalog(context, create_type_info.catalog); + auto &dependencies = create_type_info.dependencies; + auto dependency_callback = [&dependencies, &catalog](CatalogEntry &entry) { + if (&catalog != &entry.ParentCatalog()) { + // Don't register any cross-catalog dependencies + return; + } + dependencies.AddDependency(entry); + }; if (create_type_info.query) { // CREATE TYPE mood AS ENUM (SELECT 'happy') auto query_obj = Bind(*create_type_info.query); @@ -636,16 +694,25 @@ BoundStatement Binder::Bind(CreateStatement &stmt) { result.plan->AddChild(std::move(query)); } else if (create_type_info.type.id() == LogicalTypeId::USER) { + SetCatalogLookupCallback(dependency_callback); // two cases: // 1: create a type with a non-existent type as source, Binder::BindLogicalType(...) will throw exception. // 2: create a type alias with a custom type. // eg. CREATE TYPE a AS INT; CREATE TYPE b AS a; // We set b to be an alias for the underlying type of a - create_type_info.type = Catalog::GetType(context, schema.catalog.GetName(), schema.name, - UserType::GetTypeName(create_type_info.type)); + auto type_entry_p = entry_retriever.GetEntry(CatalogType::TYPE_ENTRY, schema.catalog.GetName(), schema.name, + UserType::GetTypeName(create_type_info.type)); + D_ASSERT(type_entry_p); + auto &type_entry = type_entry_p->Cast(); + + auto inner_type = type_entry.user_type; + create_type_info.type = inner_type; } else { + SetCatalogLookupCallback(dependency_callback); + // This is done so that if the type contains a USER type, + // we register this dependency auto preserved_type = create_type_info.type; - BindLogicalType(context, create_type_info.type); + BindLogicalType(create_type_info.type); create_type_info.type = preserved_type; } break; diff --git a/src/planner/binder/statement/bind_create_table.cpp b/src/planner/binder/statement/bind_create_table.cpp index 4564cb0f04c6..00b125c7cc15 100644 --- a/src/planner/binder/statement/bind_create_table.cpp +++ b/src/planner/binder/statement/bind_create_table.cpp @@ -168,6 +168,7 @@ void Binder::BindGeneratedColumns(BoundCreateTableInfo &info) { // Create a new binder because we dont need (or want) these bindings in this scope auto binder = Binder::CreateBinder(context); + binder->SetCatalogLookupCallback(entry_retriever.GetCallback()); binder->bind_context.AddGenericBinding(table_index, base.table, names, types); auto expr_binder = ExpressionBinder(*binder, context); ErrorData ignore; @@ -252,9 +253,11 @@ static void ExtractDependencies(BoundCreateTableInfo &info) { } } } + unique_ptr Binder::BindCreateTableInfo(unique_ptr info, SchemaCatalogEntry &schema) { auto &base = info->Cast(); auto result = make_uniq(schema, std::move(info)); + auto &dependencies = result->dependencies; if (base.query) { // construct the result object auto query_obj = Bind(*base.query); @@ -269,10 +272,14 @@ unique_ptr Binder::BindCreateTableInfo(unique_ptr Binder::BindCreateTableInfo(unique_ptrschema.catalog); + BindLogicalType(column.TypeMutable(), &result->schema.catalog); } result->dependencies.VerifyDependencies(schema.catalog, result->Base().table); properties.allow_stream_result = false; diff --git a/src/planner/binder/statement/bind_export.cpp b/src/planner/binder/statement/bind_export.cpp index f5370a20a915..272effcbedab 100644 --- a/src/planner/binder/statement/bind_export.cpp +++ b/src/planner/binder/statement/bind_export.cpp @@ -345,6 +345,7 @@ BoundStatement Binder::Bind(ExportStatement &stmt) { fs.CreateDirectory(stmt.info->file_path); } + stmt.info->catalog = catalog; // create the export node auto export_node = make_uniq(copy_function.function, std::move(stmt.info), exported_tables); diff --git a/src/planner/binder/statement/bind_insert.cpp b/src/planner/binder/statement/bind_insert.cpp index d0827fde2ebb..1829d77601ac 100644 --- a/src/planner/binder/statement/bind_insert.cpp +++ b/src/planner/binder/statement/bind_insert.cpp @@ -305,25 +305,23 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl } auto bindings = insert.children[0]->GetColumnBindings(); - idx_t projection_index = DConstants::INVALID_INDEX; - vector> *insert_child_operators; - insert_child_operators = &insert.children; - while (projection_index == DConstants::INVALID_INDEX) { - if (insert_child_operators->empty()) { + optional_idx projection_index; + reference>> insert_child_operators = insert.children; + while (!projection_index.IsValid()) { + if (insert_child_operators.get().empty()) { // No further children to visit break; } - D_ASSERT(insert_child_operators->size() >= 1); - auto ¤t_child = (*insert_child_operators)[0]; + auto ¤t_child = insert_child_operators.get()[0]; auto table_indices = current_child->GetTableIndex(); if (table_indices.empty()) { // This operator does not have a table index to refer to, we have to visit its children - insert_child_operators = ¤t_child->children; + insert_child_operators = current_child->children; continue; } projection_index = table_indices[0]; } - if (projection_index == DConstants::INVALID_INDEX) { + if (!projection_index.IsValid()) { throw InternalException("Could not locate a table_index from the children of the insert"); } @@ -335,7 +333,7 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl // Replace any column bindings to refer to the projection table_index, rather than the source table if (insert.on_conflict_condition) { - ReplaceColumnBindings(*insert.on_conflict_condition, table_index, projection_index); + ReplaceColumnBindings(*insert.on_conflict_condition, table_index, projection_index.GetIndex()); } if (insert.action_type == OnConflictAction::REPLACE) { @@ -387,11 +385,11 @@ void Binder::BindOnConflictClause(LogicalInsert &insert, TableCatalogEntry &tabl // Replace the column bindings to refer to the child operator for (auto &expr : insert.expressions) { // Change the non-excluded column references to refer to the projection index - ReplaceColumnBindings(*expr, table_index, projection_index); + ReplaceColumnBindings(*expr, table_index, projection_index.GetIndex()); } // Do the same for the (optional) DO UPDATE condition if (insert.do_update_condition) { - ReplaceColumnBindings(*insert.do_update_condition, table_index, projection_index); + ReplaceColumnBindings(*insert.do_update_condition, table_index, projection_index.GetIndex()); } } @@ -442,7 +440,6 @@ BoundStatement Binder::Bind(InsertStatement &stmt) { if (!entry.second) { throw BinderException("Duplicate column name \"%s\" in INSERT", stmt.columns[i]); } - column_name_map[stmt.columns[i]] = i; auto column_index = table.GetColumnIndex(stmt.columns[i]); if (column_index.index == COLUMN_IDENTIFIER_ROW_ID) { throw BinderException("Cannot explicitly insert values into rowid column"); diff --git a/src/planner/binder/statement/bind_pragma.cpp b/src/planner/binder/statement/bind_pragma.cpp index 12f30e92c20a..61bf0672082e 100644 --- a/src/planner/binder/statement/bind_pragma.cpp +++ b/src/planner/binder/statement/bind_pragma.cpp @@ -31,13 +31,13 @@ unique_ptr Binder::BindPragma(PragmaInfo &info, QueryErrorConte auto &entry = Catalog::GetEntry(context, INVALID_CATALOG, DEFAULT_SCHEMA, info.name); FunctionBinder function_binder(context); ErrorData error; - idx_t bound_idx = function_binder.BindFunction(entry.name, entry.functions, params, error); - if (bound_idx == DConstants::INVALID_INDEX) { + auto bound_idx = function_binder.BindFunction(entry.name, entry.functions, params, error); + if (!bound_idx.IsValid()) { D_ASSERT(error.HasError()); error.AddQueryLocation(error_context); error.Throw(); } - auto bound_function = entry.functions.GetFunctionByOffset(bound_idx); + auto bound_function = entry.functions.GetFunctionByOffset(bound_idx.GetIndex()); // bind and check named params BindNamedParameters(bound_function.named_parameters, named_parameters, error_context, bound_function.name); return make_uniq(std::move(bound_function), std::move(params), std::move(named_parameters)); diff --git a/src/planner/binder/statement/bind_simple.cpp b/src/planner/binder/statement/bind_simple.cpp index 08c73ef84733..8dd743b77d6f 100644 --- a/src/planner/binder/statement/bind_simple.cpp +++ b/src/planner/binder/statement/bind_simple.cpp @@ -17,18 +17,18 @@ BoundStatement Binder::Bind(AlterStatement &stmt) { BoundStatement result; result.names = {"Success"}; result.types = {LogicalType::BOOLEAN}; + BindSchemaOrCatalog(stmt.info->catalog, stmt.info->schema); optional_ptr entry; - if (stmt.info->type == AlterType::SET_COLUMN_COMMENT) { // for column comments we need to an extra step: they can alter a table or a view, we resolve that here. auto &info = stmt.info->Cast(); - entry = info.TryResolveCatalogEntry(context); + entry = info.TryResolveCatalogEntry(entry_retriever); } else { // All other AlterTypes - entry = Catalog::GetEntry(context, stmt.info->GetCatalogType(), stmt.info->catalog, stmt.info->schema, - stmt.info->name, stmt.info->if_not_found); + entry = entry_retriever.GetEntry(stmt.info->GetCatalogType(), stmt.info->catalog, stmt.info->schema, + stmt.info->name, stmt.info->if_not_found); } if (entry) { diff --git a/src/planner/binder/tableref/bind_basetableref.cpp b/src/planner/binder/tableref/bind_basetableref.cpp index 7b4077ce8f18..b93e51a32ef7 100644 --- a/src/planner/binder/tableref/bind_basetableref.cpp +++ b/src/planner/binder/tableref/bind_basetableref.cpp @@ -80,68 +80,77 @@ unique_ptr Binder::Bind(BaseTableRef &ref) { // check if the table name refers to a CTE // CTE name should never be qualified (i.e. schema_name should be empty) - optional_ptr found_cte = nullptr; + vector> found_ctes; if (ref.schema_name.empty()) { - found_cte = FindCTE(ref.table_name, ref.table_name == alias); + found_ctes = FindCTE(ref.table_name, ref.table_name == alias); } - if (found_cte) { + if (!found_ctes.empty()) { // Check if there is a CTE binding in the BindContext - auto &cte = *found_cte; - auto ctebinding = bind_context.GetCTEBinding(ref.table_name); - if (!ctebinding) { - if (CTEIsAlreadyBound(cte)) { - throw BinderException( - "Circular reference to CTE \"%s\", There are two possible solutions. \n1. use WITH RECURSIVE to " - "use recursive CTEs. \n2. If " - "you want to use the TABLE name \"%s\" the same as the CTE name, please explicitly add " - "\"SCHEMA\" before table name. You can try \"main.%s\" (main is the duckdb default schema)", - ref.table_name, ref.table_name, ref.table_name); - } - // Move CTE to subquery and bind recursively - SubqueryRef subquery(unique_ptr_cast(cte.query->Copy())); - subquery.alias = ref.alias.empty() ? ref.table_name : ref.alias; - subquery.column_name_alias = cte.aliases; - for (idx_t i = 0; i < ref.column_name_alias.size(); i++) { - if (i < subquery.column_name_alias.size()) { - subquery.column_name_alias[i] = ref.column_name_alias[i]; - } else { - subquery.column_name_alias.push_back(ref.column_name_alias[i]); + bool circular_cte = false; + for (auto found_cte : found_ctes) { + auto &cte = found_cte.get(); + auto ctebinding = bind_context.GetCTEBinding(ref.table_name); + if (!ctebinding) { + if (CTEIsAlreadyBound(cte)) { + // remember error state + circular_cte = true; + // retry with next candidate CTE + continue; } - } - return Bind(subquery, found_cte); - } else { - // There is a CTE binding in the BindContext. - // This can only be the case if there is a recursive CTE, - // or a materialized CTE present. - auto index = GenerateTableIndex(); - auto materialized = cte.materialized; - if (materialized == CTEMaterialize::CTE_MATERIALIZE_DEFAULT) { + // Move CTE to subquery and bind recursively + SubqueryRef subquery(unique_ptr_cast(cte.query->Copy())); + subquery.alias = ref.alias.empty() ? ref.table_name : ref.alias; + subquery.column_name_alias = cte.aliases; + for (idx_t i = 0; i < ref.column_name_alias.size(); i++) { + if (i < subquery.column_name_alias.size()) { + subquery.column_name_alias[i] = ref.column_name_alias[i]; + } else { + subquery.column_name_alias.push_back(ref.column_name_alias[i]); + } + } + return Bind(subquery, &found_cte.get()); + } else { + // There is a CTE binding in the BindContext. + // This can only be the case if there is a recursive CTE, + // or a materialized CTE present. + auto index = GenerateTableIndex(); + auto materialized = cte.materialized; + if (materialized == CTEMaterialize::CTE_MATERIALIZE_DEFAULT) { #ifdef DUCKDB_ALTERNATIVE_VERIFY - materialized = CTEMaterialize::CTE_MATERIALIZE_ALWAYS; + materialized = CTEMaterialize::CTE_MATERIALIZE_ALWAYS; #else - materialized = CTEMaterialize::CTE_MATERIALIZE_NEVER; + materialized = CTEMaterialize::CTE_MATERIALIZE_NEVER; #endif - } - auto result = make_uniq(index, ctebinding->index, materialized); - auto alias = ref.alias.empty() ? ref.table_name : ref.alias; - auto names = BindContext::AliasColumnNames(alias, ctebinding->names, ref.column_name_alias); + } + auto result = make_uniq(index, ctebinding->index, materialized); + auto alias = ref.alias.empty() ? ref.table_name : ref.alias; + auto names = BindContext::AliasColumnNames(alias, ctebinding->names, ref.column_name_alias); - bind_context.AddGenericBinding(index, alias, names, ctebinding->types); - // Update references to CTE - auto cteref = bind_context.cte_references[ref.table_name]; - (*cteref)++; + bind_context.AddGenericBinding(index, alias, names, ctebinding->types); + // Update references to CTE + auto cteref = bind_context.cte_references[ref.table_name]; + (*cteref)++; - result->types = ctebinding->types; - result->bound_columns = std::move(names); - return std::move(result); + result->types = ctebinding->types; + result->bound_columns = std::move(names); + return std::move(result); + } + } + if (circular_cte) { + throw BinderException( + "Circular reference to CTE \"%s\", There are two possible solutions. \n1. use WITH RECURSIVE to " + "use recursive CTEs. \n2. If " + "you want to use the TABLE name \"%s\" the same as the CTE name, please explicitly add " + "\"SCHEMA\" before table name. You can try \"main.%s\" (main is the duckdb default schema)", + ref.table_name, ref.table_name, ref.table_name); } } // not a CTE // extract a table or view from the catalog BindSchemaOrCatalog(ref.catalog_name, ref.schema_name); - auto table_or_view = Catalog::GetEntry(context, CatalogType::TABLE_ENTRY, ref.catalog_name, ref.schema_name, - ref.table_name, OnEntryNotFound::RETURN_NULL, error_context); + auto table_or_view = entry_retriever.GetEntry(CatalogType::TABLE_ENTRY, ref.catalog_name, ref.schema_name, + ref.table_name, OnEntryNotFound::RETURN_NULL, error_context); // we still didn't find the table if (GetBindingMode() == BindingMode::EXTRACT_NAMES) { if (!table_or_view || table_or_view->type == CatalogType::TABLE_ENTRY) { @@ -180,8 +189,8 @@ unique_ptr Binder::Bind(BaseTableRef &ref) { } // could not find an alternative: bind again to get the error - Catalog::GetEntry(context, CatalogType::TABLE_ENTRY, ref.catalog_name, ref.schema_name, ref.table_name, - OnEntryNotFound::THROW_EXCEPTION, error_context); + (void)entry_retriever.GetEntry(CatalogType::TABLE_ENTRY, ref.catalog_name, ref.schema_name, ref.table_name, + OnEntryNotFound::THROW_EXCEPTION, error_context); throw InternalException("Catalog::GetEntry should have thrown an exception above"); } diff --git a/src/planner/binder/tableref/bind_subqueryref.cpp b/src/planner/binder/tableref/bind_subqueryref.cpp index f674970ffac1..5acbd06b5d03 100644 --- a/src/planner/binder/tableref/bind_subqueryref.cpp +++ b/src/planner/binder/tableref/bind_subqueryref.cpp @@ -10,8 +10,8 @@ unique_ptr Binder::Bind(SubqueryRef &ref, optional_ptrbound_ctes.insert(*cte); } - binder->alias = ref.alias.empty() ? "unnamed_subquery" : ref.alias; auto subquery = binder->BindNode(*ref.subquery->node); + binder->alias = ref.alias.empty() ? "unnamed_subquery" : ref.alias; idx_t bind_index = subquery->GetRootIndex(); string subquery_alias; if (ref.alias.empty()) { diff --git a/src/planner/binder/tableref/bind_table_function.cpp b/src/planner/binder/tableref/bind_table_function.cpp index 2596243204a1..1d898bddf257 100644 --- a/src/planner/binder/tableref/bind_table_function.cpp +++ b/src/planner/binder/tableref/bind_table_function.cpp @@ -102,7 +102,7 @@ bool Binder::BindTableFunctionParameters(TableFunctionCatalogEntry &table_functi continue; } - TableFunctionBinder binder(*this, context); + TableFunctionBinder binder(*this, context, table_function.name); LogicalType sql_type; auto expr = binder.Bind(child, &sql_type); if (expr->HasParameter()) { @@ -210,8 +210,8 @@ unique_ptr Binder::Bind(TableFunctionRef &ref) { auto &fexpr = ref.function->Cast(); // fetch the function from the catalog - auto &func_catalog = Catalog::GetEntry(context, CatalogType::TABLE_FUNCTION_ENTRY, fexpr.catalog, fexpr.schema, - fexpr.function_name, error_context); + auto &func_catalog = *GetCatalogEntry(CatalogType::TABLE_FUNCTION_ENTRY, fexpr.catalog, fexpr.schema, + fexpr.function_name, OnEntryNotFound::THROW_EXCEPTION, error_context); if (func_catalog.type == CatalogType::TABLE_MACRO_ENTRY) { auto ¯o_func = func_catalog.Cast(); @@ -251,12 +251,12 @@ unique_ptr Binder::Bind(TableFunctionRef &ref) { // select the function based on the input parameters FunctionBinder function_binder(context); - idx_t best_function_idx = function_binder.BindFunction(function.name, function.functions, arguments, error); - if (best_function_idx == DConstants::INVALID_INDEX) { + auto best_function_idx = function_binder.BindFunction(function.name, function.functions, arguments, error); + if (!best_function_idx.IsValid()) { error.AddQueryLocation(ref); error.Throw(); } - auto table_function = function.functions.GetFunctionByOffset(best_function_idx); + auto table_function = function.functions.GetFunctionByOffset(best_function_idx.GetIndex()); // now check the named parameters BindNamedParameters(table_function.named_parameters, named_parameters, error_context, table_function.name); diff --git a/src/planner/expression_binder.cpp b/src/planner/expression_binder.cpp index fa03fabdf511..649ab82a8b56 100644 --- a/src/planner/expression_binder.cpp +++ b/src/planner/expression_binder.cpp @@ -9,6 +9,10 @@ namespace duckdb { +void ExpressionBinder::SetCatalogLookupCallback(catalog_entry_callback_t callback) { + binder.SetCatalogLookupCallback(std::move(callback)); +} + ExpressionBinder::ExpressionBinder(Binder &binder, ClientContext &context, bool replace_binder) : binder(binder), context(context) { InitializeStackCheck(); diff --git a/src/planner/expression_binder/table_function_binder.cpp b/src/planner/expression_binder/table_function_binder.cpp index 98a73ed364a2..7f20e9c36d15 100644 --- a/src/planner/expression_binder/table_function_binder.cpp +++ b/src/planner/expression_binder/table_function_binder.cpp @@ -5,7 +5,8 @@ namespace duckdb { -TableFunctionBinder::TableFunctionBinder(Binder &binder, ClientContext &context) : ExpressionBinder(binder, context) { +TableFunctionBinder::TableFunctionBinder(Binder &binder, ClientContext &context, string table_function_name_p) + : ExpressionBinder(binder, context), table_function_name(std::move(table_function_name_p)) { } BindResult TableFunctionBinder::BindLambdaReference(LambdaRefExpression &expr, idx_t depth) { @@ -14,23 +15,34 @@ BindResult TableFunctionBinder::BindLambdaReference(LambdaRefExpression &expr, i return (*lambda_bindings)[expr.lambda_idx].Bind(lambda_ref, depth); } -BindResult TableFunctionBinder::BindColumnReference(ColumnRefExpression &expr, idx_t depth, bool root_expression) { - +BindResult TableFunctionBinder::BindColumnReference(unique_ptr &expr_ptr, idx_t depth, + bool root_expression) { // try binding as a lambda parameter - auto &col_ref = expr.Cast(); + auto &col_ref = expr_ptr->Cast(); if (!col_ref.IsQualified()) { auto lambda_ref = LambdaRefExpression::FindMatchingBinding(lambda_bindings, col_ref.GetName()); if (lambda_ref) { return BindLambdaReference(lambda_ref->Cast(), depth); } } + auto column_names = col_ref.column_names; + auto result_name = StringUtil::Join(column_names, "."); + if (!table_function_name.empty()) { + // check if this is a lateral join column/parameter + auto result = BindCorrelatedColumns(expr_ptr, ErrorData("error")); + if (!result.HasError()) { + // it is a lateral join parameter - this is not supported in this type of table function + throw BinderException("Table function \"%s\" does not support lateral join column parameters - cannot use " + "column \"%s\" in this context", + table_function_name, result_name); + } + } - auto value_function = ExpressionBinder::GetSQLValueFunction(expr.GetColumnName()); + auto value_function = ExpressionBinder::GetSQLValueFunction(column_names.back()); if (value_function) { return BindExpression(value_function, depth, root_expression); } - auto result_name = StringUtil::Join(expr.column_names, "."); return BindResult(make_uniq(Value(result_name))); } @@ -41,7 +53,7 @@ BindResult TableFunctionBinder::BindExpression(unique_ptr &exp case ExpressionClass::LAMBDA_REF: return BindLambdaReference(expr.Cast(), depth); case ExpressionClass::COLUMN_REF: - return BindColumnReference(expr.Cast(), depth, root_expression); + return BindColumnReference(expr_ptr, depth, root_expression); case ExpressionClass::SUBQUERY: throw BinderException("Table function cannot contain subqueries"); case ExpressionClass::DEFAULT: diff --git a/src/storage/buffer/buffer_pool_reservation.cpp b/src/storage/buffer/buffer_pool_reservation.cpp index c602af1dfc55..f22a96ffa58f 100644 --- a/src/storage/buffer/buffer_pool_reservation.cpp +++ b/src/storage/buffer/buffer_pool_reservation.cpp @@ -28,7 +28,7 @@ void BufferPoolReservation::Resize(idx_t new_size) { size = new_size; } -void BufferPoolReservation::Merge(BufferPoolReservation &&src) { +void BufferPoolReservation::Merge(BufferPoolReservation src) { size += src.size; src.size = 0; } diff --git a/src/storage/buffer_manager.cpp b/src/storage/buffer_manager.cpp index d4497f627ec5..b54032314b53 100644 --- a/src/storage/buffer_manager.cpp +++ b/src/storage/buffer_manager.cpp @@ -56,7 +56,7 @@ idx_t BufferManager::GetQueryMaxMemory() const { return GetBufferPool().GetQueryMaxMemory(); } -unique_ptr BufferManager::ConstructManagedBuffer(idx_t size, unique_ptr &&source, +unique_ptr BufferManager::ConstructManagedBuffer(idx_t size, unique_ptr &&, FileBufferType type) { throw NotImplementedException("This type of BufferManager can not construct managed buffers"); } diff --git a/src/storage/checkpoint/table_data_writer.cpp b/src/storage/checkpoint/table_data_writer.cpp index 5382da0bf2ad..b90cae8901cf 100644 --- a/src/storage/checkpoint/table_data_writer.cpp +++ b/src/storage/checkpoint/table_data_writer.cpp @@ -25,9 +25,8 @@ CompressionType TableDataWriter::GetColumnCompressionType(idx_t i) { return table.GetColumn(LogicalIndex(i)).CompressionType(); } -void TableDataWriter::AddRowGroup(RowGroupPointer &&row_group_pointer, unique_ptr &&writer) { +void TableDataWriter::AddRowGroup(RowGroupPointer &&row_group_pointer, unique_ptr writer) { row_group_pointers.push_back(std::move(row_group_pointer)); - writer.reset(); } TaskScheduler &TableDataWriter::GetScheduler() { @@ -43,7 +42,7 @@ unique_ptr SingleFileTableDataWriter::GetRowGroupWriter(RowGroup return make_uniq(table, checkpoint_manager.partial_block_manager, table_data_writer); } -void SingleFileTableDataWriter::FinalizeTable(TableStatistics &&global_stats, DataTableInfo *info, +void SingleFileTableDataWriter::FinalizeTable(const TableStatistics &global_stats, DataTableInfo *info, Serializer &serializer) { // store the current position in the metadata writer // this is where the row groups for this table start diff --git a/src/storage/checkpoint_manager.cpp b/src/storage/checkpoint_manager.cpp index 574fc0660d1b..e6cd6511ce20 100644 --- a/src/storage/checkpoint_manager.cpp +++ b/src/storage/checkpoint_manager.cpp @@ -28,13 +28,13 @@ #include "duckdb/storage/metadata/metadata_reader.hpp" #include "duckdb/storage/table/column_checkpoint_state.hpp" #include "duckdb/transaction/transaction_manager.hpp" +#include "duckdb/catalog/dependency_manager.hpp" +#include "duckdb/transaction/duck_transaction_manager.hpp" #include "duckdb/execution/index/art/art.hpp" #include "duckdb/execution/index/unknown_index.hpp" namespace duckdb { -void ReorderTableEntries(catalog_entry_vector_t &tables); - SingleFileCheckpointWriter::SingleFileCheckpointWriter(AttachedDatabase &db, BlockManager &block_manager) : CheckpointWriter(db), partial_block_manager(block_manager, CheckpointType::FULL_CHECKPOINT) { } @@ -56,74 +56,6 @@ unique_ptr SingleFileCheckpointWriter::GetTableDataWriter(Table return make_uniq(*this, table, *table_metadata_writer); } -static catalog_entry_vector_t GetCatalogEntries(vector> &schemas) { - catalog_entry_vector_t entries; - for (auto &schema_p : schemas) { - auto &schema = schema_p.get(); - entries.push_back(schema); - schema.Scan(CatalogType::TYPE_ENTRY, [&](CatalogEntry &entry) { - if (entry.internal) { - return; - } - entries.push_back(entry); - }); - - schema.Scan(CatalogType::SEQUENCE_ENTRY, [&](CatalogEntry &entry) { - if (entry.internal) { - return; - } - entries.push_back(entry); - }); - - catalog_entry_vector_t tables; - vector> views; - schema.Scan(CatalogType::TABLE_ENTRY, [&](CatalogEntry &entry) { - if (entry.internal) { - return; - } - if (entry.type == CatalogType::TABLE_ENTRY) { - tables.push_back(entry.Cast()); - } else if (entry.type == CatalogType::VIEW_ENTRY) { - views.push_back(entry.Cast()); - } else { - throw NotImplementedException("Catalog type for entries"); - } - }); - // Reorder tables because of foreign key constraint - ReorderTableEntries(tables); - for (auto &table : tables) { - entries.push_back(table.get()); - } - for (auto &view : views) { - entries.push_back(view.get()); - } - - schema.Scan(CatalogType::SCALAR_FUNCTION_ENTRY, [&](CatalogEntry &entry) { - if (entry.internal) { - return; - } - if (entry.type == CatalogType::MACRO_ENTRY) { - entries.push_back(entry); - } - }); - - schema.Scan(CatalogType::TABLE_FUNCTION_ENTRY, [&](CatalogEntry &entry) { - if (entry.internal) { - return; - } - if (entry.type == CatalogType::TABLE_MACRO_ENTRY) { - entries.push_back(entry); - } - }); - - schema.Scan(CatalogType::INDEX_ENTRY, [&](CatalogEntry &entry) { - D_ASSERT(!entry.internal); - entries.push_back(entry); - }); - } - return entries; -} - void SingleFileCheckpointWriter::CreateCheckpoint() { auto &config = DBConfig::Get(db); auto &storage_manager = db.GetStorageManager().Cast(); @@ -143,10 +75,18 @@ void SingleFileCheckpointWriter::CreateCheckpoint() { // get the id of the first meta block auto meta_block = metadata_writer->GetMetaBlockPointer(); - vector> schemas; - // we scan the set of committed schemas auto &catalog = Catalog::GetCatalog(db).Cast(); - catalog.ScanSchemas([&](SchemaCatalogEntry &entry) { schemas.push_back(entry); }); + + catalog_entry_vector_t catalog_entries; + D_ASSERT(catalog.IsDuckCatalog()); + + auto &duck_catalog = catalog.Cast(); + auto &dependency_manager = duck_catalog.GetDependencyManager(); + auto &transaction_manager = DuckTransactionManager::Get(db); + auto dummy_transaction = CatalogTransaction(db.GetDatabase(), transaction_manager.LowestActiveId(), + transaction_manager.LowestActiveStart()); + catalog_entries = dependency_manager.GetExportOrder(dummy_transaction); + // write the actual data into the database // Create a serializer to write the checkpoint data @@ -167,7 +107,6 @@ void SingleFileCheckpointWriter::CreateCheckpoint() { ] } */ - auto catalog_entries = GetCatalogEntries(schemas); BinarySerializer serializer(*metadata_writer); serializer.Begin(); serializer.WriteList(100, "catalog_entries", catalog_entries.size(), [&](Serializer::List &list, idx_t i) { @@ -353,7 +292,7 @@ void CheckpointReader::ReadEntry(ClientContext &context, Deserializer &deseriali } void CheckpointReader::ReadSchema(ClientContext &context, Deserializer &deserializer) { - // Read the schema and create it in the catalog + // read the schema and create it in the catalog auto info = deserializer.ReadProperty>(100, "schema"); auto &schema_info = info->Cast(); diff --git a/src/storage/data_pointer.cpp b/src/storage/data_pointer.cpp index 29718e72bbad..92c57245af0c 100644 --- a/src/storage/data_pointer.cpp +++ b/src/storage/data_pointer.cpp @@ -9,7 +9,7 @@ namespace duckdb { unique_ptr ColumnSegmentState::Deserialize(Deserializer &deserializer) { auto compression_type = deserializer.Get(); auto &db = deserializer.Get(); - auto &type = deserializer.Get(); + auto &type = deserializer.Get(); auto compression_function = DBConfig::GetConfig(db).GetCompressionFunction(compression_type, type.InternalType()); if (!compression_function || !compression_function->deserialize_state) { throw SerializationException("Deserializing a ColumnSegmentState but could not find deserialize method"); diff --git a/src/storage/data_table.cpp b/src/storage/data_table.cpp index 6541bf193434..ac55613c9295 100644 --- a/src/storage/data_table.cpp +++ b/src/storage/data_table.cpp @@ -1244,7 +1244,7 @@ void DataTable::Checkpoint(TableDataWriter &writer, Serializer &serializer) { // row-group pointers // table pointer // index data - writer.FinalizeTable(std::move(global_stats), info.get(), serializer); + writer.FinalizeTable(global_stats, info.get(), serializer); } void DataTable::CommitDropColumn(idx_t index) { diff --git a/src/storage/partial_block_manager.cpp b/src/storage/partial_block_manager.cpp index 26366706581f..7fd73d6ff0b5 100644 --- a/src/storage/partial_block_manager.cpp +++ b/src/storage/partial_block_manager.cpp @@ -95,7 +95,7 @@ bool PartialBlockManager::GetPartialBlock(idx_t segment_size, unique_ptrstate; D_ASSERT(checkpoint_type != CheckpointType::FULL_CHECKPOINT || state.block_id >= 0); if (state.block_use_count < max_use_count) { diff --git a/src/storage/serialization/serialize_create_info.cpp b/src/storage/serialization/serialize_create_info.cpp index 3196a1d107d0..a379f1cba7ff 100644 --- a/src/storage/serialization/serialize_create_info.cpp +++ b/src/storage/serialization/serialize_create_info.cpp @@ -25,6 +25,7 @@ void CreateInfo::Serialize(Serializer &serializer) const { serializer.WriteProperty(105, "on_conflict", on_conflict); serializer.WritePropertyWithDefault(106, "sql", sql); serializer.WritePropertyWithDefault(107, "comment", comment, Value()); + serializer.WritePropertyWithDefault(108, "dependencies", dependencies, LogicalDependencyList()); } unique_ptr CreateInfo::Deserialize(Deserializer &deserializer) { @@ -36,6 +37,7 @@ unique_ptr CreateInfo::Deserialize(Deserializer &deserializer) { auto on_conflict = deserializer.ReadProperty(105, "on_conflict"); auto sql = deserializer.ReadPropertyWithDefault(106, "sql"); auto comment = deserializer.ReadPropertyWithDefault(107, "comment", Value()); + auto dependencies = deserializer.ReadPropertyWithDefault(108, "dependencies", LogicalDependencyList()); deserializer.Set(type); unique_ptr result; switch (type) { @@ -74,6 +76,7 @@ unique_ptr CreateInfo::Deserialize(Deserializer &deserializer) { result->on_conflict = on_conflict; result->sql = std::move(sql); result->comment = comment; + result->dependencies = dependencies; return result; } diff --git a/src/storage/statistics/array_stats.cpp b/src/storage/statistics/array_stats.cpp index d01b573bacb3..ecb399d21215 100644 --- a/src/storage/statistics/array_stats.cpp +++ b/src/storage/statistics/array_stats.cpp @@ -78,7 +78,7 @@ void ArrayStats::Deserialize(Deserializer &deserializer, BaseStatistics &base) { auto &child_type = ArrayType::GetChildType(type); // Push the logical type of the child type to the deserialization context - deserializer.Set(const_cast(child_type)); + deserializer.Set(child_type); base.child_stats[0].Copy(deserializer.ReadProperty(200, "child_stats")); deserializer.Unset(); } diff --git a/src/storage/statistics/base_statistics.cpp b/src/storage/statistics/base_statistics.cpp index 53bd862f0d95..106599002cd3 100644 --- a/src/storage/statistics/base_statistics.cpp +++ b/src/storage/statistics/base_statistics.cpp @@ -341,11 +341,10 @@ BaseStatistics BaseStatistics::Deserialize(Deserializer &deserializer) { auto distinct_count = deserializer.ReadProperty(102, "distinct_count"); // Get the logical type from the deserializer context. - auto type = deserializer.Get(); - + auto &type = deserializer.Get(); auto stats_type = GetStatsType(type); - BaseStatistics stats(std::move(type)); + BaseStatistics stats(type); stats.has_null = has_null; stats.has_no_null = has_no_null; diff --git a/src/storage/statistics/list_stats.cpp b/src/storage/statistics/list_stats.cpp index 0d4a00113b29..d6b658af365e 100644 --- a/src/storage/statistics/list_stats.cpp +++ b/src/storage/statistics/list_stats.cpp @@ -79,7 +79,7 @@ void ListStats::Deserialize(Deserializer &deserializer, BaseStatistics &base) { auto &child_type = ListType::GetChildType(type); // Push the logical type of the child type to the deserialization context - deserializer.Set(const_cast(child_type)); + deserializer.Set(child_type); base.child_stats[0].Copy(deserializer.ReadProperty(200, "child_stats")); deserializer.Unset(); } diff --git a/src/storage/statistics/struct_stats.cpp b/src/storage/statistics/struct_stats.cpp index 380963841df1..74c0921f894b 100644 --- a/src/storage/statistics/struct_stats.cpp +++ b/src/storage/statistics/struct_stats.cpp @@ -107,7 +107,7 @@ void StructStats::Deserialize(Deserializer &deserializer, BaseStatistics &base) auto &child_types = StructType::GetChildTypes(type); deserializer.ReadList(200, "child_stats", [&](Deserializer::List &list, idx_t i) { - deserializer.Set(const_cast(child_types[i].second)); + deserializer.Set(child_types[i].second); auto stat = list.ReadElement(); base.child_stats[i].Copy(stat); deserializer.Unset(); diff --git a/src/storage/storage_info.cpp b/src/storage/storage_info.cpp index ab8822425258..a8fd657bce6d 100644 --- a/src/storage/storage_info.cpp +++ b/src/storage/storage_info.cpp @@ -9,25 +9,25 @@ struct StorageVersionInfo { idx_t storage_version; }; -static StorageVersionInfo storage_version_info[] = {{"v0.9.0, v0.9.1, v0.9.2 or v0.10.0", 64}, - {"v0.8.0 or v0.8.1", 51}, - {"v0.7.0 or v0.7.1", 43}, - {"v0.6.0 or v0.6.1", 39}, - {"v0.5.0 or v0.5.1", 38}, - {"v0.3.3, v0.3.4 or v0.4.0", 33}, - {"v0.3.2", 31}, - {"v0.3.1", 27}, - {"v0.3.0", 25}, - {"v0.2.9", 21}, - {"v0.2.8", 18}, - {"v0.2.7", 17}, - {"v0.2.6", 15}, - {"v0.2.5", 13}, - {"v0.2.4", 11}, - {"v0.2.3", 6}, - {"v0.2.2", 4}, - {"v0.2.1 and prior", 1}, - {nullptr, 0}}; +static const StorageVersionInfo storage_version_info[] = {{"v0.9.0, v0.9.1, v0.9.2 or v0.10.0", 64}, + {"v0.8.0 or v0.8.1", 51}, + {"v0.7.0 or v0.7.1", 43}, + {"v0.6.0 or v0.6.1", 39}, + {"v0.5.0 or v0.5.1", 38}, + {"v0.3.3, v0.3.4 or v0.4.0", 33}, + {"v0.3.2", 31}, + {"v0.3.1", 27}, + {"v0.3.0", 25}, + {"v0.2.9", 21}, + {"v0.2.8", 18}, + {"v0.2.7", 17}, + {"v0.2.6", 15}, + {"v0.2.5", 13}, + {"v0.2.4", 11}, + {"v0.2.3", 6}, + {"v0.2.2", 4}, + {"v0.2.1 and prior", 1}, + {nullptr, 0}}; const char *GetDuckDBVersion(idx_t version_number) { for (idx_t i = 0; storage_version_info[i].version_name; i++) { diff --git a/src/storage/table/chunk_info.cpp b/src/storage/table/chunk_info.cpp index d220e0809022..86236aaaa725 100644 --- a/src/storage/table/chunk_info.cpp +++ b/src/storage/table/chunk_info.cpp @@ -4,6 +4,7 @@ #include "duckdb/common/serializer/serializer.hpp" #include "duckdb/common/serializer/deserializer.hpp" #include "duckdb/common/serializer/memory_stream.hpp" +#include "duckdb/transaction/delete_info.hpp" namespace duckdb { @@ -198,9 +199,16 @@ idx_t ChunkVectorInfo::Delete(transaction_t transaction_id, row_t rows[], idx_t return deleted_tuples; } -void ChunkVectorInfo::CommitDelete(transaction_t commit_id, row_t rows[], idx_t count) { - for (idx_t i = 0; i < count; i++) { - deleted[rows[i]] = commit_id; +void ChunkVectorInfo::CommitDelete(transaction_t commit_id, const DeleteInfo &info) { + if (info.is_consecutive) { + for (idx_t i = 0; i < info.count; i++) { + deleted[i] = commit_id; + } + } else { + auto rows = info.GetRows(); + for (idx_t i = 0; i < info.count; i++) { + deleted[rows[i]] = commit_id; + } } } diff --git a/src/storage/table/row_group.cpp b/src/storage/table/row_group.cpp index 67e17737211a..824827d76a97 100644 --- a/src/storage/table/row_group.cpp +++ b/src/storage/table/row_group.cpp @@ -25,15 +25,15 @@ namespace duckdb { -RowGroup::RowGroup(RowGroupCollection &collection, idx_t start, idx_t count) - : SegmentBase(start, count), collection(collection), allocation_size(0) { +RowGroup::RowGroup(RowGroupCollection &collection_p, idx_t start, idx_t count) + : SegmentBase(start, count), collection(collection_p), allocation_size(0) { Verify(); } -RowGroup::RowGroup(RowGroupCollection &collection, RowGroupPointer &&pointer) - : SegmentBase(pointer.row_start, pointer.tuple_count), collection(collection), allocation_size(0) { +RowGroup::RowGroup(RowGroupCollection &collection_p, RowGroupPointer pointer) + : SegmentBase(pointer.row_start, pointer.tuple_count), collection(collection_p), allocation_size(0) { // deserialize the columns - if (pointer.data_pointers.size() != collection.GetTypes().size()) { + if (pointer.data_pointers.size() != collection_p.GetTypes().size()) { throw IOException("Row group column count is unaligned with table column count. Corrupt file?"); } this->column_pointers = std::move(pointer.data_pointers); @@ -48,8 +48,8 @@ RowGroup::RowGroup(RowGroupCollection &collection, RowGroupPointer &&pointer) Verify(); } -void RowGroup::MoveToCollection(RowGroupCollection &collection, idx_t new_start) { - this->collection = collection; +void RowGroup::MoveToCollection(RowGroupCollection &collection_p, idx_t new_start) { + this->collection = collection_p; this->start = new_start; for (auto &column : GetColumns()) { column->SetStart(new_start); diff --git a/src/storage/table/row_version_manager.cpp b/src/storage/table/row_version_manager.cpp index e1ffecb4b8ff..ead21f89234f 100644 --- a/src/storage/table/row_version_manager.cpp +++ b/src/storage/table/row_version_manager.cpp @@ -159,10 +159,10 @@ idx_t RowVersionManager::DeleteRows(idx_t vector_idx, transaction_t transaction_ return GetVectorInfo(vector_idx).Delete(transaction_id, rows, count); } -void RowVersionManager::CommitDelete(idx_t vector_idx, transaction_t commit_id, row_t rows[], idx_t count) { +void RowVersionManager::CommitDelete(idx_t vector_idx, transaction_t commit_id, const DeleteInfo &info) { lock_guard lock(version_lock); has_changes = true; - GetVectorInfo(vector_idx).CommitDelete(commit_id, rows, count); + GetVectorInfo(vector_idx).CommitDelete(commit_id, info); } vector RowVersionManager::Checkpoint(MetadataManager &manager) { diff --git a/src/transaction/cleanup_state.cpp b/src/transaction/cleanup_state.cpp index e089bbb0e6e9..0ec438c20339 100644 --- a/src/transaction/cleanup_state.cpp +++ b/src/transaction/cleanup_state.cpp @@ -70,8 +70,15 @@ void CleanupState::CleanupDelete(DeleteInfo &info) { indexed_tables[current_table->info->table] = current_table; count = 0; - for (idx_t i = 0; i < info.count; i++) { - row_numbers[count++] = info.base_row + info.rows[i]; + if (info.is_consecutive) { + for (idx_t i = 0; i < info.count; i++) { + row_numbers[count++] = info.base_row + i; + } + } else { + auto rows = info.GetRows(); + for (idx_t i = 0; i < info.count; i++) { + row_numbers[count++] = info.base_row + rows[i]; + } } Flush(); } diff --git a/src/transaction/commit_state.cpp b/src/transaction/commit_state.cpp index 46cd9e49ac5a..986aac3b17c1 100644 --- a/src/transaction/commit_state.cpp +++ b/src/transaction/commit_state.cpp @@ -204,8 +204,15 @@ void CommitState::WriteDelete(DeleteInfo &info) { delete_chunk->Initialize(Allocator::DefaultAllocator(), delete_types); } auto rows = FlatVector::GetData(delete_chunk->data[0]); - for (idx_t i = 0; i < info.count; i++) { - rows[i] = info.base_row + info.rows[i]; + if (info.is_consecutive) { + for (idx_t i = 0; i < info.count; i++) { + rows[i] = info.base_row + i; + } + } else { + auto delete_rows = info.GetRows(); + for (idx_t i = 0; i < info.count; i++) { + rows[i] = info.base_row + delete_rows[i]; + } } delete_chunk->SetCardinality(info.count); log->WriteDelete(*delete_chunk); @@ -310,7 +317,7 @@ void CommitState::CommitEntry(UndoFlags type, data_ptr_t data) { WriteDelete(*info); } // mark the tuples as committed - info->version_info->CommitDelete(info->vector_idx, commit_id, info->rows, info->count); + info->version_info->CommitDelete(info->vector_idx, commit_id, *info); break; } case UndoFlags::UPDATE_TUPLE: { @@ -351,7 +358,7 @@ void CommitState::RevertCommit(UndoFlags type, data_ptr_t data) { auto info = reinterpret_cast(data); info->table->info->cardinality += info->count; // revert the commit by writing the (uncommitted) transaction_id back into the version info - info->version_info->CommitDelete(info->vector_idx, transaction_id, info->rows, info->count); + info->version_info->CommitDelete(info->vector_idx, transaction_id, *info); break; } case UndoFlags::UPDATE_TUPLE: { diff --git a/src/transaction/duck_transaction.cpp b/src/transaction/duck_transaction.cpp index 528d718b78ab..d2ce36578b26 100644 --- a/src/transaction/duck_transaction.cpp +++ b/src/transaction/duck_transaction.cpp @@ -73,14 +73,34 @@ void DuckTransaction::PushCatalogEntry(CatalogEntry &entry, data_ptr_t extra_dat void DuckTransaction::PushDelete(DataTable &table, RowVersionManager &info, idx_t vector_idx, row_t rows[], idx_t count, idx_t base_row) { - auto delete_info = reinterpret_cast( - undo_buffer.CreateEntry(UndoFlags::DELETE_TUPLE, sizeof(DeleteInfo) + sizeof(row_t) * count)); + bool is_consecutive = true; + // check if the rows are consecutive + for (idx_t i = 0; i < count; i++) { + if (rows[i] != row_t(i)) { + is_consecutive = false; + break; + } + } + idx_t alloc_size = sizeof(DeleteInfo); + if (!is_consecutive) { + // if rows are not consecutive we need to allocate row identifiers + alloc_size += sizeof(uint16_t) * count; + } + + auto delete_info = reinterpret_cast(undo_buffer.CreateEntry(UndoFlags::DELETE_TUPLE, alloc_size)); delete_info->version_info = &info; delete_info->vector_idx = vector_idx; delete_info->table = &table; delete_info->count = count; delete_info->base_row = base_row; - memcpy(delete_info->rows, rows, sizeof(row_t) * count); + delete_info->is_consecutive = is_consecutive; + if (!is_consecutive) { + // if rows are not consecutive + auto delete_rows = delete_info->GetRows(); + for (idx_t i = 0; i < count; i++) { + delete_rows[i] = NumericCast(rows[i]); + } + } } void DuckTransaction::PushAppend(DataTable &table, idx_t start_row, idx_t row_count) { diff --git a/src/transaction/duck_transaction_manager.cpp b/src/transaction/duck_transaction_manager.cpp index 008604c3502c..a63b886d584e 100644 --- a/src/transaction/duck_transaction_manager.cpp +++ b/src/transaction/duck_transaction_manager.cpp @@ -212,6 +212,7 @@ ErrorData DuckTransactionManager::CommitTransaction(ClientContext &context, Tran client_locks.clear(); } + D_ASSERT(lock); // commit successful: remove the transaction id from the list of active transactions // potentially resulting in garbage collection RemoveTransaction(transaction); diff --git a/src/transaction/rollback_state.cpp b/src/transaction/rollback_state.cpp index b30124c113e3..3210cffb4654 100644 --- a/src/transaction/rollback_state.cpp +++ b/src/transaction/rollback_state.cpp @@ -31,7 +31,7 @@ void RollbackState::RollbackEntry(UndoFlags type, data_ptr_t data) { case UndoFlags::DELETE_TUPLE: { auto info = reinterpret_cast(data); // reset the deleted flag on rollback - info->version_info->CommitDelete(info->vector_idx, NOT_DELETED_ID, info->rows, info->count); + info->version_info->CommitDelete(info->vector_idx, NOT_DELETED_ID, *info); break; } case UndoFlags::UPDATE_TUPLE: { diff --git a/test/api/test_reset.cpp b/test/api/test_reset.cpp index fe6724816fe3..999530a30ae0 100644 --- a/test/api/test_reset.cpp +++ b/test/api/test_reset.cpp @@ -141,12 +141,12 @@ bool ValueEqual(const Value &left, const Value &right) { return Value::NotDistinctFrom(left, right); } -void RequireValueEqual(ConfigurationOption *op, const Value &left, const Value &right, int line) { +void RequireValueEqual(const ConfigurationOption &op, const Value &left, const Value &right, int line) { if (ValueEqual(left, right)) { return; } auto error = StringUtil::Format("\nLINE[%d] (Option:%s) | Expected left:'%s' and right:'%s' to be equal", line, - op->name, left.ToString(), right.ToString()); + op.name, left.ToString(), right.ToString()); cerr << error << endl; REQUIRE(false); } @@ -208,7 +208,7 @@ TEST_CASE("Test RESET statement for ClientConfig options", "[api]") { } // Get the value of the option again auto changed_value = op->get_setting(*con.context); - REQUIRE_VALUE_EQUAL(op, changed_value, value_pair.output); + REQUIRE_VALUE_EQUAL(*op, changed_value, value_pair.output); if (op->reset_local) { op->reset_local(*con.context); @@ -218,7 +218,7 @@ TEST_CASE("Test RESET statement for ClientConfig options", "[api]") { // Get the reset value of the option auto reset_value = op->get_setting(*con.context); - REQUIRE_VALUE_EQUAL(op, reset_value, original_value); + REQUIRE_VALUE_EQUAL(*op, reset_value, original_value); } } } diff --git a/test/fuzzer/pedro/alter_dependency_conflict.test b/test/fuzzer/pedro/alter_dependency_conflict.test index 0373e36b35c6..882411014608 100644 --- a/test/fuzzer/pedro/alter_dependency_conflict.test +++ b/test/fuzzer/pedro/alter_dependency_conflict.test @@ -10,10 +10,10 @@ CREATE TABLE t4 (c0 DATE, c3 VARCHAR(10)); statement ok CREATE INDEX i2 ON t4 (c3); -# Catalog Error: Cannot alter entry "t4" because there are entries that depend on it. statement error ALTER TABLE t4 ADD c1 BLOB; ---- +Cannot alter entry "t4" because there are entries that depend on it. # the table should still be in a usable state after the alter statement ok diff --git a/test/fuzzer/pedro/view_not_rebound_error.test b/test/fuzzer/pedro/view_not_rebound_error.test index 5906b17648e8..a45aff4520ed 100644 --- a/test/fuzzer/pedro/view_not_rebound_error.test +++ b/test/fuzzer/pedro/view_not_rebound_error.test @@ -7,8 +7,14 @@ CREATE TABLE t1 (c1 INT); statement ok CREATE VIEW t0 AS SELECT 1 FROM t1 GROUP BY c1; -statement ok +statement error DROP TABLE t1; +---- +Dependency Error: Cannot drop entry "t1" because there are entries that depend on it + +# We need CASCADE to drop t1 because t0 depends on it +statement ok +DROP TABLE t1 CASCADE; statement ok CREATE TABLE t2 (c1 INT); @@ -16,7 +22,8 @@ CREATE TABLE t2 (c1 INT); statement ok CREATE TABLE t1 (c2 INT); +# t0 has been deleted, so this query fails statement error SELECT 1 FROM t2 JOIN t1 ON (SELECT TRUE FROM t0); ---- -Contents of view were altered +Catalog Error: Table with name t0 does not exist! diff --git a/test/issues/general/test_11391.test b/test/issues/general/test_11391.test new file mode 100644 index 000000000000..637c901a6dee --- /dev/null +++ b/test/issues/general/test_11391.test @@ -0,0 +1,8 @@ +# name: test/issues/general/test_11391.test +# description: Issue 1091: Catalog Error with nested CTEs +# group: [general] + +query I +with foo as (with foo as (select 1) select * from foo) select * from foo; +---- +1 diff --git a/test/sql/aggregate/aggregates/test_quantile_cont.test b/test/sql/aggregate/aggregates/test_quantile_cont.test index c7f3b0137cf0..d05df593ca1c 100644 --- a/test/sql/aggregate/aggregates/test_quantile_cont.test +++ b/test/sql/aggregate/aggregates/test_quantile_cont.test @@ -131,6 +131,16 @@ SELECT quantile_cont(r, -0.1) FROM quantile ---- 899910.0 +# ORDER BY ... DESC +query II +SELECT + percentile_cont(0.8) WITHIN GROUP (ORDER BY x DESC), + quantile_cont(x, 0.8 ORDER BY x DESC), +FROM + (VALUES (2), (1)) _(x); +---- +1.2 1.2 + # empty input query R SELECT quantile_cont(r, 0.1) FROM quantile WHERE 1=0 diff --git a/test/sql/aggregate/aggregates/test_quantile_disc.test b/test/sql/aggregate/aggregates/test_quantile_disc.test index 766a89b9a31f..53a5c4acac52 100644 --- a/test/sql/aggregate/aggregates/test_quantile_disc.test +++ b/test/sql/aggregate/aggregates/test_quantile_disc.test @@ -83,6 +83,16 @@ FROM VALUES (11000), (3100), (2900), (2800), (2600), (2500) AS tab(col); ---- 2900 +# ORDER BY ... DESC +query II +SELECT + percentile_disc(0.8) WITHIN GROUP (ORDER BY x DESC), + quantile_disc(x, 0.8 ORDER BY x DESC), +FROM + (VALUES (2), (1)) _(x); +---- +1.2 1.2 + # # VARCHAR. Remember, this is dictionary ordering, not numeric ordering! # diff --git a/test/sql/aggregate/qualify/test_qualify_view.test b/test/sql/aggregate/qualify/test_qualify_view.test index e054aa9c3047..ae34d206f978 100644 --- a/test/sql/aggregate/qualify/test_qualify_view.test +++ b/test/sql/aggregate/qualify/test_qualify_view.test @@ -37,21 +37,29 @@ ORDER BY ALL 22 11 # drop the table the view is based on -statement ok +statement error DROP TABLE test.t +---- +Dependency Error: Cannot drop entry "t" because there are entries that depend on it + +statement ok +DROP TABLE test.t CASCADE; loop i 0 2 # restart the system restart -# after recreating the table, we can query the view again +# we first need to recreate both the table and the view before we can query it again statement ok CREATE TABLE test.t (a INTEGER, b INTEGER); statement ok INSERT INTO test.t VALUES (11, 22), (13, 22), (12, 21) +statement ok +CREATE VIEW test.v AS SELECT * FROM test.t QUALIFY row_number() OVER (PARTITION BY b) = 1; + query II SELECT b, SUM(a) FROM test.v @@ -64,10 +72,12 @@ ORDER BY ALL # drop the table again statement ok -DROP TABLE test.t +DROP TABLE test.t CASCADE; +# dropping the table also caused the view to be dropped statement error SELECT * FROM test.v ---- +Catalog Error: Table with name v does not exist! endloop diff --git a/test/sql/alter/default/test_set_default.test b/test/sql/alter/default/test_set_default.test index 9af72559f541..af15678ba45c 100644 --- a/test/sql/alter/default/test_set_default.test +++ b/test/sql/alter/default/test_set_default.test @@ -3,10 +3,15 @@ # group: [default] statement ok -CREATE TABLE test(i INTEGER, j INTEGER) +CREATE TABLE test( + i INTEGER, + j INTEGER +) statement ok -INSERT INTO test VALUES (1, 1), (2, 2) +INSERT INTO test VALUES + (1, 1), + (2, 2) statement ok ALTER TABLE test ALTER j SET DEFAULT 3 diff --git a/test/sql/alter/rename_table/test_rename_table_with_dependency_check.test b/test/sql/alter/rename_table/test_rename_table_with_dependency_check.test index 8f505ab28dc9..6dce4f3b7c91 100644 --- a/test/sql/alter/rename_table/test_rename_table_with_dependency_check.test +++ b/test/sql/alter/rename_table/test_rename_table_with_dependency_check.test @@ -8,24 +8,24 @@ CREATE TABLE t0 (c0 INT); statement ok CREATE UNIQUE INDEX i1 ON t0 (c0); -# Cannot alter entry "t0" because there are entries that depend on it statement error ALTER TABLE t0 RENAME TO t3; ---- +Cannot alter entry "t0" because there are entries that depend on it # t3 is not exist statement ok CREATE TABLE t3 (c0 INT); -# Cannot alter entry "t0" because there are entries that depend on it statement error ALTER TABLE t0 RENAME TO t4; ---- +Cannot alter entry "t0" because there are entries that depend on it statement ok DROP TABLE t0; -# t4 is not exist statement error ANALYZE t4; ---- +Table with name t4 does not exist! diff --git a/test/sql/attach/attach_export_import.test b/test/sql/attach/attach_export_import.test index b139699d15c9..3a7a53987411 100644 --- a/test/sql/attach/attach_export_import.test +++ b/test/sql/attach/attach_export_import.test @@ -7,16 +7,42 @@ require skip_reload statement ok ATTACH ':memory:' AS db1 +statement ok +ATTACH ':memory:' as other + +statement ok +USE db1; + statement ok CREATE TABLE db1.integers(i INTEGER); statement ok INSERT INTO db1.integers VALUES (1), (2), (3), (NULL); +# FIXME: when we don't use 'USE' then we have to refer to 'integers' as 'db1.integers' +# this breaks when re-imported, because the table will be created as just 'integers', not 'db1.integers' + +# Create a view that references the integers table +statement ok +CREATE VIEW db1.integers_view AS SELECT * FROM integers; + +statement ok +BEGIN TRANSACTION; + +# Create a table that should not be exported +statement ok +CREATE TABLE other.dont_export_me (i integer); + # now export the db statement ok EXPORT DATABASE db1 TO '__TEST_DIR__/export_test' (FORMAT CSV) +statement ok +rollback; + +statement ok +drop table db1.integers CASCADE; + statement error SELECT * FROM integers ---- @@ -25,10 +51,16 @@ does not exist statement ok IMPORT DATABASE '__TEST_DIR__/export_test' -query I +query I nosort q1 SELECT * FROM integers ORDER BY i NULLS LAST ---- -1 -2 -3 -NULL + +query I nosort q1 +SELECT * FROM integers_view order by i NULLS LAST; +---- + +statement error +SELECT * FROM other.dont_export_me; +---- +Catalog Error: Table with name dont_export_me does not exist! + diff --git a/test/sql/attach/attach_index.test b/test/sql/attach/attach_index.test index eebca517eb40..3cb1112b2978 100644 --- a/test/sql/attach/attach_index.test +++ b/test/sql/attach/attach_index.test @@ -14,7 +14,10 @@ statement ok USE index_db statement ok -CREATE TABLE tbl_a (a_id INTEGER PRIMARY KEY, value VARCHAR NOT NULL) +CREATE TABLE tbl_a ( + a_id INTEGER PRIMARY KEY, + value VARCHAR NOT NULL +) statement ok CREATE INDEX idx_tbl_a ON tbl_a (value) diff --git a/test/sql/catalog/dependencies/test_alter_dependency_ownership.test b/test/sql/catalog/dependencies/test_alter_dependency_ownership.test index 857e2fbadfd3..96710e92a5e2 100644 --- a/test/sql/catalog/dependencies/test_alter_dependency_ownership.test +++ b/test/sql/catalog/dependencies/test_alter_dependency_ownership.test @@ -23,6 +23,7 @@ DROP TABLE tablename; statement error SELECT nextval('sequence1'); ---- +Catalog Error: Sequence with name sequence1 does not exist! ##TEST: If the table is dropped, then the sequence is also droppped, using schemas statement ok @@ -42,6 +43,7 @@ DROP TABLE main.tablename; statement error SELECT nextval('main.sequence1'); ---- +Catalog Error: Sequence with name sequence1 does not exist! ##TEST: If the owned sequence is dropped with CASCADE, then the table is also dropped statement ok @@ -61,6 +63,7 @@ DROP SEQUENCE sequence1 CASCADE; statement error SELECT * FROM tablename; ---- +Catalog Error: Table with name tablename does not exist! ##TEST: The owned sequence cannot be dropped without CASCADE statement ok @@ -77,6 +80,7 @@ ALTER SEQUENCE sequence1 OWNED BY tablename; statement error DROP SEQUENCE sequence1; ---- +Dependency Error: Cannot drop entry "sequence1" because there are entries that depend on it. Use DROP...CASCADE to drop all dependents. statement ok DROP TABLE tablename; @@ -163,14 +167,17 @@ DROP TABLE tablename; statement error SELECT nextval('sequence1'); ---- +Catalog Error: Sequence with name sequence1 does not exist! statement error SELECT nextval('sequence2'); ---- +Catalog Error: Sequence with name sequence2 does not exist! statement error SELECT nextval('sequence3'); ---- +Catalog Error: Sequence with name sequence3 does not exist! ##TEST: When owning a sequence, insertions work normally statement ok @@ -234,10 +241,12 @@ CREATE TABLE tablename ( statement error ALTER SEQUENCE sequence1 OWNED BY tablename; ---- +sequence1 is already owned by new_tablename statement error DROP SEQUENCE sequence1; ---- +Dependency Error: Cannot drop entry "sequence1" because there are entries that depend on it. Use DROP...CASCADE to drop all dependents. statement error CREATE OR REPLACE SEQUENCE sequence1; @@ -275,6 +284,7 @@ ALTER SEQUENCE sequence1 OWNED BY tablename; statement error DROP SEQUENCE sequence1; ---- +Dependency Error: Cannot drop entry "sequence1" because there are entries that depend on it. Use DROP...CASCADE to drop all dependents. statement ok DROP TABLE tablename; @@ -298,6 +308,7 @@ ALTER TABLE tablename DROP colname2; statement error DROP SEQUENCE sequence1; ---- +Dependency Error: Cannot drop entry "sequence1" because there are entries that depend on it. Use DROP...CASCADE to drop all dependents. statement ok DROP TABLE tablename; @@ -320,6 +331,7 @@ ALTER TABLE tablename ALTER colname TYPE float; statement error DROP SEQUENCE sequence1; ---- +Dependency Error: Cannot drop entry "sequence1" because there are entries that depend on it. Use DROP...CASCADE to drop all dependents. statement ok DROP TABLE tablename; @@ -345,6 +357,7 @@ ALTER TABLE tablename DROP colname4; statement error DROP SEQUENCE sequence1; ---- +Dependency Error: Cannot drop entry "sequence1" because there are entries that depend on it. Use DROP...CASCADE to drop all dependents. statement ok ALTER TABLE tablename DROP colname3; @@ -352,6 +365,7 @@ ALTER TABLE tablename DROP colname3; statement error DROP SEQUENCE sequence1; ---- +Dependency Error: Cannot drop entry "sequence1" because there are entries that depend on it. Use DROP...CASCADE to drop all dependents. statement ok ALTER TABLE tablename DROP colname2; @@ -359,6 +373,7 @@ ALTER TABLE tablename DROP colname2; statement error DROP SEQUENCE sequence1; ---- +Dependency Error: Cannot drop entry "sequence1" because there are entries that depend on it. Use DROP...CASCADE to drop all dependents. statement ok DROP TABLE tablename; @@ -384,6 +399,7 @@ DROP TABLE tablename; statement error select nextval('sequence1'); ---- +Catalog Error: Sequence with name sequence1 does not exist! statement ok ROLLBACK; @@ -396,6 +412,7 @@ select nextval('sequence1'); statement error DROP SEQUENCE sequence1; ---- +Dependency Error: Cannot drop entry "sequence1" because there are entries that depend on it. Use DROP...CASCADE to drop all dependents. statement ok DROP TABLE tablename; @@ -413,6 +430,7 @@ ALTER SEQUENCE sequence1 OWNED BY v1_sequence1; statement error DROP SEQUENCE sequence1; ---- +Dependency Error: Cannot drop entry "sequence1" because there are entries that depend on it. Use DROP...CASCADE to drop all dependents. statement ok DROP VIEW v1_sequence1; @@ -420,6 +438,7 @@ DROP VIEW v1_sequence1; statement error SELECT nextval('sequence1'); ---- +Catalog Error: Sequence with name sequence1 does not exist! ##TEST: Sequence can own a sequence statement ok @@ -434,6 +453,7 @@ ALTER SEQUENCE sequence1 OWNED BY sequence2; statement error DROP SEQUENCE sequence1; ---- +Dependency Error: Cannot drop entry "sequence1" because there are entries that depend on it. Use DROP...CASCADE to drop all dependents. statement ok DROP SEQUENCE sequence2; @@ -441,6 +461,7 @@ DROP SEQUENCE sequence2; statement error SELECT nextval('sequence1'); ---- +Catalog Error: Sequence with name sequence1 does not exist! ##TEST: Sequence cant own its owner statement ok @@ -481,6 +502,8 @@ ALTER SEQUENCE sequence3 OWNED BY sequence2; ---- Dependency Error: sequence2 can not become the owner, it is already owned by sequence1 +# FIXME: this error makes no sense, if there is no circular dependency +# this should be allowed statement error ALTER SEQUENCE sequence1 OWNED BY sequence3; ---- @@ -498,15 +521,19 @@ DROP SEQUENCE sequence4; statement error SELECT nextval('sequence1'); ---- +Catalog Error: Sequence with name sequence1 does not exist! statement error SELECT nextval('sequence2'); ---- +Catalog Error: Sequence with name sequence2 does not exist! statement error SELECT nextval('sequence3'); ---- +Catalog Error: Sequence with name sequence3 does not exist! statement error SELECT nextval('sequence4'); ---- +Catalog Error: Sequence with name sequence4 does not exist! diff --git a/test/sql/catalog/function/test_macro_default_arg.test b/test/sql/catalog/function/test_macro_default_arg.test index 471f2f9ab2ce..16f51543fabd 100644 --- a/test/sql/catalog/function/test_macro_default_arg.test +++ b/test/sql/catalog/function/test_macro_default_arg.test @@ -61,14 +61,19 @@ select my_macro2(can_not_be_empty) ---- 5 -statement ok +statement error drop table integers; +---- +Dependency Error: Cannot drop entry "integers" because there are entries that depend on it. Use DROP...CASCADE to drop all dependents. + +statement ok +drop table integers cascade; # The macro still exists, but points to a non existant table statement error select my_macro2(5); ---- -Catalog Error: Table with name integers does not exist! +Catalog Error: Scalar Function with name my_macro2 does not exist! statement ok Create table t1 (a int, b int); diff --git a/test/sql/catalog/function/test_recursive_macro.test b/test/sql/catalog/function/test_recursive_macro.test index be9739561679..ff5cf51743bd 100644 --- a/test/sql/catalog/function/test_recursive_macro.test +++ b/test/sql/catalog/function/test_recursive_macro.test @@ -39,13 +39,10 @@ create macro m1(a) as a+1; statement ok create macro m2(a) as m1(a)+1; -statement ok -create or replace macro m1(a) as m2(a)+1; - statement error -select m2(42); +create or replace macro m1(a) as m2(a)+1; ---- -Binder Error: Maximum recursion depth exceeded +Catalog Error: CREATE OR REPLACE is not allowed to depend on itself # also table macros statement ok @@ -54,10 +51,7 @@ create macro m3(a) as a+1; statement ok create macro m4(a) as table select m3(a); -statement ok -create or replace macro m3(a) as (from m4(42)); - statement error -select m3(42); +create or replace macro m3(a) as (from m4(42)); ---- -Binder Error: Maximum recursion depth exceeded +Catalog Error: CREATE OR REPLACE is not allowed to depend on itself diff --git a/test/sql/catalog/view/recursive_view.test b/test/sql/catalog/view/recursive_view.test index ce9290ac9ead..fc3106a4afec 100644 --- a/test/sql/catalog/view/recursive_view.test +++ b/test/sql/catalog/view/recursive_view.test @@ -15,23 +15,32 @@ INSERT INTO test(val) VALUES (1), (2), (3); statement ok CREATE OR REPLACE VIEW foo AS (SELECT * FROM test); -statement ok +statement error CREATE OR REPLACE VIEW foo AS (SELECT * FROM foo); +---- +Catalog Error: CREATE OR REPLACE is not allowed to depend on itself -statement error +# foo was not deleted +query I SELECT * FROM foo; ---- +1 +2 +3 # more complex recursive view definition -statement ok -CREATE OR REPLACE VIEW foo AS (SELECT * FROM test); - statement ok CREATE OR REPLACE VIEW foo2 AS (SELECT * FROM foo); -statement ok +statement error CREATE OR REPLACE VIEW foo AS (SELECT (SELECT * FROM foo2)); +---- +Catalog Error: CREATE OR REPLACE is not allowed to depend on itself -statement error +# foo was not deleted +query I SELECT * FROM foo; ---- +1 +2 +3 diff --git a/test/sql/catalog/view/test_view_schema_change.test b/test/sql/catalog/view/test_view_schema_change.test index f669b36ef7f5..c3afd49cc33b 100644 --- a/test/sql/catalog/view/test_view_schema_change.test +++ b/test/sql/catalog/view/test_view_schema_change.test @@ -24,8 +24,13 @@ SELECT * FROM v1 43 # now drop the table and create a table that has a different schema -statement ok +statement error DROP TABLE t1 +---- +Dependency Error: Cannot drop entry "t1" because there are entries that depend on it. + +statement ok +DROP TABLE t1 CASCADE; statement ok CREATE TABLE t1(i DATE) @@ -54,11 +59,11 @@ DROP TABLE t1 statement ok CREATE TABLE t1(k INTEGER) -# names returned by the view have changed +# v1 was deleted earlier as part of CASCADE statement error -SELECT * FROM v1 +SELECT i FROM v1 ---- -Binder Error: Contents of view were altered: names don't match! +Catalog Error: Table with name v1 does not exist! statement ok DROP TABLE t1 @@ -66,6 +71,9 @@ DROP TABLE t1 statement ok CREATE TABLE t1(i INTEGER) +statement ok +CREATE VIEW v1 AS SELECT * FROM t1 + # now we can query again! query I SELECT * FROM v1 diff --git a/test/sql/constraints/foreignkey/test_foreignkey.test b/test/sql/constraints/foreignkey/test_foreignkey.test index 9c4a2ccce7ce..700c18922d07 100644 --- a/test/sql/constraints/foreignkey/test_foreignkey.test +++ b/test/sql/constraints/foreignkey/test_foreignkey.test @@ -182,7 +182,11 @@ statement ok INSERT INTO pkt VALUES (1, 11), (2, 12), (3, 13) statement ok -CREATE TABLE fkt(j INTEGER, l INTEGER UNIQUE, FOREIGN KEY (j) REFERENCES pkt(i)) +CREATE TABLE fkt( + j INTEGER, + l INTEGER UNIQUE, + FOREIGN KEY (j) REFERENCES pkt(i) +) statement ok CREATE INDEX k_index ON pkt(k) diff --git a/test/sql/copy/csv/batched_write/batch_csv_mixed_batches.test_slow b/test/sql/copy/csv/batched_write/batch_csv_mixed_batches.test_slow index cc9799af8b04..6d997e2af9ea 100644 --- a/test/sql/copy/csv/batched_write/batch_csv_mixed_batches.test_slow +++ b/test/sql/copy/csv/batched_write/batch_csv_mixed_batches.test_slow @@ -154,6 +154,9 @@ DROP TABLE mixed_batches_v3 statement ok DROP TABLE mixed_batches_v4 +statement ok +DROP VIEW v1 CASCADE; + # create views that read the batches using unions statement ok CREATE OR REPLACE VIEW v1 AS FROM '__TEST_DIR__/mix_batches_small.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_large.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_odd.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_odd_again.parquet' diff --git a/test/sql/copy/csv/test_11403.test b/test/sql/copy/csv/test_11403.test new file mode 100644 index 000000000000..3a49d55342f0 --- /dev/null +++ b/test/sql/copy/csv/test_11403.test @@ -0,0 +1,19 @@ +# name: test/sql/copy/csv/test_11403.test +# description: Test CSV Sniffing detects correct quote for issue 11403 +# group: [csv] + +statement ok +PRAGMA enable_verification + +# requires notwindows for windows-style path backslash reasons +require notwindows + +query I +SELECT COUNT(*) FROM 'data/csv/quoted_newline.csv' +---- +1 + +query I +SELECT quote FROM sniff_csv('data/csv/quoted_newline.csv') +---- +" \ No newline at end of file diff --git a/test/sql/copy/parquet/batched_write/parquet_write_mixed_batches.test_slow b/test/sql/copy/parquet/batched_write/parquet_write_mixed_batches.test_slow index cb0466166bf5..d2bc62b131ae 100644 --- a/test/sql/copy/parquet/batched_write/parquet_write_mixed_batches.test_slow +++ b/test/sql/copy/parquet/batched_write/parquet_write_mixed_batches.test_slow @@ -154,9 +154,18 @@ DROP TABLE mixed_batches_v3 statement ok DROP TABLE mixed_batches_v4 +statement error +CREATE OR REPLACE VIEW v1 AS FROM '__TEST_DIR__/mix_batches_small.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_large.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_odd.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_odd_again.parquet' +---- +Dependency Error: Cannot drop entry "v1" because there are entries that depend on it + +# Because v2 depends on v1 we have to drop with CASCADE +statement ok +DROP VIEW v1 CASCADE; + # create views that read the batches using unions statement ok -CREATE OR REPLACE VIEW v1 AS FROM '__TEST_DIR__/mix_batches_small.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_large.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_odd.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_odd_again.parquet' +CREATE VIEW v1 AS FROM '__TEST_DIR__/mix_batches_small.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_large.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_odd.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_odd_again.parquet' statement ok CREATE OR REPLACE VIEW v2 AS FROM v1 WHERE (i//10000)%2=0; diff --git a/test/sql/copy_database/copy_database_fk.test b/test/sql/copy_database/copy_database_fk.test index a5e22bfea8a4..c76a970bda43 100644 --- a/test/sql/copy_database/copy_database_fk.test +++ b/test/sql/copy_database/copy_database_fk.test @@ -4,9 +4,6 @@ require noforcestorage -# FIXME - this is not working right now - we need to correctly take dependencies into account for COPY FROM DATABASE -mode skip - statement ok CREATE TABLE pk_integers(i INTEGER PRIMARY KEY) diff --git a/test/sql/export/export_fk_table.test b/test/sql/export/export_fk_table.test new file mode 100644 index 000000000000..00b547b561a8 --- /dev/null +++ b/test/sql/export/export_fk_table.test @@ -0,0 +1,27 @@ +# name: test/sql/export/export_fk_table.test +# description: Test export database +# group: [export] + +statement ok +BEGIN TRANSACTION + +statement ok +create table tbl (a integer primary key, b integer); + +statement ok +create sequence seq; + +statement ok +alter sequence seq owned by tbl; + +statement ok +create table tbl2 (c integer references tbl(a)); + +statement ok +EXPORT DATABASE '__TEST_DIR__/export_generated_columns' (FORMAT CSV); + +statement ok +ROLLBACK + +statement ok +IMPORT DATABASE '__TEST_DIR__/export_generated_columns' diff --git a/test/sql/export/export_generated_columns.test b/test/sql/export/export_generated_columns.test index 5509d49eb915..edcfde310129 100644 --- a/test/sql/export/export_generated_columns.test +++ b/test/sql/export/export_generated_columns.test @@ -5,10 +5,14 @@ statement ok BEGIN TRANSACTION +# Create a macro that the generated column uses +statement ok +CREATE MACRO my_macro(b) AS b + 5; + statement ok CREATE TABLE tbl ( x INTEGER, - gen_x AS (x + 5) + gen_x AS (my_macro(x)) ); statement ok @@ -44,6 +48,11 @@ statement error INSERT INTO tbl VALUES(2,3) ---- +statement error +drop macro my_macro; +---- +Dependency Error: Cannot drop entry "my_macro" because there are entries that depend on it + # 'x' can not be removed, as 'gen_x' depends on it statement error ALTER TABLE tbl DROP COLUMN x; diff --git a/test/sql/export/export_indexes.test b/test/sql/export/export_indexes.test new file mode 100644 index 000000000000..4552455fbe50 --- /dev/null +++ b/test/sql/export/export_indexes.test @@ -0,0 +1,49 @@ +# name: test/sql/export/export_indexes.test +# description: Test export of macro's +# group: [export] + +statement ok +BEGIN TRANSACTION + +# scalar macro +statement ok +CREATE MACRO elaborate_macro(x, y := 7) AS x + y; + +statement ok +CREATE TABLE tbl (x integer, y varchar); + +# Index that depends on the 'elaborate_macro' macro function +statement ok +CREATE UNIQUE INDEX my_index on tbl (elaborate_macro(tbl.x)); + +query I +select index_name from duckdb_indexes(); +---- +my_index + +statement ok +EXPORT DATABASE '__TEST_DIR__/export_macros' (FORMAT CSV); + +statement ok +ROLLBACK + +statement ok +IMPORT DATABASE '__TEST_DIR__/export_macros' + +query I +select index_name from duckdb_indexes(); +---- +my_index + +query T +SELECT elaborate_macro(28, y := 5) +---- +33 + +statement ok +insert into tbl VALUES (10, 'hello'); + +statement error +insert into tbl VALUES (10, 'world'); +---- +Constraint Error: Duplicate key "(x + 7): 17" violates unique constraint. diff --git a/test/sql/export/export_types.test b/test/sql/export/export_types.test index 1913be340995..8c7a1e7d08e2 100644 --- a/test/sql/export/export_types.test +++ b/test/sql/export/export_types.test @@ -2,9 +2,6 @@ # description: Test export of macro's # group: [export] -# Because of ordering issues in EXPORT DATABASE we can't IMPORT this database -mode skip - statement ok BEGIN TRANSACTION diff --git a/test/sql/json/issues/read_json_memory_usage.test b/test/sql/json/issues/read_json_memory_usage.test new file mode 100644 index 000000000000..e33c485c9519 --- /dev/null +++ b/test/sql/json/issues/read_json_memory_usage.test @@ -0,0 +1,28 @@ +# name: test/sql/json/issues/read_json_memory_usage.test +# description: Test JSON memory usage (internal issue #1683) +# group: [issues] + +require json + +statement ok +SET threads=8; + +statement ok +SET memory_limit='100MiB'; + +query I +SELECT * FROM read_json_objects('data/json/example_rn.ndjson', format='nd'); +---- +{"id":1,"name":"O Brother, Where Art Thou?"} +{"id":2,"name":"Home for the Holidays"} +{"id":3,"name":"The Firm"} +{"id":4,"name":"Broadcast News"} +{"id":5,"name":"Raising Arizona"} + +statement ok +SET memory_limit='50MiB'; + +statement error +SELECT * FROM read_json_objects('data/json/example_rn.ndjson', format='nd'); +---- +Out of Memory Error diff --git a/test/sql/storage/catalog/test_view_cross_catalog.test b/test/sql/storage/catalog/test_view_cross_catalog.test new file mode 100644 index 000000000000..188b3b793beb --- /dev/null +++ b/test/sql/storage/catalog/test_view_cross_catalog.test @@ -0,0 +1,63 @@ +# name: test/sql/storage/catalog/test_view_cross_catalog.test +# group: [catalog] + +statement ok +attach ':memory:' as db2 + +statement ok +create table db2.tbl (a varchar); + +statement ok +insert into db2.tbl VALUES ('1'), ('42'); + +query I +select * from db2.tbl +---- +1 +42 + +statement ok +create view tbl_view as select * from db2.tbl + +query I +select * from tbl_view +---- +1 +42 + +statement ok +drop table db2.tbl + +statement error +select * from tbl_view +---- +Table with name tbl does not exist! + +query I +select "type" from pragma_table_info('tbl_view') +---- +VARCHAR + +statement ok +create table db2.tbl (a INTEGER); + +query I +select "type" from pragma_table_info('tbl_view') +---- +VARCHAR + +statement error +select * from tbl_view +---- +Contents of view were altered: types don't match! + +statement ok +drop table db2.tbl; + +statement ok +create table db2.tbl (b VARCHAR); + +statement error +select * from tbl_view +---- +Contents of view were altered: names don't match! diff --git a/test/sql/storage/catalog/test_view_explicit_aliases.test b/test/sql/storage/catalog/test_view_explicit_aliases.test index f6ee0db8899a..5c8862db9d00 100644 --- a/test/sql/storage/catalog/test_view_explicit_aliases.test +++ b/test/sql/storage/catalog/test_view_explicit_aliases.test @@ -17,18 +17,19 @@ statement ok CREATE VIEW test.v (b,c) AS SELECT * FROM test.t; # check the view info -query II nosort view_info +query IIIIII PRAGMA table_info('test.v') ---- +0 b INTEGER false NULL false +1 c INTEGER false NULL false statement ok SELECT * FROM test.v statement ok -DROP TABLE test.t +DROP TABLE test.t CASCADE; -# we can still query this after the table is gone -query II nosort view_info +statement error PRAGMA table_info('test.v') ---- @@ -37,8 +38,7 @@ loop i 0 2 # now reload restart -# can check info, but not query the view -query II nosort view_info +statement error PRAGMA table_info('test.v') ---- @@ -53,10 +53,14 @@ CREATE TABLE test.t (a INTEGER, b INTEGER); statement ok SELECT * FROM test.t -statement ok +statement error SELECT b,c FROM test.v +---- + +statement ok +CREATE VIEW test.v (b,c) AS SELECT * FROM test.t; statement ok -DROP TABLE test.t +DROP TABLE test.t CASCADE; endloop diff --git a/test/sql/storage/catalog/test_view_storage.test b/test/sql/storage/catalog/test_view_storage.test index d69e519193e9..e8645f8a9801 100644 --- a/test/sql/storage/catalog/test_view_storage.test +++ b/test/sql/storage/catalog/test_view_storage.test @@ -24,14 +24,11 @@ PRAGMA table_info('test.v') # drop the table the view is based on statement ok -DROP TABLE test.t +DROP TABLE test.t CASCADE; -# we can still query the types and column names -query IIIIII +statement error PRAGMA table_info('test.v') ---- -0 a INTEGER 0 NULL 0 -1 b INTEGER 0 NULL 0 # but querying the view fails statement error @@ -44,13 +41,9 @@ loop i 0 2 # restart the system restart -# the view still exists, but the table does not -# we can check the types, but not query it -query IIIIII +statement error PRAGMA table_info('test.v') ---- -0 a INTEGER 0 NULL 0 -1 b INTEGER 0 NULL 0 statement error SELECT * FROM test.v @@ -63,6 +56,13 @@ CREATE TABLE test.t (a INTEGER, b INTEGER); statement ok SELECT * FROM test.t +statement error +SELECT * FROM test.v +---- + +statement ok +CREATE VIEW test.v AS SELECT * FROM test.t; + statement ok SELECT * FROM test.v @@ -74,6 +74,6 @@ PRAGMA table_info('test.v') # drop the table again statement ok -DROP TABLE test.t +DROP TABLE test.t CASCADE; endloop diff --git a/test/sql/storage/parallel/batch_insert_mix_batches.test_slow b/test/sql/storage/parallel/batch_insert_mix_batches.test_slow index 2808151c95c2..8b44524396bb 100644 --- a/test/sql/storage/parallel/batch_insert_mix_batches.test_slow +++ b/test/sql/storage/parallel/batch_insert_mix_batches.test_slow @@ -162,6 +162,9 @@ DROP TABLE integers2 statement ok DROP TABLE integers3 +statement ok +DROP VIEW v1 CASCADE; + # create views that read the batches using unions statement ok CREATE OR REPLACE VIEW v1 AS FROM '__TEST_DIR__/mix_batches_small.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_large.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_odd.parquet' UNION ALL FROM '__TEST_DIR__/mix_batches_odd_again.parquet' diff --git a/test/sql/storage/parallel/batch_insert_small_batches.test_slow b/test/sql/storage/parallel/batch_insert_small_batches.test_slow index 54a1842c1d02..851a99c18731 100644 --- a/test/sql/storage/parallel/batch_insert_small_batches.test_slow +++ b/test/sql/storage/parallel/batch_insert_small_batches.test_slow @@ -159,13 +159,7 @@ SELECT * FROM integers3 LIMIT 5 OFFSET 9999 20003 statement ok -DROP VIEW v1 - -statement ok -DROP VIEW v2 - -statement ok -DROP VIEW v3 +DROP VIEW v1 CASCADE; statement ok DROP TABLE integers diff --git a/test/sql/storage/store_group_order_all.test b/test/sql/storage/store_group_order_all.test index 61cb846d77ff..de753c18efd4 100644 --- a/test/sql/storage/store_group_order_all.test +++ b/test/sql/storage/store_group_order_all.test @@ -12,20 +12,39 @@ statement ok PRAGMA wal_autocheckpoint='1TB'; statement ok -CREATE TABLE integers(g integer, i integer); +CREATE TABLE integers( + g integer, + i integer +); statement ok -INSERT INTO integers values (0, 1), (0, 2), (1, 3), (1, NULL); +INSERT INTO integers values + (0, 1), + (0, 2), + (1, 3), + (1, NULL); statement ok -CREATE VIEW v1 AS SELECT g, i, g%2, SUM(i), SUM(g) FROM integers GROUP BY ALL ORDER BY ALL +CREATE VIEW v1 AS SELECT + g, + i, + g%2, + SUM(i), + SUM(g) +FROM integers GROUP BY ALL ORDER BY ALL; query IIIII nosort v1 SELECT * FROM v1 ---- statement ok -CREATE VIEW v2 AS SELECT g, i, g%2, SUM(i), SUM(g) FROM integers GROUP BY ALL ORDER BY ALL DESC NULLS LAST +CREATE VIEW v2 AS SELECT + g, + i, + g%2, + SUM(i), + SUM(g) +FROM integers GROUP BY ALL ORDER BY ALL DESC NULLS LAST; query IIIII nosort v2 SELECT * FROM v2 diff --git a/test/sql/storage/wal/wal_view_explicit_aliases.test b/test/sql/storage/wal/wal_view_explicit_aliases.test index 8056aa378a94..fbfe63f59614 100644 --- a/test/sql/storage/wal/wal_view_explicit_aliases.test +++ b/test/sql/storage/wal/wal_view_explicit_aliases.test @@ -34,12 +34,7 @@ statement ok SELECT * FROM test.v statement ok -DROP TABLE test.t - -# we can still query this after the table is gone -query II nosort view_info -PRAGMA table_info('test.v') ----- +DROP TABLE test.t CASCADE; loop i 0 2 @@ -50,25 +45,31 @@ statement ok PRAGMA disable_checkpoint_on_shutdown # can check info, but not query the view -query II nosort view_info +statement error PRAGMA table_info('test.v') ---- +Catalog Error: Table with name v does not exist! statement error SELECT * FROM test.v ---- -# we can query again after recreating the table statement ok CREATE TABLE test.t (a INTEGER, b INTEGER); statement ok SELECT * FROM test.t -statement ok +# we need to recreate the view to query 'v' after recreating the table + +statement error SELECT b,c FROM test.v +---- + +statement ok +CREATE VIEW test.v (b,c) AS SELECT * FROM test.t; statement ok -DROP TABLE test.t +DROP TABLE test.t CASCADE; endloop diff --git a/test/sql/storage/wal/wal_view_storage.test b/test/sql/storage/wal/wal_view_storage.test index 953332f6e4bb..b419ec80dbb5 100644 --- a/test/sql/storage/wal/wal_view_storage.test +++ b/test/sql/storage/wal/wal_view_storage.test @@ -11,17 +11,14 @@ load __TEST_DIR__/view_storage.db statement ok PRAGMA disable_checkpoint_on_shutdown +# Make sure the WAL doesn't get flushed by a checkpoint statement ok PRAGMA wal_autocheckpoint='1TB'; -# create a schema and view +# Create a schema containing a table and a view statement ok CREATE SCHEMA test; - -statement ok CREATE TABLE test.t (a INTEGER, b INTEGER); - -statement ok CREATE VIEW test.v AS SELECT * FROM test.t; # read the info from the view @@ -31,21 +28,20 @@ PRAGMA table_info('test.v') 0 a INTEGER 0 NULL 0 1 b INTEGER 0 NULL 0 -# drop the table the view is based on -statement ok -DROP TABLE test.t - -# we can still query the types and column names -query IIIIII -PRAGMA table_info('test.v') +# Try to drop the table +statement error +drop table test.t; ---- -0 a INTEGER 0 NULL 0 -1 b INTEGER 0 NULL 0 +Cannot drop entry "t" because there are entries that depend on it + +# Now with CASCADE +statement ok +drop table test.t cascade; -# but querying the view fails statement error -SELECT * FROM test.v +PRAGMA table_info('test.v') ---- +Catalog Error: Table with name v does not exist! statement ok CREATE VIEW test.v2 AS SELECT 42 @@ -55,16 +51,14 @@ DROP VIEW test.v2 loop i 0 2 -# restart the system +# restart the system, causing the database to restore from the WAL restart -# the view still exists, but the table does not -# we can check the types, but not query it -query IIIIII +# the view no longer exists +statement error PRAGMA table_info('test.v') ---- -0 a INTEGER 0 NULL 0 -1 b INTEGER 0 NULL 0 +Catalog Error: Table with name v does not exist! statement error SELECT * FROM test.v @@ -77,8 +71,14 @@ CREATE TABLE test.t (a INTEGER, b INTEGER); statement ok SELECT * FROM test.t -statement ok +# We created the table, but the view still doesn't exist +statement error SELECT * FROM test.v +---- +Catalog Error: Table with name v does not exist! + +statement ok +CREATE VIEW test.v AS SELECT * FROM test.t; query IIIIII PRAGMA table_info('test.v') @@ -86,9 +86,15 @@ PRAGMA table_info('test.v') 0 a INTEGER 0 NULL 0 1 b INTEGER 0 NULL 0 -# drop the table again +# Try to drop the table +statement error +drop table test.t; +---- +Cannot drop entry "t" because there are entries that depend on it + +# Now with CASCADE statement ok -DROP TABLE test.t +drop table test.t cascade; statement error SELECT * FROM test.v2 diff --git a/test/sql/storage_version/storage_version.db b/test/sql/storage_version/storage_version.db index c816c3c219ae..56a4fa79eb9e 100644 Binary files a/test/sql/storage_version/storage_version.db and b/test/sql/storage_version/storage_version.db differ diff --git a/test/sql/table_function/duckdb_databases.test b/test/sql/table_function/duckdb_databases.test index beec34101673..9484983d2d78 100644 --- a/test/sql/table_function/duckdb_databases.test +++ b/test/sql/table_function/duckdb_databases.test @@ -11,12 +11,12 @@ PRAGMA enable_verification statement ok SELECT * FROM duckdb_databases(); -query IIII -SELECT database_name, internal, path, type FROM duckdb_databases() ORDER BY database_name; +query IIIII +SELECT database_name, internal, readonly, path, type FROM duckdb_databases() ORDER BY database_name; ---- -memory false NULL duckdb -system true NULL duckdb -temp true NULL duckdb +memory false false NULL duckdb +system true false NULL duckdb +temp true false NULL duckdb statement ok ATTACH '__TEST_DIR__/duckdb_databases.db' AS new_duckdb_database @@ -26,6 +26,11 @@ SELECT database_name, internal, split(replace(path, '\', '/'), '/')[-1], type FR ---- new_duckdb_database false duckdb_databases.db duckdb +query I +SELECT readonly FROM duckdb_databases WHERE database_name='new_duckdb_database'; +---- +false + query II nosort show_db SELECT database_name FROM duckdb_databases ORDER BY database_name ---- @@ -41,3 +46,14 @@ memory new_duckdb_database system temp + +statement ok +DETACH new_duckdb_database; + +statement ok +ATTACH '__TEST_DIR__/duckdb_databases.db' AS readonly_duckdb_database (READONLY 1); + +query I +SELECT readonly FROM duckdb_databases WHERE database_name='readonly_duckdb_database'; +---- +true \ No newline at end of file diff --git a/test/sql/table_function/lateral_table_function.test b/test/sql/table_function/lateral_table_function.test new file mode 100644 index 000000000000..18a429222110 --- /dev/null +++ b/test/sql/table_function/lateral_table_function.test @@ -0,0 +1,17 @@ +# name: test/sql/table_function/lateral_table_function.test +# description: Test lateral join table function parameters for functions that do not support it +# group: [table_function] + +statement ok +pragma enable_verification + +statement error +SELECT * FROM read_csv(thisishopefullyanonexistentfile) +---- +No files found that match the pattern + +# lateral join parameter +statement error +SELECT * FROM (SELECT 'myfile.csv' AS thisishopefullyanonexistentfile), read_csv(thisishopefullyanonexistentfile) +---- +does not support lateral join column parameters diff --git a/third_party/fsst/fsst.h b/third_party/fsst/fsst.h index 6970dedc053f..ea80be6c2444 100644 --- a/third_party/fsst/fsst.h +++ b/third_party/fsst/fsst.h @@ -48,7 +48,7 @@ #ifndef FSST_INCLUDED_H #define FSST_INCLUDED_H -#ifdef _MSC_VER +#if defined(_MSC_VER) && !defined(__clang__) #define __restrict__ #define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__ #define __ORDER_LITTLE_ENDIAN__ 2 diff --git a/third_party/re2/util/logging.h b/third_party/re2/util/logging.h index b65845f602da..4fec23e840b1 100644 --- a/third_party/re2/util/logging.h +++ b/third_party/re2/util/logging.h @@ -92,10 +92,11 @@ class LogMessage { class LogMessageFatal : public LogMessage { public: LogMessageFatal(const char* file, int line) - : LogMessage(file, line) {} - ATTRIBUTE_NORETURN ~LogMessageFatal() { + : LogMessage(file, line) { + throw std::runtime_error("RE2 Fatal Error"); + } + ~LogMessageFatal() { Flush(); - abort(); } private: LogMessageFatal(const LogMessageFatal&) = delete; diff --git a/third_party/re2/util/mutex.h b/third_party/re2/util/mutex.h index 45e590ca416d..4392fd408533 100644 --- a/third_party/re2/util/mutex.h +++ b/third_party/re2/util/mutex.h @@ -93,8 +93,8 @@ void Mutex::ReaderUnlock() { ReleaseSRWLockShared(&mutex_); } #define SAFE_PTHREAD(fncall) \ do { \ - if ((fncall) != 0) abort(); \ - } while (0) + if ((fncall) != 0) throw std::runtime_error("RE2 pthread failure"); \ + } while (0); Mutex::Mutex() { SAFE_PTHREAD(pthread_rwlock_init(&mutex_, NULL)); } Mutex::~Mutex() { SAFE_PTHREAD(pthread_rwlock_destroy(&mutex_)); } diff --git a/tools/jdbc/src/main/java/org/duckdb/DuckDBResultSet.java b/tools/jdbc/src/main/java/org/duckdb/DuckDBResultSet.java index ebd29bb55cfb..aac82d6073e1 100644 --- a/tools/jdbc/src/main/java/org/duckdb/DuckDBResultSet.java +++ b/tools/jdbc/src/main/java/org/duckdb/DuckDBResultSet.java @@ -440,9 +440,13 @@ public InputStream getBinaryStream(long pos, long length) { return new ByteBufferBackedInputStream(buffer); } - public byte[] getBytes(long pos, int length) { + @Override + public byte[] getBytes(long pos, int length) throws SQLException { + if (pos < 1 || length < 0) { + throw new SQLException("Invalid position or length"); + } byte[] bytes = new byte[length]; - buffer.position((int) pos); + buffer.position((int) pos - 1); buffer.get(bytes, 0, length); return bytes; } diff --git a/tools/jdbc/src/test/java/org/duckdb/TestDuckDBJDBC.java b/tools/jdbc/src/test/java/org/duckdb/TestDuckDBJDBC.java index adf7ab5ba0eb..8307825bfad2 100644 --- a/tools/jdbc/src/test/java/org/duckdb/TestDuckDBJDBC.java +++ b/tools/jdbc/src/test/java/org/duckdb/TestDuckDBJDBC.java @@ -2764,7 +2764,7 @@ public static void test_statement_creation_bug1268() throws Exception { } private static String blob_to_string(Blob b) throws SQLException { - return new String(b.getBytes(0, (int) b.length()), StandardCharsets.US_ASCII); + return new String(b.getBytes(1, (int) b.length()), StandardCharsets.US_ASCII); } public static void test_blob_bug1090() throws Exception { diff --git a/tools/odbc/src/common/odbc_utils.cpp b/tools/odbc/src/common/odbc_utils.cpp index e8415a95b3fd..4f418b1a840a 100644 --- a/tools/odbc/src/common/odbc_utils.cpp +++ b/tools/odbc/src/common/odbc_utils.cpp @@ -69,6 +69,8 @@ string OdbcUtils::ParseStringFilter(const string &filter_name, const string &fil } else { filter = filter_name + " LIKE '" + filter_value + "'"; } + // Handle escape character passed by Power Query SDK + filter += " ESCAPE '\\'"; return filter; } diff --git a/tools/pythonpkg/src/include/duckdb_python/pyfilesystem.hpp b/tools/pythonpkg/src/include/duckdb_python/pyfilesystem.hpp index 9fa45ae2f1f5..09d00fbe8383 100644 --- a/tools/pythonpkg/src/include/duckdb_python/pyfilesystem.hpp +++ b/tools/pythonpkg/src/include/duckdb_python/pyfilesystem.hpp @@ -81,6 +81,11 @@ class PythonFilesystem : public FileSystem { bool CanSeek() override { return true; } + + bool IsManuallySet() override { + return true; + } + bool OnDiskFile(FileHandle &handle) override { return false; } diff --git a/tools/pythonpkg/src/include/duckdb_python/pyrelation.hpp b/tools/pythonpkg/src/include/duckdb_python/pyrelation.hpp index 178fec217f87..e6cd967558b4 100644 --- a/tools/pythonpkg/src/include/duckdb_python/pyrelation.hpp +++ b/tools/pythonpkg/src/include/duckdb_python/pyrelation.hpp @@ -281,7 +281,7 @@ struct DuckDBPyRelation { const string &groups = "", const string &function_parameter = "", bool ignore_nulls = false, const string &projected_columns = "", const string &window_spec = ""); - string GenerateExpressionList(const string &function_name, vector &&aggregated_columns, + string GenerateExpressionList(const string &function_name, vector aggregated_columns, const string &groups = "", const string &function_parameter = "", bool ignore_nulls = false, const string &projected_columns = "", const string &window_spec = ""); diff --git a/tools/pythonpkg/src/pyconnection.cpp b/tools/pythonpkg/src/pyconnection.cpp index d619be312214..dd366334e71e 100644 --- a/tools/pythonpkg/src/pyconnection.cpp +++ b/tools/pythonpkg/src/pyconnection.cpp @@ -57,10 +57,10 @@ namespace duckdb { -shared_ptr DuckDBPyConnection::default_connection = nullptr; -DBInstanceCache instance_cache; -shared_ptr DuckDBPyConnection::import_cache = nullptr; -PythonEnvironmentType DuckDBPyConnection::environment = PythonEnvironmentType::NORMAL; +shared_ptr DuckDBPyConnection::default_connection = nullptr; // NOLINT: allow global +DBInstanceCache instance_cache; // NOLINT: allow global +shared_ptr DuckDBPyConnection::import_cache = nullptr; // NOLINT: allow global +PythonEnvironmentType DuckDBPyConnection::environment = PythonEnvironmentType::NORMAL; // NOLINT: allow global DuckDBPyConnection::~DuckDBPyConnection() { try { diff --git a/tools/pythonpkg/src/pyrelation.cpp b/tools/pythonpkg/src/pyrelation.cpp index cf68f35079fd..457e0f72e617 100644 --- a/tools/pythonpkg/src/pyrelation.cpp +++ b/tools/pythonpkg/src/pyrelation.cpp @@ -335,10 +335,9 @@ string DuckDBPyRelation::GenerateExpressionList(const string &function_name, con projected_columns, window_spec); } -string DuckDBPyRelation::GenerateExpressionList(const string &function_name, vector &&input, - const string &groups, const string &function_parameter, - bool ignore_nulls, const string &projected_columns, - const string &window_spec) { +string DuckDBPyRelation::GenerateExpressionList(const string &function_name, vector input, const string &groups, + const string &function_parameter, bool ignore_nulls, + const string &projected_columns, const string &window_spec) { string expr; if (StringUtil::CIEquals("count", function_name) && input.empty()) { diff --git a/tools/pythonpkg/src/python_udf.cpp b/tools/pythonpkg/src/python_udf.cpp index ef5be16ebb48..8850e8e0218d 100644 --- a/tools/pythonpkg/src/python_udf.cpp +++ b/tools/pythonpkg/src/python_udf.cpp @@ -162,7 +162,7 @@ static scalar_function_t CreateNativeFunction(PyObject *function, PythonExceptio const ClientProperties &client_properties) { // Through the capture of the lambda, we have access to the function pointer // We just need to make sure that it doesn't get garbage collected - scalar_function_t func = [=](DataChunk &input, ExpressionState &state, Vector &result) -> void { + scalar_function_t func = [=](DataChunk &input, ExpressionState &state, Vector &result) -> void { // NOLINT py::gil_scoped_acquire gil; // owning references diff --git a/tools/pythonpkg/tests/extensions/test_httpfs.py b/tools/pythonpkg/tests/extensions/test_httpfs.py index 4dcd4aa11939..25c1a99e388a 100644 --- a/tools/pythonpkg/tests/extensions/test_httpfs.py +++ b/tools/pythonpkg/tests/extensions/test_httpfs.py @@ -70,3 +70,14 @@ def test_http_exception(self, require): assert value.status_code != 200 assert value.body == '' assert 'Content-Length' in value.headers + + def test_fsspec_priority(self, require): + pytest.importorskip("fsspec") + pytest.importorskip("gscfs") + import fsspec + + connection = require('httpfs') + + gcs = fsspec.filesystem("gcs") + connection.register_filesystem(gcs) + assert connection.sql("select count(*) from 'gcs://ibis-examples/data/band_members.csv.gz'").fetchone() == (3,) diff --git a/tools/pythonpkg/tests/fast/arrow/test_arrow_types.py b/tools/pythonpkg/tests/fast/arrow/test_arrow_types.py index 379f517167ae..24f10ea87b96 100644 --- a/tools/pythonpkg/tests/fast/arrow/test_arrow_types.py +++ b/tools/pythonpkg/tests/fast/arrow/test_arrow_types.py @@ -1,23 +1,16 @@ import duckdb +import pytest -try: - import pyarrow as pa - import pyarrow.dataset as ds - - can_run = True -except: - can_run = False +pa = pytest.importorskip("pyarrow") +ds = pytest.importorskip("pyarrow.dataset") class TestArrowTypes(object): def test_null_type(self, duckdb_cursor): - if not can_run: - return schema = pa.schema([("data", pa.null())]) inputs = [pa.array([None, None, None], type=pa.null())] arrow_table = pa.Table.from_arrays(inputs, schema=schema) - duckdb_conn = duckdb.connect() - duckdb_conn.register("testarrow", arrow_table) + duckdb_cursor.register("testarrow", arrow_table) rel = duckdb.from_arrow(arrow_table).arrow() # We turn it to an array of int32 nulls schema = pa.schema([("data", pa.int32())]) @@ -25,3 +18,30 @@ def test_null_type(self, duckdb_cursor): arrow_table = pa.Table.from_arrays(inputs, schema=schema) assert rel['data'] == arrow_table['data'] + + def test_invalid_struct(self, duckdb_cursor): + empty_struct_type = pa.struct([]) + + # Create an empty array with the defined struct type + empty_array = pa.array([], type=empty_struct_type) + arrow_table = pa.Table.from_arrays([empty_array], schema=pa.schema([("data", empty_struct_type)])) + with pytest.raises( + duckdb.InvalidInputException, + match='Attempted to convert a STRUCT with no fields to DuckDB which is not supported', + ): + duckdb_cursor.register('invalid_struct', arrow_table) + + def test_invalid_union(self, duckdb_cursor): + # Create a sparse union array from dense arrays + types = pa.array([0, 1, 1], type=pa.int8()) + sparse_union_array = pa.UnionArray.from_sparse(types, [], type_codes=[]) + + arrow_table = pa.Table.from_arrays([sparse_union_array], schema=pa.schema([("data", sparse_union_array.type)])) + with pytest.raises( + duckdb.InvalidInputException, + match='Attempted to convert a UNION with no fields to DuckDB which is not supported', + ): + duckdb_cursor.register('invalid_union', arrow_table) + + res = duckdb_cursor.sql("select * from invalid_union").fetchall() + print(res) diff --git a/tools/pythonpkg/tests/fast/relational_api/test_rapi_query.py b/tools/pythonpkg/tests/fast/relational_api/test_rapi_query.py index 5fdbae2f9f64..8d42f06146bb 100644 --- a/tools/pythonpkg/tests/fast/relational_api/test_rapi_query.py +++ b/tools/pythonpkg/tests/fast/relational_api/test_rapi_query.py @@ -5,10 +5,10 @@ @pytest.fixture() def tbl_table(): con = duckdb.default_connection - con.execute("drop table if exists tbl") + con.execute("drop table if exists tbl CASCADE") con.execute("create table tbl (i integer)") yield - con.execute('drop table tbl') + con.execute('drop table tbl CASCADE') class TestRAPIQuery(object): diff --git a/tools/shell/linenoise/include/linenoise.hpp b/tools/shell/linenoise/include/linenoise.hpp index bfa6d81f69bc..7393734649d3 100644 --- a/tools/shell/linenoise/include/linenoise.hpp +++ b/tools/shell/linenoise/include/linenoise.hpp @@ -14,6 +14,7 @@ #include "linenoise.h" #define LINENOISE_MAX_LINE 204800 +#define LINENOISE_EDITOR namespace duckdb { struct highlightToken; @@ -98,6 +99,11 @@ class Linenoise { void SearchPrev(); void SearchNext(); +#ifdef LINENOISE_EDITOR + bool EditBufferWithEditor(const char *editor); + bool EditFileWithEditor(const string &file_name, const char *editor); +#endif + char Search(char c); void RefreshMultiLine(); diff --git a/tools/shell/linenoise/linenoise.cpp b/tools/shell/linenoise/linenoise.cpp index 9571d3dcdacc..090965c0ca3a 100644 --- a/tools/shell/linenoise/linenoise.cpp +++ b/tools/shell/linenoise/linenoise.cpp @@ -20,6 +20,14 @@ #include #endif +#ifdef LINENOISE_EDITOR +#if defined(WIN32) || defined(__CYGWIN__) +#define DEFAULT_EDITOR "notepad.exe" +#else +#define DEFAULT_EDITOR "vi" +#endif +#endif + namespace duckdb { static linenoiseCompletionCallback *completionCallback = NULL; @@ -1137,6 +1145,35 @@ int Linenoise::Edit() { switch (c) { case CTRL_J: case ENTER: { /* enter */ +#ifdef LINENOISE_EDITOR + if (len > 0) { + // check if this contains ".edit" + + // scroll back to last newline + idx_t begin_pos; + for (begin_pos = len; begin_pos > 0 && buf[begin_pos - 1] != '\n'; begin_pos--) { + } + // check if line is ".edit" + bool open_editor = false; + if (begin_pos + 5 == len && memcmp(buf + begin_pos, ".edit", 5) == 0) { + open_editor = true; + } + // check if line is "\\e" + if (begin_pos + 2 == len && memcmp(buf + begin_pos, "\\e", 2) == 0) { + open_editor = true; + } + if (open_editor) { + // .edit + // clear the buffer and open the editor + pos = len = begin_pos; + if (!EditBufferWithEditor(nullptr)) { + // failed to edit - refresh the removal of ".edit" / "\e" + RefreshLine(); + break; + } + } + } +#endif if (Terminal::IsMultiline() && len > 0) { // check if this forms a complete SQL statement or not buf[len] = '\0'; @@ -1424,4 +1461,156 @@ void Linenoise::LogTokens(const vector &tokens) { #endif } +#ifdef LINENOISE_EDITOR +// .edit functionality - code adopted from psql + +bool Linenoise::EditFileWithEditor(const string &file_name, const char *editor) { + /* Find an editor to use */ + if (!editor) { + editor = getenv("DUCKDB_EDITOR"); + } + if (!editor) { + editor = getenv("EDITOR"); + } + if (!editor) { + editor = getenv("VISUAL"); + } + if (!editor) { + editor = DEFAULT_EDITOR; + } + + /* + * On Unix the EDITOR value should *not* be quoted, since it might include + * switches, eg, EDITOR="pico -t"; it's up to the user to put quotes in it + * if necessary. But this policy is not very workable on Windows, due to + * severe brain damage in their command shell plus the fact that standard + * program paths include spaces. + */ + string command; +#ifndef WIN32 + command = "exec " + string(editor) + " '" + file_name + "'"; +#else + command = "\"" + string(editor) + "\" \"" + file_name + "\""; +#endif + int result = system(command.c_str()); + if (result == -1) { + Log("could not start editor \"%s\"\n", editor); + } else if (result == 127) { + Log("could not start /bin/sh\n"); + } + return result == 0; +} + +bool Linenoise::EditBufferWithEditor(const char *editor) { + /* make a temp file to edit */ +#ifndef WIN32 + const char *tmpdir = getenv("TMPDIR"); + if (!tmpdir) { + tmpdir = "/tmp"; + } +#else + char tmpdir[MAX_PATH_LENGTH]; + int ret; + + ret = GetTempPath(MAX_PATH_LENGTH, tmpdir); + if (ret == 0 || ret > MAX_PATH_LENGTH) { + Log("cannot locate temporary directory: %s", !ret ? strerror(errno) : ""); + return false; + } +#endif + string temporary_file_name; +#ifndef WIN32 + temporary_file_name = string(tmpdir) + "/duckdb.edit." + std::to_string(getpid()); +#else + temporary_file_name = string(tmpdir) + "duckdb.edit." + std::to_string(getpid()); +#endif + + FILE *f = fopen(temporary_file_name.c_str(), "w+"); + if (!f) { + Log("could not open temporary file \"%s\": %s\n", temporary_file_name, strerror(errno)); + Terminal::Beep(); + return false; + } + + // edit the current buffer by default + const char *write_buffer = buf; + idx_t write_len = len; + if (write_len == 0) { + // if the current buffer is empty we are typing ".edit" as the first command + // edit the previous history entry + auto edit_index = History::GetLength(); + if (edit_index >= 2) { + auto history_entry = History::GetEntry(edit_index - 2); + if (history_entry) { + write_buffer = history_entry; + write_len = strlen(history_entry); + } + } + } + + // write existing buffer to file + if (fwrite(write_buffer, 1, write_len, f) != write_len) { + Log("Failed to write data %s: %s\n", temporary_file_name, strerror(errno)); + fclose(f); + remove(temporary_file_name.c_str()); + Terminal::Beep(); + return false; + } + fclose(f); + + /* call editor */ + if (!EditFileWithEditor(temporary_file_name, editor)) { + Terminal::Beep(); + return false; + } + + // read the file contents again + f = fopen(temporary_file_name.c_str(), "rb"); + if (!f) { + Log("Failed to open file%s: %s\n", temporary_file_name, strerror(errno)); + remove(temporary_file_name.c_str()); + Terminal::Beep(); + return false; + } + + /* read file back into buffer */ + string new_buffer; + char line[1024]; + while (fgets(line, sizeof(line), f)) { + // strip the existing newline from the line obtained from fgets + // the reason for that is that we need the line endings to be "\r\n" for rendering purposes + idx_t line_len = strlen(line); + idx_t orig_len = line_len; + while (line_len > 0 && (line[line_len - 1] == '\r' || line[line_len - 1] == '\n')) { + line_len--; + } + new_buffer.append(line, line_len); + if (orig_len != line_len) { + // we stripped a newline - add a new newline (but this time always \r\n) + new_buffer += "\r\n"; + } + } + if (ferror(f)) { + Log("Failed while reading back buffer %s: %s\n", temporary_file_name, strerror(errno)); + Terminal::Beep(); + } + fclose(f); + + /* remove temp file */ + if (remove(temporary_file_name.c_str()) == -1) { + Log("Failed to remove file \"%s\": %s\n", temporary_file_name, strerror(errno)); + Terminal::Beep(); + return false; + } + + // copy back into buffer + memcpy(buf, new_buffer.c_str(), new_buffer.size()); + len = new_buffer.size(); + pos = len; + RefreshLine(); + + return true; +} +#endif + } // namespace duckdb diff --git a/tools/shell/shell.c b/tools/shell/shell.c index b6abdc384c46..d228bee5ff98 100644 --- a/tools/shell/shell.c +++ b/tools/shell/shell.c @@ -685,6 +685,15 @@ static char *local_getline(char *zLine, FILE *in){ int nLine = zLine==0 ? 0 : 100; int n = 0; +#if defined(_WIN32) || defined(WIN32) + int is_stdin = stdin_is_interactive && in==stdin; + int is_utf8 = 0; + if (is_stdin) { + if (SetConsoleCP(CP_UTF8)) { + is_utf8 = 1; + } + } +#endif while( 1 ){ if( n+100>nLine ){ nLine = nLine*2 + 100; @@ -710,7 +719,7 @@ static char *local_getline(char *zLine, FILE *in){ #if defined(_WIN32) || defined(WIN32) /* For interactive input on Windows systems, translate the ** multi-byte characterset characters into UTF-8. */ - if( stdin_is_interactive && in==stdin ){ + if(is_stdin && !is_utf8){ char *zTrans = sqlite3_win32_mbcs_to_utf8_v2(zLine, 0); if( zTrans ){ int nTrans = strlen30(zTrans)+1; @@ -13594,11 +13603,13 @@ static const char *(azHelp[]) = { ".changes on|off Show number of rows changed by SQL", ".check GLOB Fail if output since .testcase does not match", ".columns Column-wise rendering of query results", +#ifdef HAVE_LINENOISE ".constant ?COLOR? Sets the syntax highlighting color used for constant values", " COLOR is one of:", " red|green|yellow|blue|magenta|cyan|white|brightblack|brightred|brightgreen", " brightyellow|brightblue|brightmagenta|brightcyan|brightwhite", ".constantcode ?CODE? Sets the syntax highlighting terminal code used for constant values", +#endif ".databases List names and files of attached databases", ".dump ?TABLE? Render database content as SQL", " Options:", @@ -13615,12 +13626,22 @@ static const char *(azHelp[]) = { " trigger Like \"full\" but also show trigger bytecode", ".excel Display the output of next command in spreadsheet", " --bom Put a UTF8 byte-order mark on intermediate file", +#ifdef HAVE_LINENOISE + ".edit Opens an external text editor to edit a query.", + " Notes:", + " * The editor is read from the environment variables", + " DUCKDB_EDITOR, EDITOR, VISUAL in-order", + " * If none of these are set, the default editor is vi", + " * \\e can be used as an alais for .edit", +#endif ".exit ?CODE? Exit this program with return-code CODE", ".explain ?on|off|auto? Change the EXPLAIN formatting mode. Default: auto", ".fullschema ?--indent? Show schema and the content of sqlite_stat tables", ".headers on|off Turn display of headers on or off", ".help ?-all? ?PATTERN? Show help text for PATTERN", +#ifdef HAVE_LINENOISE ".highlight [on|off] Toggle syntax highlighting in the shell on/off", +#endif ".import FILE TABLE Import data from FILE into TABLE", " Options:", " --ascii Use \\037 and \\036 as column and row separators", @@ -13640,11 +13661,13 @@ static const char *(azHelp[]) = { #ifdef SQLITE_ENABLE_IOTRACE ".iotrace FILE Enable I/O diagnostic logging to FILE", #endif +#ifdef HAVE_LINENOISE ".keyword ?COLOR? Sets the syntax highlighting color used for keywords", " COLOR is one of:", " red|green|yellow|blue|magenta|cyan|white|brightblack|brightred|brightgreen", " brightyellow|brightblue|brightmagenta|brightcyan|brightwhite", ".keywordcode ?CODE? Sets the syntax highlighting terminal code used for keywords", +#endif ".lint OPTIONS Report potential schema issues.", " Options:", " fkey-indexes Find missing foreign key indexes", diff --git a/tools/shell/tests/test_read_from_stdin.py b/tools/shell/tests/test_read_from_stdin.py index c75c3daa755d..a37d749c5c3e 100644 --- a/tools/shell/tests/test_read_from_stdin.py +++ b/tools/shell/tests/test_read_from_stdin.py @@ -141,6 +141,27 @@ def test_read_stdin_json_auto(self, shell, json_extension): '5|Raising Arizona' ]) + def test_read_stdin_json_array(self, shell, json_extension): + test = ( + ShellTest(shell) + .input_file('data/json/11407.json') + .statement(""" + create table mytable as select * from + read_json_auto('/dev/stdin') + """) + .statement("select * from mytable;") + .add_argument( + '-list', + ':memory:' + ) + ) + result = test.run() + result.check_stdout([ + 'k', + 'v', + 'v2' + ]) + def test_read_stdin_json_auto_recursive_cte(self, shell, json_extension): test = ( ShellTest(shell) diff --git a/tools/sqlite3_api_wrapper/sqlite3/stripped_sqlite_int.h b/tools/sqlite3_api_wrapper/sqlite3/stripped_sqlite_int.h index 2836d059a9f8..3e19d80e7976 100644 --- a/tools/sqlite3_api_wrapper/sqlite3/stripped_sqlite_int.h +++ b/tools/sqlite3_api_wrapper/sqlite3/stripped_sqlite_int.h @@ -20,6 +20,7 @@ typedef uint64_t sqlite3_uint64; #ifdef USE_DUCKDB_SHELL_WRAPPER #include "duckdb_shell_wrapper.h" void *sqlite3_realloc64(void *ptr, sqlite3_uint64 n); +void *sqlite3_free(void *ptr); #else #define sqlite3_realloc64 realloc #define sqlite3_free free