diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index ffa33f1259d99..e2c348586d797 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -19,10 +19,11 @@ By default, **CHOOSE MASTER ONLY** so your changes will be applied to the next T For details, see [tips for choosing the affected versions](https://github.com/pingcap/docs/blob/master/CONTRIBUTING.md#guideline-for-choosing-the-affected-versions). - [ ] master (the latest development version) +- [ ] v7.4 (TiDB 7.4 versions) +- [ ] v7.3 (TiDB 7.3 versions) - [ ] v7.2 (TiDB 7.2 versions) - [ ] v7.1 (TiDB 7.1 versions) - [ ] v7.0 (TiDB 7.0 versions) -- [ ] v6.6 (TiDB 6.6 versions) - [ ] v6.5 (TiDB 6.5 versions) - [ ] v6.1 (TiDB 6.1 versions) - [ ] v5.4 (TiDB 5.4 versions) diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml index a0d274e496359..bbdb5a05a804a 100644 --- a/.github/workflows/cron.yml +++ b/.github/workflows/cron.yml @@ -2,7 +2,7 @@ name: cron on: schedule: - - cron: "30 15 * * *" + - cron: "30 15 */3 * *" workflow_dispatch: jobs: @@ -18,12 +18,12 @@ jobs: - uses: actions/checkout@v3 name: Download docs repo and specified branch with: - ref: "i18n-ja-release-5.4" + ref: "i18n-ja-release-7.1" path: "docs" - uses: actions/setup-node@v3 - name: Setup node 16 + name: Setup node 18 with: - node-version: 16 + node-version: 18 - run: | sudo apt install tree -y @@ -63,59 +63,73 @@ jobs: git commit -m "update translated files" git push - ja-6-1: - runs-on: ubuntu-latest +# When ja-kernal version is different with cloud, open the comment and run the github action! +# ja-cloud: +# runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - name: Download translator repo - with: - repository: "shczhen/markdown-translator" - path: "markdown-translator" - - uses: actions/checkout@v3 - name: Download docs repo and specified branch - with: - ref: "i18n-ja-release-6.1" - path: "docs" - - uses: actions/setup-node@v3 - name: Setup node 16 - with: - node-version: 16 +# steps: +# - uses: actions/checkout@v3 +# name: Download translator repo +# with: +# repository: "shczhen/markdown-translator" +# path: "markdown-translator" +# - uses: actions/checkout@v3 +# name: Download docs repo and specified branch +# with: +# ref: "i18n-ja-release-7.1" +# path: "docs" +# - uses: actions/setup-node@v3 +# name: Setup node 18 +# with: +# node-version: 18 - - run: | - sudo apt install tree -y +# - run: | +# sudo apt install tree -y - - name: Download files by comparing commits - run: | - export GH_TOKEN=${{github.token}} - cd docs - npm i - node scripts/filterUpdateFiles.js - tree tmp - cd .. - - name: Copy new files to translator folder - run: | - cp -r docs/tmp markdown-translator/markdowns - - name: Config and translate - run: | - cd markdown-translator - echo ${{secrets.GCP_KEY}} | base64 --decode >> key.json - export GOOGLE_APPLICATION_CREDENTIALS=key.json - export PROJECT_ID=${{ secrets.GCP_PROJECT_ID }} - export GLOSSARY_ID=${{ secrets.GCP_GLOSSARY_ID }} - yarn - node src/index.js - cd .. - - name: Copy translated files to docs repo - run: | - cp -r markdown-translator/output/markdowns/* docs/ +# - name: Download files by comparing commits +# run: | +# export GH_TOKEN=${{github.token}} +# cd docs +# npm i +# node scripts/filterUpdateFiles.js +# tree tmp +# cd .. +# - name: Copy new files to translator folder +# run: | +# cp -r docs/tmp markdown-translator/markdowns +# - name: Config and translate +# run: | +# cd markdown-translator +# echo ${{secrets.GCP_KEY}} | base64 --decode >> key.json +# export GOOGLE_APPLICATION_CREDENTIALS=key.json +# export PROJECT_ID=${{ secrets.GCP_PROJECT_ID }} +# export GLOSSARY_ID=${{ secrets.GCP_GLOSSARY_ID }} +# yarn +# node src/index.js +# cd .. +# - name: Copy translated files to docs repo +# run: | +# cp -r markdown-translator/output/markdowns/* docs/ - - name: Git commit and push +# - name: Git commit and push +# run: | +# cd docs +# git status +# git config user.name github-actions +# git config user.email github-actions@github.com +# git add . +# git commit -m "update translated files" +# git push + dispatch: + runs-on: ubuntu-latest + needs: [ja] + + steps: + - name: trigger docs-staging workflow run: | - cd docs - git status - git config user.name github-actions - git config user.email github-actions@github.com - git add . - git commit -m "update translated files" - git push + curl \ + -X POST \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: token ${{ secrets.DOCS_STAGING }}" \ + https://api.github.com/repos/pingcap/docs-staging/actions/workflows/update.yml/dispatches \ + -d '{"ref":"main","inputs":{"full": "false", "repo":"${{ github.repository }}","branch":"i18n-ja-release-7.1"}}' diff --git a/.github/workflows/dispatch.yml b/.github/workflows/dispatch.yml index fbe9a446d8172..35c5683680f50 100644 --- a/.github/workflows/dispatch.yml +++ b/.github/workflows/dispatch.yml @@ -6,8 +6,9 @@ on: - ".github/**" branches: - master + - release-7.2 + - release-7.1 - release-7.0 - - release-6.6 - release-6.5 - release-6.1 - release-5.4 diff --git a/.github/workflows/prevent-deletion.yaml b/.github/workflows/prevent-deletion.yaml index 76786ec54f6c4..62b6c32c9a506 100644 --- a/.github/workflows/prevent-deletion.yaml +++ b/.github/workflows/prevent-deletion.yaml @@ -23,7 +23,7 @@ jobs: - name: Find changes run: | git rev-parse '${{ github.event.pull_request.head.sha }}' - if git diff --name-only --diff-filter 'D' HEAD '${{ github.event.pull_request.head.sha }}' | grep -E '^media/.*\.(jpg|png|jpeg|gif)$' >/tmp/changed_files; then + if git diff --merge-base --name-only --diff-filter 'D' HEAD '${{ github.event.pull_request.head.sha }}' | grep -E '^media/.*\.(jpg|png|jpeg|gif)$' >/tmp/changed_files; then cat /tmp/changed_files echo '{"name":"Image Deletion Check","head_sha":"${{ github.event.pull_request.head.sha }}","status":"completed","conclusion":"failure"}' > /tmp/body.json jq \ diff --git a/OWNERS b/OWNERS index 64d7b71f3aecd..ec95e7e30cb68 100644 --- a/OWNERS +++ b/OWNERS @@ -1,12 +1,11 @@ # See the OWNERS docs at https://go.k8s.io/owners approvers: - # - docs-maintainers - - lilin90 - - qiancai - # - docs-committers - breezewish + - CaitinChen - CharLotteiu + - cofyc - csuzhangxc + - DanielZhangQD - dcalvin - dragonly - en-jin19 @@ -15,24 +14,41 @@ approvers: - kissmydb - lance6716 - lichunzhu + - lilin90 - Liuxiaozhen12 + - morgo - Oreoxmt - overvenus - - QueenyJin + - qiancai + - queenypingcap - ran-huang + - shichun-0415 + - SunRunAway - tangenta + - TomShawn + - toutdesuite + - WangXiangUSTC + - yikeke + - YiniXu9506 reviewers: - # - docs-reviewers - 3pointer - amyangfei - anotherrachel + - aylei - crazycs520 - dveeden + - ericsyh - glkappe - GMHDBJD - Icemap - Joyinqin + - junlan-zhang - KanShiori - lucklove + - lysu + - ngaut + - superlzs0476 - tiancaiamao + - weekface + - Yisaer - zimulala diff --git a/README.md b/README.md index 9ed42cc993809..19e03952685b6 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,9 @@ Currently, we maintain the following versions of TiDB documentation in different | Branch name | TiDB docs version | | :---------|:----------| | [`master`](https://github.com/pingcap/docs/tree/master) | The latest development version | +| [`release-7.3`](https://github.com/pingcap/docs/tree/release-7.3) | 7.3 Development Milestone Release | +| [`release-7.2`](https://github.com/pingcap/docs/tree/release-7.2) | 7.2 Development Milestone Release | +| [`release-7.1`](https://github.com/pingcap/docs/tree/release-7.1) | 7.1 LTS (Long-Term Support) version | | [`release-7.0`](https://github.com/pingcap/docs/tree/release-7.0) | 7.0 Development Milestone Release | | [`release-6.6`](https://github.com/pingcap/docs/tree/release-6.6) | 6.6 Development Milestone Release | | [`release-6.5`](https://github.com/pingcap/docs/tree/release-6.5) | 6.5 LTS (Long-Term Support) version | diff --git a/TOC-tidb-cloud.md b/TOC-tidb-cloud.md index 1802e1d183456..80443485852d0 100644 --- a/TOC-tidb-cloud.md +++ b/TOC-tidb-cloud.md @@ -181,6 +181,7 @@ - [Wrong Index Solution](/wrong-index-solution.md) - [Distinct Optimization](/agg-distinct-optimization.md) - [Cost Model](/cost-model.md) + - [Runtime Filter](/runtime-filter.md) - [Prepared Execution Plan Cache](/sql-prepared-plan-cache.md) - [Non-Prepared Execution Plan Cache](/sql-non-prepared-plan-cache.md) - Control Execution Plans @@ -188,6 +189,7 @@ - [Optimizer Hints](/optimizer-hints.md) - [SQL Plan Management](/sql-plan-management.md) - [The Blocklist of Optimization Rules and Expression Pushdown](/blocklist-control-plan.md) + - [Optimizer Fix Controls](/optimizer-fix-controls.md) - [TiKV Follower Read](/follower-read.md) - [Coprocessor Cache](/coprocessor-cache.md) - Garbage Collection (GC) @@ -263,6 +265,10 @@ - [`ADMIN CANCEL DDL`](/sql-statements/sql-statement-admin-cancel-ddl.md) - [`ADMIN CHECKSUM TABLE`](/sql-statements/sql-statement-admin-checksum-table.md) - [`ADMIN CHECK [TABLE|INDEX]`](/sql-statements/sql-statement-admin-check-table-index.md) + - [`ADMIN CLEANUP INDEX`](/sql-statements/sql-statement-admin-cleanup.md) + - [`ADMIN PAUSE DDL`](/sql-statements/sql-statement-admin-pause-ddl.md) + - [`ADMIN RECOVER INDEX`](/sql-statements/sql-statement-admin-recover.md) + - [`ADMIN RESUME DDL`](/sql-statements/sql-statement-admin-resume-ddl.md) - [`ADMIN SHOW DDL [JOBS|JOB QUERIES]`](/sql-statements/sql-statement-admin-show-ddl.md) - [`ALTER DATABASE`](/sql-statements/sql-statement-alter-database.md) - [`ALTER INDEX`](/sql-statements/sql-statement-alter-index.md) @@ -273,7 +279,6 @@ - [`ANALYZE TABLE`](/sql-statements/sql-statement-analyze-table.md) - [`BATCH`](/sql-statements/sql-statement-batch.md) - [`BEGIN`](/sql-statements/sql-statement-begin.md) - - [`CALIBRATE RESOURCE`](/sql-statements/sql-statement-calibrate-resource.md) - [`CHANGE COLUMN`](/sql-statements/sql-statement-change-column.md) - [`COMMIT`](/sql-statements/sql-statement-commit.md) - [`CHANGE DRAINER`](/sql-statements/sql-statement-change-drainer.md) @@ -319,8 +324,10 @@ - [`KILL [TIDB]`](/sql-statements/sql-statement-kill.md) - [`LOAD DATA`](/sql-statements/sql-statement-load-data.md) - [`LOCK STATS`](/sql-statements/sql-statement-lock-stats.md) + - [`LOCK TABLES` and `UNLOCK TABLES`](/sql-statements/sql-statement-lock-tables-and-unlock-tables.md) - [`MODIFY COLUMN`](/sql-statements/sql-statement-modify-column.md) - [`PREPARE`](/sql-statements/sql-statement-prepare.md) + - [`QUERY WATCH`](/sql-statements/sql-statement-query-watch.md) - [`RECOVER TABLE`](/sql-statements/sql-statement-recover-table.md) - [`RENAME INDEX`](/sql-statements/sql-statement-rename-index.md) - [`RENAME TABLE`](/sql-statements/sql-statement-rename-table.md) @@ -464,6 +471,7 @@ - [`PROCESSLIST`](/information-schema/information-schema-processlist.md) - [`REFERENTIAL_CONSTRAINTS`](/information-schema/information-schema-referential-constraints.md) - [`RESOURCE_GROUPS`](/information-schema/information-schema-resource-groups.md) + - [`RUNAWAY_WATCHES`](/information-schema/information-schema-runaway-watches.md) - [`SCHEMATA`](/information-schema/information-schema-schemata.md) - [`SEQUENCES`](/information-schema/information-schema-sequences.md) - [`SESSION_VARIABLES`](/information-schema/information-schema-session-variables.md) @@ -477,6 +485,8 @@ - [`TIDB_SERVERS_INFO`](/information-schema/information-schema-tidb-servers-info.md) - [`TIDB_TRX`](/information-schema/information-schema-tidb-trx.md) - [`TIFLASH_REPLICA`](/information-schema/information-schema-tiflash-replica.md) + - [`TIFLASH_SEGMENTS`](/information-schema/information-schema-tiflash-segments.md) + - [`TIFLASH_TABLES`](/information-schema/information-schema-tiflash-tables.md) - [`TIKV_REGION_PEERS`](/information-schema/information-schema-tikv-region-peers.md) - [`TIKV_REGION_STATUS`](/information-schema/information-schema-tikv-region-status.md) - [`TIKV_STORE_STATUS`](/information-schema/information-schema-tikv-store-status.md) @@ -496,10 +506,13 @@ - [Spill to Disk](/tiflash/tiflash-spill-disk.md) - [Dumpling](/dumpling-overview.md) - [Table Filter](/table-filter.md) - - [Troubleshoot Inconsistency Between Data and Indexes](/troubleshoot-data-inconsistency-errors.md) - [Serverless Tier Limitations](/tidb-cloud/serverless-tier-limitations.md) - [Resource Control](/tidb-resource-control.md) - [TiDB Backend Task Distributed Execution Framework](/tidb-distributed-execution-framework.md) + - [DDL Execution Principles and Best Practices](/ddl-introduction.md) + - [Troubleshoot Inconsistency Between Data and Indexes](/troubleshoot-data-inconsistency-errors.md) + - [Support](/tidb-cloud/tidb-cloud-support.md) + - [Glossary](/tidb-cloud/tidb-cloud-glossary.md) - FAQs - [TiDB Cloud FAQs](/tidb-cloud/tidb-cloud-faq.md) - [Serverless Tier FAQs](/tidb-cloud/serverless-tier-faqs.md) @@ -508,5 +521,3 @@ - [2022](/tidb-cloud/release-notes-2022.md) - [2021](/tidb-cloud/release-notes-2021.md) - [2020](/tidb-cloud/release-notes-2020.md) -- [Support](/tidb-cloud/tidb-cloud-support.md) -- [Glossary](/tidb-cloud/tidb-cloud-glossary.md) diff --git a/TOC.md b/TOC.md index c040634eff642..1c8f2b8483e6a 100644 --- a/TOC.md +++ b/TOC.md @@ -4,7 +4,7 @@ - [Docs Home](https://docs.pingcap.com/) - About TiDB - [TiDB Introduction](/overview.md) - - [TiDB 7.1 (upcoming) Release Notes](/releases/release-7.1.0.md) + - [TiDB 7.3 Release Notes](/releases/release-7.3.0.md) - [Features](/basic-features.md) - [MySQL Compatibility](/mysql-compatibility.md) - [TiDB Limitations](/tidb-limitations.md) @@ -19,7 +19,7 @@ - Develop - [Overview](/develop/dev-guide-overview.md) - Quick Start - - [Build a TiDB Cluster in TiDB Cloud (Serverless Tier)](/develop/dev-guide-build-cluster-in-cloud.md) + - [Build a TiDB Serverless Cluster](/develop/dev-guide-build-cluster-in-cloud.md) - [CRUD SQL in TiDB](/develop/dev-guide-tidb-crud-sql.md) - Example Applications - Java @@ -116,6 +116,7 @@ - Migrate - [Overview](/migration-overview.md) - [Migration Tools](/migration-tools.md) + - [Import Best Practices](/tidb-lightning/data-import-best-practices.md) - Migration Scenarios - [Migrate from Aurora](/migrate-aurora-to-tidb.md) - [Migrate MySQL of Small Datasets](/migrate-small-mysql-to-tidb.md) @@ -142,7 +143,7 @@ - [Use TiUP](/upgrade-tidb-using-tiup.md) - [Use TiDB Operator](https://docs.pingcap.com/tidb-in-kubernetes/stable/upgrade-a-tidb-cluster) - [TiDB Smooth Upgrade](/smooth-upgrade-tidb.md) - - [TiFlash v6.2.0 Upgrade Guide](/tiflash-620-upgrade-guide.md) + - [TiFlash Upgrade Guide](/tiflash-upgrade-guide.md) - Scale - [Use TiUP (Recommended)](/scale-tidb-using-tiup.md) - [Use TiDB Operator](https://docs.pingcap.com/tidb-in-kubernetes/stable/scale-a-tidb-cluster) @@ -222,6 +223,7 @@ - [Performance Analysis and Tuning](/performance-tuning-methods.md) - [Performance Tuning Practices for OLTP Scenarios](/performance-tuning-practices.md) - [Latency Breakdown](/latency-breakdown.md) + - [TiDB Best Practices on Public Cloud](/best-practices-on-public-cloud.md) - Configuration Tuning - [Tune Operating System Performance](/tune-operating-system.md) - [Tune TiDB Memory](/configure-memory-usage.md) @@ -268,6 +270,7 @@ - [Wrong Index Solution](/wrong-index-solution.md) - [Distinct Optimization](/agg-distinct-optimization.md) - [Cost Model](/cost-model.md) + - [Runtime Filter](/runtime-filter.md) - [Prepared Execution Plan Cache](/sql-prepared-plan-cache.md) - [Non-Prepared Execution Plan Cache](/sql-non-prepared-plan-cache.md) - Control Execution Plans @@ -275,6 +278,7 @@ - [Optimizer Hints](/optimizer-hints.md) - [SQL Plan Management](/sql-plan-management.md) - [The Blocklist of Optimization Rules and Expression Pushdown](/blocklist-control-plan.md) + - [Optimizer Fix Controls](/optimizer-fix-controls.md) - Tutorials - [Multiple Availability Zones in One Region Deployment](/multi-data-centers-in-one-city-deployment.md) - [Three Availability Zones in Two Regions Deployment](/three-data-centers-in-two-cities-deployment.md) @@ -550,7 +554,9 @@ - [TiCDC CSV Protocol](/ticdc/ticdc-csv.md) - [TiCDC Open API v2](/ticdc/ticdc-open-api-v2.md) - [TiCDC Open API v1](/ticdc/ticdc-open-api.md) - - [Guide for Developing a Storage Sink Consumer](/ticdc/ticdc-storage-consumer-dev-guide.md) + - TiCDC Data Consumption + - [TiCDC Row Data Checksum Verification Based on Avro](/ticdc/ticdc-avro-checksum-verification.md) + - [Guide for Developing a Storage Sink Consumer](/ticdc/ticdc-storage-consumer-dev-guide.md) - [Compatibility](/ticdc/ticdc-compatibility.md) - [Troubleshoot](/ticdc/troubleshoot-ticdc.md) - [FAQs](/ticdc/ticdc-faq.md) @@ -614,6 +620,7 @@ - [Spill to Disk](/tiflash/tiflash-spill-disk.md) - [Data Validation](/tiflash/tiflash-data-validation.md) - [Compatibility](/tiflash/tiflash-compatibility.md) + - [Pipeline Execution Model](/tiflash/tiflash-pipeline-model.md) - [System Variables](/system-variables.md) - Configuration File Parameters - [tidb-server](/tidb-configuration-file.md) @@ -673,7 +680,9 @@ - [`ADMIN CHECKSUM TABLE`](/sql-statements/sql-statement-admin-checksum-table.md) - [`ADMIN CHECK [TABLE|INDEX]`](/sql-statements/sql-statement-admin-check-table-index.md) - [`ADMIN CLEANUP`](/sql-statements/sql-statement-admin-cleanup.md) + - [`ADMIN PAUSE DDL`](/sql-statements/sql-statement-admin-pause-ddl.md) - [`ADMIN RECOVER INDEX`](/sql-statements/sql-statement-admin-recover.md) + - [`ADMIN RESUME DDL`](/sql-statements/sql-statement-admin-resume-ddl.md) - [`ADMIN SHOW DDL [JOBS|JOB QUERIES]`](/sql-statements/sql-statement-admin-show-ddl.md) - [`ADMIN SHOW TELEMETRY`](/sql-statements/sql-statement-admin-show-telemetry.md) - [`ALTER DATABASE`](/sql-statements/sql-statement-alter-database.md) @@ -689,6 +698,7 @@ - [`BATCH`](/sql-statements/sql-statement-batch.md) - [`BEGIN`](/sql-statements/sql-statement-begin.md) - [`CALIBRATE RESOURCE`](/sql-statements/sql-statement-calibrate-resource.md) + - [`CANCEL IMPORT JOB`](/sql-statements/sql-statement-cancel-import-job.md) - [`CHANGE COLUMN`](/sql-statements/sql-statement-change-column.md) - [`COMMIT`](/sql-statements/sql-statement-commit.md) - [`CHANGE DRAINER`](/sql-statements/sql-statement-change-drainer.md) @@ -732,6 +742,7 @@ - [`FLUSH TABLES`](/sql-statements/sql-statement-flush-tables.md) - [`GRANT `](/sql-statements/sql-statement-grant-privileges.md) - [`GRANT `](/sql-statements/sql-statement-grant-role.md) + - [`IMPORT INTO`](/sql-statements/sql-statement-import-into.md) - [`INSERT`](/sql-statements/sql-statement-insert.md) - [`KILL [TIDB]`](/sql-statements/sql-statement-kill.md) - [`LOAD DATA`](/sql-statements/sql-statement-load-data.md) @@ -740,6 +751,7 @@ - [`LOCK TABLES` and `UNLOCK TABLES`](/sql-statements/sql-statement-lock-tables-and-unlock-tables.md) - [`MODIFY COLUMN`](/sql-statements/sql-statement-modify-column.md) - [`PREPARE`](/sql-statements/sql-statement-prepare.md) + - [`QUERY WATCH`](/sql-statements/sql-statement-query-watch.md) - [`RECOVER TABLE`](/sql-statements/sql-statement-recover-table.md) - [`RENAME USER`](/sql-statements/sql-statement-rename-user.md) - [`RENAME INDEX`](/sql-statements/sql-statement-rename-index.md) @@ -778,6 +790,7 @@ - [`SHOW ERRORS`](/sql-statements/sql-statement-show-errors.md) - [`SHOW [FULL] FIELDS FROM`](/sql-statements/sql-statement-show-fields-from.md) - [`SHOW GRANTS`](/sql-statements/sql-statement-show-grants.md) + - [`SHOW IMPORT JOB`](/sql-statements/sql-statement-show-import-job.md) - [`SHOW INDEX [FROM|IN]`](/sql-statements/sql-statement-show-index.md) - [`SHOW INDEXES [FROM|IN]`](/sql-statements/sql-statement-show-indexes.md) - [`SHOW KEYS [FROM|IN]`](/sql-statements/sql-statement-show-keys.md) @@ -896,6 +909,7 @@ - [`PROCESSLIST`](/information-schema/information-schema-processlist.md) - [`REFERENTIAL_CONSTRAINTS`](/information-schema/information-schema-referential-constraints.md) - [`RESOURCE_GROUPS`](/information-schema/information-schema-resource-groups.md) + - [`RUNAWAY_WATCHES`](/information-schema/information-schema-runaway-watches.md) - [`SCHEMATA`](/information-schema/information-schema-schemata.md) - [`SEQUENCES`](/information-schema/information-schema-sequences.md) - [`SESSION_VARIABLES`](/information-schema/information-schema-session-variables.md) @@ -976,13 +990,19 @@ - [Release Timeline](/releases/release-timeline.md) - [TiDB Versioning](/releases/versioning.md) - [TiDB Installation Packages](/binary-package.md) - - v7.1 (upcoming) + - v7.3 + - [7.3.0-DMR](/releases/release-7.3.0.md) + - v7.2 + - [7.2.0-DMR](/releases/release-7.2.0.md) + - v7.1 + - [7.1.1](/releases/release-7.1.1.md) - [7.1.0](/releases/release-7.1.0.md) - v7.0 - [7.0.0-DMR](/releases/release-7.0.0.md) - v6.6 - [6.6.0-DMR](/releases/release-6.6.0.md) - v6.5 + - [6.5.3](/releases/release-6.5.3.md) - [6.5.2](/releases/release-6.5.2.md) - [6.5.1](/releases/release-6.5.1.md) - [6.5.0](/releases/release-6.5.0.md) @@ -993,6 +1013,7 @@ - v6.2 - [6.2.0-DMR](/releases/release-6.2.0.md) - v6.1 + - [6.1.7](/releases/release-6.1.7.md) - [6.1.6](/releases/release-6.1.6.md) - [6.1.5](/releases/release-6.1.5.md) - [6.1.4](/releases/release-6.1.4.md) diff --git a/_docHome.md b/_docHome.md index 3d9a36079c623..0e5d5e46b8f74 100644 --- a/_docHome.md +++ b/_docHome.md @@ -9,25 +9,25 @@ hide_leftNav: true -TiDB Cloud is a fully-managed Database-as-a-Service (DBaaS) that brings everything great about TiDB to your cloud, and lets you focus on your applications, not the complexities of your database. +TiDB Cloud is a fully-managed Database-as-a-Service (DBaaS) that brings everything great about TiDB to your cloud, letting you focus on your applications instead of the complexities of your database. -See the documentation of TiDB Cloud +View the documentation for TiDB Cloud. -Guides you through an easy way to get started with TiDB Cloud +Guide for an easy way to get started with TiDB Cloud. -Helps you quickly complete a Proof of Concept (PoC) of TiDB Cloud +Helps you quickly complete a Proof of Concept (PoC) with TiDB Cloud. @@ -49,31 +49,31 @@ Get the power of a cloud-native, distributed SQL database built for real-time an --> -TiDB is an open-source distributed SQL database that supports Hybrid Transactional and Analytical Processing (HTAP) workloads. It is MySQL compatible and features horizontal scalability, strong consistency, and high availability. You can deploy TiDB on premises or in the cloud. +TiDB is an open-source distributed SQL database that supports Hybrid Transactional and Analytical Processing (HTAP) workloads. It is MySQL compatible and features horizontal scalability, strong consistency, and high availability. You can deploy TiDB in a self-hosted environment or in the cloud. -See the documentation of TiDB +View the documentation for TiDB. -Walks you through the quickest way to get started with TiDB +Walks you through the quickest way to get started with TiDB. -Learn how to deploy TiDB locally in production +Learn how to deploy TiDB locally in a production environment. -The open-source TiDB platform is released under the Apache 2.0 license, and supported by the community. +The open-source TiDB platform is released under the Apache 2.0 license and is supported by the community. Download @@ -85,13 +85,13 @@ The open-source TiDB platform is released under the Apache 2.0 license, and supp -Documentation for TiDB application developers +Documentation for TiDB application developers. -Documentation for TiDB Cloud application developers +Documentation for TiDB Cloud application developers. @@ -103,51 +103,57 @@ Documentation for TiDB Cloud application developers + + +Experience the capabilities of TiDB WITHOUT registration. + + + -Learn TiDB and TiDB Cloud through well-designed online courses and instructor-led training +Learn TiDB and TiDB Cloud through well-designed online courses and instructor-led training. -Join us on Slack or become a contributor +Join us on Slack or become a contributor. -Learn great articles about TiDB and TiDB Cloud +Read great articles about TiDB and TiDB Cloud. -See a compilation of short videos describing TiDB and a variety of use cases +Watch a compilation of short videos describing TiDB and various use cases. -Learn events about PingCAP and the community +Learn about events hosted by PingCAP and the community. -Download eBooks and papers +Download eBooks and papers. -A powerful insight tool that analyzes in depth any GitHub repository, powered by TiDB Cloud +A powerful insight tool that analyzes any GitHub repository in depth, powered by TiDB Cloud. -Let’s work together to make the documentation better! +Let's work together to improve the documentation! diff --git a/_index.md b/_index.md index 030acea5c89ff..0d249ee544799 100644 --- a/_index.md +++ b/_index.md @@ -41,7 +41,7 @@ hide_commit: true [Quick Start](https://docs.pingcap.com/tidb/dev/dev-guide-build-cluster-in-cloud) -[Example Application](https://docs.pingcap.com/tidb/dev/dev-guide-sample-application-spring-boot) +[Example Application](https://docs.pingcap.com/tidb/dev/dev-guide-sample-application-java-spring-boot) diff --git a/auto-increment.md b/auto-increment.md index 70ca2e18fb33f..48671a9edef15 100644 --- a/auto-increment.md +++ b/auto-increment.md @@ -24,6 +24,8 @@ This document introduces the `AUTO_INCREMENT` column attribute, including its co +You can also use the `AUTO_INCREMENT` parameter in the [`CREATE TABLE`](/sql-statements/sql-statement-create-table.md) statement to specify the initial value of the increment field. + ## Concept `AUTO_INCREMENT` is a column attribute that is used to automatically fill in default column values. When the `INSERT` statement does not specify values for the `AUTO_INCREMENT` column, the system automatically assigns values to this column. diff --git a/auto-random.md b/auto-random.md index 2353e2796aa38..5a3e2e57065e8 100644 --- a/auto-random.md +++ b/auto-random.md @@ -16,6 +16,8 @@ For more information about how to handle highly concurrent write-heavy workloads +The `AUTO_RANDOM_BASE` parameter in the [CREATE TABLE](/sql-statements/sql-statement-create-table.md) statement is used to set the initial incremental part value of `auto_random`. This option can be considered as a part of the internal interface. You can ignore this parameter. + ## Basic concepts `AUTO_RANDOM` is a column attribute that is used to automatically assign values to a `BIGINT` column. Values assigned automatically are **random** and **unique**. diff --git a/basic-features.md b/basic-features.md index f0cc758eeb42d..1cb1249025d69 100644 --- a/basic-features.md +++ b/basic-features.md @@ -8,6 +8,8 @@ aliases: ['/docs/dev/basic-features/','/tidb/dev/experimental-features-4.0/'] This document lists the features supported in different TiDB versions, including [Long-Term Support (LTS)](/releases/versioning.md#long-term-support-releases) versions and [Development Milestone Release (DMR)](/releases/versioning.md#development-milestone-releases) versions after the latest LTS version. +You can try out TiDB features on [TiDB Playground](https://play.tidbcloud.com/?utm_source=docs&utm_medium=tidb_features). + > **Note:** > > PingCAP does not provide patch releases for DMR versions. Any bugs will be fixed in future releases. For general purposes, it is recommended to use the [latest LTS version](https://docs.pingcap.com/tidb/stable). @@ -20,220 +22,231 @@ This document lists the features supported in different TiDB versions, including ## Data types, functions, and operators -| Data types, functions, and operators | 7.1 (upcoming) | 6.5 | 6.1 | 5.4 | 5.3 | 5.2 | 5.1 | 5.0 | 4.0 | -|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| -| [Numeric types](/data-type-numeric.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Date and time types](/data-type-date-and-time.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [String types](/data-type-string.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [JSON type](/data-type-json.md) | Y | Y | E | E | E | E | E | E | E | -| [Control flow functions](/functions-and-operators/control-flow-functions.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [String functions](/functions-and-operators/string-functions.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Numeric functions and operators](/functions-and-operators/numeric-functions-and-operators.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Date and time functions](/functions-and-operators/date-and-time-functions.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Bit functions and operators](/functions-and-operators/bit-functions-and-operators.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Cast functions and operators](/functions-and-operators/cast-functions-and-operators.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Encryption and compression functions](/functions-and-operators/encryption-and-compression-functions.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Information functions](/functions-and-operators/information-functions.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [JSON functions](/functions-and-operators/json-functions.md) | Y | Y | E | E | E | E | E | E | E | -| [Aggregation functions](/functions-and-operators/aggregate-group-by-functions.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Window functions](/functions-and-operators/window-functions.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Miscellaneous functions](/functions-and-operators/miscellaneous-functions.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Operators](/functions-and-operators/operators.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Character sets and collations](/character-set-and-collation.md) [^1] | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [User-level lock](/functions-and-operators/locking-functions.md) | Y | Y | Y | N | N | N | N | N | N | +| Data types, functions, and operators | 7.3 | 7.2 | 7.1 | 6.5 | 6.1 | 5.4 | 5.3 | 5.2 | 5.1 | 5.0 | 4.0 | +|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| +| [Numeric types](/data-type-numeric.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Date and time types](/data-type-date-and-time.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [String types](/data-type-string.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [JSON type](/data-type-json.md) | Y | Y | Y | Y | E | E | E | E | E | E | E | +| [Control flow functions](/functions-and-operators/control-flow-functions.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [String functions](/functions-and-operators/string-functions.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Numeric functions and operators](/functions-and-operators/numeric-functions-and-operators.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Date and time functions](/functions-and-operators/date-and-time-functions.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Bit functions and operators](/functions-and-operators/bit-functions-and-operators.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Cast functions and operators](/functions-and-operators/cast-functions-and-operators.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Encryption and compression functions](/functions-and-operators/encryption-and-compression-functions.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Information functions](/functions-and-operators/information-functions.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [JSON functions](/functions-and-operators/json-functions.md) | Y | Y | Y | Y | E | E | E | E | E | E | E | +| [Aggregation functions](/functions-and-operators/aggregate-group-by-functions.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Window functions](/functions-and-operators/window-functions.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Miscellaneous functions](/functions-and-operators/miscellaneous-functions.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Operators](/functions-and-operators/operators.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Character sets and collations](/character-set-and-collation.md) [^1] | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [User-level lock](/functions-and-operators/locking-functions.md) | Y | Y | Y | Y | Y | N | N | N | N | N | N | ## Indexing and constraints -| Indexing and constraints | 7.1 (upcoming) | 6.5 | 6.1 | 5.4 | 5.3 | 5.2 | 5.1 | 5.0 | 4.0 | -|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| -| [Expression indexes](/sql-statements/sql-statement-create-index.md#expression-index) [^2] | Y | Y | E | E | E | E | E | E | E | -| [Columnar storage (TiFlash)](/tiflash/tiflash-overview.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Use FastScan to accelerate queries in OLAP scenarios](/tiflash/use-fastscan.md) | Y | E | N | N | N | N | N | N | N | -| [RocksDB engine](/storage-engine/rocksdb-overview.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Titan plugin](/storage-engine/titan-overview.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Titan Level Merge](/storage-engine/titan-configuration.md#level-merge-experimental) | E | E | E | E | E | E | E | E | E | -| [Use buckets to improve scan concurrency](/tune-region-performance.md#use-bucket-to-increase-concurrency) | E | E | E | N | N | N | N | N | N | -| [Invisible indexes](/sql-statements/sql-statement-add-index.md) | Y | Y | Y | Y | Y | Y | Y | Y | N | -| [Composite `PRIMARY KEY`](/constraints.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Unique indexes](/constraints.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Clustered index on integer `PRIMARY KEY`](/constraints.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Clustered index on composite or non-integer key](/constraints.md) | Y | Y | Y | Y | Y | Y | Y | Y | N | -| [Multi-valued index](/sql-statements/sql-statement-create-index.md#multi-valued-index) | Y | N | N | N | N | N | N | N | N | -| [Foreign key](/constraints.md#foreign-key) | Y | N | N | N | N | N | N | N | N | -| [TiFlash late materialization](/tiflash/tiflash-late-materialization.md) | Y | N | N | N | N | N | N | N | N | +| Indexing and constraints | 7.3 | 7.2 | 7.1 | 6.5 | 6.1 | 5.4 | 5.3 | 5.2 | 5.1 | 5.0 | 4.0 | +|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| +| [Expression indexes](/sql-statements/sql-statement-create-index.md#expression-index) [^2] | Y | Y | Y | Y | E | E | E | E | E | E | E | +| [Columnar storage (TiFlash)](/tiflash/tiflash-overview.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Use FastScan to accelerate queries in OLAP scenarios](/tiflash/use-fastscan.md) | Y | Y | Y | E | N | N | N | N | N | N | N | +| [RocksDB engine](/storage-engine/rocksdb-overview.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Titan plugin](/storage-engine/titan-overview.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Titan Level Merge](/storage-engine/titan-configuration.md#level-merge-experimental) | E | E | E | E | E | E | E | E | E | E | E | +| [Use buckets to improve scan concurrency](/tune-region-performance.md#use-bucket-to-increase-concurrency) | E | E | E | E | E | N | N | N | N | N | N | +| [Invisible indexes](/sql-statements/sql-statement-add-index.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | N | +| [Composite `PRIMARY KEY`](/constraints.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [`CHECK` constraints](/constraints.md#check) | Y | Y | N | N | N | N | N | N | N | N | N | +| [Unique indexes](/constraints.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Clustered index on integer `PRIMARY KEY`](/constraints.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Clustered index on composite or non-integer key](/constraints.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | N | +| [Multi-valued indexes](/sql-statements/sql-statement-create-index.md#multi-valued-indexes) | Y | Y | Y | N | N | N | N | N | N | N | N | +| [Foreign key](/constraints.md#foreign-key) | Y | Y | Y | N | N | N | N | N | N | N | N | +| [TiFlash late materialization](/tiflash/tiflash-late-materialization.md) | Y | Y | Y | N | N | N | N | N | N | N | N | ## SQL statements -| SQL statements [^3] | 7.1 (upcoming) | 6.5 | 6.1 | 5.4 | 5.3 | 5.2 | 5.1 | 5.0 | 4.0 | -|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| -| Basic `SELECT`, `INSERT`, `UPDATE`, `DELETE`, `REPLACE` | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| `INSERT ON DUPLICATE KEY UPDATE` | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| `LOAD DATA INFILE` | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| `SELECT INTO OUTFILE` | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| `INNER JOIN`, LEFT\|RIGHT [OUTER] JOIN | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| `UNION`, `UNION ALL` | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [`EXCEPT` and `INTERSECT` operators](/functions-and-operators/set-operators.md) | Y | Y | Y | Y | Y | Y | Y | Y | N | -| `GROUP BY`, `ORDER BY` | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Window Functions](/functions-and-operators/window-functions.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Common Table Expressions (CTE)](/sql-statements/sql-statement-with.md) | Y | Y | Y | Y | Y | Y | Y | N | N | -| `START TRANSACTION`, `COMMIT`, `ROLLBACK` | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [`EXPLAIN`](/sql-statements/sql-statement-explain.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [`EXPLAIN ANALYZE`](/sql-statements/sql-statement-explain-analyze.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [User-defined variables](/user-defined-variables.md) | E | E | E | E | E | E | E | E | E | -| [`BATCH [ON COLUMN] LIMIT INTEGER DELETE`](/sql-statements/sql-statement-batch.md) | Y | Y | Y | N | N | N | N | N | N | -| [`BATCH [ON COLUMN] LIMIT INTEGER INSERT/UPDATE/REPLACE`](/sql-statements/sql-statement-batch.md) | Y | Y | N | N | N | N | N | N | N | -| [`ALTER TABLE ... COMPACT`](/sql-statements/sql-statement-alter-table-compact.md) | Y | Y | E | N | N | N | N | N | N | -| [Table Lock](/sql-statements/sql-statement-lock-tables-and-unlock-tables.md) | E | E | E | E | E | E | E | E | E | -| [TiFlash Query Result Materialization](/tiflash/tiflash-results-materialization.md) | Y | E | N | N | N | N | N | N | N | +| SQL statements [^3] | 7.3 | 7.2 | 7.1 | 6.5 | 6.1 | 5.4 | 5.3 | 5.2 | 5.1 | 5.0 | 4.0 | +|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| +| Basic `SELECT`, `INSERT`, `UPDATE`, `DELETE`, `REPLACE` | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| `INSERT ON DUPLICATE KEY UPDATE` | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| `LOAD DATA INFILE` | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| `SELECT INTO OUTFILE` | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| `INNER JOIN`, LEFT\|RIGHT [OUTER] JOIN | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| `UNION`, `UNION ALL` | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [`EXCEPT` and `INTERSECT` operators](/functions-and-operators/set-operators.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | N | +| `GROUP BY`, `ORDER BY` | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Window Functions](/functions-and-operators/window-functions.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Common Table Expressions (CTE)](/sql-statements/sql-statement-with.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | N | N | +| `START TRANSACTION`, `COMMIT`, `ROLLBACK` | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [`EXPLAIN`](/sql-statements/sql-statement-explain.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [`EXPLAIN ANALYZE`](/sql-statements/sql-statement-explain-analyze.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [User-defined variables](/user-defined-variables.md) | E | E | E | E | E | E | E | E | E | E | E | +| [`BATCH [ON COLUMN] LIMIT INTEGER DELETE`](/sql-statements/sql-statement-batch.md) | Y | Y | Y | Y | Y | N | N | N | N | N | N | +| [`BATCH [ON COLUMN] LIMIT INTEGER INSERT/UPDATE/REPLACE`](/sql-statements/sql-statement-batch.md) | Y | Y | Y | Y | N | N | N | N | N | N | N | +| [`ALTER TABLE ... COMPACT`](/sql-statements/sql-statement-alter-table-compact.md) | Y | Y | Y | Y | E | N | N | N | N | N | N | +| [Table Lock](/sql-statements/sql-statement-lock-tables-and-unlock-tables.md) | E | E | E | E | E | E | E | E | E | E | E | +| [TiFlash Query Result Materialization](/tiflash/tiflash-results-materialization.md) | Y | Y | Y | E | N | N | N | N | N | N | N | ## Advanced SQL features -| Advanced SQL features | 7.1 (upcoming) | 6.5 | 6.1 | 5.4 | 5.3 | 5.2 | 5.1 | 5.0 | 4.0 | -|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| -| [Prepared statement cache](/sql-prepared-plan-cache.md) | Y | Y | Y | Y | Y | E | E | E | E | -| [Non-prepared statement cache](/sql-non-prepared-plan-cache.md) | Y | N | N | N | N | N | N | N | N | -| [SQL plan management (SPM)](/sql-plan-management.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Create bindings according to historical execution plans](/sql-plan-management.md#create-a-binding-according-to-a-historical-execution-plan) | Y | E | N | N | N | N | N | N | N | -| [Coprocessor cache](/coprocessor-cache.md) | Y | Y | Y | Y | Y | Y | Y | Y | E | -| [Stale Read](/stale-read.md) | Y | Y | Y | Y | Y | Y | Y | N | N | -| [Follower reads](/follower-read.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Read historical data (tidb_snapshot)](/read-historical-data.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Optimizer hints](/optimizer-hints.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [MPP execution engine](/explain-mpp.md) | Y | Y | Y | Y | Y | Y | Y | Y | N | -| [MPP execution engine - compression exchange](/explain-mpp.md#mpp-version-and-exchange-data-compression) | Y | N | N | N | N | N | N | N | N | -| [Index Merge](/explain-index-merge.md) | Y | Y | Y | Y | E | E | E | E | E | -| [Placement Rules in SQL](/placement-rules-in-sql.md) | Y | Y | Y | E | E | N | N | N | N | -| [Cascades Planner](/system-variables.md#tidb_enable_cascades_planner) | E | E | E | E | E | E | E | E | E | +| Advanced SQL features | 7.3 | 7.2 | 7.1 | 6.5 | 6.1 | 5.4 | 5.3 | 5.2 | 5.1 | 5.0 | 4.0 | +|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| +| [Prepared statement cache](/sql-prepared-plan-cache.md) | Y | Y | Y | Y | Y | Y | Y | E | E | E | E | +| [Non-prepared statement cache](/sql-non-prepared-plan-cache.md) | E | E | E | N | N | N | N | N | N | N | N | +| [SQL plan management (SPM)](/sql-plan-management.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Create bindings according to historical execution plans](/sql-plan-management.md#create-a-binding-according-to-a-historical-execution-plan) | Y | Y | Y | E | N | N | N | N | N | N | N | +| [Coprocessor cache](/coprocessor-cache.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | E | +| [Stale Read](/stale-read.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | N | N | +| [Follower reads](/follower-read.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Read historical data (tidb_snapshot)](/read-historical-data.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Optimizer hints](/optimizer-hints.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [MPP execution engine](/explain-mpp.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | N | +| [MPP execution engine - compression exchange](/explain-mpp.md#mpp-version-and-exchange-data-compression) | Y | Y | Y | N | N | N | N | N | N | N | N | +| [TiFlash Pipeline Model](/tiflash/tiflash-pipeline-model.md) | E | E | N | N | N | N | N | N | N | N | N | +| [TiFlash replica selection strategy](/system-variables.md#tiflash_replica_read-new-in-v730) | Y | N | N | N | N | N | N | N | N | N | N | +| [Index Merge](/explain-index-merge.md) | Y | Y | Y | Y | Y | Y | E | E | E | E | E | +| [Placement Rules in SQL](/placement-rules-in-sql.md) | Y | Y | Y | Y | Y | E | E | N | N | N | N | +| [Cascades Planner](/system-variables.md#tidb_enable_cascades_planner) | E | E | E | E | E | E | E | E | E | E | E | +| [Runtime Filter](/runtime-filter.md) | Y | N | N | N | N | N | N | N | N | N | N | ## Data definition language (DDL) -| Data definition language (DDL) | 7.1 (upcoming) | 6.5 | 6.1 | 5.4 | 5.3 | 5.2 | 5.1 | 5.0 | 4.0 | -|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| -| Basic `CREATE`, `DROP`, `ALTER`, `RENAME`, `TRUNCATE` | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Generated columns](/generated-columns.md) | Y | E | E | E | E | E | E | E | E | -| [Views](/views.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Sequences](/sql-statements/sql-statement-create-sequence.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Auto increment](/auto-increment.md) | Y | Y[^4] | Y | Y | Y | Y | Y | Y | Y | -| [Auto random](/auto-random.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [TTL (Time to Live)](/time-to-live.md) | Y | E | N | N | N | N | N | N | N | -| [DDL algorithm assertions](/sql-statements/sql-statement-alter-table.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| Multi-schema change: add columns | Y | Y | E | E | E | E | E | E | E | -| [Change column type](/sql-statements/sql-statement-modify-column.md) | Y | Y | Y | Y | Y | Y | Y | N | N | -| [Temporary tables](/temporary-tables.md) | Y | Y | Y | Y | Y | N | N | N | N | -| Concurrent DDL statements | Y | Y | N | N | N | N | N | N | N | -| [Acceleration of `ADD INDEX` and `CREATE INDEX`](/system-variables.md#tidb_ddl_enable_fast_reorg-new-in-v630) | Y | Y | N | N | N | N | N | N | N | -| [Metadata lock](/metadata-lock.md) | Y | Y | N | N | N | N | N | N | N | -| [`FLASHBACK CLUSTER TO TIMESTAMP`](/sql-statements/sql-statement-flashback-to-timestamp.md) | Y | Y | N | N | N | N | N | N | N | -| [Pause/Resume DDL](/ddl-introduction.md#ddl-related-commands) | E | N | N | N | N | N | N | N | N | +| Data definition language (DDL) | 7.3 | 7.2 | 7.1 | 6.5 | 6.1 | 5.4 | 5.3 | 5.2 | 5.1 | 5.0 | 4.0 | +|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| +| Basic `CREATE`, `DROP`, `ALTER`, `RENAME`, `TRUNCATE` | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Generated columns](/generated-columns.md) | Y | Y | Y | E | E | E | E | E | E | E | E | +| [Views](/views.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Sequences](/sql-statements/sql-statement-create-sequence.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Auto increment](/auto-increment.md) | Y | Y | Y | Y[^4] | Y | Y | Y | Y | Y | Y | Y | +| [Auto random](/auto-random.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [TTL (Time to Live)](/time-to-live.md) | Y | Y | Y | E | N | N | N | N | N | N | N | +| [DDL algorithm assertions](/sql-statements/sql-statement-alter-table.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| Multi-schema change: add columns | Y | Y | Y | Y | E | E | E | E | E | E | E | +| [Change column type](/sql-statements/sql-statement-modify-column.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | N | N | +| [Temporary tables](/temporary-tables.md) | Y | Y | Y | Y | Y | Y | Y | N | N | N | N | +| Concurrent DDL statements | Y | Y | Y | Y | N | N | N | N | N | N | N | +| [Acceleration of `ADD INDEX` and `CREATE INDEX`](/system-variables.md#tidb_ddl_enable_fast_reorg-new-in-v630) | Y | Y | Y | Y | N | N | N | N | N | N | N | +| [Metadata lock](/metadata-lock.md) | Y | Y | Y | Y | N | N | N | N | N | N | N | +| [`FLASHBACK CLUSTER TO TIMESTAMP`](/sql-statements/sql-statement-flashback-to-timestamp.md) | Y | Y | Y | Y | N | N | N | N | N | N | N | +| [Pause](/sql-statements/sql-statement-admin-pause-ddl.md)/[Resume](/sql-statements/sql-statement-admin-resume-ddl.md) DDL | E | E | N | N | N | N | N | N | N | N | N | ## Transactions -| Transactions | 7.1 (upcoming) | 6.5 | 6.1 | 5.4 | 5.3 | 5.2 | 5.1 | 5.0 | 4.0 | -|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| -| [Async commit](/system-variables.md#tidb_enable_async_commit-new-in-v50) | Y | Y | Y | Y | Y | Y | Y | Y | N | -| [1PC](/system-variables.md#tidb_enable_1pc-new-in-v50) | Y | Y | Y | Y | Y | Y | Y | Y | N | -| [Large transactions (10GB)](/transaction-overview.md#transaction-size-limit) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Pessimistic transactions](/pessimistic-transaction.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Optimistic transactions](/optimistic-transaction.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Repeatable-read isolation (snapshot isolation)](/transaction-isolation-levels.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Read-committed isolation](/transaction-isolation-levels.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| Transactions | 7.3 | 7.2 | 7.1 | 6.5 | 6.1 | 5.4 | 5.3 | 5.2 | 5.1 | 5.0 | 4.0 | +|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| +| [Async commit](/system-variables.md#tidb_enable_async_commit-new-in-v50) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | N | +| [1PC](/system-variables.md#tidb_enable_1pc-new-in-v50) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | N | +| [Large transactions (10GB)](/transaction-overview.md#transaction-size-limit) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Pessimistic transactions](/pessimistic-transaction.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Optimistic transactions](/optimistic-transaction.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Repeatable-read isolation (snapshot isolation)](/transaction-isolation-levels.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Read-committed isolation](/transaction-isolation-levels.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | ## Partitioning -| Partitioning | 7.1 (upcoming) | 6.5 | 6.1 | 5.4 | 5.3 | 5.2 | 5.1 | 5.0 | 4.0 | -|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| -| [Range partitioning](/partitioned-table.md#range-partitioning) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Hash partitioning](/partitioned-table.md#hash-partitioning) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Key partitioning](/partitioned-table.md#key-partitioning) | Y | N | N | N | N | N | N | N | N | -| [List partitioning](/partitioned-table.md#list-partitioning) | Y | Y | Y | E | E | E | E | E | N | -| [List COLUMNS partitioning](/partitioned-table.md) | Y | Y | Y | E | E | E | E | E | N | -| [`EXCHANGE PARTITION`](/partitioned-table.md) | Y | Y | E | E | E | E | E | E | N | -| [`REORGANIZE PARTITION`](/partitioned-table.md#reorganize-partitions) | Y | N | N | N | N | N | N | N | N | -| [`COALESCE PARTITION`](/partitioned-table.md#decrease-the-number-of-partitions) | Y | N | N | N | N | N | N | N | N | -| [Dynamic pruning](/partitioned-table.md#dynamic-pruning-mode) | Y | Y | Y | E | E | E | E | N | N | -| [Range COLUMNS partitioning](/partitioned-table.md#range-columns-partitioning) | Y | Y | N | N | N | N | N | N | N | -| [Range INTERVAL partitioning](/partitioned-table.md#range-interval-partitioning) | Y | E | N | N | N | N | N | N | N | +| Partitioning | 7.3 | 7.2 | 7.1 | 6.5 | 6.1 | 5.4 | 5.3 | 5.2 | 5.1 | 5.0 | 4.0 | +|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| +| [Range partitioning](/partitioned-table.md#range-partitioning) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Hash partitioning](/partitioned-table.md#hash-partitioning) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Key partitioning](/partitioned-table.md#key-partitioning) | Y | Y | Y | N | N | N | N | N | N | N | N | +| [List partitioning](/partitioned-table.md#list-partitioning) | Y | Y | Y | Y | Y | E | E | E | E | E | N | +| [List COLUMNS partitioning](/partitioned-table.md) | Y | Y | Y | Y | Y | E | E | E | E | E | N | +| [Default partition for List and List COLUMNS partitioned tables](/partitioned-table.md#default-list-partition) | Y | N | N | N | N | N | N | N | N | N | N | +| [`EXCHANGE PARTITION`](/partitioned-table.md) | Y | Y | Y | Y | E | E | E | E | E | E | N | +| [`REORGANIZE PARTITION`](/partitioned-table.md#reorganize-partitions) | Y | Y | Y | N | N | N | N | N | N | N | N | +| [`COALESCE PARTITION`](/partitioned-table.md#decrease-the-number-of-partitions) | Y | Y | Y | N | N | N | N | N | N | N | N | +| [Dynamic pruning](/partitioned-table.md#dynamic-pruning-mode) | Y | Y | Y | Y | Y | E | E | E | E | N | N | +| [Range COLUMNS partitioning](/partitioned-table.md#range-columns-partitioning) | Y | Y | Y | Y | N | N | N | N | N | N | N | +| [Range INTERVAL partitioning](/partitioned-table.md#range-interval-partitioning) | Y | Y | Y | E | N | N | N | N | N | N | N | ## Statistics -| Statistics | 7.1 (upcoming) | 6.5 | 6.1 | 6.0 | 5.4 | 5.3 | 5.2 | 5.1 | 5.0 | -|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| -| [CMSketch](/statistics.md) | Disabled by default | Disabled by default | Disabled by default | Disabled by default | Disabled by default | Disabled by default | Y | Y | Y | -| [Histograms](/statistics.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Extended statistics](/extended-statistics.md) | E | E | E | E | E | E | E | E | E | -| Statistics feedback | N | N | Deprecated | Deprecated | Deprecated | E | E | E | E | -| [Automatically update statistics](/statistics.md#automatic-update) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Fast Analyze](/system-variables.md#tidb_enable_fast_analyze) | E | E | E | E | E | E | E | E | E | -| [Dynamic pruning](/partitioned-table.md#dynamic-pruning-mode) | Y | Y | Y | E | E | E | E | E | N | -| [Collect statistics for `PREDICATE COLUMNS`](/statistics.md#collect-statistics-on-some-columns) | E | E | E | E | E | N | N | N | N | -| [Control the memory quota for collecting statistics](/statistics.md#the-memory-quota-for-collecting-statistics) | E | E | E | N | N | N | N | N | N | -| [Randomly sample about 10000 rows of data to quickly build statistics](/system-variables.md#tidb_enable_fast_analyze) | E | E | E | E | E | E | E | E | E | -| [Lock statistics](/statistics.md#lock-statistics) | E | E | N | N | N | N | N | N | N | +| Statistics | 7.3 | 7.2 | 7.1 | 6.5 | 6.1 | 6.0 | 5.4 | 5.3 | 5.2 | 5.1 | 5.0 | +|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| +| [CMSketch](/statistics.md) | Disabled by default | Disabled by default | Disabled by default | Disabled by default | Disabled by default | Disabled by default | Disabled by default | Disabled by default | Y | Y | Y | +| [Histograms](/statistics.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Extended statistics](/extended-statistics.md) | E | E | E | E | E | E | E | E | E | E | E | +| Statistics feedback | N | N | N | N | Deprecated | Deprecated | Deprecated | E | E | E | E | +| [Automatically update statistics](/statistics.md#automatic-update) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Fast Analyze](/system-variables.md#tidb_enable_fast_analyze) | E | E | E | E | E | E | E | E | E | E | E | +| [Dynamic pruning](/partitioned-table.md#dynamic-pruning-mode) | Y | Y | Y | Y | Y | E | E | E | E | E | N | +| [Collect statistics for `PREDICATE COLUMNS`](/statistics.md#collect-statistics-on-some-columns) | E | E | E | E | E | E | E | N | N | N | N | +| [Control the memory quota for collecting statistics](/statistics.md#the-memory-quota-for-collecting-statistics) | E | E | E | E | E | N | N | N | N | N | N | +| [Randomly sample about 10000 rows of data to quickly build statistics](/system-variables.md#tidb_enable_fast_analyze) | E | E | E | E | E | E | E | E | E | E | E | +| [Lock statistics](/statistics.md#lock-statistics) | E | E | E | E | N | N | N | N | N | N | N | +| [Lightweight statistics initialization](/statistics.md#load-statistics) | Y | Y | E | N | N | N | N | N | N | N | N | +| [Show the progress of collecting statistics](/sql-statements/sql-statement-show-analyze-status.md) | Y | N | N | N | N | N | N | N | N | N | N | ## Security -| Security | 7.1 (upcoming) | 6.5 | 6.1 | 5.4 | 5.3 | 5.2 | 5.1 | 5.0 | 4.0 | -|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| -| [Transparent layer security (TLS)](/enable-tls-between-clients-and-servers.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Encryption at rest (TDE)](/encryption-at-rest.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Role-based authentication (RBAC)](/role-based-access-control.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Certificate-based authentication](/certificate-authentication.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [`caching_sha2_password` authentication](/system-variables.md#default_authentication_plugin) | Y | Y | Y | Y | Y | Y | N | N | N | -| [`tidb_sm3_password` authentication](/system-variables.md#default_authentication_plugin) | Y | Y | N | N | N | N | N | N | N | -| [`tidb_auth_token` authentication](/system-variables.md#default_authentication_plugin) | Y | Y | N | N | N | N | N | N | N | -| [Password management](/password-management.md) | Y | Y | N | N | N | N | N | N | N | -| [MySQL compatible `GRANT` system](/privilege-management.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Dynamic Privileges](/privilege-management.md#dynamic-privileges) | Y | Y | Y | Y | Y | Y | Y | N | N | -| [Security Enhanced Mode](/system-variables.md#tidb_enable_enhanced_security) | Y | Y | Y | Y | Y | Y | Y | N | N | -| [Redacted Log Files](/log-redaction.md) | Y | Y | Y | Y | Y | Y | Y | Y | N | +| Security | 7.3 | 7.2 | 7.1 | 6.5 | 6.1 | 5.4 | 5.3 | 5.2 | 5.1 | 5.0 | 4.0 | +|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| +| [Transparent layer security (TLS)](/enable-tls-between-clients-and-servers.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Encryption at rest (TDE)](/encryption-at-rest.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Role-based authentication (RBAC)](/role-based-access-control.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Certificate-based authentication](/certificate-authentication.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [`caching_sha2_password` authentication](/system-variables.md#default_authentication_plugin) | Y | Y | Y | Y | Y | Y | Y | Y | N | N | N | +| [`tidb_sm3_password` authentication](/system-variables.md#default_authentication_plugin) | Y | Y | Y | Y | N | N | N | N | N | N | N | +| [`tidb_auth_token` authentication](/system-variables.md#default_authentication_plugin) | Y | Y | Y | Y | N | N | N | N | N | N | N | +| [`authentication_ldap_sasl` authentication](/system-variables.md#default_authentication_plugin) | Y | Y | Y | N | N | N | N | N | N | N | N | +| [`authentication_ldap_simple` authentication](/system-variables.md#default_authentication_plugin) | Y | Y | Y | N | N | N | N | N | N | N | N | +| [Password management](/password-management.md) | Y | Y | Y | Y | N | N | N | N | N | N | N | +| [MySQL compatible `GRANT` system](/privilege-management.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Dynamic Privileges](/privilege-management.md#dynamic-privileges) | Y | Y | Y | Y | Y | Y | Y | Y | Y | N | N | +| [Security Enhanced Mode](/system-variables.md#tidb_enable_enhanced_security) | Y | Y | Y | Y | Y | Y | Y | Y | Y | N | N | +| [Redacted Log Files](/log-redaction.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | N | ## Data import and export -| Data import and export | 7.1 (upcoming) | 6.5 | 6.1 | 5.4 | 5.3 | 5.2 | 5.1 | 5.0 | 4.0 | -|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| -| [Fast Importer (TiDB Lightning)](/tidb-lightning/tidb-lightning-overview.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| mydumper logical dumper | Deprecated | Deprecated | Deprecated | Deprecated | Deprecated | Deprecated | Deprecated | Deprecated | Deprecated | -| [Dumpling logical dumper](/dumpling-overview.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Transactional `LOAD DATA`](/sql-statements/sql-statement-load-data.md) | Y [^5] | Y | Y | Y | Y | Y | Y | Y | N [^6] | -| [Database migration toolkit (DM)](/migration-overview.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [TiDB Binlog](/tidb-binlog/tidb-binlog-overview.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Change data capture (CDC)](/ticdc/ticdc-overview.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Stream data to Amazon S3, GCS, Azure Blob Storage, and NFS through TiCDC](/ticdc/ticdc-sink-to-cloud-storage.md) | Y | E | N | N | N | N | N | N | N | -| [TiCDC supports bidirectional replication between two TiDB clusters](/ticdc/ticdc-bidirectional-replication.md) | Y | Y | N | N | N | N | N | N | N | -| [TiCDC OpenAPI v2](/ticdc/ticdc-open-api-v2.md) | Y | N | N | N | N | N | N | N | N | +| Data import and export | 7.3 | 7.2 | 7.1 | 6.5 | 6.1 | 5.4 | 5.3 | 5.2 | 5.1 | 5.0 | 4.0 | +|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| +| [Fast import using TiDB Lightning](/tidb-lightning/tidb-lightning-overview.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Fast import using the `IMPORT INTO` statement](/sql-statements/sql-statement-import-into.md) | E | E | N | N | N | N | N | N | N | N | N | +| mydumper logical dumper | Deprecated | Deprecated | Deprecated | Deprecated | Deprecated | Deprecated | Deprecated | Deprecated | Deprecated | Deprecated | Deprecated | +| [Dumpling logical dumper](/dumpling-overview.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Transactional `LOAD DATA`](/sql-statements/sql-statement-load-data.md) [^5] | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | N [^6] | +| [Database migration toolkit (DM)](/migration-overview.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [TiDB Binlog](/tidb-binlog/tidb-binlog-overview.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Change data capture (CDC)](/ticdc/ticdc-overview.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Stream data to Amazon S3, GCS, Azure Blob Storage, and NFS through TiCDC](/ticdc/ticdc-sink-to-cloud-storage.md) | Y | Y | Y | E | N | N | N | N | N | N | N | +| [TiCDC supports bidirectional replication between two TiDB clusters](/ticdc/ticdc-bidirectional-replication.md) | Y | Y | Y | Y | N | N | N | N | N | N | N | +| [TiCDC OpenAPI v2](/ticdc/ticdc-open-api-v2.md) | Y | Y | Y | N | N | N | N | N | N | N | N | ## Management, observability, and tools -| Management, observability, and tools | 7.1 (upcoming) | 6.5 | 6.1 | 5.4 | 5.3 | 5.2 | 5.1 | 5.0 | 4.0 | -|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| -| [TiDB Dashboard UI](/dashboard/dashboard-intro.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [TiDB Dashboard Continuous Profiling](/dashboard/continuous-profiling.md) | Y | Y | Y | E | E | N | N | N | N | -| [TiDB Dashboard Top SQL](/dashboard/top-sql.md) | Y | Y | Y | E | N | N | N | N | N | -| [TiDB Dashboard SQL Diagnostics](/information-schema/information-schema-sql-diagnostics.md) | Y | Y | E | E | E | E | E | E | E | -| [TiDB Dashboard Cluster Diagnostics](/dashboard/dashboard-diagnostics-access.md) | Y | Y | E | E | E | E | E | E | E | -| [TiKV-FastTune dashboard](/grafana-tikv-dashboard.md#tikv-fasttune-dashboard) | E | E | E | E | E | E | E | E | E | -| [Information schema](/information-schema/information-schema.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Metrics schema](/metrics-schema.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Statements summary tables](/statement-summary-tables.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Statements summary tables - summary persistence](/statement-summary-tables.md#persist-statements-summary) | E | N | N | N | N | N | N | N | N | -| [Slow query log](/identify-slow-queries.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [TiUP deployment](/tiup/tiup-overview.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Kubernetes operator](https://docs.pingcap.com/tidb-in-kubernetes/) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Built-in physical backup](/br/backup-and-restore-use-cases.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [Global Kill](/sql-statements/sql-statement-kill.md) | Y | Y | Y | E | E | E | E | E | E | -| [Lock View](/information-schema/information-schema-data-lock-waits.md) | Y | Y | Y | Y | Y | Y | E | E | E | -| [`SHOW CONFIG`](/sql-statements/sql-statement-show-config.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | -| [`SET CONFIG`](/dynamic-config.md) | Y | Y | Y | E | E | E | E | E | E | -| [DM WebUI](/dm/dm-webui-guide.md) | E | E | E | N | N | N | N | N | N | -| [Foreground Quota Limiter](/tikv-configuration-file.md#foreground-quota-limiter) | Y | Y | E | N | N | N | N | N | N | -| [Background Quota Limiter](/tikv-configuration-file.md#background-quota-limiter) | E | E | N | N | N | N | N | N | N | -| [EBS volume snapshot backup and restore](https://docs.pingcap.com/tidb-in-kubernetes/v1.4/backup-to-aws-s3-by-snapshot) | Y | Y | N | N | N | N | N | N | N | -| [PITR](/br/backup-and-restore-overview.md) | Y | Y | N | N | N | N | N | N | N | -| [Global memory control](/configure-memory-usage.md#configure-the-memory-usage-threshold-of-a-tidb-server-instance) | Y | Y | N | N | N | N | N | N | N | -| [Cross-cluster RawKV replication](/tikv-configuration-file.md#api-version-new-in-v610) | E | E | N | N | N | N | N | N | N | -| [Green GC](/system-variables.md#tidb_gc_scan_lock_mode-new-in-v50) | E | E | E | E | E | E | E | E | N | -| [Resource control](/tidb-resource-control.md) | Y | N | N | N | N | N | N | N | N | -| [TiFlash Disaggregated Storage and Compute Architecture and S3 Support](/tiflash/tiflash-disaggregated-and-s3.md) | E | N | N | N | N | N | N | N | N | +| Management, observability, and tools | 7.3 | 7.2 | 7.1 | 6.5 | 6.1 | 5.4 | 5.3 | 5.2 | 5.1 | 5.0 | 4.0 | +|---|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:|:---:| +| [TiDB Dashboard UI](/dashboard/dashboard-intro.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [TiDB Dashboard Continuous Profiling](/dashboard/continuous-profiling.md) | Y | Y | Y | Y | Y | E | E | N | N | N | N | +| [TiDB Dashboard Top SQL](/dashboard/top-sql.md) | Y | Y | Y | Y | Y | E | N | N | N | N | N | +| [TiDB Dashboard SQL Diagnostics](/information-schema/information-schema-sql-diagnostics.md) | Y | Y | Y | Y | E | E | E | E | E | E | E | +| [TiDB Dashboard Cluster Diagnostics](/dashboard/dashboard-diagnostics-access.md) | Y | Y | Y | Y | E | E | E | E | E | E | E | +| [TiKV-FastTune dashboard](/grafana-tikv-dashboard.md#tikv-fasttune-dashboard) | E | E | E | E | E | E | E | E | E | E | E | +| [Information schema](/information-schema/information-schema.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Metrics schema](/metrics-schema.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Statements summary tables](/statement-summary-tables.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Statements summary tables - summary persistence](/statement-summary-tables.md#persist-statements-summary) | E | E | E | N | N | N | N | N | N | N | N | +| [Slow query log](/identify-slow-queries.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [TiUP deployment](/tiup/tiup-overview.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Kubernetes operator](https://docs.pingcap.com/tidb-in-kubernetes/) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Built-in physical backup](/br/backup-and-restore-use-cases.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [Global Kill](/sql-statements/sql-statement-kill.md) | Y | Y | Y | Y | Y | E | E | E | E | E | E | +| [Lock View](/information-schema/information-schema-data-lock-waits.md) | Y | Y | Y | Y | Y | Y | Y | Y | E | E | E | +| [`SHOW CONFIG`](/sql-statements/sql-statement-show-config.md) | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | Y | +| [`SET CONFIG`](/dynamic-config.md) | Y | Y | Y | Y | Y | E | E | E | E | E | E | +| [DM WebUI](/dm/dm-webui-guide.md) | E | E | E | E | E | N | N | N | N | N | N | +| [Foreground Quota Limiter](/tikv-configuration-file.md#foreground-quota-limiter) | Y | Y | Y | Y | E | N | N | N | N | N | N | +| [Background Quota Limiter](/tikv-configuration-file.md#background-quota-limiter) | E | E | E | E | N | N | N | N | N | N | N | +| [EBS volume snapshot backup and restore](https://docs.pingcap.com/tidb-in-kubernetes/v1.4/backup-to-aws-s3-by-snapshot) | Y | Y | Y | Y | N | N | N | N | N | N | N | +| [PITR](/br/backup-and-restore-overview.md) | Y | Y | Y | Y | N | N | N | N | N | N | N | +| [Global memory control](/configure-memory-usage.md#configure-the-memory-usage-threshold-of-a-tidb-server-instance) | Y | Y | Y | Y | N | N | N | N | N | N | N | +| [Cross-cluster RawKV replication](/tikv-configuration-file.md#api-version-new-in-v610) | E | E | E | E | N | N | N | N | N | N | N | +| [Green GC](/system-variables.md#tidb_gc_scan_lock_mode-new-in-v50) | E | E | E | E | E | E | E | E | E | E | N | +| [Resource control](/tidb-resource-control.md) | Y | Y | Y | N | N | N | N | N | N | N | N | +| [Runaway Queries Management](/tidb-resource-control.md#manage-queries-that-consume-more-resources-than-expected-runaway-queries) | E | E | N | N | N | N | N | N | N | N | N | +| [TiFlash Disaggregated Storage and Compute Architecture and S3 Support](/tiflash/tiflash-disaggregated-and-s3.md) | E | E | E | N | N | N | N | N | N | N | N | [^1]: TiDB incorrectly treats latin1 as a subset of utf8. See [TiDB #18955](https://github.com/pingcap/tidb/issues/18955) for more details. @@ -243,6 +256,6 @@ This document lists the features supported in different TiDB versions, including [^4]: Starting from [v6.4.0](/releases/release-6.4.0.md), TiDB supports [high-performance and globally monotonic `AUTO_INCREMENT` columns](/auto-increment.md#mysql-compatibility-mode) -[^5]: For [TiDB v7.0.0](/releases/release-7.0.0.md), the new parameter `FIELDS DEFINED NULL BY` and support for importing data from S3 and GCS are experimental features. +[^5]: Starting from [TiDB v7.0.0](/releases/release-7.0.0.md), the new parameter `FIELDS DEFINED NULL BY` and support for importing data from S3 and GCS are experimental features. [^6]: For TiDB v4.0, the `LOAD DATA` transaction does not guarantee atomicity. diff --git a/basic-sql-operations.md b/basic-sql-operations.md index ff9c60cd3575b..6bd04f4324776 100644 --- a/basic-sql-operations.md +++ b/basic-sql-operations.md @@ -10,7 +10,7 @@ TiDB is compatible with MySQL, you can use MySQL statements directly in most of -To experiment with SQL and test out TiDB compatibility with MySQL queries, you can [run TiDB directly in your web browser without installing it](https://tour.tidb.io/). You can also first deploy a TiDB cluster and then run SQL statements in it. +To experiment with SQL and test out TiDB compatibility with MySQL queries, you can try [TiDB Playground](https://play.tidbcloud.com/?utm_source=docs&utm_medium=basic-sql-operations). You can also first deploy a TiDB cluster and then run SQL statements in it. diff --git a/benchmark/benchmark-tidb-using-sysbench.md b/benchmark/benchmark-tidb-using-sysbench.md index 3595ddb1fe1ea..35b8ddd0758a7 100644 --- a/benchmark/benchmark-tidb-using-sysbench.md +++ b/benchmark/benchmark-tidb-using-sysbench.md @@ -19,7 +19,11 @@ server_configs: log.level: "error" ``` -It is also recommended to make sure [`tidb_enable_prepared_plan_cache`](/system-variables.md#tidb_enable_prepared_plan_cache-new-in-v610) is enabled and that you allow sysbench to use prepared statements by _not_ using `--db-ps-mode=disabled`. See the [SQL Prepared Execution Plan Cache](/sql-prepared-plan-cache.md) for documetnation about what the SQL plan cache does and how to monitor it. +It is also recommended to make sure [`tidb_enable_prepared_plan_cache`](/system-variables.md#tidb_enable_prepared_plan_cache-new-in-v610) is enabled and that you allow sysbench to use prepared statements by using `--db-ps-mode=auto`. See the [SQL Prepared Execution Plan Cache](/sql-prepared-plan-cache.md) for documetnation about what the SQL plan cache does and how to monitor it. + +> **Note:** +> +> In different versions of Sysbench, the default value of `db-ps-mode` might be different. It is recommended to explicitly specify it in the command. ### TiKV configuration @@ -141,7 +145,7 @@ sysbench --config-file=config oltp_point_select --tables=32 --table-size=1000000 {{< copyable "shell-regular" >}} ```bash -sysbench --config-file=config oltp_point_select --tables=32 --table-size=10000000 run +sysbench --config-file=config oltp_point_select --tables=32 --table-size=10000000 --db-ps-mode=auto --rand-type=uniform run ``` ### Update index test command @@ -149,7 +153,7 @@ sysbench --config-file=config oltp_point_select --tables=32 --table-size=1000000 {{< copyable "shell-regular" >}} ```bash -sysbench --config-file=config oltp_update_index --tables=32 --table-size=10000000 run +sysbench --config-file=config oltp_update_index --tables=32 --table-size=10000000 --db-ps-mode=auto --rand-type=uniform run ``` ### Read-only test command @@ -157,7 +161,7 @@ sysbench --config-file=config oltp_update_index --tables=32 --table-size=1000000 {{< copyable "shell-regular" >}} ```bash -sysbench --config-file=config oltp_read_only --tables=32 --table-size=10000000 run +sysbench --config-file=config oltp_read_only --tables=32 --table-size=10000000 --db-ps-mode=auto --rand-type=uniform run ``` ## Common issues diff --git a/best-practices-on-public-cloud.md b/best-practices-on-public-cloud.md new file mode 100644 index 0000000000000..d4677de975273 --- /dev/null +++ b/best-practices-on-public-cloud.md @@ -0,0 +1,192 @@ +--- +title: TiDB Best Practices on Public Cloud +summary: Learn about the best practices for deploying TiDB on public cloud. +--- + +# TiDB Best Practices on Public Cloud + +Public cloud infrastructure has become an increasingly popular choice for deploying and managing TiDB. However, deploying TiDB on public cloud requires careful consideration of several critical factors, including performance tuning, cost optimization, reliability, and scalability. + +This document covers various essential best practices for deploying TiDB on public cloud, such as using a dedicated disk for Raft Engine, reducing compaction I/O flow in KV RocksDB, optimizing costs for cross-AZ traffic, mitigating Google Cloud live migration events, and fine-tuning the PD server in large clusters. By following these best practices, you can maximize the performance, cost efficiency, reliability, and scalability of your TiDB deployment on public cloud. + +## Use a dedicated disk for Raft Engine + +The [Raft Engine](/glossary.md#raft-engine) in TiKV plays a critical role similar to that of a write-ahead log (WAL) in traditional databases. To achieve optimal performance and stability, it is crucial to allocate a dedicated disk for the Raft Engine when you deploy TiDB on public cloud. The following `iostat` shows the I/O characteristics on a TiKV node with a write-heavy workload. + +``` +Device r/s rkB/s w/s wkB/s f/s aqu-sz %util +sdb 1649.00 209030.67 1293.33 304644.00 13.33 5.09 48.37 +sdd 1033.00 4132.00 1141.33 31685.33 571.00 0.94 100.00 +``` + +The device `sdb` is used for KV RocksDB, while `sdd` is used to restore Raft Engine logs. Note that `sdd` has a significantly higher `f/s` value, which represents the number of flush requests completed per second for the device. In Raft Engine, when a write in a batch is marked synchronous, the batch leader will call `fdatasync()` after writing, guaranteeing that buffered data is flushed to the storage. By using a dedicated disk for Raft Engine, TiKV reduces the average queue length of requests, thereby ensuring optimal and stable write latency. + +Different cloud providers offer various disk types with different performance characteristics, such as IOPS and MBPS. Therefore, it is important to choose an appropriate cloud provider, disk type, and disk size based on your workload. + +### Choose appropriate disks for Raft Engine on public clouds + +This section outlines best practices for choosing appropriate disks for Raft Engine on different public clouds. Depending on performance requirements, two types of recommended disks are available. + +#### Middle-range disk + +The following are recommended middle-range disks for different public clouds: + +- On AWS, [gp3](https://aws.amazon.com/ebs/general-purpose/) is recommended. The gp3 volume offers a free allocation of 3000 IOPS and 125 MB/s throughput, regardless of the volume size, which is usually sufficient for the Raft Engine. + +- On Google Cloud, [pd-ssd](https://cloud.google.com/compute/docs/disks#disk-types/) is recommended. The IOPS and MBPS vary depending on the allocated disk size. To meet performance requirements, it is recommended to allocate 200 GB for Raft Engine. Although Raft Engine does not require such a large space, it ensures optimal performance. + +- On Azure, [Premium SSD v2](https://learn.microsoft.com/en-us/azure/virtual-machines/disks-types#premium-ssd-v2) is recommended. Similar to AWS gp3, Premium SSD v2 provides a free allocation of 3000 IOPS and 125 MB/s throughput, regardless of the volume size, which is usually sufficient for Raft Engine. + +#### High-end disk + +If you expect an even lower latency for Raft Engine, consider using high-end disks. The following are recommended high-end disks for different public clouds: + +- On AWS, [io2](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ebs-volume-types.html) is recommended. Disk size and IOPS can be provisioned according to your specific requirements. + +- On Google Cloud, [pd-extreme](https://cloud.google.com/compute/docs/disks#disk-types/) is recommended. Disk size, IOPS, and MBPS can be provisioned, but it is only available on instances with more than 64 CPU cores. + +- On Azure, [ultra disk](https://learn.microsoft.com/en-us/azure/virtual-machines/disks-types#ultra-disks) is recommended. Disk size, IOPS, and MBPS can be provisioned according to your specific requirements. + +### Example 1: Run a social network workload on AWS + +AWS offers 3000 IOPS and 125 MBPS/s for a 20 GB [gp3](https://aws.amazon.com/ebs/general-purpose/) volume. + +By using a dedicated 20 GB [gp3](https://aws.amazon.com/ebs/general-purpose/) Raft Engine disk on AWS for a write-intensive social network application workload, the following improvements are observed but the estimated cost only increases by 0.4%: + +- a 17.5% increase in QPS (queries per second) +- an 18.7% decrease in average latency for insert statements +- a 45.6% decrease in p99 latency for insert statements. + +| Metric | Shared Raft Engine disk | Dedicated Raft Engine disk | Difference (%) | +| ------------- | ------------- |------------- |------------- | +| QPS (K/s)| 8.0 | 9.4 | 17.5| +| AVG Insert Latency (ms)| 11.3 | 9.2 | -18.7 | +| P99 Insert Latency (ms)| 29.4 | 16.0 | -45.6| + +### Example 2: Run TPC-C/Sysbench workload on Azure + +By using a dedicated 32 GB [ultra disk](https://learn.microsoft.com/en-us/azure/virtual-machines/disks-types#ultra-disks) for Raft Engine on Azure, the following improvements are observed: + +- Sysbench `oltp_read_write` workload: a 17.8% increase in QPS and a 15.6% decrease in average latency. +- TPC-C workload: a 27.6% increase in QPS and a 23.1% decrease in average latency. + +| Metric | Workload | Shared Raft Engine disk | Dedicated Raft Engine disk | Difference (%) | +| ------------- | ------------- | ------------- |------------- |------------- | +| QPS (K/s) | Sysbench `oltp_read_write` | 60.7 | 71.5 | 17.8| +| QPS (K/s) | TPC-C | 23.9 | 30.5 | 27.6| +| AVG Latency (ms)| Sysbench `oltp_read_write` | 4.5 | 3.8 | -15.6 | +| AVG Latency (ms)| TPC-C | 3.9 | 3.0 | -23.1 | + +### Example 3: Attach a dedicated pd-ssd disk on Google Cloud for Raft Engine on TiKV manifest + +The following TiKV configuration example shows how to attach an additional 512 GB [pd-ssd](https://cloud.google.com/compute/docs/disks#disk-types/) disk to a cluster on Google Cloud deployed by [TiDB Operator](https://docs.pingcap.com/tidb-in-kubernetes/stable), with `raft-engine.dir` configured to store Raft Engine logs to this specific disk. + +``` +tikv: + config: | + [raft-engine] + dir = "/var/lib/raft-pv-ssd/raft-engine" + enable = true + enable-log-recycle = true + requests: + storage: 4Ti + storageClassName: pd-ssd + storageVolumes: + - mountPath: /var/lib/raft-pv-ssd + name: raft-pv-ssd + storageSize: 512Gi +``` + +## Reduce compaction I/O flow in KV RocksDB + +As the storage engine of TiKV, [RocksDB](https://rocksdb.org/) is used to store user data. Because the provisioned IO throughput on cloud EBS is usually limited due to cost considerations, RocksDB might exhibit high write amplification, and the disk throughput might become the bottleneck for the workload. As a result, the total number of pending compaction bytes grows over time and triggers flow control, which indicates that TiKV lacks sufficient disk bandwidth to keep up with the foreground write flow. + +To alleviate the bottleneck caused by limited disk throughput, you can improve performance by increasing the compression level for RocksDB and reducing the disk throughput. For example, you can refer to the following example to increase all the compression levels of the default column family to `zstd`. + +``` +[rocksdb.defaultcf] +compression-per-level = ["zstd", "zstd", "zstd", "zstd", "zstd", "zstd", "zstd"] +``` + +## Optimize cost for cross-AZ network traffic + +Deploying TiDB across multiple availability zones (AZs) can lead to increased costs due to cross-AZ data transfer fees. To optimize costs, it is important to reduce cross-AZ network traffic. + +To reduce cross-AZ read traffic, you can enable the [Follower Read feature](/follower-read.md), which allows TiDB to prioritize selecting replicas in the same availability zone. To enable this feature, set the [`tidb_replica_read`](/system-variables.md#tidb_replica_read-new-in-v40) variable to `closest-replicas` or `closest-adaptive`. + +To reduce cross-AZ write traffic in TiKV instances, you can enable the gRPC compression feature, which compresses data before transmitting it over the network. The following configuration example shows how to enable gzip gRPC compression for TiKV. + +``` +server_configs: + tikv: + server.grpc-compression-type: gzip +``` + +To reduce network traffic caused by the data shuffle of TiFlash MPP tasks, it is recommended to deploy multiple TiFlash instances in the same availability zones (AZs). Starting from v6.6.0, [compression exchange](/explain-mpp.md#mpp-version-and-exchange-data-compression) is enabled by default, which reduces the network traffic caused by MPP data shuffle. + +## Mitigate live migration maintenance events on Google Cloud + +The [Live Migration feature](https://cloud.google.com/compute/docs/instances/live-migration-process) of Google Cloud enables VMs to be seamlessly migrated between hosts without causing downtime. However, these migration events, although infrequent, can significantly impact the performance of VMs, including those running in a TiDB cluster. During such events, affected VMs might experience reduced performance, leading to longer query processing times in the TiDB cluster. + +To detect live migration events initiated by Google Cloud and mitigate the performance impact of these events, TiDB provides a [watching script](https://github.com/PingCAP-QE/tidb-google-maintenance) based on Google's metadata [example](https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/compute/metadata/main.py). You can deploy this script on TiDB, TiKV, and PD nodes to detect maintenance events. When a maintenance event is detected, appropriate actions can be taken automatically as follows to minimize disruption and optimize the cluster behavior: + +- TiDB: Takes the TiDB node offline by cordoning it and deleting the TiDB pod. This assumes that the node pool of the TiDB instance is set to auto-scale and dedicated to TiDB. Other pods running on the node might experience interruptions, and the cordoned node is expected to be reclaimed by the auto-scaler. +- TiKV: Evicts leaders on the affected TiKV store during maintenance. +- PD: Resigns a leader if the current PD instance is the PD leader. + +It is important to note that this watching script is specifically designed for TiDB clusters deployed using [TiDB Operator](https://docs.pingcap.com/tidb-in-kubernetes/dev/tidb-operator-overview), which offers enhanced management functionalities for TiDB in Kubernetes environments. + +By utilizing the watching script and taking necessary actions during maintenance events, TiDB clusters can better handle live migration events on Google Cloud and ensure smoother operations with minimal impact on query processing and response times. + +## Tune PD for a large-scale TiDB cluster with high QPS + +In a TiDB cluster, a single active Placement Driver (PD) server is used to handle crucial tasks such as serving the TSO (Timestamp Oracle) and processing requests. However, relying on a single active PD server can limit the scalability of TiDB clusters. + +### Symptoms of PD limitation + +The following diagrams show the symptoms of a large-scale TiDB cluster consisting of three PD servers, each equipped with 56 CPUs. From these diagrams, it is observed that when the query per second (QPS) exceeds 1 million and the TSO (Timestamp Oracle) requests per second exceed 162,000, the CPU utilization reaches approximately 4,600%. This high CPU utilization indicates that the PD leader is experiencing a significant load and is running out of available CPU resources. + +![pd-server-cpu](/media/performance/public-cloud-best-practice/baseline_cpu.png) +![pd-server-metrics](/media/performance/public-cloud-best-practice/baseline_metrics.png) + +### Tune PD performance + +To address the high CPU utilization issue in the PD server, you can make the following tuning adjustments: + +#### Adjust PD configuration + +[`tso-update-physical-interval`](/pd-configuration-file.md#tso-update-physical-interval): This parameter controls the interval at which the PD server updates the physical TSO batch. By reducing the interval, the PD server can allocate TSO batches more frequently, thereby reducing the waiting time for the next allocation. + +``` +tso-update-physical-interval = "10ms" # default: 50ms +``` + +#### Adjust a TiDB global variable + +In addition to the PD configuration, enabling the TSO client batch wait feature can further optimize the TSO client's behavior. To enable this feature, you can set the global variable [`tidb_tso_client_batch_max_wait_time`](/system-variables.md#tidb_tso_client_batch_max_wait_time-new-in-v530) to a non-zero value. + +``` +set global tidb_tso_client_batch_max_wait_time = 2; # default: 0 +``` + +#### Adjust TiKV configuration + +To reduce the number of Regions and alleviate the heartbeat overhead on the system, it is recommended to increase the Region size in the TiKV configuration from `96MB` to `256MB`. + +``` +[coprocessor] + region-split-size = "256MB" +``` + +## After tuning + +After the tunning, the following effects can be observed: + +- The TSO requests per second are decreased to 64,800. +- The CPU utilization is significantly reduced from approximately 4,600% to 1,400%. +- The P999 value of `PD server TSO handle time` is decreased from 2ms to 0.5ms. + +These improvements indicate that the tuning adjustments have successfully reduced the CPU utilization of the PD server while maintaining stable TSO handling performance. + +![pd-server-cpu](/media/performance/public-cloud-best-practice/after_tuning_cpu.png) +![pd-server-metrics](/media/performance/public-cloud-best-practice/after_tuning_metrics.png) diff --git a/best-practices/java-app-best-practices.md b/best-practices/java-app-best-practices.md index a766b08133f8f..a6ac004313579 100644 --- a/best-practices/java-app-best-practices.md +++ b/best-practices/java-app-best-practices.md @@ -12,7 +12,7 @@ This document introduces the best practice for developing Java applications to b Common components that interact with the TiDB database in Java applications include: -- Network protocol: A client interacts with a TiDB server via the standard [MySQL protocol](https://dev.mysql.com/doc/internals/en/client-server-protocol.html). +- Network protocol: A client interacts with a TiDB server via the standard [MySQL protocol](https://dev.mysql.com/doc/dev/mysql-server/latest/PAGE_PROTOCOL.html). - JDBC API and JDBC drivers: Java applications usually use the standard [JDBC (Java Database Connectivity)](https://docs.oracle.com/javase/8/docs/technotes/guides/jdbc/) API to access a database. To connect to TiDB, you can use a JDBC driver that implements the MySQL protocol via the JDBC API. Such common JDBC drivers for MySQL include [MySQL Connector/J](https://github.com/mysql/mysql-connector-j) and [MariaDB Connector/J](https://mariadb.com/kb/en/library/about-mariadb-connector-j/#about-mariadb-connectorj). - Database connection pool: To reduce the overhead of creating a connection each time it is requested, applications usually use a connection pool to cache and reuse connections. JDBC [DataSource](https://docs.oracle.com/javase/8/docs/api/javax/sql/DataSource.html) defines a connection pool API. You can choose from different open-source connection pool implementations as needed. - Data access framework: Applications usually use a data access framework such as [MyBatis](https://mybatis.org/mybatis-3/index.html) and [Hibernate](https://hibernate.org/) to further simplify and manage the database access operations. diff --git a/best-practices/pd-scheduling-best-practices.md b/best-practices/pd-scheduling-best-practices.md index c7130e9d3cc87..8f9253c8e1543 100644 --- a/best-practices/pd-scheduling-best-practices.md +++ b/best-practices/pd-scheduling-best-practices.md @@ -297,4 +297,4 @@ If a TiKV node fails, PD defaults to setting the corresponding node to the **dow Practically, if a node failure is considered unrecoverable, you can immediately take it offline. This makes PD replenish replicas soon in another node and reduces the risk of data loss. In contrast, if a node is considered recoverable, but the recovery cannot be done in 30 minutes, you can temporarily adjust `max-store-down-time` to a larger value to avoid unnecessary replenishment of the replicas and resources waste after the timeout. -In TiDB v5.2.0, TiKV introduces the mechanism of slow TiKV node detection. By sampling the requests in TiKV, this mechanism works out a score ranging from 1 to 100. A TiKV node with a score higher than or equal to 80 is marked as slow. You can add [`evict-slow-store-scheduler`](/pd-control.md#scheduler-show--add--remove--pause--resume--config--describe) to detect and schedule slow nodes. If only one TiKV is detected as slow, and the slow score reaches the upper limit (100 by default), the leader in this node will be evicted (similar to the effect of `evict-leader-scheduler`). +In TiDB v5.2.0, TiKV introduces the mechanism of slow TiKV node detection. By sampling the requests in TiKV, this mechanism works out a score ranging from 1 to 100. A TiKV node with a score higher than or equal to 80 is marked as slow. You can add [`evict-slow-store-scheduler`](/pd-control.md#scheduler-show--add--remove--pause--resume--config--describe) to detect and schedule slow nodes. If only one TiKV is detected as slow, and the slow score reaches the limit (80 by default), the leader in this node will be evicted (similar to the effect of `evict-leader-scheduler`). diff --git a/br/backup-and-restore-storages.md b/br/backup-and-restore-storages.md index 15fb125af75b1..0b392b3f952ef 100644 --- a/br/backup-and-restore-storages.md +++ b/br/backup-and-restore-storages.md @@ -55,6 +55,8 @@ This section describes the URI format of the storage services: - `sse`: Specifies the server-side encryption algorithm used to encrypt the uploaded objects (value options: ``, `AES256`, or `aws:kms`). - `sse-kms-key-id`: Specifies the KMS ID if `sse` is set to `aws:kms`. - `acl`: Specifies the canned ACL of the uploaded objects (for example, `private` or `authenticated-read`). + - `role-arn`: When you need to access Amazon S3 data from a third party using a specified [IAM role](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles.html), you can specify the corresponding [Amazon Resource Name (ARN)](https://docs.aws.amazon.com/general/latest/gr/aws-arns-and-namespaces.html) of the IAM role with the `role-arn` URL query parameter, such as `arn:aws:iam::888888888888:role/my-role`. For more information about using an IAM role to access Amazon S3 data from a third party, see [AWS documentation](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_common-scenarios_third-party.html). + - `external-id`: When you access Amazon S3 data from a third party, you might need to specify a correct [external ID](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create_for-user_externalid.html) to assume [the IAM role](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles.html). In this case, you can use this `external-id` URL query parameter to specify the external ID and make sure that you can assume the IAM role. An external ID is an arbitrary string provided by the third party together with the IAM role ARN to access the Amazon S3 data. Providing an external ID is optional when assuming an IAM role, which means if the third party does not require an external ID for the IAM role, you can assume the IAM role and access the corresponding Amazon S3 data without providing this parameter.
@@ -76,7 +78,10 @@ This section describes the URI format of the storage services: - `account-name`: Specifies the account name of the storage. - `account-key`: Specifies the access key. - - `access-tier`: Specifies the access tier of the uploaded objects, for example, `Hot`, `Cool`, or `Archive`. The value is `Hot` by default. + - `sas-token`: Specifies the shared access signature (SAS) token. + - `access-tier`: Specifies the access tier of the uploaded objects, for example, `Hot`, `Cool`, or `Archive`. The default value is the default access tier of the storage account. + - `encryption-scope`: Specifies the [encryption scope](https://learn.microsoft.com/en-us/azure/storage/blobs/encryption-scope-manage?tabs=powershell#upload-a-blob-with-an-encryption-scope) for server-side encryption. + - `encryption-key`: Specifies the [encryption key](https://learn.microsoft.com/en-us/azure/storage/blobs/encryption-customer-provided-keys) for server-side encryption, which uses the AES256 encryption algorithm.
@@ -91,7 +96,7 @@ This section provides some URI examples by using `external` as the `host` parame **Back up snapshot data to Amazon S3** ```shell -./br restore full -u "${PD_IP}:2379" \ +./br backup full -u "${PD_IP}:2379" \ --storage "s3://external/backup-20220915?access-key=${access-key}&secret-access-key=${secret-access-key}" ``` @@ -185,11 +190,15 @@ You can configure the account used to access GCS by specifying the access key. I
-- Method 1: Specify the access key +- Method 1: Specify the shared access signature + + If you specify `account-name` and `sas-token` in the URI, the authentication is performed using the specified account name and shared access signature (SAS) token. Note that the SAS token contains the `&` character. You need to encode it as `%26` before appending it to the URI. You can also directly encode the entire `sas-token` using percent-encoding. - If you specify `account-name` and `account-key` in the URI, the authentication is performed using the specified access key and secret access key. Besides the method of specifying the key in the URI, BR can also read the key from the environment variable `$AZURE_STORAGE_KEY`. +- Method 2: Specify the access key -- Method 2: Use Azure AD for backup and restore + If you specify `account-name` and `account-key` in the URI, the authentication is performed using the specified account name and account key. Besides the method of specifying the key in the URI, BR can also read the key from the environment variable `$AZURE_STORAGE_KEY`. + +- Method 3: Use Azure AD for backup and restore Configure the environment variables `$AZURE_CLIENT_ID`, `$AZURE_TENANT_ID`, and `$AZURE_CLIENT_SECRET` on the node where BR is running. @@ -245,6 +254,10 @@ You can configure the account used to access GCS by specifying the access key. I BR supports server-side encryption when backing up data to Amazon S3. You can also use an AWS KMS key you create for S3 server-side encryption using BR. For details, see [BR S3 server-side encryption](/encryption-at-rest.md#br-s3-server-side-encryption). +### Azure Blob Storage server-side encryption + +BR supports specifying the Azure server-side encryption scope or providing the encryption key when backing up data to Azure Blob Storage. This feature lets you establish a security boundary for different backup data of the same storage account. For details, see [BR Azure Blob Storage server-side encryption](/encryption-at-rest.md#br-azure-blob-storage-server-side-encryption). + ## Other features supported by the storage service BR v6.3.0 supports AWS [S3 Object Lock](https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-lock.html). You can enable this feature to prevent backup data from being tampered with or deleted. diff --git a/br/backup-and-restore-use-cases.md b/br/backup-and-restore-use-cases.md index 3402adecc019f..ca4ce21e28910 100644 --- a/br/backup-and-restore-use-cases.md +++ b/br/backup-and-restore-use-cases.md @@ -17,7 +17,7 @@ With PITR, you can satisfy the preceding requirements. ## Deploy the TiDB cluster and BR -To use PITR, you need to deploy a TiDB cluster >= v6.2.0 and update BR to the same version as the TiDB cluster. This document uses v7.0.0 as an example. +To use PITR, you need to deploy a TiDB cluster >= v6.2.0 and update BR to the same version as the TiDB cluster. This document uses v7.3.0 as an example. The following table shows the recommended hardware resources for using PITR in a TiDB cluster. @@ -44,13 +44,13 @@ Install or upgrade BR using TiUP: - Install: ```shell - tiup install br:v7.0.0 + tiup install br:v7.3.0 ``` - Upgrade: ```shell - tiup update br:v7.0.0 + tiup update br:v7.3.0 ``` ## Configure backup storage (Amazon S3) diff --git a/br/br-pitr-guide.md b/br/br-pitr-guide.md index 6432674879844..00b0b5d8b23f9 100644 --- a/br/br-pitr-guide.md +++ b/br/br-pitr-guide.md @@ -130,7 +130,7 @@ Testing scenario 1 (on [TiDB Cloud](https://tidbcloud.com)): - New log data created in the cluster: 10 GB/h - Write (INSERT/UPDATE/DELETE) QPS: 10,000 -Testing scenario 2 (on-premises): +Testing scenario 2 (on TiDB Self-Hosted): - The number of TiKV nodes (8 core, 64 GB memory): 6 - TiKV configuration item `import.num-threads`: 8 diff --git a/character-set-and-collation.md b/character-set-and-collation.md index 1b28a44e10870..ee8f3502535a4 100644 --- a/character-set-and-collation.md +++ b/character-set-and-collation.md @@ -147,8 +147,8 @@ The following demonstrates the default behavior when inserting a 4-byte emoji ch ```sql CREATE TABLE utf8_test ( - -> c char(1) NOT NULL - -> ) CHARACTER SET utf8; + c char(1) NOT NULL + ) CHARACTER SET utf8; ``` ```sql @@ -157,8 +157,8 @@ Query OK, 0 rows affected (0.09 sec) ```sql CREATE TABLE utf8m4_test ( - -> c char(1) NOT NULL - -> ) CHARACTER SET utf8mb4; + c char(1) NOT NULL + ) CHARACTER SET utf8mb4; ``` ```sql diff --git a/check-before-deployment.md b/check-before-deployment.md index 26b1d0e432cfc..38a5e3b52b935 100644 --- a/check-before-deployment.md +++ b/check-before-deployment.md @@ -128,6 +128,36 @@ sysctl -p > > - `sysctl -p` is to make the configuration effective without restarting the system. +## Set temporary spaces for TiDB instances (Recommended) + +Some operations in TiDB require writing temporary files to the server, so it is necessary to ensure that the operating system user that runs TiDB has sufficient permissions to read and write to the target directory. If you do not start the TiDB instance with the `root` privilege, you need to check the directory permissions and set them correctly. + +- TiDB work area + + Operations that consume a significant amount of memory, such as hash table construction and sorting, might write temporary data to disk to reduce memory consumption and improve stability. The disk location for writing is defined by the configuration item [`tmp-storage-path`](/tidb-configuration-file.md#tmp-storage-path). With the default configuration, make sure that the user that runs TiDB has read and write permissions to the temporary folder (usually `/tmp`) of the operating system. + +- `Fast Online DDL` work area + + When the variable [`tidb_ddl_enable_fast_reorg`](/system-variables.md#tidb_ddl_enable_fast_reorg-new-in-v630) is set to `ON` (the default value in v6.5.0 and later versions), `Fast Online DDL` is enabled, and some DDL operations need to read and write temporary files in filesystems. The location is defined by the configuration item [`temp-dir`](/tidb-configuration-file.md#temp-dir-new-in-v630). You need to ensure that the user that runs TiDB has read and write permissions for that directory of the operating system. Taking the default directory `/tmp/tidb` as an example: + + > **Note:** + > + > If DDL operations on large objects exist in your application, it is highly recommended to configure an independent large file system for [`temp-dir`](/tidb-configuration-file.md#temp-dir-new-in-v630). + + ```shell + sudo mkdir /tmp/tidb + ``` + + If the `/tmp/tidb` directory already exists, make sure the write permission is granted. + + ```shell + sudo chmod -R 777 /tmp/tidb + ``` + + > **Note:** + > + > If the directory does not exist, TiDB will automatically create it upon startup. If the directory creation fails or TiDB does not have the read and write permissions for that directory, [`Fast Online DDL`](/system-variables.md#tidb_ddl_enable_fast_reorg-new-in-v630) might experience unpredictable issues during runtime. + ## Check and stop the firewall service of target machines In TiDB clusters, the access ports between nodes must be open to ensure the transmission of information such as read and write requests and data heartbeats. In common online scenarios, the data interaction between the database and the application service and between the database nodes are all made within a secure network. Therefore, if there are no special security requirements, it is recommended to stop the firewall of the target machine. Otherwise, refer to [the port usage](/hardware-and-software-requirements.md#network-requirements) and add the needed port information to the allowlist of the firewall service. diff --git a/choose-index.md b/choose-index.md index ac1e51530e498..df3512fd4ffb0 100644 --- a/choose-index.md +++ b/choose-index.md @@ -141,9 +141,11 @@ The index selection can be controlled by a single query through [Optimizer Hints - `READ_FROM_STORAGE` can force the optimizer to choose the TiKV / TiFlash storage engine for certain tables to execute queries. -## Use a multi-valued index +## Use multi-valued indexes -[Multi-value indexes](/sql-statements/sql-statement-create-index.md#multi-valued-index) are different from normal indexes. TiDB currently only uses [IndexMerge](/explain-index-merge.md) to access multi-valued indexes. Therefore, to use multi-valued indexes for data access, make sure that the value of the sytem variable `tidb_enable_index_merge` is set to `ON`. +[Multi-valued indexes](/sql-statements/sql-statement-create-index.md#multi-valued-indexes) are different from normal indexes. TiDB currently only uses [IndexMerge](/explain-index-merge.md) to access multi-valued indexes. Therefore, to use multi-valued indexes for data access, make sure that the value of the system variable `tidb_enable_index_merge` is set to `ON`. + +For the limitations of multi-valued indexes, refer to [`CREATE INDEX`](/sql-statements/sql-statement-create-index.md#limitations). Currently, TiDB supports accessing multi-valued indexes using IndexMerge that is automatically converted from `json_member_of`, `json_contains`, and `json_overlaps` conditions. You can either rely on the optimizer to automatically select IndexMerge based on cost, or specify the selection of multi-valued indexes through the optimizer hint [`use_index_merge`](/optimizer-hints.md#use_index_merget1_name-idx1_name--idx2_name-) or [`use_index`](/optimizer-hints.md#use_indext1_name-idx1_name--idx2_name-). See the following examples: @@ -296,7 +298,7 @@ mysql> EXPLAIN SELECT /*+ use_index_merge(t3, idx) */ * FROM t3 WHERE ((1 member 3 rows in set, 2 warnings (0.00 sec) ``` -Limited by the current implementation of multi-valued index, using [`use_index`](/optimizer-hints.md#use_indext1_name-idx1_name--idx2_name-) might return the `Can't find a proper physical plan for this query` error while using [`use_index_merge`](/optimizer-hints.md#use_index_merget1_name-idx1_name--idx2_name-) will not return such an error. Therefore, it is recommended to use `use_index_merge` if you want to use the multi-valued index. +Limited by the current implementation of multi-valued indexes, using [`use_index`](/optimizer-hints.md#use_indext1_name-idx1_name--idx2_name-) might return the `Can't find a proper physical plan for this query` error while using [`use_index_merge`](/optimizer-hints.md#use_index_merget1_name-idx1_name--idx2_name-) will not return such an error. Therefore, it is recommended to use `use_index_merge` if you want to use multi-valued indexes. ```sql mysql> EXPLAIN SELECT /*+ use_index(t3, idx) */ * FROM t3 WHERE ((1 member of (j)) AND (2 member of (j))) OR ((3 member of (j)) AND (4 member of (j))); diff --git a/clinic/clinic-introduction.md b/clinic/clinic-introduction.md index d355a71dde8eb..44cc8db40e988 100644 --- a/clinic/clinic-introduction.md +++ b/clinic/clinic-introduction.md @@ -61,7 +61,7 @@ First, Diag gets cluster topology information from the deployment tool TiUP (tiu > **Note:** > -> - Clinic Server is free from July 15, 2022 to July 14, 2023. You will be notified through email before July 14, 2023 if the service starts charging fee afterwards. +> - Clinic Server is free from July 15, 2022 to July 14, 2024. You will be notified through email before July 14, 2024 if the service starts charging fee afterwards. > - If you want to adjust the usage limitations, [get support](/support.md) from PingCAP. | Service Type| Limitation | diff --git a/clinic/clinic-user-guide-for-tiup.md b/clinic/clinic-user-guide-for-tiup.md index 7652a5ffffea5..7ac8b6f56205e 100644 --- a/clinic/clinic-user-guide-for-tiup.md +++ b/clinic/clinic-user-guide-for-tiup.md @@ -9,7 +9,7 @@ For TiDB clusters and DM clusters deployed using TiUP, you can use PingCAP Clini > **Note:** > -> - This document **only** applies to clusters deployed using TiUP in an on-premises environment. For clusters deployed using TiDB Operator on Kubernetes, see [PingCAP Clinic for TiDB Operator environments](https://docs.pingcap.com/tidb-in-kubernetes/stable/clinic-user-guide). +> - This document **only** applies to clusters deployed using TiUP in a self-hosted environment. For clusters deployed using TiDB Operator on Kubernetes, see [PingCAP Clinic for TiDB Operator environments](https://docs.pingcap.com/tidb-in-kubernetes/stable/clinic-user-guide). > > - PingCAP Clinic **does not support** collecting data from clusters deployed using TiDB Ansible. diff --git a/command-line-flags-for-tidb-configuration.md b/command-line-flags-for-tidb-configuration.md index 947cb58eefa4a..e4b5b936f2e7e 100644 --- a/command-line-flags-for-tidb-configuration.md +++ b/command-line-flags-for-tidb-configuration.md @@ -114,7 +114,7 @@ When you start the TiDB cluster, you can use command-line options or environment ## `--proxy-protocol-fallbackable` -- Controls whether to enable PROXY protocol fallback mode. When this parameter is set to `true`, TiDB accepts PROXY client connections and client connections without any PROXY protocol header. By default, TiDB only accepts client connections with a PROXY protocol header. +- Controls whether to enable PROXY protocol fallback mode. When this parameter is set to `true`, TiDB accepts client connections that belong to `--proxy-protocol-networks` without using the PROXY protocol specification or without sending a PROXY protocol header. By default, TiDB only accepts client connections that belong to `--proxy-protocol-networks` and send a PROXY protocol header. - Default value: `false` ## `--proxy-protocol-networks` diff --git a/constraints.md b/constraints.md index 1922b1eda76f6..dbfda1043c83d 100644 --- a/constraints.md +++ b/constraints.md @@ -52,22 +52,92 @@ Query OK, 1 row affected (0.03 sec) ## CHECK -TiDB parses but ignores `CHECK` constraints. This is MySQL 5.7 compatible behavior. +> **Note:** +> +> The `CHECK` constraint feature is disabled by default. To enable it, you need to set the [`tidb_enable_check_constraint`](/system-variables.md#tidb_enable_check_constraint-new-in-v720) variable to `ON`. -For example: +A `CHECK` constraint restricts the values of a column in a table to meet your specified conditions. When the `CHECK` constraint is added to a table, TiDB checks whether the constraint is satisfied during the insertion or updates of data into the table. If the constraint is not met, an error is returned. + +The syntax for the `CHECK` constraint in TiDB is the same as that in MySQL: ```sql -DROP TABLE IF EXISTS users; -CREATE TABLE users ( - id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, - username VARCHAR(60) NOT NULL, - UNIQUE KEY (username), - CONSTRAINT min_username_length CHECK (CHARACTER_LENGTH(username) >=4) -); -INSERT INTO users (username) VALUES ('a'); -SELECT * FROM users; +[CONSTRAINT [symbol]] CHECK (expr) [[NOT] ENFORCED] ``` +Syntax explanation: + +- `[]`: the content within `[]` is optional. +- `CONSTRAINT [symbol]`: specifies the name of the `CHECK` constraint. +- `CHECK (expr)`: specifies the constraint condition, where `expr` needs to be a boolean expression. For each row in the table, the calculation result of this expression must be one of `TRUE`, `FALSE`, or `UNKNOWN` (for `NULL` values). If the calculation result is `FALSE` for a row, it indicates that the constraint is violated. +- `[NOT] ENFORCED`: specifies whether to implement the constraint check. You can use it to enable or disable a `CHECK` constraint. + +### Add `CHECK` constraints + +In TiDB, you can add a `CHECK` constraint to a table using either the [`CREATE TABLE`](/sql-statements/sql-statement-create-table.md) or the [`ALTER TABLE`](/sql-statements/sql-statement-modify-column.md) statement. + +- Example of adding a `CHECK` constraint using the `CREATE TABLE` statement: + + ```sql + CREATE TABLE t(a INT CHECK(a > 10) NOT ENFORCED, b INT, c INT, CONSTRAINT c1 CHECK (b > c)); + ``` + +- Example of adding a `CHECK` constraint using the `ALTER TABLE` statement: + + ```sql + ALTER TABLE t ADD CONSTRAINT CHECK (1 < c); + ``` + +When adding or enabling a `CHECK` constraint, TiDB checks the existing data in the table. If any data violates the constraint, the operation of adding the `CHECK` constraint will fail and return an error. + +When adding a `CHECK` constraint, you can either specify a constraint name or leave the name unspecified. If no constraint name is specified, TiDB automatically generates a constraint name in the `_chk_<1, 2, 3...>` format. + +### View `CHECK` constraints + +You can view the constraint information in a table using the [`SHOW CREATE TABLE`](/sql-statements/sql-statement-show-create-table.md) statement. For example: + +```sql +SHOW CREATE TABLE t; ++-------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| t | CREATE TABLE `t` ( + `a` int(11) DEFAULT NULL, + `b` int(11) DEFAULT NULL, + `c` int(11) DEFAULT NULL, +CONSTRAINT `c1` CHECK ((`b` > `c`)), +CONSTRAINT `t_chk_1` CHECK ((`a` > 10)) /*!80016 NOT ENFORCED */, +CONSTRAINT `t_chk_2` CHECK ((1 < `c`)) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin | ++-------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +1 row in set (0.00 sec) +``` + +### Delete `CHECK` constraints + +When deleting a `CHECK` constraint, you need to specify the name of the constraint to be deleted. For example: + +```sql +ALTER TABLE t DROP CONSTRAINT t_chk_1; +``` + +### Enable or disable `CHECK` constraints + +When [adding a `CHECK` constraint](#add-check-constraints) to a table, you can specify whether TiDB needs to implement the constraint check during data insertion or updates. + +- If `NOT ENFORCED` is specified, TiDB does not check the constraint conditions during data insertion or updates. +- If `NOT ENFORCED` is not specified or `ENFORCED` is specified, TiDB checks the constraint conditions during data insertion or updates. + +In addition to specifying `[NOT] ENFORCED` when adding the constraint, you can also enable or disable a `CHECK` constraint using the `ALTER TABLE` statement. For example: + +```sql +ALTER TABLE t ALTER CONSTRAINT c1 NOT ENFORCED; +``` + +### MySQL compatibility + +- It is not supported to add a `CHECK` constraint while adding a column (for example, `ALTER TABLE t ADD COLUMN a CHECK(a > 0)`). In this case, only the column is added successfully, and TiDB ignores the `CHECK` constraint without reporting any error. +- It is not supported to use `ALTER TABLE t CHANGE a b int CHECK(b > 0)` to add a `CHECK` constraint. When this statement is executed, TiDB reports an error. + ## UNIQUE KEY Unique constraints mean that all non-null values in a unique index and a primary key column are unique. diff --git a/control-execution-plan.md b/control-execution-plan.md index 71787a569db5f..cd7a71ddece0e 100644 --- a/control-execution-plan.md +++ b/control-execution-plan.md @@ -10,4 +10,14 @@ The first two chapters of SQL Tuning introduce how to understand TiDB's executio - But hints change the SQL statement intrusively. In some scenarios, hints cannot be simply inserted. In [SQL Plan Management](/sql-plan-management.md), you will know how TiDB uses another syntax to non-intrusively control the generation of execution plans, and the methods of automatic execution plan evolution in the background. This method helps address issues such as execution plan instability caused by version upgrades and cluster performance degradation. - Finally, you will learn how to use the blocklist in [Blocklist of Optimization Rules and Expression Pushdown](/blocklist-control-plan.md). -Besides the preceding methods, the execution plan is also affected by some system variables. By modifying these variables at the system level or session level, you can control the generation of the execution plan. Starting from v7.1.0, TiDB introduces a relatively special variable [`tidb_opt_fix_control`](/system-variables.md#tidb_opt_fix_control-new-in-v710). This variable can accept multiple control items to control the behavior of the optimizer in a more fine-grained way, to prevent performance regression caused by behavior changes in the optimizer after cluster upgrade. + + +Besides the preceding methods, the execution plan is also affected by some system variables. By modifying these variables at the system level or session level, you can control the generation of the execution plan. Starting from v7.1.0, TiDB introduces a relatively special variable [`tidb_opt_fix_control`](/system-variables.md#tidb_opt_fix_control-new-in-v710). This variable can accept multiple control items to control the behavior of the optimizer in a more fine-grained way, to prevent performance regression caused by behavior changes in the optimizer after cluster upgrade. Refer to [Optimizer Fix Controls](/optimizer-fix-controls.md) for a more detailed introduction. + + + + + +Besides the preceding methods, the execution plan is also affected by some system variables. By modifying these variables at the system level or session level, you can control the generation of the execution plan. Starting from v7.1.0, TiDB introduces a relatively special variable [`tidb_opt_fix_control`](/system-variables.md#tidb_opt_fix_control-new-in-v710). This variable can accept multiple control items to control the behavior of the optimizer in a more fine-grained way, to prevent performance regression caused by behavior changes in the optimizer after cluster upgrade. Refer to [Optimizer Fix Controls](https://docs.pingcap.com/tidb/v7.2/optimizer-fix-controls) for a more detailed introduction. + + diff --git a/dashboard/dashboard-ops-reverse-proxy.md b/dashboard/dashboard-ops-reverse-proxy.md index 1be6962b53630..fb417e2e4d363 100644 --- a/dashboard/dashboard-ops-reverse-proxy.md +++ b/dashboard/dashboard-ops-reverse-proxy.md @@ -195,9 +195,9 @@ For a deployed cluster: {{< copyable "shell-regular" >}} - ```shell - tiup cluster reload CLUSTER_NAME -R pd - ``` + ```shell + tiup cluster reload CLUSTER_NAME -R pd + ``` See [Common TiUP Operations - Modify the configuration](/maintain-tidb-using-tiup.md#modify-the-configuration) for details. diff --git a/dashboard/dashboard-overview.md b/dashboard/dashboard-overview.md index 6ec04083a53a1..075c98fdf99a4 100644 --- a/dashboard/dashboard-overview.md +++ b/dashboard/dashboard-overview.md @@ -59,7 +59,7 @@ By default, this area shows the latest 10 slow queries in the entire cluster ove ![Recent slow queries](/media/dashboard/dashboard-overview-slow-query.png) -By default, the SQL query that is executed longer than 300 milliseconds is counted as a slow query and displayed on the table. You can change this threshold by modifying the [tidb_slow_log_threshold](/system-variables.md#tidb_slow_log_threshold) variable or the [slow-threshold](/tidb-configuration-file.md#slow-threshold) TiDB parameter. +By default, the SQL query that is executed longer than 300 milliseconds is counted as a slow query and displayed on the table. You can change this threshold by modifying the [tidb_slow_log_threshold](/system-variables.md#tidb_slow_log_threshold) variable or the [instance.tidb_slow_log_threshold](/tidb-configuration-file.md#tidb_slow_log_threshold) TiDB parameter. The content displayed in this area is consistent with the more detailed [Slow Queries Page](/dashboard/dashboard-slow-query.md). You can click the **Recent Slow Queries** title to view the complete list. For details of the columns in this table, see this [Slow Queries Page](/dashboard/dashboard-slow-query.md). diff --git a/dashboard/dashboard-resource-manager.md b/dashboard/dashboard-resource-manager.md index a1a7621602d05..d2abddcf101c3 100644 --- a/dashboard/dashboard-resource-manager.md +++ b/dashboard/dashboard-resource-manager.md @@ -57,7 +57,9 @@ Before resource planning, you need to know the overall capacity of the cluster. - When the time window range does not fall between 10 minutes and 24 hours, the following error is displayed `ERROR 1105 (HY000): the duration of calibration is too short, which could lead to inaccurate output. Please make the duration between 10m0s and 24h0m0s`. - - When the workload within the time window is too low, the following error is displayed `ERROR 1105 (HY000): The workload in selected time window is too low, with which TiDB is unable to reach a capacity estimation; please select another time window with higher workload, or calibrate resource by hardware instead` is displayed. + - The monitoring metrics for the [capacity estimation based on the actual workload](/sql-statements/sql-statement-calibrate-resource.md#estimate-capacity-based-on-actual-workload) feature include `tikv_cpu_quota`, `tidb_server_maxprocs`, `resource_manager_resource_unit`, and `process_cpu_usage`. If the CPU quota monitoring data is empty, there will be an error with the corresponding monitoring metric name, for example, `Error 1105 (HY000): There is no CPU quota metrics, metrics 'tikv_cpu_quota' is empty`. + + - If the workload in the time window is too low, or the `resource_manager_resource_unit` and `process_cpu_usage` monitoring data is missing, an error will be reported `Error 1105 (HY000): The workload in selected time window is too low, with which TiDB is unable to reach a capacity estimation; please select another time window with higher workload, or calibrate resource by hardware instead`. In addition, because TiKV does not monitor CPU utilization on macOS, it does not support capacity estimation based on the actual workload, and will also report this error. You can select an appropriate time range using **CPU Usage** in the [Metrics](#metrics) section. diff --git a/dashboard/dashboard-session-sso.md b/dashboard/dashboard-session-sso.md index f5c1a0148d1af..2a46b56538374 100644 --- a/dashboard/dashboard-session-sso.md +++ b/dashboard/dashboard-session-sso.md @@ -19,28 +19,28 @@ TiDB Dashboard supports [OIDC](https://openid.net/connect/)-based Single Sign-On 4. Fill the **OIDC Client ID** and the **OIDC Discovery URL** fields in the form. - Generally, you can obtain the two fields from the SSO service provider: + Generally, you can obtain the two fields from the SSO service provider: - - OIDC Client ID is also called OIDC Token Issuer. - - OIDC Discovery URL is also called OIDC Token Audience. + - OIDC Client ID is also called OIDC Token Issuer. + - OIDC Discovery URL is also called OIDC Token Audience. 5. Click **Authorize Impersonation** and input the SQL password. - TiDB Dashboard will store this SQL password and use it to impersonate a normal SQL sign-in after an SSO sign-in is finished. + TiDB Dashboard will store this SQL password and use it to impersonate a normal SQL sign-in after an SSO sign-in is finished. - ![Sample Step](/media/dashboard/dashboard-session-sso-enable-1.png) + ![Sample Step](/media/dashboard/dashboard-session-sso-enable-1.png) - > **Note:** - > - > The password you have entered will be encrypted and stored. The SSO sign-in will fail after the password of the SQL user is changed. In this case, you can re-enter the password to bring SSO back. + > **Note:** + > + > The password you have entered will be encrypted and stored. The SSO sign-in will fail after the password of the SQL user is changed. In this case, you can re-enter the password to bring SSO back. 6. Click **Authorize and Save**. - ![Sample Step](/media/dashboard/dashboard-session-sso-enable-2.png) + ![Sample Step](/media/dashboard/dashboard-session-sso-enable-2.png) 7. Click **Update** (Update) to save the configuration. - ![Sample Step](/media/dashboard/dashboard-session-sso-enable-3.png) + ![Sample Step](/media/dashboard/dashboard-session-sso-enable-3.png) Now SSO sign-in has been enabled for TiDB Dashboard. @@ -60,7 +60,7 @@ You can disable the SSO, which will completely erase the stored SQL password: 4. Click **Update** (Update) to save the configuration. - ![Sample Step](/media/dashboard/dashboard-session-sso-disable.png) + ![Sample Step](/media/dashboard/dashboard-session-sso-disable.png) ### Re-enter the password after a password change @@ -72,7 +72,7 @@ The SSO sign-in will fail once the password of the SQL user is changed. In this 3. In the **Single Sign-On** section, Click **Authorize Impersonation** and input the updated SQL password. - ![Sample Step](/media/dashboard/dashboard-session-sso-reauthorize.png) + ![Sample Step](/media/dashboard/dashboard-session-sso-reauthorize.png) 4. Click **Authorize and Save**. @@ -82,7 +82,7 @@ Once SSO is configured for TiDB Dashboard, you can sign in via SSO by taking fol 1. In the sign-in page of TiDB Dashboard, click **Sign in via Company Account**. - ![Sample Step](/media/dashboard/dashboard-session-sso-signin.png) + ![Sample Step](/media/dashboard/dashboard-session-sso-signin.png) 2. Sign into the system with SSO service configured. @@ -102,7 +102,7 @@ First, create an Okta Application Integration to integrate SSO. 3. Click **Create App Integration**. - ![Sample Step](/media/dashboard/dashboard-session-sso-okta-1.png) + ![Sample Step](/media/dashboard/dashboard-session-sso-okta-1.png) 4. In the poped up dialog, choose **OIDC - OpenID Connect** in **Sign-in method**. @@ -110,43 +110,43 @@ First, create an Okta Application Integration to integrate SSO. 6. Click the **Next** button. - ![Sample Step](/media/dashboard/dashboard-session-sso-okta-2.png) + ![Sample Step](/media/dashboard/dashboard-session-sso-okta-2.png) 7. Fill **Sign-in redirect URIs** as follows: - ``` - http://DASHBOARD_IP:PORT/dashboard/?sso_callback=1 - ``` + ``` + http://DASHBOARD_IP:PORT/dashboard/?sso_callback=1 + ``` - Substitute `DASHBOARD_IP:PORT` with the actual domain (or IP address) and port that you use to access the TiDB Dashboard in the browser. + Substitute `DASHBOARD_IP:PORT` with the actual domain (or IP address) and port that you use to access the TiDB Dashboard in the browser. 8. Fill **Sign-out redirect URIs** as follows: - ``` - http://DASHBOARD_IP:PORT/dashboard/ - ``` + ``` + http://DASHBOARD_IP:PORT/dashboard/ + ``` - Similarly, substitute `DASHBOARD_IP:PORT` with the actual domain (or IP address) and port. + Similarly, substitute `DASHBOARD_IP:PORT` with the actual domain (or IP address) and port. - ![Sample Step](/media/dashboard/dashboard-session-sso-okta-3.png) + ![Sample Step](/media/dashboard/dashboard-session-sso-okta-3.png) 9. Configure what type of users in your organization is allowed for SSO sign-in in the **Assignments** field, and then click **Save** to save the configuration. - ![Sample Step](/media/dashboard/dashboard-session-sso-okta-4.png) + ![Sample Step](/media/dashboard/dashboard-session-sso-okta-4.png) ### Step 2: Obtain OIDC information and fill in TiDB Dashboard 1. In the Application Integration just created in Okta, click **Sign On**. - ![Sample Step 1](/media/dashboard/dashboard-session-sso-okta-info-1.png) + ![Sample Step 1](/media/dashboard/dashboard-session-sso-okta-info-1.png) 2. Copy values of the **Issuer** and **Audience** fields from the **OpenID Connect ID Token** section. - ![Sample Step 2](/media/dashboard/dashboard-session-sso-okta-info-2.png) + ![Sample Step 2](/media/dashboard/dashboard-session-sso-okta-info-2.png) 3. Open the TiDB Dashboard configuration page, fill **OIDC Client ID** with **Issuer** obtained from the last step and fill **OIDC Discovery URL** with **Audience**. Then finish the authorization and save the configuration. For example: - ![Sample Step 3](/media/dashboard/dashboard-session-sso-okta-info-3.png) + ![Sample Step 3](/media/dashboard/dashboard-session-sso-okta-info-3.png) Now TiDB Dashboard has been configured to use Okta SSO for sign-in. @@ -160,33 +160,33 @@ Similar to Okta, [Auth0](https://auth0.com/) also provides OIDC SSO identity ser 2. Navigate on the left sidebar **Applications** > **Applications**. -3. Click **Create App Integration**. +3. Click **Create App Integration**. - ![Create Application](/media/dashboard/dashboard-session-sso-auth0-create-app.png) + ![Create Application](/media/dashboard/dashboard-session-sso-auth0-create-app.png) In the popped-up dialog, fill **Name**, for example, "TiDB Dashboard". Choose **Single Page Web Applications** in **Choose an application type**. Click **Create**. 4. Click **Settings**. - ![Settings](/media/dashboard/dashboard-session-sso-auth0-settings-1.png) + ![Settings](/media/dashboard/dashboard-session-sso-auth0-settings-1.png) 5. Fill **Allowed Callback URLs** as follows: - ``` - http://DASHBOARD_IP:PORT/dashboard/?sso_callback=1 - ``` + ``` + http://DASHBOARD_IP:PORT/dashboard/?sso_callback=1 + ``` - Replace `DASHBOARD_IP:PORT` with the actual domain (or IP address) and port that you use to access the TiDB Dashboard in your browser. + Replace `DASHBOARD_IP:PORT` with the actual domain (or IP address) and port that you use to access the TiDB Dashboard in your browser. 6. Fill **Allowed Logout URLs** as follows: - ``` - http://DASHBOARD_IP:PORT/dashboard/ + ``` + http://DASHBOARD_IP:PORT/dashboard/ ``` - Similarly, replace `DASHBOARD_IP:PORT` with the actual domain (or IP address) and port. + Similarly, replace `DASHBOARD_IP:PORT` with the actual domain (or IP address) and port. - ![Settings](/media/dashboard/dashboard-session-sso-auth0-settings-2.png) + ![Settings](/media/dashboard/dashboard-session-sso-auth0-settings-2.png) 7. Keep the default values for other settings and click **Save Changes**. @@ -196,7 +196,7 @@ Similar to Okta, [Auth0](https://auth0.com/) also provides OIDC SSO identity ser 2. Fill **OIDC Discovery URL** with the **Domain** field value prefixed with `https://` and suffixed with `/`, for example, `https://example.us.auth0.com/`. Complete authorization and save the configuration. - ![Settings](/media/dashboard/dashboard-session-sso-auth0-settings-3.png) + ![Settings](/media/dashboard/dashboard-session-sso-auth0-settings-3.png) Now TiDB Dashboard has been configured to use Auth0 SSO for sign-in. @@ -211,19 +211,19 @@ Now TiDB Dashboard has been configured to use Auth0 SSO for sign-in. 2. Navigate from the top sidebar **Applications**. 3. Click **Applications - Add**. - ![Settings](/media/dashboard/dashboard-session-sso-casdoor-settings-1.png) + ![Settings](/media/dashboard/dashboard-session-sso-casdoor-settings-1.png) 4. Fill **Name** and **Display name**, for example, **TiDB Dashboard**. 5. Add **Redirect URLs** as follows: - ``` - http://DASHBOARD_IP:PORT/dashboard/?sso_callback=1 - ``` + ``` + http://DASHBOARD_IP:PORT/dashboard/?sso_callback=1 + ``` + + Replace `DASHBOARD_IP:PORT` with the actual domain (or IP address) and port that you use to access the TiDB Dashboard in your browser. - Replace `DASHBOARD_IP:PORT` with the actual domain (or IP address) and port that you use to access the TiDB Dashboard in your browser. - - ![Settings](/media/dashboard/dashboard-session-sso-casdoor-settings-2.png) + ![Settings](/media/dashboard/dashboard-session-sso-casdoor-settings-2.png) 6. Keep the default values for other settings and click **Save & Exit**. @@ -235,6 +235,6 @@ Now TiDB Dashboard has been configured to use Auth0 SSO for sign-in. 2. Fill **OIDC Discovery URL** with the **Domain** field value prefixed with `https://` and suffixed with `/`, for example, `https://casdoor.example.com/`. Complete authorization and save the configuration. - ![Settings](/media/dashboard/dashboard-session-sso-casdoor-settings-3.png) + ![Settings](/media/dashboard/dashboard-session-sso-casdoor-settings-3.png) Now TiDB Dashboard has been configured to use Casdoor SSO for sign-in. \ No newline at end of file diff --git a/dashboard/dashboard-slow-query.md b/dashboard/dashboard-slow-query.md index 97682335e810c..25e72f0386e53 100644 --- a/dashboard/dashboard-slow-query.md +++ b/dashboard/dashboard-slow-query.md @@ -8,7 +8,7 @@ aliases: ['/docs/dev/dashboard/dashboard-slow-query/'] On the Slow Queries page of TiDB Dashboard, you can search and view all slow queries in the cluster. -By default, SQL queries with an execution time of more than 300 milliseconds are considered as slow queries. These queries are recorded in the [slow query logs](/identify-slow-queries.md) and can be searched via TiDB Dashboard. You can adjust the threshold of slow queries through the [`tidb_slow_log_threshold`](/system-variables.md#tidb_slow_log_threshold) session variable or the [`slow-threshold`](/tidb-configuration-file.md#slow-threshold) TiDB parameter. +By default, SQL queries with an execution time of more than 300 milliseconds are considered as slow queries. These queries are recorded in the [slow query logs](/identify-slow-queries.md) and can be searched via TiDB Dashboard. You can adjust the threshold of slow queries through the [`tidb_slow_log_threshold`](/system-variables.md#tidb_slow_log_threshold) session variable or the [`instance.tidb_slow_log_threshold`](/tidb-configuration-file.md#tidb_slow_log_threshold) TiDB parameter. > **Note:** > @@ -36,6 +36,12 @@ Click **Columns** on the page and you can choose to see more columns. You can mo ![Show more columns](/media/dashboard/dashboard-slow-queries-list2-v620.png) +### Export slow queries locally + +Click ☰ (**More**) in the upper-right corner of the page to display the **Export** option. After you click **Export**, TiDB Dashboard exports slow queries in the current list as a CSV file. + +![Export slow queries locally](/media/dashboard/dashboard-slow-queries-export-v651.png) + ### Sort by column By default, the list is sorted by **Finish Time** in the descending order. Click column headings to sort by the column or switch the sorting order: diff --git a/data-type-json.md b/data-type-json.md index 15a8d2fbdf1d4..f602582a98ba8 100644 --- a/data-type-json.md +++ b/data-type-json.md @@ -28,7 +28,7 @@ For more information, see [JSON Functions](/functions-and-operators/json-functio ## Restrictions -- Currently, TiDB does not support pushing down `JSON` functions to TiFlash. +- Currently, TiDB only supports pushing down limited `JSON` functions to TiFlash. For more information, see [Push-down expressions](/tiflash/tiflash-supported-pushdown-calculations.md#push-down-expressions). - TiDB Backup & Restore (BR) versions earlier than v6.3.0 do not support recovering data containing JSON columns. No version of BR supports recovering data containing JSON columns to TiDB clusters earlier than v6.3.0. - Do not use any replication tool to replicate data containing non-standard `JSON` data types, such as `DATE`, `DATETIME`, and `TIME`. diff --git a/data-type-string.md b/data-type-string.md index 2b4fc5289aecb..30479af25cc2b 100644 --- a/data-type-string.md +++ b/data-type-string.md @@ -54,7 +54,16 @@ TINYTEXT [CHARACTER SET charset_name] [COLLATE collation_name] ### `MEDIUMTEXT` type -The `MEDIUMTEXT` type is similar to the [`TEXT` type](#text-type). The difference is that the maximum column length of `MEDIUMTEXT` is 16,777,215. But due to the [Limitation on a single column in TiDB](/tidb-limitations.md#limitation-on-a-single-column), the maximum storage size of a single column in TiDB is 6 MiB by default and can be increased to 120 MiB by changing the configuration. + + +The `MEDIUMTEXT` type is similar to the [`TEXT` type](#text-type). The difference is that the maximum column length of `MEDIUMTEXT` is 16,777,215. But due to the limitation of [`txn-entry-size-limit`](/tidb-configuration-file.md#txn-entry-size-limit-new-in-v50), the maximum storage size of a single row in TiDB is 6 MiB by default and can be increased to 120 MiB by changing the configuration. + + + + +The `MEDIUMTEXT` type is similar to the [`TEXT` type](#text-type). The difference is that the maximum column length of `MEDIUMTEXT` is 16,777,215. But due to the limitation of [`txn-entry-size-limit`](https://docs.pingcap.com/tidb/stable/tidb-configuration-file#txn-entry-size-limit-new-in-v50), the maximum storage size of a single row in TiDB is 6 MiB by default and can be increased to 120 MiB by changing the configuration. + + ```sql MEDIUMTEXT [CHARACTER SET charset_name] [COLLATE collation_name] @@ -62,7 +71,16 @@ MEDIUMTEXT [CHARACTER SET charset_name] [COLLATE collation_name] ### `LONGTEXT` type -The `LONGTEXT` type is similar to the [`TEXT` type](#text-type). The difference is that the maximum column length of `LONGTEXT` is 4,294,967,295. But due to the [Limitation on a single column in TiDB](/tidb-limitations.md#limitation-on-a-single-column), the maximum storage size of a single column in TiDB is 6 MiB by default and can be increased to 120 MiB by changing the configuration. + + +The `LONGTEXT` type is similar to the [`TEXT` type](#text-type). The difference is that the maximum column length of `LONGTEXT` is 4,294,967,295. But due to the limitation of [`txn-entry-size-limit`](/tidb-configuration-file.md#txn-entry-size-limit-new-in-v50), the maximum storage size of a single row in TiDB is 6 MiB by default and can be increased to 120 MiB by changing the configuration. + + + + +The `LONGTEXT` type is similar to the [`TEXT` type](#text-type). The difference is that the maximum column length of `LONGTEXT` is 4,294,967,295. But due to the limitation of [`txn-entry-size-limit`](https://docs.pingcap.com/tidb/stable/tidb-configuration-file#txn-entry-size-limit-new-in-v50), the maximum storage size of a single row in TiDB is 6 MiB by default and can be increased to 120 MiB by changing the configuration. + + ```sql LONGTEXT [CHARACTER SET charset_name] [COLLATE collation_name] @@ -102,7 +120,16 @@ TINYBLOB ### `MEDIUMBLOB` type -The `MEDIUMBLOB` type is similar to the [`BLOB` type](#blob-type). The difference is that the maximum column length of `MEDIUMBLOB` is 16,777,215. But due to the [Limitation on a single column in TiDB](/tidb-limitations.md#limitation-on-a-single-column), the maximum storage size of a single column in TiDB is 6 MiB by default and can be increased to 120 MiB by changing the configuration. + + +The `MEDIUMBLOB` type is similar to the [`BLOB` type](#blob-type). The difference is that the maximum column length of `MEDIUMBLOB` is 16,777,215. But due to the limitation of [`txn-entry-size-limit`](/tidb-configuration-file.md#txn-entry-size-limit-new-in-v50), the maximum storage size of a single row in TiDB is 6 MiB by default and can be increased to 120 MiB by changing the configuration. + + + + +The `MEDIUMBLOB` type is similar to the [`BLOB` type](#blob-type). The difference is that the maximum column length of `MEDIUMBLOB` is 16,777,215. But due to the limitation of [`txn-entry-size-limit`](https://docs.pingcap.com/tidb/stable/tidb-configuration-file#txn-entry-size-limit-new-in-v50), the maximum storage size of a single row in TiDB is 6 MiB by default and can be increased to 120 MiB by changing the configuration. + + ```sql MEDIUMBLOB @@ -110,7 +137,16 @@ MEDIUMBLOB ### `LONGBLOB` type -The `LONGBLOB` type is similar to the [`BLOB` type](#blob-type). The difference is that the maximum column length of `LONGBLOB` is 4,294,967,295. But due to the [Limitation on a single column in TiDB](/tidb-limitations.md#limitation-on-a-single-column), the maximum storage size of a single column in TiDB is 6 MiB by default and can be increased to 120 MiB by changing the configuration. + + +The `LONGBLOB` type is similar to the [`BLOB` type](#blob-type). The difference is that the maximum column length of `LONGBLOB` is 4,294,967,295. But due to the limitation of [`txn-entry-size-limit`](/tidb-configuration-file.md#txn-entry-size-limit-new-in-v50), the maximum storage size of a single row in TiDB is 6 MiB by default and can be increased to 120 MiB by changing the configuration. + + + + +The `LONGBLOB` type is similar to the [`BLOB` type](#blob-type). The difference is that the maximum column length of `LONGBLOB` is 4,294,967,295. But due to the limitation of [`txn-entry-size-limit`](https://docs.pingcap.com/tidb/stable/tidb-configuration-file#txn-entry-size-limit-new-in-v50), the maximum storage size of a single row in TiDB is 6 MiB by default and can be increased to 120 MiB by changing the configuration. + + ```sql LONGBLOB diff --git a/ddl-introduction.md b/ddl-introduction.md index 096b266cda0c2..ec994786815b5 100644 --- a/ddl-introduction.md +++ b/ddl-introduction.md @@ -101,7 +101,7 @@ Before TiDB v6.2.0, the DDL execution framework had the following limitations: - The DDL Owner always processes DDL jobs in a first-in-first-out way. - The DDL Owner can only execute one DDL task of the same type (either logical or physical) at a time, which is relatively strict, and affects the user experience. -These limitations might lead to some "unintended" DDL blocking behavior. For more details, see [SQL FAQ - DDL Execution](/faq/sql-faq.md#ddl-execution). +These limitations might lead to some "unintended" DDL blocking behavior. For more details, see [SQL FAQ - DDL Execution](https://docs.pingcap.com/tidb/stable/sql-faq#ddl-execution).
@@ -178,14 +178,14 @@ When TiDB is adding an index, the phase of backfilling data will cause read and If a completed DDL task is canceled, you can see the `DDL Job:90 not found` error in the `RESULT` column, which means that the task has been removed from the DDL waiting queue. -- `ADMIN PAUSE DDL JOBS job_id [, job_id]`: Used to pause the DDL tasks that are being executed. After the command is executed, the SQL statement that executes the DDL task is displayed as being executed, and the background task is paused. (Experimental feature) +- `ADMIN PAUSE DDL JOBS job_id [, job_id]`: Used to pause the DDL jobs that are being executed. After the command is executed, the SQL statement that executes the DDL job is displayed as being executed, while the background job has been paused. For details, refer to [`ADMIN PAUSE DDL JOBS`](/sql-statements/sql-statement-admin-pause-ddl.md). (Experimental feature) - You can pause only DDL tasks that are in progress or still in the queue. Otherwise, the `Job 3 can't be paused now` error is shown in the `RESULT` column. + You can only pause DDL tasks that are in progress or still in the queue. Otherwise, the `Job 3 can't be paused now` error is shown in the `RESULT` column. -- `ADMIN RESUME DDL JOBS job_id [, job_id]`: Used to resume the DDL tasks that have been paused. After the command is executed, the SQL statement that executes the DDL task is displayed as being executed, and the background task is resumed. (Experimental feature) +- `ADMIN RESUME DDL JOBS job_id [, job_id]`: Used to resume the DDL tasks that have been paused. After the command is executed, the SQL statement that executes the DDL task is displayed as being executed, and the background task is resumed. For details, refer to [`ADMIN RESUME DDL JOBS`](/sql-statements/sql-statement-admin-resume-ddl.md). (Experimental feature) You can only resume a paused DDL task. Otherwise, the `Job 3 can't be resumed` error is shown in the `RESULT` column. ## Common questions -For common questions about DDL execution, see [SQL FAQ - DDL execution](/faq/sql-faq.md#ddl-execution). +For common questions about DDL execution, see [SQL FAQ - DDL execution](https://docs.pingcap.com/tidb/stable/sql-faq). diff --git a/develop/dev-guide-aws-appflow-integration.md b/develop/dev-guide-aws-appflow-integration.md index 608a3c30781ba..1305234a01a60 100644 --- a/develop/dev-guide-aws-appflow-integration.md +++ b/develop/dev-guide-aws-appflow-integration.md @@ -7,9 +7,9 @@ summary: Introduce how to integrate TiDB with Amazon AppFlow step by step. [Amazon AppFlow](https://aws.amazon.com/appflow/) is a fully managed API integration service that you use to connect your software as a service (SaaS) applications to AWS services, and securely transfer data. With Amazon AppFlow, you can import and export data from and to TiDB into many types of data providers, such as Salesforce, Amazon S3, LinkedIn, and GitHub. For more information, see [Supported source and destination applications](https://docs.aws.amazon.com/appflow/latest/userguide/app-specific.html) in AWS documentation. -This document describes how to integrate TiDB with Amazon AppFlow and takes integrating a TiDB Cloud Serverless Tier cluster as an example. +This document describes how to integrate TiDB with Amazon AppFlow and takes integrating a TiDB Serverless cluster as an example. -If you do not have a TiDB cluster, you can create a [Serverless Tier](https://tidbcloud.com/console/clusters) cluster, which is free and can be created in approximately 30 seconds. +If you do not have a TiDB cluster, you can create a [TiDB Serverless](https://tidbcloud.com/console/clusters) cluster, which is free and can be created in approximately 30 seconds. ## Prerequisites @@ -66,7 +66,7 @@ git clone https://github.com/pingcap-inc/tidb-appflow-integration > > - The `--guided` option uses prompts to guide you through the deployment. Your input will be stored in a configuration file, which is `samconfig.toml` by default. > - `stack_name` specifies the name of AWS Lambda that you are deploying. - > - This prompted guide uses AWS as the cloud provider of TiDB Cloud Serverless Tier. To use Amazon S3 as the source or destination, you need to set the `region` of AWS Lambda as the same as that of Amazon S3. + > - This prompted guide uses AWS as the cloud provider of TiDB Serverless. To use Amazon S3 as the source or destination, you need to set the `region` of AWS Lambda as the same as that of Amazon S3. > - If you have already run `sam deploy --guided` before, you can just run `sam deploy` instead, and SAM CLI will use the configuration file `samconfig.toml` to simplify the interaction. If you see a similar output as follows, this Lambda is successfully deployed. @@ -148,7 +148,7 @@ Choose the **Source details** and **Destination details**. TiDB connector can be ``` 5. After the `sf_account` table is created, click **Connect**. A connection dialog is displayed. -6. In the **Connect to TiDB-Connector** dialog, enter the connection properties of the TiDB cluster. If you use a TiDB Cloud Serverless Tier cluster, you need to set the **TLS** option to `Yes`, which lets the TiDB connector use the TLS connection. Then, click **Connect**. +6. In the **Connect to TiDB-Connector** dialog, enter the connection properties of the TiDB cluster. If you use a TiDB Serverless cluster, you need to set the **TLS** option to `Yes`, which lets the TiDB connector use the TLS connection. Then, click **Connect**. ![tidb connection message](/media/develop/aws-appflow-step-tidb-connection-message.png) @@ -244,5 +244,5 @@ test> SELECT * FROM sf_account; - If anything goes wrong, you can navigate to the [CloudWatch](https://console.aws.amazon.com/cloudwatch/home) page on the AWS Management Console to get logs. - The steps in this document are based on [Building custom connectors using the Amazon AppFlow Custom Connector SDK](https://aws.amazon.com/blogs/compute/building-custom-connectors-using-the-amazon-appflow-custom-connector-sdk/). -- [TiDB Cloud Serverless Tier](https://docs.pingcap.com/tidbcloud/select-cluster-tier#serverless-tier-beta) is **NOT** a production environment. +- [TiDB Serverless](https://docs.pingcap.com/tidbcloud/select-cluster-tier#tidb-serverless) is **NOT** a production environment. - To prevent excessive length, the examples in this document only show the `Insert` strategy, but `Update` and `Upsert` strategies are also tested and can be used. \ No newline at end of file diff --git a/develop/dev-guide-build-cluster-in-cloud.md b/develop/dev-guide-build-cluster-in-cloud.md index e71393be66a30..ec13e6d894b0e 100644 --- a/develop/dev-guide-build-cluster-in-cloud.md +++ b/develop/dev-guide-build-cluster-in-cloud.md @@ -1,15 +1,15 @@ --- -title: Build a TiDB Cluster in TiDB Cloud (Serverless Tier) -summary: Learn how to build a TiDB cluster in TiDB Cloud (Serverless Tier) and connect to a TiDB Cloud cluster. +title: Build a TiDB Serverless Cluster +summary: Learn how to build a TiDB Serverless cluster in TiDB Cloud and connect to it. --- -# Build a TiDB Cluster in TiDB Cloud (Serverless Tier) +# Build a TiDB Serverless Cluster -This document walks you through the quickest way to get started with TiDB. You will use [TiDB Cloud](https://en.pingcap.com/tidb-cloud) to create a Serverless Tier cluster, connect to it, and run a sample application on it. +This document walks you through the quickest way to get started with TiDB. You will use [TiDB Cloud](https://en.pingcap.com/tidb-cloud) to create a TiDB Serverless cluster, connect to it, and run a sample application on it. If you need to run TiDB on your local machine, see [Starting TiDB Locally](/quick-start-with-tidb.md). @@ -21,7 +21,7 @@ This document walks you through the quickest way to get started with TiDB Cloud. -## Step 1. Create a Serverless Tier cluster +## Step 1. Create a TiDB Serverless cluster 1. If you do not have a TiDB Cloud account, click [here](https://tidbcloud.com/free-trial) to sign up for an account. @@ -29,9 +29,9 @@ This document walks you through the quickest way to get started with TiDB Cloud. 3. On the [**Clusters**](https://tidbcloud.com/console/clusters) page, click **Create Cluster**. -4. On the **Create Cluster** page, **Serverless Tier** is selected by default. Update the default cluster name if necessary, and then select the region where you want to create your cluster. +4. On the **Create Cluster** page, **Serverless** is selected by default. Update the default cluster name if necessary, and then select the region where you want to create your cluster. -5. Click **Create** to create a Serverless Tier cluster. +5. Click **Create** to create a TiDB Serverless cluster. Your TiDB Cloud cluster will be created in approximately 30 seconds. @@ -45,7 +45,7 @@ This document walks you through the quickest way to get started with TiDB Cloud. > **Note:** > -> For [Serverless Tier clusters](https://docs.pingcap.com/tidbcloud/select-cluster-tier#serverless-tier), when you connect to your cluster, you must include the prefix for your cluster in the user name and wrap the name with quotation marks. For more information, see [User name prefix](https://docs.pingcap.com/tidbcloud/select-cluster-tier#user-name-prefix). +> For [TiDB Serverless clusters](https://docs.pingcap.com/tidbcloud/select-cluster-tier#tidb-serverless), when you connect to your cluster, you must include the prefix for your cluster in the user name and wrap the name with quotation marks. For more information, see [User name prefix](https://docs.pingcap.com/tidbcloud/select-cluster-tier#user-name-prefix). @@ -53,7 +53,7 @@ This document walks you through the quickest way to get started with TiDB Cloud. > **Note:** > -> For [Serverless Tier clusters](/tidb-cloud/select-cluster-tier.md#serverless-tier-beta), when you connect to your cluster, you must include the prefix for your cluster in the user name and wrap the name with quotation marks. For more information, see [User name prefix](/tidb-cloud/select-cluster-tier.md#user-name-prefix). +> For [TiDB Serverless clusters](https://docs.pingcap.com/tidbcloud/select-cluster-tier#tidb-serverless), when you connect to your cluster, you must include the prefix for your cluster in the user name and wrap the name with quotation marks. For more information, see [User name prefix](/tidb-cloud/select-cluster-tier.md#user-name-prefix). @@ -130,7 +130,7 @@ mysql Ver 15.1 Distrib 5.5.68-MariaDB, for Linux (x86_64) using readline 5.1 -2. Run the connection string obtained in [Step 1](#step-1-create-a-serverless-tier-cluster). +2. Run the connection string obtained in [Step 1](#step-1-create-a-tidb-serverless-cluster). {{< copyable "shell-regular" >}} @@ -142,8 +142,8 @@ mysql Ver 15.1 Distrib 5.5.68-MariaDB, for Linux (x86_64) using readline 5.1 > **Note:** > -> - When you connect to a Serverless Tier cluster, you must [use the TLS connection](https://docs.pingcap.com/tidbcloud/secure-connections-to-serverless-tier-clusters). -> - If you encounter problems when connecting to a Serverless Tier cluster, you can read [Secure Connections to Serverless Tier Clusters](https://docs.pingcap.com/tidbcloud/secure-connections-to-serverless-tier-clusters) for more information. +> - When you connect to a TiDB Serverless cluster, you must [use the TLS connection](https://docs.pingcap.com/tidbcloud/secure-connections-to-serverless-tier-clusters). +> - If you encounter problems when connecting to a TiDB Serverless cluster, you can read [Secure Connections to TiDB Serverless Clusters](https://docs.pingcap.com/tidbcloud/secure-connections-to-serverless-tier-clusters) for more information. @@ -151,8 +151,8 @@ mysql Ver 15.1 Distrib 5.5.68-MariaDB, for Linux (x86_64) using readline 5.1 > **Note:** > -> - When you connect to a Serverless Tier cluster, you must [use the TLS connection](/tidb-cloud/secure-connections-to-serverless-tier-clusters.md). -> - If you encounter problems when connecting to a Serverless Tier cluster, you can read [Secure Connections to Serverless Tier Clusters](/tidb-cloud/secure-connections-to-serverless-tier-clusters.md) for more information. +> - When you connect to a TiDB Serverless cluster, you must [use the TLS connection](/tidb-cloud/secure-connections-to-serverless-tier-clusters.md). +> - If you encounter problems when connecting to a TiDB Serverless cluster, you can read [Secure Connections to TiDB Serverless Clusters](/tidb-cloud/secure-connections-to-serverless-tier-clusters.md) for more information. diff --git a/develop/dev-guide-choose-driver-or-orm.md b/develop/dev-guide-choose-driver-or-orm.md index dd4244128ccef..55e8e49a6234d 100644 --- a/develop/dev-guide-choose-driver-or-orm.md +++ b/develop/dev-guide-choose-driver-or-orm.md @@ -27,7 +27,7 @@ This section describes how to use drivers and ORM frameworks in Java. Support level: **Full** -You can follow the [MySQL documentation](https://dev.mysql.com/doc/connector-j/8.0/en/) to download and configure a Java JDBC driver. It is recommended to use MySQL Connector/J 8.0.29 or later with TiDB v6.3.0 and newer. +You can follow the [MySQL documentation](https://dev.mysql.com/doc/connector-j/8.0/en/) to download and configure a Java JDBC driver. It is recommended to use MySQL Connector/J 8.0.33 or later with TiDB v6.3.0 and newer. > **Tip:** > @@ -163,7 +163,7 @@ implementation 'org.mybatis:mybatis:3.5.9' implementation 'mysql:mysql-connector-java:5.1.49' ``` -For an example of using MyBatis to build a TiDB application, see [Build a simple CRUD app with TiDB and Mybatis](/develop/dev-guide-sample-application-java-mybatis.md). +For an example of using MyBatis to build a TiDB application, see [Build a simple CRUD app with TiDB and MyBatis](/develop/dev-guide-sample-application-java-mybatis.md).
@@ -275,7 +275,7 @@ For an example of using MySQL Connector/Python to build a TiDB application, see
-Support level: **Compatible** +Support level: **Full** [SQLAlchemy](https://www.sqlalchemy.org/) is a popular ORM framework for Python. To get all dependencies in your application, you can use the `pip install SQLAlchemy==1.4.44` command. It is recommended to use SQLAlchemy 1.4.44 or later versions. diff --git a/develop/dev-guide-connect-to-tidb.md b/develop/dev-guide-connect-to-tidb.md index e36804c067079..9ebe316236bbb 100644 --- a/develop/dev-guide-connect-to-tidb.md +++ b/develop/dev-guide-connect-to-tidb.md @@ -7,7 +7,7 @@ summary: Learn how to connect to TiDB. TiDB is highly compatible with the MySQL protocol. For a full list of client link parameters, see [MySQL Client Options](https://dev.mysql.com/doc/refman/5.7/en/mysql-command-options.html). -TiDB supports the [MySQL Client/Server Protocol](https://dev.mysql.com/doc/internals/en/client-server-protocol.html), which allows most client drivers and ORM frameworks to connect to TiDB just as they connect to MySQL. +TiDB supports the [MySQL Client/Server Protocol](https://dev.mysql.com/doc/dev/mysql-server/latest/PAGE_PROTOCOL.html), which allows most client drivers and ORM frameworks to connect to TiDB just as they connect to MySQL. ## MySQL @@ -132,4 +132,4 @@ For more information about TiDB SQL users, see [TiDB User Account Management](/u For more information about TiDB SQL users, see [TiDB User Account Management](https://docs.pingcap.com/tidb/stable/user-account-management). - \ No newline at end of file + diff --git a/develop/dev-guide-create-database.md b/develop/dev-guide-create-database.md index d79fbbb44462f..c9ef93d081af3 100644 --- a/develop/dev-guide-create-database.md +++ b/develop/dev-guide-create-database.md @@ -11,7 +11,7 @@ This document describes how to create a database using SQL and various programmi Before creating a database, do the following: -- [Build a TiDB Cluster in TiDB Cloud (Serverless Tier)](/develop/dev-guide-build-cluster-in-cloud.md). +- [Build a TiDB Serverless Cluster](/develop/dev-guide-build-cluster-in-cloud.md). - Read [Schema Design Overview](/develop/dev-guide-schema-design-overview.md). ## What is database diff --git a/develop/dev-guide-create-secondary-indexes.md b/develop/dev-guide-create-secondary-indexes.md index 486cc168af422..0e57fd3c6f4a9 100644 --- a/develop/dev-guide-create-secondary-indexes.md +++ b/develop/dev-guide-create-secondary-indexes.md @@ -11,7 +11,7 @@ This document describes how to create a secondary index using SQL and various pr Before creating a secondary index, do the following: -- [Build a TiDB Cluster in TiDB Cloud (Serverless Tier)](/develop/dev-guide-build-cluster-in-cloud.md). +- [Build a TiDB Serverless Cluster](/develop/dev-guide-build-cluster-in-cloud.md). - Read [Schema Design Overview](/develop/dev-guide-schema-design-overview.md). - [Create a Database](/develop/dev-guide-create-database.md). - [Create a Table](/develop/dev-guide-create-table.md). diff --git a/develop/dev-guide-create-table.md b/develop/dev-guide-create-table.md index 1aa5d54f64dbe..058828e5a82f5 100644 --- a/develop/dev-guide-create-table.md +++ b/develop/dev-guide-create-table.md @@ -11,7 +11,7 @@ This document introduces how to create tables using the SQL statement and the re Before reading this document, make sure that the following tasks are completed: -- [Build a TiDB Cluster in TiDB Cloud (Serverless Tier)](/develop/dev-guide-build-cluster-in-cloud.md). +- [Build a TiDB Serverless Cluster](/develop/dev-guide-build-cluster-in-cloud.md). - Read [Schema Design Overview](/develop/dev-guide-schema-design-overview.md). - [Create a Database](/develop/dev-guide-create-database.md). @@ -290,7 +290,7 @@ ALTER TABLE `bookshop`.`ratings` SET TIFLASH REPLICA 1; > **Note:** > -> If your cluster does not contain **TiFlash** nodes, this SQL statement will report an error: `1105 - the tiflash replica count: 1 should be less than the total tiflash server count: 0`. You can use [Build a TiDB Cluster in TiDB Cloud (Serverless Tier)](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster) to create a Serverless Tier cluster that includes **TiFlash**. +> If your cluster does not contain **TiFlash** nodes, this SQL statement will report an error: `1105 - the tiflash replica count: 1 should be less than the total tiflash server count: 0`. You can use [Build a TiDB Serverless Cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster) to create a TiDB Serverless cluster that includes **TiFlash**. Then you can go on to perform the following query: diff --git a/develop/dev-guide-delete-data.md b/develop/dev-guide-delete-data.md index 5dfb874bc7566..96278a73313a5 100644 --- a/develop/dev-guide-delete-data.md +++ b/develop/dev-guide-delete-data.md @@ -11,7 +11,7 @@ This document describes how to use the [DELETE](/sql-statements/sql-statement-de Before reading this document, you need to prepare the following: -- [Build a TiDB Cluster in TiDB Cloud (Serverless Tier)](/develop/dev-guide-build-cluster-in-cloud.md) +- [Build a TiDB Serverless Cluster](/develop/dev-guide-build-cluster-in-cloud.md) - Read [Schema Design Overview](/develop/dev-guide-schema-design-overview.md), [Create a Database](/develop/dev-guide-create-database.md), [Create a Table](/develop/dev-guide-create-table.md), and [Create Secondary Indexes](/develop/dev-guide-create-secondary-indexes.md) - [Insert Data](/develop/dev-guide-insert-data.md) diff --git a/develop/dev-guide-insert-data.md b/develop/dev-guide-insert-data.md index c75bdc44ce133..cab09a938df78 100644 --- a/develop/dev-guide-insert-data.md +++ b/develop/dev-guide-insert-data.md @@ -13,7 +13,7 @@ This document describes how to insert data into TiDB by using the SQL language w Before reading this document, you need to prepare the following: -- [Build a TiDB Cluster in TiDB Cloud (Serverless Tier)](/develop/dev-guide-build-cluster-in-cloud.md). +- [Build a TiDB Serverless Cluster](/develop/dev-guide-build-cluster-in-cloud.md). - Read [Schema Design Overview](/develop/dev-guide-schema-design-overview.md), [Create a Database](/develop/dev-guide-create-database.md), [Create a Table](/develop/dev-guide-create-table.md), and [Create Secondary Indexes](/develop/dev-guide-create-secondary-indexes.md) ## Insert rows diff --git a/develop/dev-guide-outdated-for-django.md b/develop/dev-guide-outdated-for-django.md index 7a0f8240cb80e..632acb1ce1759 100644 --- a/develop/dev-guide-outdated-for-django.md +++ b/develop/dev-guide-outdated-for-django.md @@ -25,7 +25,7 @@ The above command starts a temporary and single-node cluster with mock TiKV. The > > To deploy a "real" TiDB cluster for production, see the following guides: > -> + [Deploy TiDB using TiUP for On-Premises](https://docs.pingcap.com/tidb/v5.1/production-deployment-using-tiup) +> + [Deploy TiDB using TiUP for Self-Hosted Environment](https://docs.pingcap.com/tidb/v5.1/production-deployment-using-tiup) > + [Deploy TiDB on Kubernetes](https://docs.pingcap.com/tidb-in-kubernetes/stable) > > You can also [use TiDB Cloud](https://pingcap.com/products/tidbcloud/), a fully-managed Database-as-a-Service (DBaaS) of TiDB. diff --git a/develop/dev-guide-proxysql-integration.md b/develop/dev-guide-proxysql-integration.md index b89fd24ce7d84..e0465d329a0b4 100644 --- a/develop/dev-guide-proxysql-integration.md +++ b/develop/dev-guide-proxysql-integration.md @@ -119,11 +119,11 @@ systemctl start docker ### Option 1: Integrate TiDB Cloud with ProxySQL -For this integration, you will be using the [ProxySQL Docker image](https://hub.docker.com/r/proxysql/proxysql) along with a TiDB Serverless Tier cluster. The following steps will set up ProxySQL on port `16033`, so make sure this port is available. +For this integration, you will be using the [ProxySQL Docker image](https://hub.docker.com/r/proxysql/proxysql) along with a TiDB Serverless cluster. The following steps will set up ProxySQL on port `16033`, so make sure this port is available. -#### Step 1. Create a TiDB Cloud Serverless Tier cluster +#### Step 1. Create a TiDB Serverless cluster -1. [Create a free TiDB Serverless Tier cluster](https://docs.pingcap.com/tidbcloud/tidb-cloud-quickstart#step-1-create-a-tidb-cluster). Remember the root password that you set for your cluster. +1. [Create a free TiDB Serverless cluster](https://docs.pingcap.com/tidbcloud/tidb-cloud-quickstart#step-1-create-a-tidb-cluster). Remember the root password that you set for your cluster. 2. Get your cluster hostname, port, and username for later use. 1. On the [Clusters](https://tidbcloud.com/console/clusters) page, click your cluster name to go to the cluster overview page. @@ -327,12 +327,12 @@ For this integration, you will be using the [ProxySQL Docker image](https://hub. > > 1. Adds a user using the username and password of your cluster. > 2. Assigns the user to the monitoring account. - > 3. Adds your TiDB Serverless Tier cluster to the list of hosts. - > 4. Enables a secure connection between ProxySQL and the TiDB Serverless Tier cluster. + > 3. Adds your TiDB Serverless cluster to the list of hosts. + > 4. Enables a secure connection between ProxySQL and the TiDB Serverless cluster. > > To have a better understanding, it is strongly recommended that you check the `proxysql-prepare.sql` file. To learn more about ProxySQL configuration, see [ProxySQL documentation](https://proxysql.com/documentation/proxysql-configuration/). - The following is an example output. You will see that the hostname of your cluster is shown in the output, which means that the connectivity between ProxySQL and the TiDB Serverless Tier cluster is established. + The following is an example output. You will see that the hostname of your cluster is shown in the output, which means that the connectivity between ProxySQL and the TiDB Serverless cluster is established. ``` *************************** 1. row *************************** @@ -388,7 +388,7 @@ For this integration, you will be using the [ProxySQL Docker image](https://hub. SELECT VERSION(); ``` - If the TiDB version is displayed, you are successfully connected to your TiDB Serverless Tier cluster through ProxySQL. To exit from the MySQL client anytime, enter `quit` and press enter. + If the TiDB version is displayed, you are successfully connected to your TiDB Serverless cluster through ProxySQL. To exit from the MySQL client anytime, enter `quit` and press enter. > **Note:** > @@ -636,7 +636,7 @@ ProxySQL can be installed on many different platforms. The following takes CentO For a full list of supported platforms and the corresponding version requirements, see [ProxySQL documentation](https://proxysql.com/documentation/installing-proxysql/). -#### Step 1. Create a TiDB Cloud Dedicated Tier cluster +#### Step 1. Create a TiDB Dedicated cluster For detailed steps, see [Create a TiDB Cluster](https://docs.pingcap.com/tidbcloud/create-tidb-cluster). @@ -687,7 +687,7 @@ To use ProxySQL as a proxy for TiDB, you need to configure ProxySQL. To do so, y The above step will take you to the ProxySQL admin prompt. -2. Configure the TiDB clusters to be used, where you can add one or multiple TiDB clusters to ProxySQL. The following statement will add one TiDB Cloud Dedicated Tier cluster for example. You need to replace `` and `` with your TiDB Cloud endpoint and port (the default port is `4000`). +2. Configure the TiDB clusters to be used, where you can add one or multiple TiDB clusters to ProxySQL. The following statement will add one TiDB Dedicated cluster for example. You need to replace `` and `` with your TiDB Cloud endpoint and port (the default port is `4000`). ```sql INSERT INTO mysql_servers(hostgroup_id, hostname, port) diff --git a/develop/dev-guide-sample-application-golang-gorm.md b/develop/dev-guide-sample-application-golang-gorm.md index ae8157657e73e..fb49a0e222ad9 100644 --- a/develop/dev-guide-sample-application-golang-gorm.md +++ b/develop/dev-guide-sample-application-golang-gorm.md @@ -14,7 +14,7 @@ This document describes how to use TiDB and GORM to build a simple CRUD applicat > **Note:** > -> It is recommended to use Golang 1.16 or a later version. +> It is recommended to use Golang 1.20 or a later version. ## Step 1. Launch your TiDB cluster @@ -22,9 +22,9 @@ This document describes how to use TiDB and GORM to build a simple CRUD applicat The following introduces how to start a TiDB cluster. -**Use a TiDB Cloud Serverless Tier cluster** +**Use a TiDB Serverless cluster** -For detailed steps, see [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). +For detailed steps, see [Create a TiDB Serverless cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster). **Use a local cluster** @@ -34,7 +34,7 @@ For detailed steps, see [Deploy a local test cluster](/quick-start-with-tidb.md# -See [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). +See [Create a TiDB Serverless cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster). @@ -44,8 +44,6 @@ See [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud git clone https://github.com/pingcap-inc/tidb-example-golang.git ``` -Compared with GORM, the go-sql-driver/mysql implementation might be not a best practice, because you need to write error handling logic, close `*sql.Rows` manually and cannot reuse code easily, which makes your code slightly redundant. - The following instructions take `v1.23.5` as an example. To adapt TiDB transactions, write a toolkit [util](https://github.com/pingcap-inc/tidb-example-golang/tree/main/util) according to the following code: @@ -54,49 +52,26 @@ To adapt TiDB transactions, write a toolkit [util](https://github.com/pingcap-in package util import ( - "context" - "database/sql" + "gorm.io/gorm" ) -type TiDBSqlTx struct { - *sql.Tx - conn *sql.Conn - pessimistic bool -} - -func TiDBSqlBegin(db *sql.DB, pessimistic bool) (*TiDBSqlTx, error) { - ctx := context.Background() - conn, err := db.Conn(ctx) - if err != nil { - return nil, err +// TiDBGormBegin start a TiDB and Gorm transaction as a block. If no error is returned, the transaction will be committed. Otherwise, the transaction will be rolled back. +func TiDBGormBegin(db *gorm.DB, pessimistic bool, fc func(tx *gorm.DB) error) (err error) { + session := db.Session(&gorm.Session{}) + if session.Error != nil { + return session.Error } + if pessimistic { - _, err = conn.ExecContext(ctx, "set @@tidb_txn_mode=?", "pessimistic") + session = session.Exec("set @@tidb_txn_mode=pessimistic") } else { - _, err = conn.ExecContext(ctx, "set @@tidb_txn_mode=?", "optimistic") - } - if err != nil { - return nil, err + session = session.Exec("set @@tidb_txn_mode=optimistic") } - tx, err := conn.BeginTx(ctx, nil) - if err != nil { - return nil, err - } - return &TiDBSqlTx{ - conn: conn, - Tx: tx, - pessimistic: pessimistic, - }, nil -} -func (tx *TiDBSqlTx) Commit() error { - defer tx.conn.Close() - return tx.Tx.Commit() -} - -func (tx *TiDBSqlTx) Rollback() error { - defer tx.conn.Close() - return tx.Tx.Rollback() + if session.Error != nil { + return session.Error + } + return session.Transaction(fc) } ``` @@ -280,7 +255,7 @@ The following content introduces how to run the code step by step. ### Step 3.1 Modify parameters for TiDB Cloud -If you are using a TiDB Cloud Serverless Tier cluster, modify the value of the `dsn` in `gorm.go`: +If you are using a TiDB Serverless cluster, modify the value of the `dsn` in `gorm.go`: ```go dsn := "root:@tcp(127.0.0.1:4000)/test?charset=utf8mb4" @@ -292,15 +267,10 @@ Suppose that the password you set is `123456`, and the connection parameters you - Port: `4000` - User: `2aEp24QWEDLqRFs.root` -In this case, you can modify the `mysql.RegisterTLSConfig` and `dsn` as follows: +In this case, you can modify the `dsn` as follows: ```go -mysql.RegisterTLSConfig("register-tidb-tls", &tls.Config { - MinVersion: tls.VersionTLS12, - ServerName: "xxx.tidbcloud.com", -}) - -dsn := "2aEp24QWEDLqRFs.root:123456@tcp(xxx.tidbcloud.com:4000)/test?charset=utf8mb4&tls=register-tidb-tls" +dsn := "2aEp24QWEDLqRFs.root:123456@tcp(xxx.tidbcloud.com:4000)/test?charset=utf8mb4&tls=true" ``` ### Step 3.2 Run the code diff --git a/develop/dev-guide-sample-application-golang-sql-driver.md b/develop/dev-guide-sample-application-golang-sql-driver.md index 4afa45855b71f..705164f482db8 100644 --- a/develop/dev-guide-sample-application-golang-sql-driver.md +++ b/develop/dev-guide-sample-application-golang-sql-driver.md @@ -13,7 +13,7 @@ This document describes how to use TiDB and [Go-MySQL-Driver](https://github.com > **Note:** > -> It is recommended to use Golang 1.16 or a later version. +> It is recommended to use Golang 1.20 or a later version. ## Step 1. Launch your TiDB cluster @@ -21,9 +21,9 @@ This document describes how to use TiDB and [Go-MySQL-Driver](https://github.com The following introduces how to start a TiDB cluster. -**Use a TiDB Cloud Serverless Tier cluster** +**Use a TiDB Serverless cluster** -For detailed steps, see [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). +For detailed steps, see [Create a TiDB Serverless cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster). **Use a local cluster** @@ -33,7 +33,7 @@ For detailed steps, see [Deploy a local test cluster](/quick-start-with-tidb.md# -See [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). +See [Create a TiDB Serverless cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster). @@ -77,7 +77,7 @@ CREATE TABLE player ( ); ``` -`sqldriver.go` is the main body of the `sqldriver`. TiDB is highly compatible with the MySQL protocol, so you need to initialize a MySQL source instance `db, err := sql.Open("mysql", dsn)` to connect to TiDB. Then, you can use `dao.go` to read, edit, add, and delete data. +`sqldriver.go` is the main body of the `sqldriver`. Compared with GORM, the go-sql-driver/mysql implementation might be not a best practice, because you need to write error handling logic, close `*sql.Rows` manually and cannot reuse code easily, which makes your code slightly redundant. TiDB is highly compatible with the MySQL protocol, so you need to initialize a MySQL source instance `db, err := sql.Open("mysql", dsn)` to connect to TiDB. Then, you can use `dao.go` to read, edit, add, and delete data. ```go package main @@ -509,7 +509,7 @@ When using go-sql-driver/mysql, you need to connect to your cluster and run the ### Step 3.2 Modify parameters for TiDB Cloud -If you are using a TiDB Cloud Serverless Tier cluster, modify the value of the `dsn` in `sqldriver.go`: +If you are using a TiDB Serverless cluster, modify the value of the `dsn` in `sqldriver.go`: ```go dsn := "root:@tcp(127.0.0.1:4000)/test?charset=utf8mb4" diff --git a/develop/dev-guide-sample-application-java-hibernate.md b/develop/dev-guide-sample-application-java-hibernate.md index 1f0432264d850..b5557d4f91a1f 100644 --- a/develop/dev-guide-sample-application-java-hibernate.md +++ b/develop/dev-guide-sample-application-java-hibernate.md @@ -8,6 +8,8 @@ summary: Learn how to build a simple CRUD application with TiDB and Hibernate. # Build a Simple CRUD App with TiDB and Hibernate +[Hibernate](https://hibernate.org/) is a popular open-source Java ORM, and it supports TiDB dialect starting from `v6.0.0.Beta2`, which fits TiDB features well. + This document describes how to use TiDB and Hibernate to build a simple CRUD application. > **Note:** @@ -20,9 +22,9 @@ This document describes how to use TiDB and Hibernate to build a simple CRUD app The following introduces how to start a TiDB cluster. -**Use a TiDB Cloud Serverless Tier cluster** +**Use a TiDB Serverless cluster** -For detailed steps, see [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). +For detailed steps, see [Create a TiDB Serverless cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster). **Use a local cluster** @@ -32,7 +34,7 @@ For detailed steps, see [Deploy a local test cluster](/quick-start-with-tidb.md# -See [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). +See [Create a TiDB Serverless cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster). @@ -44,7 +46,7 @@ git clone https://github.com/pingcap-inc/tidb-example-java.git Compared with Hibernate, the JDBC implementation might be not a best practice, because you need to write error handling logic manually and cannot reuse code easily, which makes your code slightly redundant. -Hibernate is a popular open-source Java ORM, and it supports TiDB dialect starting from `v6.0.0.Beta2`, which fits TiDB features well. The following instructions take `v6.0.0.Beta2` as an example. +The following instructions take `v6.0.0.Beta2` as an example. Change to the `plain-java-hibernate` directory: @@ -335,13 +337,9 @@ public class HibernateExample The following content introduces how to run the code step by step. -### Step 3.1 Table initialization - -No need to initialize tables manually. - -### Step 3.2 Modify parameters for TiDB Cloud +### Step 3.1 Modify parameters for TiDB Cloud -If you are using a TiDB Cloud Serverless Tier cluster, modify the `hibernate.connection.url`, `hibernate.connection.username`, `hibernate.connection.password` in `hibernate.cfg.xml`. +If you are using a TiDB Serverless cluster, modify the `hibernate.connection.url`, `hibernate.connection.username`, `hibernate.connection.password` in `hibernate.cfg.xml`. ```xml @@ -403,20 +401,20 @@ In this case, you can modify the parameters as follows: ``` -### Step 3.3 Run +### Step 3.2 Run To run the code, you can run `make build` and `make run` respectively: ```shell make build # this command executes `mvn clean package` -make run # this command executes `java -jar target/plain-java-jdbc-0.0.1-jar-with-dependencies.jar` +make run # this command executes `java -jar target/plain-java-hibernate-0.0.1-jar-with-dependencies.jar` ``` Or you can use the native commands: ```shell mvn clean package -java -jar target/plain-java-jdbc-0.0.1-jar-with-dependencies.jar +java -jar target/plain-java-hibernate-0.0.1-jar-with-dependencies.jar ``` Or run the `make` command directly, which is a combination of `make build` and `make run`. diff --git a/develop/dev-guide-sample-application-java-jdbc.md b/develop/dev-guide-sample-application-java-jdbc.md index 34d75cb95925a..02ba214c71e0c 100644 --- a/develop/dev-guide-sample-application-java-jdbc.md +++ b/develop/dev-guide-sample-application-java-jdbc.md @@ -20,9 +20,9 @@ This document describes how to use TiDB and JDBC to build a simple CRUD applicat The following introduces how to start a TiDB cluster. -**Use a TiDB Cloud Serverless Tier cluster** +**Use a TiDB Serverless cluster** -For detailed steps, see [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). +For detailed steps, see [Create a TiDB Serverless cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster). **Use a local cluster** @@ -32,7 +32,7 @@ For detailed steps, see [Deploy a local test cluster](/quick-start-with-tidb.md# -See [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). +See [Create a TiDB Serverless cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster). @@ -540,7 +540,7 @@ When using JDBC, you need to connect to your cluster and run the statement in th ### Step 3.2 Modify parameters for TiDB Cloud -If you are using a TiDB Cloud Serverless Tier cluster, modify the parameters of the host, port, user, and password in `JDBCExample.java`: +If you are using a TiDB Serverless cluster, modify the parameters of the host, port, user, and password in `JDBCExample.java`: ```java mysqlDataSource.setServerName("localhost"); diff --git a/develop/dev-guide-sample-application-java-mybatis.md b/develop/dev-guide-sample-application-java-mybatis.md index ac746e4495c52..625cb2de4cde7 100644 --- a/develop/dev-guide-sample-application-java-mybatis.md +++ b/develop/dev-guide-sample-application-java-mybatis.md @@ -1,14 +1,16 @@ --- -title: Build a Simple CRUD App with TiDB and Mybatis -summary: Learn how to build a simple CRUD application with TiDB and Mybatis. +title: Build a Simple CRUD App with TiDB and MyBatis +summary: Learn how to build a simple CRUD application with TiDB and MyBatis. --- -# Build a Simple CRUD App with TiDB and Mybatis +# Build a Simple CRUD App with TiDB and MyBatis -This document describes how to use TiDB and Mybatis to build a simple CRUD application. +[MyBatis](https://mybatis.org/mybatis-3/index.html) is a popular open-source Java class persistence framework. + +This document describes how to use TiDB and MyBatis to build a simple CRUD application. > **Note:** > @@ -20,9 +22,9 @@ This document describes how to use TiDB and Mybatis to build a simple CRUD appli The following introduces how to start a TiDB cluster. -**Use a TiDB Cloud Serverless Tier cluster** +**Use a TiDB Serverless cluster** -For detailed steps, see [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). +For detailed steps, see [Create a TiDB Serverless cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster). **Use a local cluster** @@ -32,7 +34,7 @@ For detailed steps, see [Deploy a local test cluster](/quick-start-with-tidb.md# -See [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). +See [Create a TiDB Serverless cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster). @@ -42,9 +44,9 @@ See [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud git clone https://github.com/pingcap-inc/tidb-example-java.git ``` -Compared with [Mybatis](https://mybatis.org/mybatis-3/index.html), the JDBC implementation might be not a best practice, because you need to write error handling logic manually and cannot reuse code easily, which makes your code slightly redundant. +Compared with [MyBatis](https://mybatis.org/mybatis-3/index.html), the JDBC implementation might be not a best practice, because you need to write error handling logic manually and cannot reuse code easily, which makes your code slightly redundant. -Mybatis is a popular open-source Java class persistence framework. The following uses [MyBatis Generator](https://mybatis.org/generator/quickstart.html) as a Maven plugin to generate the persistence layer code. +The following uses [MyBatis Generator](https://mybatis.org/generator/quickstart.html) as a Maven plugin to generate the persistence layer code. Change to the `plain-java-mybatis` directory: @@ -84,9 +86,9 @@ The automatically generated files are: - `src/main/java/com/pingcap/model/Player.java`: The `Player` entity class. - `src/main/java/com/pingcap/model/PlayerMapper.java`: The interface of `PlayerMapper`. -- `src/main/resources/mapper/PlayerMapper.xml`: The XML mapping of `Player`. Mybatis uses this configuration to automatically generate the implementation class of the `PlayerMapper` interface. +- `src/main/resources/mapper/PlayerMapper.xml`: The XML mapping of `Player`. MyBatis uses this configuration to automatically generate the implementation class of the `PlayerMapper` interface. -The strategy for generating these files is written in `mybatis-generator.xml`, which is the configuration file for [Mybatis Generator](https://mybatis.org/generator/quickstart.html). There are comments in the following configuration file to describe how to use it. +The strategy for generating these files is written in `mybatis-generator.xml`, which is the configuration file for [MyBatis Generator](https://mybatis.org/generator/quickstart.html). There are comments in the following configuration file to describe how to use it. ```xml @@ -200,7 +202,7 @@ Once included in the Maven plugin, you can delete the old generated files and ma > > The property `configuration.overwrite` in `mybatis-generator.xml` only ensures that the generated Java code files are overwritten. But the XML mapping files are still written as appended. Therefore, it is recommended to delete the old file before Mybaits Generator generating a new one. -`Player.java` is a data entity class file generated using Mybatis Generator, which is a mapping of database tables in the application. Each property of the `Player` class corresponds to a field in the `player` table. +`Player.java` is a data entity class file generated using MyBatis Generator, which is a mapping of database tables in the application. Each property of the `Player` class corresponds to a field in the `player` table. ```java package com.pingcap.model; @@ -248,7 +250,7 @@ public class Player { } ``` -`PlayerMapper.java` is a mapping interface file generated using Mybatis Generator. This file only defines the interface, and the implementation classes of interface are automatically generated using XML or annotations. +`PlayerMapper.java` is a mapping interface file generated using MyBatis Generator. This file only defines the interface, and the implementation classes of interface are automatically generated using XML or annotations. ```java package com.pingcap.model; @@ -270,7 +272,7 @@ public interface PlayerMapper { } ``` -`PlayerMapper.xml` is a mapping XML file generated using Mybatis Generator. Mybatis uses this to automatically generate the implementation class of the `PlayerMapper` interface. +`PlayerMapper.xml` is a mapping XML file generated using MyBatis Generator. MyBatis uses this to automatically generate the implementation class of the `PlayerMapper` interface. ```xml @@ -348,7 +350,7 @@ public interface PlayerMapper { ``` -Since Mybatis Generator needs to generate the source code from the table definition, the table needs to be created first. To create the table, you can use `dbinit.sql`. +Since MyBatis Generator needs to generate the source code from the table definition, the table needs to be created first. To create the table, you can use `dbinit.sql`. ```sql USE test; @@ -362,7 +364,7 @@ CREATE TABLE player ( ); ``` -Split the interface `PlayerMapperEx` additionally to extend from `PlayerMapper` and write a matching `PlayerMapperEx.xml` file. Avoid changing `PlayerMapper.java` and `PlayerMapper.xml` directly. This is to avoid overwrite by Mybatis Generator. +Split the interface `PlayerMapperEx` additionally to extend from `PlayerMapper` and write a matching `PlayerMapperEx.xml` file. Avoid changing `PlayerMapper.java` and `PlayerMapper.xml` directly. This is to avoid overwrite by MyBatis Generator. Define the added interface in `PlayerMapperEx.java`: @@ -419,7 +421,7 @@ Define the mapping rules in `PlayerMapperEx.xml`: ``` -`PlayerDAO.java` is a class used to manage data, in which `DAO` means [Data Access Object](https://en.wikipedia.org/wiki/Data_access_object). The class defines a set of data manipulation methods for writing data. In it, Mybatis encapsulates a large number of operations such as object mapping and CRUD of basic objects, which greatly simplifies the code. +`PlayerDAO.java` is a class used to manage data, in which `DAO` means [Data Access Object](https://en.wikipedia.org/wiki/Data_access_object). The class defines a set of data manipulation methods for writing data. In it, MyBatis encapsulates a large number of operations such as object mapping and CRUD of basic objects, which greatly simplifies the code. ```java package com.pingcap.dao; @@ -613,7 +615,7 @@ The following content introduces how to run the code step by step. ### Step 3.1 Table initialization -When using Mybatis, you need to initialize the database tables manually. If you are using a local cluster, and MySQL client has been installed locally, you can run it directly in the `plain-java-mybatis` directory: +When using MyBatis, you need to initialize the database tables manually. If you are using a local cluster, and MySQL client has been installed locally, you can run it directly in the `plain-java-mybatis` directory: ```shell make prepare @@ -629,7 +631,7 @@ If you are using a non-local cluster or MySQL client has not been installed, con ### Step 3.2 Modify parameters for TiDB Cloud -If you are using a TiDB Cloud Serverless Tier cluster, modify the `dataSource.url`, `dataSource.username`, `dataSource.password` in `mybatis-config.xml`. +If you are using a TiDB Serverless cluster, modify the `dataSource.url`, `dataSource.username`, `dataSource.password` in `mybatis-config.xml`. ```xml @@ -738,4 +740,4 @@ Or run the `make` command directly, which is a combination of `make prepare`, `m ## Step 4. Expected output -[Mybatis Expected Output](https://github.com/pingcap-inc/tidb-example-java/blob/main/Expected-Output.md#plain-java-mybatis) \ No newline at end of file +[MyBatis Expected Output](https://github.com/pingcap-inc/tidb-example-java/blob/main/Expected-Output.md#plain-java-mybatis) \ No newline at end of file diff --git a/develop/dev-guide-sample-application-java-spring-boot.md b/develop/dev-guide-sample-application-java-spring-boot.md index d4f0e4543e68f..7ef85cad5e28f 100644 --- a/develop/dev-guide-sample-application-java-spring-boot.md +++ b/develop/dev-guide-sample-application-java-spring-boot.md @@ -1,7 +1,7 @@ --- title: Build a TiDB App Using Spring Boot summary: Learn an example of how to build a TiDB application using Spring Boot. -aliases: ['/tidbcloud/dev-guide-sample-application-spring-boot'] +aliases: ['/tidbcloud/dev-guide-sample-application-spring-boot','/tidb/dev/dev-guide-sample-application-spring-boot'] --- @@ -22,9 +22,9 @@ You can build your own application based on this example. The following introduces how to start a TiDB cluster. -**Use a TiDB Cloud Serverless Tier cluster** +**Use a TiDB Serverless cluster** -For detailed steps, see [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). +For detailed steps, see [Create a TiDB Serverless cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster). **Use a local cluster** @@ -34,7 +34,7 @@ For detailed steps, see [Deploy a local test cluster](/quick-start-with-tidb.md# -See [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). +See [Create a TiDB Serverless cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster). @@ -98,7 +98,7 @@ If you want to learn more about the code of this application, refer to [Implemen ### Step 5.1 Change parameters -If you are using a TiDB Cloud Serverless Tier cluster, change the `spring.datasource.url`, `spring.datasource.username`, `spring.datasource.password` parameters in the `application.yml` (located in `src/main/resources`). +If you are using a TiDB Serverless cluster, change the `spring.datasource.url`, `spring.datasource.username`, `spring.datasource.password` parameters in the `application.yml` (located in `src/main/resources`). ```yaml spring: diff --git a/develop/dev-guide-sample-application-python-mysql-connector.md b/develop/dev-guide-sample-application-python-mysql-connector.md index 619148dd3a099..c643b738ba614 100644 --- a/develop/dev-guide-sample-application-python-mysql-connector.md +++ b/develop/dev-guide-sample-application-python-mysql-connector.md @@ -23,9 +23,9 @@ This document describes how to use TiDB and MySQL Connector/Python to build a si The following introduces how to start a TiDB cluster. -**Use a TiDB Cloud Serverless Tier cluster** +**Use a TiDB Serverless cluster** -For detailed steps, see [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). +For detailed steps, see [Create a TiDB Serverless cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster). **Use a local cluster** @@ -35,7 +35,7 @@ For detailed steps, see [Deploy a local test cluster](/quick-start-with-tidb.md# -See [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). +See [Create a TiDB Serverless cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster). @@ -151,9 +151,11 @@ def simple_example() -> None: # create players with bulk inserts. # insert 1919 players totally, with 114 players per batch. - # each player has a random UUID + # all players have random uuid + print(f'start to insert one by one, it will take a long time') player_list = random_player(1919) for idx in range(0, len(player_list), 114): + print(f'inserted {idx} players') bulk_create_player(cur, player_list[idx:idx + 114]) # print the number of players @@ -233,9 +235,9 @@ If you are not using a local cluster, or have not installed a MySQL client, conn ### Step 3.2 Modify parameters for TiDB Cloud -If you are using a TiDB Cloud Serverless Tier cluster, you need to provide your CA root path and replace `` in the following examples with your CA path. To get the CA root path on your system, refer to [Where is the CA root path on my system?](https://docs.pingcap.com/tidbcloud/secure-connections-to-serverless-tier-clusters#where-is-the-ca-root-path-on-my-system).> +If you are using a TiDB Serverless cluster, you need to provide your CA root path and replace `` in the following examples with your CA path. To get the CA root path on your system, refer to [Where is the CA root path on my system?](https://docs.pingcap.com/tidbcloud/secure-connections-to-serverless-tier-clusters#where-is-the-ca-root-path-on-my-system).> -If you are using a TiDB Cloud Serverless Tier cluster, change the `get_connection` function in `mysql_connector_python_example.py`: +If you are using a TiDB Serverless cluster, change the `get_connection` function in `mysql_connector_python_example.py`: ```python def get_connection(autocommit: bool = True) -> MySQLConnection: diff --git a/develop/dev-guide-sample-application-python-mysqlclient.md b/develop/dev-guide-sample-application-python-mysqlclient.md index 2feaf5b4142f7..2e993a66b891e 100644 --- a/develop/dev-guide-sample-application-python-mysqlclient.md +++ b/develop/dev-guide-sample-application-python-mysqlclient.md @@ -22,9 +22,9 @@ This document describes how to use TiDB and mysqlclient to build a simple CRUD a The following introduces how to start a TiDB cluster. -**Use a TiDB Cloud Serverless Tier cluster** +**Use a TiDB Serverless cluster** -For detailed steps, see [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). +For detailed steps, see [Create a TiDB Serverless cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster). **Use a local cluster** @@ -34,7 +34,7 @@ For detailed steps, see [Deploy a local test cluster](/quick-start-with-tidb.md# -See [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). +See [Create a TiDB Serverless cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster). @@ -233,9 +233,9 @@ If you are not using a local cluster, or have not installed a MySQL client, conn ### Step 3.2 Modify parameters for TiDB Cloud -If you are using a TiDB Cloud Serverless Tier cluster, you need to provide your CA root path and replace `` in the following examples with your CA path. To get the CA root path on your system, refer to [Where is the CA root path on my system?](https://docs.pingcap.com/tidbcloud/secure-connections-to-serverless-tier-clusters#where-is-the-ca-root-path-on-my-system). +If you are using a TiDB Serverless cluster, you need to provide your CA root path and replace `` in the following examples with your CA path. To get the CA root path on your system, refer to [Where is the CA root path on my system?](https://docs.pingcap.com/tidbcloud/secure-connections-to-serverless-tier-clusters#where-is-the-ca-root-path-on-my-system). -If you are using a TiDB Cloud Serverless Tier cluster, change the `get_connection` function in `mysqlclient_example.py`: +If you are using a TiDB Serverless cluster, change the `get_connection` function in `mysqlclient_example.py`: ```python def get_connection(autocommit: bool = True) -> MySQLdb.Connection: diff --git a/develop/dev-guide-sample-application-python-peewee.md b/develop/dev-guide-sample-application-python-peewee.md index 9c2d78d117f78..da13997e5dc88 100644 --- a/develop/dev-guide-sample-application-python-peewee.md +++ b/develop/dev-guide-sample-application-python-peewee.md @@ -22,9 +22,9 @@ This document describes how to use TiDB and peewee to build a simple CRUD applic The following introduces how to start a TiDB cluster. -**Use a TiDB Cloud Serverless Tier cluster** +**Use a TiDB Serverless cluster** -For detailed steps, see [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). +For detailed steps, see [Create a TiDB Serverless cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster). **Use a local cluster** @@ -34,7 +34,7 @@ For detailed steps, see [Deploy a local test cluster](/quick-start-with-tidb.md# -See [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). +See [Create a TiDB Serverless cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster). @@ -168,7 +168,7 @@ trade_example() Compared with using drivers directly, peewee provides an abstraction for the specific details of different databases when you create a database connection. In addition, peewee encapsulates some operations such as session management and CRUD of basic objects, which greatly simplifies the code. -The `Player` class is a mapping of a table to attributes in the application. Each attribute of `Player` corresponds to a field in the `player` table. To provide SQLAlchemy with more information, the attribute is defined as `id = Column(String(36), primary_key=True)` to indicate the field type and its additional attributes. For example, `id = Column(String(36), primary_key=True)` indicates that the `id` attribute is `String` type, the corresponding field in database is `VARCHAR` type, the length is `36`, and it is a primary key. +The `Player` class is a mapping of a table to attributes in the application. Each attribute of `Player` corresponds to a field in the `player` table. To provide peewee with more information, the attribute is defined as `id = CharField(max_length=36, primary_key=True)` to indicate the field type and its additional attributes. For example, `id = CharField(max_length=36, primary_key=True)` indicates that the `id` attribute is `String` type, the corresponding field in database is `VARCHAR` type, the length is `36`, and it is a primary key. For more information about how to use peewee, refer to [peewee documentation](http://docs.peewee-orm.com/en/latest/). @@ -204,9 +204,9 @@ If you are not using a local cluster, or have not installed a MySQL client, conn ### Step 3.2 Modify parameters for TiDB Cloud -If you are using a TiDB Cloud Serverless Tier cluster, you need to provide your CA root path and replace `` in the following examples with your CA path. To get the CA root path on your system, refer to [Where is the CA root path on my system?](https://docs.pingcap.com/tidbcloud/secure-connections-to-serverless-tier-clusters#where-is-the-ca-root-path-on-my-system). +If you are using a TiDB Serverless cluster, you need to provide your CA root path and replace `` in the following examples with your CA path. To get the CA root path on your system, refer to [Where is the CA root path on my system?](https://docs.pingcap.com/tidbcloud/secure-connections-to-serverless-tier-clusters#where-is-the-ca-root-path-on-my-system). -If you are using a TiDB Cloud Serverless Tier cluster, modify the parameters of the `connect` function in `peewee_example.py`: +If you are using a TiDB Serverless cluster, modify the parameters of the `connect` function in `peewee_example.py`: ```python db = connect('mysql://root:@127.0.0.1:4000/test') diff --git a/develop/dev-guide-sample-application-python-pymysql.md b/develop/dev-guide-sample-application-python-pymysql.md index 1bc9e39c3b81c..bac670caf7fe4 100644 --- a/develop/dev-guide-sample-application-python-pymysql.md +++ b/develop/dev-guide-sample-application-python-pymysql.md @@ -22,9 +22,9 @@ This document describes how to use TiDB and PyMySQL to build a simple CRUD appli The following introduces how to start a TiDB cluster. -**Use a TiDB Cloud Serverless Tier cluster** +**Use a TiDB Serverless cluster** -For detailed steps, see [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). +For detailed steps, see [Create a TiDB Serverless cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster). **Use a local cluster** @@ -34,7 +34,7 @@ For detailed steps, see [Deploy a local test cluster](/quick-start-with-tidb.md# -See [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). +See [Create a TiDB Serverless cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster). @@ -231,9 +231,9 @@ If you are not using a local cluster, or have not installed a MySQL client, conn ### Step 3.2 Modify parameters for TiDB Cloud -If you are using a TiDB Cloud Serverless Tier cluster, you need to provide your CA root path and replace `` in the following examples with your CA path. To get the CA root path on your system, refer to [Where is the CA root path on my system?](https://docs.pingcap.com/tidbcloud/secure-connections-to-serverless-tier-clusters#where-is-the-ca-root-path-on-my-system). +If you are using a TiDB Serverless cluster, you need to provide your CA root path and replace `` in the following examples with your CA path. To get the CA root path on your system, refer to [Where is the CA root path on my system?](https://docs.pingcap.com/tidbcloud/secure-connections-to-serverless-tier-clusters#where-is-the-ca-root-path-on-my-system). -If you are using a TiDB Cloud Serverless Tier cluster, change the `get_connection` function in `pymysql_example.py`: +If you are using a TiDB Serverless cluster, change the `get_connection` function in `pymysql_example.py`: ```python def get_connection(autocommit: bool = False) -> Connection: diff --git a/develop/dev-guide-sample-application-python-sqlalchemy.md b/develop/dev-guide-sample-application-python-sqlalchemy.md index 135b882625874..7e003b803f6fc 100644 --- a/develop/dev-guide-sample-application-python-sqlalchemy.md +++ b/develop/dev-guide-sample-application-python-sqlalchemy.md @@ -23,9 +23,9 @@ This document describes how to use TiDB and SQLAlchemy to build a simple CRUD ap The following introduces how to start a TiDB cluster. -**Use a TiDB Cloud Serverless Tier cluster** +**Use a TiDB Serverless cluster** -For detailed steps, see [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). +For detailed steps, see [Create a TiDB Serverless cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster). **Use a local cluster** @@ -35,7 +35,7 @@ For detailed steps, see [Deploy a local test cluster](/quick-start-with-tidb.md# -See [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). +See [Create a TiDB Serverless cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster). @@ -205,9 +205,9 @@ If you are not using a local cluster, or have not installed a MySQL client, conn ### Step 3.2 Modify parameters for TiDB Cloud -If you are using a TiDB Cloud Serverless Tier cluster, you need to provide your CA root path and replace `` in the following examples with your CA path. To get the CA root path on your system, refer to [Where is the CA root path on my system?](https://docs.pingcap.com/tidbcloud/secure-connections-to-serverless-tier-clusters#where-is-the-ca-root-path-on-my-system). +If you are using a TiDB Serverless cluster, you need to provide your CA root path and replace `` in the following examples with your CA path. To get the CA root path on your system, refer to [Where is the CA root path on my system?](https://docs.pingcap.com/tidbcloud/secure-connections-to-serverless-tier-clusters#where-is-the-ca-root-path-on-my-system). -If you are using a TiDB Cloud Serverless Tier cluster, modify the parameters of the `create_engine` function in `sqlalchemy_example.py`: +If you are using a TiDB Serverless cluster, modify the parameters of the `create_engine` function in `sqlalchemy_example.py`: ```python engine = create_engine('mysql://root:@127.0.0.1:4000/test') diff --git a/develop/dev-guide-sample-application-python.md b/develop/dev-guide-sample-application-python.md deleted file mode 100644 index 326735ae3f560..0000000000000 --- a/develop/dev-guide-sample-application-python.md +++ /dev/null @@ -1,1103 +0,0 @@ ---- -title: Build a Simple CRUD App with TiDB and Golang -summary: Learn how to build a simple CRUD application with TiDB and Golang. -aliases: ['/tidb/dev/dev-guide-outdated-for-python-mysql-connector','/tidb/dev/dev-guide-outdated-for-sqlalchemy'] ---- - - - - -# Build a Simple CRUD App with TiDB and Python - -This document describes how to use TiDB and Python to build a simple CRUD application. - -> **Note:** -> -> It is recommended to use Python 3.10 or a later Python version. - -## Step 1. Launch your TiDB cluster - - - -The following introduces how to start a TiDB cluster. - -**Use a TiDB Cloud Serverless Tier cluster** - -For detailed steps, see [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). - -**Use a local cluster** - -For detailed steps, see [Deploy a local test cluster](/quick-start-with-tidb.md#deploy-a-local-test-cluster) or [Deploy a TiDB cluster using TiUP](/production-deployment-using-tiup.md). - - - - - -See [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). - - - -## Step 2. Get the code - -```shell -git clone https://github.com/pingcap-inc/tidb-example-python.git -``` - - - -
- -[SQLAlchemy](https://www.sqlalchemy.org/) is a popular open-source ORM library for Python. The following uses SQLAlchemy 1.44 as an example. - -```python -import uuid -from typing import List - -from sqlalchemy import create_engine, String, Column, Integer, select, func -from sqlalchemy.orm import declarative_base, sessionmaker - -engine = create_engine('mysql://root:@127.0.0.1:4000/test') -Base = declarative_base() -Base.metadata.create_all(engine) -Session = sessionmaker(bind=engine) - - -class Player(Base): - __tablename__ = "player" - - id = Column(String(36), primary_key=True) - coins = Column(Integer) - goods = Column(Integer) - - def __repr__(self): - return f'Player(id={self.id!r}, coins={self.coins!r}, goods={self.goods!r})' - - -def random_player(amount: int) -> List[Player]: - players = [] - for _ in range(amount): - players.append(Player(id=uuid.uuid4(), coins=10000, goods=10000)) - - return players - - -def simple_example() -> None: - with Session() as session: - # create a player, who has a coin and a goods. - session.add(Player(id="test", coins=1, goods=1)) - - # get this player, and print it. - get_test_stmt = select(Player).where(Player.id == "test") - for player in session.scalars(get_test_stmt): - print(player) - - # create players with bulk inserts. - # insert 1919 players totally, with 114 players per batch. - # each player has a random UUID - player_list = random_player(1919) - for idx in range(0, len(player_list), 114): - session.bulk_save_objects(player_list[idx:idx + 114]) - - # print the number of players - count = session.query(func.count(Player.id)).scalar() - print(f'number of players: {count}') - - # print 3 players. - three_players = session.query(Player).limit(3).all() - for player in three_players: - print(player) - - session.commit() - - -def trade_check(session: Session, sell_id: str, buy_id: str, amount: int, price: int) -> bool: - # sell player goods check - sell_player = session.query(Player.goods).filter(Player.id == sell_id).with_for_update().one() - if sell_player.goods < amount: - print(f'sell player {sell_id} goods not enough') - return False - - # buy player coins check - buy_player = session.query(Player.coins).filter(Player.id == buy_id).with_for_update().one() - if buy_player.coins < price: - print(f'buy player {buy_id} coins not enough') - return False - - -def trade(sell_id: str, buy_id: str, amount: int, price: int) -> None: - with Session() as session: - if trade_check(session, sell_id, buy_id, amount, price) is False: - return - - # deduct the goods of seller, and raise his/her the coins - session.query(Player).filter(Player.id == sell_id). \ - update({'goods': Player.goods - amount, 'coins': Player.coins + price}) - # deduct the coins of buyer, and raise his/her the goods - session.query(Player).filter(Player.id == buy_id). \ - update({'goods': Player.goods + amount, 'coins': Player.coins - price}) - - session.commit() - print("trade success") - - -def trade_example() -> None: - with Session() as session: - # create two players - # player 1: id is "1", has only 100 coins. - # player 2: id is "2", has 114514 coins, and 20 goods. - session.add(Player(id="1", coins=100, goods=0)) - session.add(Player(id="2", coins=114514, goods=20)) - session.commit() - - # player 1 wants to buy 10 goods from player 2. - # it will cost 500 coins, but player 1 cannot afford it. - # so this trade will fail, and nobody will lose their coins or goods - trade(sell_id="2", buy_id="1", amount=10, price=500) - - # then player 1 has to reduce the incoming quantity to 2. - # this trade will be successful - trade(sell_id="2", buy_id="1", amount=2, price=100) - - with Session() as session: - traders = session.query(Player).filter(Player.id.in_(("1", "2"))).all() - for player in traders: - print(player) - session.commit() - - -simple_example() -trade_example() -``` - -Compared with using drivers directly, SQLAlchemy provides an abstraction for the specific details of different databases when you create a database connection. In addition, SQLAlchemy encapsulates some operations such as session management and CRUD of basic objects, which greatly simplifies the code. - -The `Player` class is a mapping of a table to attributes in the application. Each attribute of `Player` corresponds to a field in the `player` table. To provide SQLAlchemy with more information, the attribute is defined as `id = Column(String(36), primary_key=True)` to indicate the field type and its additional attributes. For example, `id = Column(String(36), primary_key=True)` indicates that the `id` attribute is `String` type, the corresponding field in database is `VARCHAR` type, the length is `36`, and it is a primary key. - -For more information about how to use SQLAlchemy, refer to [SQLAlchemy documentation](https://www.sqlalchemy.org/). - -
- -
- -[peewee](http://docs.peewee-orm.com/en/latest/) is a popular open-source ORM library for Python. The following uses peewee 3.15.4 as an example. - -```python -import os -import uuid -from typing import List - -from peewee import * - -from playhouse.db_url import connect - -db = connect('mysql://root:@127.0.0.1:4000/test') - - -class Player(Model): - id = CharField(max_length=36, primary_key=True) - coins = IntegerField() - goods = IntegerField() - - class Meta: - database = db - table_name = "player" - - -def random_player(amount: int) -> List[Player]: - players = [] - for _ in range(amount): - players.append(Player(id=uuid.uuid4(), coins=10000, goods=10000)) - - return players - - -def simple_example() -> None: - # create a player, who has a coin and a goods. - Player.create(id="test", coins=1, goods=1) - - # get this player, and print it. - test_player = Player.select().where(Player.id == "test").get() - print(f'id:{test_player.id}, coins:{test_player.coins}, goods:{test_player.goods}') - - # create players with bulk inserts. - # insert 1919 players totally, with 114 players per batch. - # each player has a random UUID - player_list = random_player(1919) - Player.bulk_create(player_list, 114) - - # print the number of players - count = Player.select().count() - print(f'number of players: {count}') - - # print 3 players. - three_players = Player.select().limit(3) - for player in three_players: - print(f'id:{player.id}, coins:{player.coins}, goods:{player.goods}') - - -def trade_check(sell_id: str, buy_id: str, amount: int, price: int) -> bool: - sell_goods = Player.select(Player.goods).where(Player.id == sell_id).get().goods - if sell_goods < amount: - print(f'sell player {sell_id} goods not enough') - return False - - buy_coins = Player.select(Player.coins).where(Player.id == buy_id).get().coins - if buy_coins < price: - print(f'buy player {buy_id} coins not enough') - return False - - return True - - -def trade(sell_id: str, buy_id: str, amount: int, price: int) -> None: - with db.atomic() as txn: - try: - if trade_check(sell_id, buy_id, amount, price) is False: - txn.rollback() - return - - # deduct the goods of seller, and raise his/her the coins - Player.update(goods=Player.goods - amount, coins=Player.coins + price).where(Player.id == sell_id).execute() - # deduct the coins of buyer, and raise his/her the goods - Player.update(goods=Player.goods + amount, coins=Player.coins - price).where(Player.id == buy_id).execute() - - except Exception as err: - txn.rollback() - print(f'something went wrong: {err}') - else: - txn.commit() - print("trade success") - - -def trade_example() -> None: - # create two players - # player 1: id is "1", has only 100 coins. - # player 2: id is "2", has 114514 coins, and 20 goods. - Player.create(id="1", coins=100, goods=0) - Player.create(id="2", coins=114514, goods=20) - - # player 1 wants to buy 10 goods from player 2. - # it will cost 500 coins, but player 1 cannot afford it. - # so this trade will fail, and nobody will lose their coins or goods - trade(sell_id="2", buy_id="1", amount=10, price=500) - - # then player 1 has to reduce the incoming quantity to 2. - # this trade will be successful - trade(sell_id="2", buy_id="1", amount=2, price=100) - - # let's take a look for player 1 and player 2 currently - after_trade_players = Player.select().where(Player.id.in_(["1", "2"])) - for player in after_trade_players: - print(f'id:{player.id}, coins:{player.coins}, goods:{player.goods}') - - -db.connect() - -# recreate the player table -db.drop_tables([Player]) -db.create_tables([Player]) - -simple_example() -trade_example() -``` - -Compared with using drivers directly, peewee provides an abstraction for the specific details of different databases when you create a database connection. In addition, peewee encapsulates some operations such as session management and CRUD of basic objects, which greatly simplifies the code. - -The `Player` class is a mapping of a table to attributes in the application. Each attribute of `Player` corresponds to a field in the `player` table. To provide SQLAlchemy with more information, the attribute is defined as `id = Column(String(36), primary_key=True)` to indicate the field type and its additional attributes. For example, `id = Column(String(36), primary_key=True)` indicates that the `id` attribute is `String` type, the corresponding field in database is `VARCHAR` type, the length is `36`, and it is a primary key. - -For more information about how to use peewee, refer to [peewee documentation](http://docs.peewee-orm.com/en/latest/). - -
- -
- -[mysqlclient](https://pypi.org/project/mysqlclient/) is a popular open-source driver for Python. The following uses mysqlclient 2.1.1 as an example. Drivers for Python are more convenient to use than other languages, but they do not shield the underlying implementation and require manual management of transactions. If there are not a lot of scenarios where SQL is required, it is recommended to use ORM, which can help reduce the coupling of your program. - -```python -import uuid -from typing import List - -import MySQLdb -from MySQLdb import Connection -from MySQLdb.cursors import Cursor - -def get_connection(autocommit: bool = True) -> MySQLdb.Connection: - return MySQLdb.connect( - host="127.0.0.1", - port=4000, - user="root", - password="", - database="test", - autocommit=autocommit - ) - - -def create_player(cursor: Cursor, player: tuple) -> None: - cursor.execute("INSERT INTO player (id, coins, goods) VALUES (%s, %s, %s)", player) - - -def get_player(cursor: Cursor, player_id: str) -> tuple: - cursor.execute("SELECT id, coins, goods FROM player WHERE id = %s", (player_id,)) - return cursor.fetchone() - - -def get_players_with_limit(cursor: Cursor, limit: int) -> List[tuple]: - cursor.execute("SELECT id, coins, goods FROM player LIMIT %s", (limit,)) - return cursor.fetchall() - - -def random_player(amount: int) -> List[tuple]: - players = [] - for _ in range(amount): - players.append((uuid.uuid4(), 10000, 10000)) - - return players - - -def bulk_create_player(cursor: Cursor, players: List[tuple]) -> None: - cursor.executemany("INSERT INTO player (id, coins, goods) VALUES (%s, %s, %s)", players) - - -def get_count(cursor: Cursor) -> None: - cursor.execute("SELECT count(*) FROM player") - return cursor.fetchone()[0] - - -def trade_check(cursor: Cursor, sell_id: str, buy_id: str, amount: int, price: int) -> bool: - get_player_with_lock_sql = "SELECT coins, goods FROM player WHERE id = %s FOR UPDATE" - - # sell player goods check - cursor.execute(get_player_with_lock_sql, (sell_id,)) - _, sell_goods = cursor.fetchone() - if sell_goods < amount: - print(f'sell player {sell_id} goods not enough') - return False - - # buy player coins check - cursor.execute(get_player_with_lock_sql, (buy_id,)) - buy_coins, _ = cursor.fetchone() - if buy_coins < price: - print(f'buy player {buy_id} coins not enough') - return False - - -def trade_update(cursor: Cursor, sell_id: str, buy_id: str, amount: int, price: int) -> None: - update_player_sql = "UPDATE player set goods = goods + %s, coins = coins + %s WHERE id = %s" - - # deduct the goods of seller, and raise his/her the coins - cursor.execute(update_player_sql, (-amount, price, sell_id)) - # deduct the coins of buyer, and raise his/her the goods - cursor.execute(update_player_sql, (amount, -price, buy_id)) - - -def trade(connection: Connection, sell_id: str, buy_id: str, amount: int, price: int) -> None: - with connection.cursor() as cursor: - if trade_check(cursor, sell_id, buy_id, amount, price) is False: - connection.rollback() - return - - try: - trade_update(cursor, sell_id, buy_id, amount, price) - except Exception as err: - connection.rollback() - print(f'something went wrong: {err}') - else: - connection.commit() - print("trade success") - - -def simple_example() -> None: - with get_connection(autocommit=True) as conn: - with conn.cursor() as cur: - # create a player, who has a coin and a goods. - create_player(cur, ("test", 1, 1)) - - # get this player, and print it. - test_player = get_player(cur, "test") - print(f'id:{test_player[0]}, coins:{test_player[1]}, goods:{test_player[2]}') - - # create players with bulk inserts. - # insert 1919 players totally, with 114 players per batch. - # each player has a random UUID - player_list = random_player(1919) - for idx in range(0, len(player_list), 114): - bulk_create_player(cur, player_list[idx:idx + 114]) - - # print the number of players - count = get_count(cur) - print(f'number of players: {count}') - - # print 3 players. - three_players = get_players_with_limit(cur, 3) - for player in three_players: - print(f'id:{player[0]}, coins:{player[1]}, goods:{player[2]}') - - -def trade_example() -> None: - with get_connection(autocommit=False) as conn: - with conn.cursor() as cur: - # create two players - # player 1: id is "1", has only 100 coins. - # player 2: id is "2", has 114514 coins, and 20 goods. - create_player(cur, ("1", 100, 0)) - create_player(cur, ("2", 114514, 20)) - conn.commit() - - # player 1 wants to buy 10 goods from player 2. - # it will cost 500 coins, but player 1 cannot afford it. - # so this trade will fail, and nobody will lose their coins or goods - trade(conn, sell_id="2", buy_id="1", amount=10, price=500) - - # then player 1 has to reduce the incoming quantity to 2. - # this trade will be successful - trade(conn, sell_id="2", buy_id="1", amount=2, price=100) - - # let's take a look for player 1 and player 2 currently - with conn.cursor() as cur: - _, player1_coin, player1_goods = get_player(cur, "1") - print(f'id:1, coins:{player1_coin}, goods:{player1_goods}') - _, player2_coin, player2_goods = get_player(cur, "2") - print(f'id:2, coins:{player2_coin}, goods:{player2_goods}') - - -simple_example() -trade_example() -``` - -The driver has a lower level of encapsulation than ORM, so there are a lot of SQL statements in the program. Unlike ORM, there is no data object in drivers, so the `Player` queried by the driver is represented as a tuple. - -For more information about how to use mysqlclient, refer to [mysqlclient documentation](https://mysqlclient.readthedocs.io/). - -
- -
- -[PyMySQL](https://pypi.org/project/PyMySQL/) is a popular open-source driver for Python. The following uses PyMySQL 1.0.2 as an example. Drivers for Python are more convenient to use than other languages, but they do not shield the underlying implementation and require manual management of transactions. If there are not a lot of scenarios where SQL is required, it is recommended to use ORM, which can help reduce the coupling of your program. - -```python -import uuid -from typing import List - -import pymysql.cursors -from pymysql import Connection -from pymysql.cursors import DictCursor - - -def get_connection(autocommit: bool = False) -> Connection: - return pymysql.connect(host='127.0.0.1', - port=4000, - user='root', - password='', - database='test', - cursorclass=DictCursor, - autocommit=autocommit) - - -def create_player(cursor: DictCursor, player: tuple) -> None: - cursor.execute("INSERT INTO player (id, coins, goods) VALUES (%s, %s, %s)", player) - - -def get_player(cursor: DictCursor, player_id: str) -> dict: - cursor.execute("SELECT id, coins, goods FROM player WHERE id = %s", (player_id,)) - return cursor.fetchone() - - -def get_players_with_limit(cursor: DictCursor, limit: int) -> tuple: - cursor.execute("SELECT id, coins, goods FROM player LIMIT %s", (limit,)) - return cursor.fetchall() - - -def random_player(amount: int) -> List[tuple]: - players = [] - for _ in range(amount): - players.append((uuid.uuid4(), 10000, 10000)) - - return players - - -def bulk_create_player(cursor: DictCursor, players: List[tuple]) -> None: - cursor.executemany("INSERT INTO player (id, coins, goods) VALUES (%s, %s, %s)", players) - - -def get_count(cursor: DictCursor) -> int: - cursor.execute("SELECT count(*) as count FROM player") - return cursor.fetchone()['count'] - - -def trade_check(cursor: DictCursor, sell_id: str, buy_id: str, amount: int, price: int) -> bool: - get_player_with_lock_sql = "SELECT coins, goods FROM player WHERE id = %s FOR UPDATE" - - # sell player goods check - cursor.execute(get_player_with_lock_sql, (sell_id,)) - seller = cursor.fetchone() - if seller['goods'] < amount: - print(f'sell player {sell_id} goods not enough') - return False - - # buy player coins check - cursor.execute(get_player_with_lock_sql, (buy_id,)) - buyer = cursor.fetchone() - if buyer['coins'] < price: - print(f'buy player {buy_id} coins not enough') - return False - - -def trade_update(cursor: DictCursor, sell_id: str, buy_id: str, amount: int, price: int) -> None: - update_player_sql = "UPDATE player set goods = goods + %s, coins = coins + %s WHERE id = %s" - - # deduct the goods of seller, and raise his/her the coins - cursor.execute(update_player_sql, (-amount, price, sell_id)) - # deduct the coins of buyer, and raise his/her the goods - cursor.execute(update_player_sql, (amount, -price, buy_id)) - - -def trade(connection: Connection, sell_id: str, buy_id: str, amount: int, price: int) -> None: - with connection.cursor() as cursor: - if trade_check(cursor, sell_id, buy_id, amount, price) is False: - connection.rollback() - return - - try: - trade_update(cursor, sell_id, buy_id, amount, price) - except Exception as err: - connection.rollback() - print(f'something went wrong: {err}') - else: - connection.commit() - print("trade success") - - -def simple_example() -> None: - with get_connection(autocommit=True) as connection: - with connection.cursor() as cur: - # create a player, who has a coin and a goods. - create_player(cur, ("test", 1, 1)) - - # get this player, and print it. - test_player = get_player(cur, "test") - print(test_player) - - # create players with bulk inserts. - # insert 1919 players totally, with 114 players per batch. - # each player has a random UUID - player_list = random_player(1919) - for idx in range(0, len(player_list), 114): - bulk_create_player(cur, player_list[idx:idx + 114]) - - # print the number of players - count = get_count(cur) - print(f'number of players: {count}') - - # print 3 players. - three_players = get_players_with_limit(cur, 3) - for player in three_players: - print(player) - - -def trade_example() -> None: - with get_connection(autocommit=False) as connection: - with connection.cursor() as cur: - # create two players - # player 1: id is "1", has only 100 coins. - # player 2: id is "2", has 114514 coins, and 20 goods. - create_player(cur, ("1", 100, 0)) - create_player(cur, ("2", 114514, 20)) - connection.commit() - - # player 1 wants to buy 10 goods from player 2. - # it will cost 500 coins, but player 1 cannot afford it. - # so this trade will fail, and nobody will lose their coins or goods - trade(connection, sell_id="2", buy_id="1", amount=10, price=500) - - # then player 1 has to reduce the incoming quantity to 2. - # this trade will be successful - trade(connection, sell_id="2", buy_id="1", amount=2, price=100) - - # let's take a look for player 1 and player 2 currently - with connection.cursor() as cur: - print(get_player(cur, "1")) - print(get_player(cur, "2")) - - -simple_example() -trade_example() -``` - -The driver has a lower level of encapsulation than ORM, so there are a lot of SQL statements in the program. Unlike ORM, there is no data object in drivers, so the `Player` queried by the driver is represented as a dictionary. - -For more information about how to use PyMySQL, refer to [PyMySQL documentation](https://pymysql.readthedocs.io/en/latest/). - -
- -
- -[mysql-connector-python](https://dev.mysql.com/doc/connector-python/en/) is a popular open-source driver for Python. The following uses mysql-connector-python 8.0.31 as an example. Drivers for Python are more convenient to use than other languages, but they do not shield the underlying implementation and require manual management of transactions. If there are not a lot of scenarios where SQL is required, it is recommended to use ORM, which can help reduce the coupling of your program. - -```python -import uuid -from typing import List - -from mysql.connector import connect, MySQLConnection -from mysql.connector.cursor import MySQLCursor - - -def get_connection(autocommit: bool = True) -> MySQLConnection: - connection = connect(host='127.0.0.1', - port=4000, - user='root', - password='', - database='test') - connection.autocommit = autocommit - return connection - - -def create_player(cursor: MySQLCursor, player: tuple) -> None: - cursor.execute("INSERT INTO player (id, coins, goods) VALUES (%s, %s, %s)", player) - - -def get_player(cursor: MySQLCursor, player_id: str) -> tuple: - cursor.execute("SELECT id, coins, goods FROM player WHERE id = %s", (player_id,)) - return cursor.fetchone() - - -def get_players_with_limit(cursor: MySQLCursor, limit: int) -> List[tuple]: - cursor.execute("SELECT id, coins, goods FROM player LIMIT %s", (limit,)) - return cursor.fetchall() - - -def random_player(amount: int) -> List[tuple]: - players = [] - for _ in range(amount): - players.append((str(uuid.uuid4()), 10000, 10000)) - - return players - - -def bulk_create_player(cursor: MySQLCursor, players: List[tuple]) -> None: - cursor.executemany("INSERT INTO player (id, coins, goods) VALUES (%s, %s, %s)", players) - - -def get_count(cursor: MySQLCursor) -> int: - cursor.execute("SELECT count(*) FROM player") - return cursor.fetchone()[0] - - -def trade_check(cursor: MySQLCursor, sell_id: str, buy_id: str, amount: int, price: int) -> bool: - get_player_with_lock_sql = "SELECT coins, goods FROM player WHERE id = %s FOR UPDATE" - - # sell player goods check - cursor.execute(get_player_with_lock_sql, (sell_id,)) - _, sell_goods = cursor.fetchone() - if sell_goods < amount: - print(f'sell player {sell_id} goods not enough') - return False - - # buy player coins check - cursor.execute(get_player_with_lock_sql, (buy_id,)) - buy_coins, _ = cursor.fetchone() - if buy_coins < price: - print(f'buy player {buy_id} coins not enough') - return False - - -def trade_update(cursor: MySQLCursor, sell_id: str, buy_id: str, amount: int, price: int) -> None: - update_player_sql = "UPDATE player set goods = goods + %s, coins = coins + %s WHERE id = %s" - - # deduct the goods of seller, and raise his/her the coins - cursor.execute(update_player_sql, (-amount, price, sell_id)) - # deduct the coins of buyer, and raise his/her the goods - cursor.execute(update_player_sql, (amount, -price, buy_id)) - - -def trade(connection: MySQLConnection, sell_id: str, buy_id: str, amount: int, price: int) -> None: - with connection.cursor() as cursor: - if trade_check(cursor, sell_id, buy_id, amount, price) is False: - connection.rollback() - return - - try: - trade_update(cursor, sell_id, buy_id, amount, price) - except Exception as err: - connection.rollback() - print(f'something went wrong: {err}') - else: - connection.commit() - print("trade success") - - -def simple_example() -> None: - with get_connection(autocommit=True) as connection: - with connection.cursor() as cur: - # create a player, who has a coin and a goods. - create_player(cur, ("test", 1, 1)) - - # get this player, and print it. - test_player = get_player(cur, "test") - print(f'id:{test_player[0]}, coins:{test_player[1]}, goods:{test_player[2]}') - - # create players with bulk inserts. - # insert 1919 players totally, with 114 players per batch. - # each player has a random UUID - player_list = random_player(1919) - for idx in range(0, len(player_list), 114): - bulk_create_player(cur, player_list[idx:idx + 114]) - - # print the number of players - count = get_count(cur) - print(f'number of players: {count}') - - # print 3 players. - three_players = get_players_with_limit(cur, 3) - for player in three_players: - print(f'id:{player[0]}, coins:{player[1]}, goods:{player[2]}') - - -def trade_example() -> None: - with get_connection(autocommit=False) as conn: - with conn.cursor() as cur: - # create two players - # player 1: id is "1", has only 100 coins. - # player 2: id is "2", has 114514 coins, and 20 goods. - create_player(cur, ("1", 100, 0)) - create_player(cur, ("2", 114514, 20)) - conn.commit() - - # player 1 wants to buy 10 goods from player 2. - # it will cost 500 coins, but player 1 cannot afford it. - # so this trade will fail, and nobody will lose their coins or goods - trade(conn, sell_id="2", buy_id="1", amount=10, price=500) - - # then player 1 has to reduce the incoming quantity to 2. - # this trade will be successful - trade(conn, sell_id="2", buy_id="1", amount=2, price=100) - - # let's take a look for player 1 and player 2 currently - with conn.cursor() as cur: - _, player1_coin, player1_goods = get_player(cur, "1") - print(f'id:1, coins:{player1_coin}, goods:{player1_goods}') - _, player2_coin, player2_goods = get_player(cur, "2") - print(f'id:2, coins:{player2_coin}, goods:{player2_goods}') - - -simple_example() -trade_example() -``` - -The driver has a lower level of encapsulation than ORM, so there are a lot of SQL statements in the program. Unlike ORM, there is no data object in drivers, so the `Player` queried by the driver is represented as a tuple. - -For more information about how to use mysql-connector-python, refer to [mysql-connector-python documentation](https://dev.mysql.com/doc/connector-python/en/). - -
- -
- -## Step 3. Run the code - -The following content introduces how to run the code step by step. - -### Step 3.1 Initialize table - -Before running the code, you need to initialize the table manually. If you are using a local TiDB cluster, you can run the following command: - - - -
- -```shell -mysql --host 127.0.0.1 --port 4000 -u root < player_init.sql -``` - -
- -
- -```shell -mycli --host 127.0.0.1 --port 4000 -u root --no-warn < player_init.sql -``` - -
- -
- -If you are not using a local cluster, or have not installed a MySQL client, connect to your cluster using your preferred method (such as Navicat, DBeaver, or other GUI tools) and run the SQL statements in the `player_init.sql` file. - -### Step 3.2 Modify parameters for TiDB Cloud - -If you are using a TiDB Cloud Serverless Tier cluster, you need to provide your CA root path and replace `` in the following examples with your CA path. To get the CA root path on your system, refer to [Where is the CA root path on my system?](https://docs.pingcap.com/tidbcloud/secure-connections-to-serverless-tier-clusters#where-is-the-ca-root-path-on-my-system). - - - -
- -If you are using a TiDB Cloud Serverless Tier cluster, modify the parameters of the `create_engine` function in `sqlalchemy_example.py`: - -```python -engine = create_engine('mysql://root:@127.0.0.1:4000/test') -``` - -Suppose that the password you set is `123456`, and the connection parameters you get from the cluster details page are the following: - -- Endpoint: `xxx.tidbcloud.com` -- Port: `4000` -- User: `2aEp24QWEDLqRFs.root` - -In this case, you can modify the `create_engine` as follows: - -```python -engine = create_engine('mysql://2aEp24QWEDLqRFs.root:123456@xxx.tidbcloud.com:4000/test', connect_args={ - "ssl_mode": "VERIFY_IDENTITY", - "ssl": { - "ca": "" - } -}) -``` - -
- -
- -If you are using a TiDB Cloud Serverless Tier cluster, modify the parameters of the `create_engine` function in `sqlalchemy_example.py`: - -```python -db = connect('mysql://root:@127.0.0.1:4000/test') -``` - -Suppose that the password you set is `123456`, and the connection parameters you get from the cluster details page are the following: - -- Endpoint: `xxx.tidbcloud.com` -- Port: `4000` -- User: `2aEp24QWEDLqRFs.root` - -In this case, you can modify the `connect` as follows: - -- When peewee uses PyMySQL as the driver: - - ```python - db = connect('mysql://2aEp24QWEDLqRFs.root:123456@xxx.tidbcloud.com:4000/test', - ssl_verify_cert=True, ssl_ca="") - ``` - -- When peewee uses mysqlclient as the driver: - - ```python - db = connect('mysql://2aEp24QWEDLqRFs.root:123456@xxx.tidbcloud.com:4000/test', - ssl_mode="VERIFY_IDENTITY", ssl={"ca": ""}) - ``` - -Because peewee will pass parameters to the driver, you need to pay attention to the usage type of the driver when using peewee. - -
- -
- -If you are using a TiDB Cloud Serverless Tier cluster, change the `get_connection` function in `mysqlclient_example.py`: - -```python -def get_connection(autocommit: bool = True) -> MySQLdb.Connection: - return MySQLdb.connect( - host="127.0.0.1", - port=4000, - user="root", - password="", - database="test", - autocommit=autocommit - ) -``` - -Suppose that the password you set is `123456`, and the connection parameters you get from the cluster details page are the following: - -- Endpoint: `xxx.tidbcloud.com` -- Port: `4000` -- User: `2aEp24QWEDLqRFs.root` - -In this case, you can modify the `get_connection` as follows: - -```python -def get_connection(autocommit: bool = True) -> MySQLdb.Connection: - return MySQLdb.connect( - host="xxx.tidbcloud.com", - port=4000, - user="2aEp24QWEDLqRFs.root", - password="123456", - database="test", - autocommit=autocommit, - ssl_mode="VERIFY_IDENTITY", - ssl={ - "ca": "" - } - ) -``` - -
- -
- -If you are using a TiDB Cloud Serverless Tier cluster, change the `get_connection` function in `pymysql_example.py`: - -```python -def get_connection(autocommit: bool = False) -> Connection: - return pymysql.connect(host='127.0.0.1', - port=4000, - user='root', - password='', - database='test', - cursorclass=DictCursor, - autocommit=autocommit) -``` - -Suppose that the password you set is `123456`, and the connection parameters you get from the cluster details page are the following: - -- Endpoint: `xxx.tidbcloud.com` -- Port: `4000` -- User: `2aEp24QWEDLqRFs.root` - -In this case, you can modify the `get_connection` as follows: - -```python -def get_connection(autocommit: bool = False) -> Connection: - return pymysql.connect(host='xxx.tidbcloud.com', - port=4000, - user='2aEp24QWEDLqRFs.root', - password='123546', - database='test', - cursorclass=DictCursor, - autocommit=autocommit, - ssl_ca='', - ssl_verify_cert=True, - ssl_verify_identity=True) -``` - -
- -
- -If you are using a TiDB Cloud Serverless Tier cluster, change the `get_connection` function in `mysql_connector_python_example.py`: - -```python -def get_connection(autocommit: bool = True) -> MySQLConnection: - connection = connect(host='127.0.0.1', - port=4000, - user='root', - password='', - database='test') - connection.autocommit = autocommit - return connection -``` - -Suppose that the password you set is `123456`, and the connection parameters you get from the cluster details page are the following: - -- Endpoint: `xxx.tidbcloud.com` -- Port: `4000` -- User: `2aEp24QWEDLqRFs.root` - -In this case, you can modify the `get_connection` as follows: - -```python -def get_connection(autocommit: bool = True) -> MySQLConnection: - connection = connect( - host="xxx.tidbcloud.com", - port=4000, - user="2aEp24QWEDLqRFs.root", - password="123456", - database="test", - autocommit=autocommit, - ssl_ca='', - ssl_verify_identity=True - ) - connection.autocommit = autocommit - return connection -``` - -
- -
- -### Step 3.3 Run the code - -Before running the code, use the following command to install dependencies: - -```bash -pip3 install -r requirement.txt -``` - -If you need to run the script multiple times, follow the [Table initialization](#step-31-initialize-table) section to initialize the table again before each run. - - - -
- -```bash -python3 sqlalchemy_example.py -``` - -
- -
- -```bash -python3 peewee_example.py -``` - -
- -
- -```bash -python3 mysqlclient_example.py -``` - -
- -
- -```bash -python3 pymysql_example.py -``` - -
- -
- -```bash -python3 mysql_connector_python_example.py -``` - -
- -
- -## Step 4. Expected output - - - -
- -[SQLAlchemy Expected Output](https://github.com/pingcap-inc/tidb-example-python/blob/main/Expected-Output.md#SQLAlchemy) - -
- -
- -[peewee Expected Output](https://github.com/pingcap-inc/tidb-example-python/blob/main/Expected-Output.md#peewee) - -
- -
- -[mysqlclient Expected Output](https://github.com/pingcap-inc/tidb-example-python/blob/main/Expected-Output.md#mysqlclient) - -
- -
- -[PyMySQL Expected Output](https://github.com/pingcap-inc/tidb-example-python/blob/main/Expected-Output.md#PyMySQL) - -
- -
- -[mysql-connector-python Expected Output](https://github.com/pingcap-inc/tidb-example-python/blob/main/Expected-Output.md#mysql-connector-python) - -
- -
diff --git a/develop/dev-guide-third-party-support.md b/develop/dev-guide-third-party-support.md index 34fe3121ef155..b6f07edf42e59 100644 --- a/develop/dev-guide-third-party-support.md +++ b/develop/dev-guide-third-party-support.md @@ -112,7 +112,7 @@ If you encounter problems when connecting to TiDB using the tools listed in this 2.1.1 Compatible N/A - Build a Simple CRUD App with TiDB and mysqlclient + Build a Simple CRUD App with TiDB and mysqlclient PyMySQL @@ -187,7 +187,7 @@ If you encounter problems when connecting to TiDB using the tools listed in this v3.5.10 Full N/A - Build a Simple CRUD App with TiDB and Mybatis + Build a Simple CRUD App with TiDB and MyBatis Spring Data JPA @@ -215,7 +215,7 @@ If you encounter problems when connecting to TiDB using the tools listed in this JavaScript / TypeScript sequelize v6.20.1 - Compatible + Full N/A N/A @@ -228,8 +228,8 @@ If you encounter problems when connecting to TiDB using the tools listed in this Prisma Client - 3.15.1 - Compatible + 4.16.2 + Full N/A N/A @@ -251,24 +251,24 @@ If you encounter problems when connecting to TiDB using the tools listed in this Python Django - v4.0.5 - Compatible + v4.1 + Full django-tidb N/A - peewee - v3.14.10 - Compatible + SQLAlchemy + v1.4.37 + Full N/A - Build a Simple CRUD App with TiDB and peewee + Build a Simple CRUD App with TiDB and SQLAlchemy - SQLAlchemy - v1.4.37 + peewee + v3.14.10 Compatible N/A - Build a Simple CRUD App with TiDB and SQLAlchemy + Build a Simple CRUD App with TiDB and peewee diff --git a/develop/dev-guide-third-party-tools-compatibility.md b/develop/dev-guide-third-party-tools-compatibility.md index 71399035cb3ab..8db35487191aa 100644 --- a/develop/dev-guide-third-party-tools-compatibility.md +++ b/develop/dev-guide-third-party-tools-compatibility.md @@ -180,7 +180,7 @@ TiDB fixes it in the following ways: ## Compatibility with Sequelize -The compatibility information described in this section is based on [Sequelize v6.21.4](https://www.npmjs.com/package/sequelize/v/6.21.4). +The compatibility information described in this section is based on [Sequelize v6.32.1](https://www.npmjs.com/package/sequelize/v/6.32.1). According to the test results, TiDB supports most of the Sequelize features ([using `MySQL` as the dialect](https://sequelize.org/docs/v6/other-topics/dialect-specific-things/#mysql)). @@ -192,6 +192,9 @@ Unsupported features are: - The `READ-UNCOMMITTED` and `SERIALIZABLE` [isolation levels](/system-variables.md#transaction_isolation) are not supported. - Modification of a column's `AUTO_INCREMENT` attribute is not allowed by default. - `FULLTEXT`, `HASH`, and `SPATIAL` indexes are not supported. +- `sequelize.queryInterface.showIndex(Model.tableName);` is not supported. +- `sequelize.options.databaseVersion` is not supported. +- Adding a foreign key reference using [`queryInterface.addColumn`](https://sequelize.org/api/v6/class/src/dialects/abstract/query-interface.js~queryinterface#instance-method-addColumn) is not supported. ### Modification of integer primary key is not supported diff --git a/develop/dev-guide-tidb-crud-sql.md b/develop/dev-guide-tidb-crud-sql.md index cdf1495f10d06..9fa4493e245f6 100644 --- a/develop/dev-guide-tidb-crud-sql.md +++ b/develop/dev-guide-tidb-crud-sql.md @@ -9,7 +9,7 @@ This document briefly introduces how to use TiDB's CURD SQL. ## Before you start -Please make sure you are connected to a TiDB cluster. If not, refer to [Build a TiDB Cluster in TiDB Cloud (Serverless Tier)](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster) to create a Serverless Tier cluster. +Please make sure you are connected to a TiDB cluster. If not, refer to [Build a TiDB Serverless Cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster) to create a TiDB Serverless cluster. ## Explore SQL with TiDB @@ -19,7 +19,7 @@ Please make sure you are connected to a TiDB cluster. If not, refer to [Build a TiDB is compatible with MySQL, you can use MySQL statements directly in most cases. For unsupported features, see [Compatibility with MySQL](/mysql-compatibility.md#unsupported-features). -To experiment with SQL and test out TiDB compatibility with MySQL queries, you can [run TiDB directly in your web browser without installing it](https://tour.tidb.io/). You can also first deploy a TiDB cluster and then run SQL statements in it. +To experiment with SQL and test out TiDB compatibility with MySQL queries, you can try [TiDB Playground](https://play.tidbcloud.com/?utm_source=docs&utm_medium=basic-sql-operations). You can also first deploy a TiDB cluster and then run SQL statements in it. This page walks you through the basic TiDB SQL statements such as DDL, DML, and CRUD operations. For a complete list of TiDB statements, see [TiDB SQL Syntax Diagram](https://pingcap.github.io/sqlgram/). diff --git a/develop/dev-guide-timeouts-in-tidb.md b/develop/dev-guide-timeouts-in-tidb.md index aad13f226bf20..4b326cb766857 100644 --- a/develop/dev-guide-timeouts-in-tidb.md +++ b/develop/dev-guide-timeouts-in-tidb.md @@ -25,7 +25,7 @@ SQL statements such as `INSERT INTO t10 SELECT * FROM t1` are not affected by GC ## SQL execution timeout -TiDB also provides a system variable (`max_execution_time`, `0` by default, indicating no limit) to limit the execution time of a single SQL statement. `max_execution_time` currently takes effect for all types of statements, not just the `SELECT` statements. The unit is `ms`, but the actual precision is at the `100ms` level instead of the millisecond level. +TiDB also provides a system variable (`max_execution_time`, `0` by default, indicating no limit) to limit the execution time of a single SQL statement. Currently, the system variable only takes effect for read-only SQL statements. The unit of `max_execution_time` is `ms`, but the actual precision is at the `100ms` level instead of the millisecond level. ## JDBC query timeout @@ -35,7 +35,7 @@ TiDB provides the following MySQL-compatible timeout control parameters. - **wait_timeout**, controls the non-interactive idle timeout for the connection to Java applications. Since TiDB v5.4, the default value of `wait_timeout` is `28800` seconds, which is 8 hours. For TiDB versions earlier than v5.4, the default value is `0`, which means the timeout is unlimited. - **interactive_timeout**, controls the interactive idle timeout for the connection to Java applications. The value is `8 hours` by default. -- **max_execution_time**, controls the timeout for SQL execution in the connection. The value is `0` by default, which allows the connection to be infinitely busy, that is, an SQL statement is executed for an infinitely long time. +- **max_execution_time**, controls the timeout for SQL execution in the connection, only effective for read-only SQL statements. The value is `0` by default, which allows the connection to be infinitely busy, that is, an SQL statement is executed for an infinitely long time. However, in a real production environment, idle connections and indefinitely executing SQL statements have a negative effect on both the database and the application. You can avoid idle connections and indefinitely executing SQL statements by configuring these two session-level variables in your application's connection string. For example, set the following: diff --git a/develop/dev-guide-unstable-result-set.md b/develop/dev-guide-unstable-result-set.md index 2ed4d18afae7d..dacc60b3d332c 100644 --- a/develop/dev-guide-unstable-result-set.md +++ b/develop/dev-guide-unstable-result-set.md @@ -70,25 +70,25 @@ Then two values that match this SQL are returned. The first returned value: - ```sql - +------------+--------------+------------------------+ - | class | stuname | max( `b`.`courscore` ) | - +------------+--------------+------------------------+ - | 2018_CS_01 | MonkeyDLuffy | 95.5 | - | 2018_CS_03 | PatrickStar | 99.0 | - +------------+--------------+------------------------+ - ``` +```sql ++------------+--------------+------------------------+ +| class | stuname | max( `b`.`courscore` ) | ++------------+--------------+------------------------+ +| 2018_CS_01 | MonkeyDLuffy | 95.5 | +| 2018_CS_03 | PatrickStar | 99.0 | ++------------+--------------+------------------------+ +``` The second returned value: - ```sql - +------------+--------------+------------------+ - | class | stuname | max(b.courscore) | - +------------+--------------+------------------+ - | 2018_CS_01 | MonkeyDLuffy | 95.5 | - | 2018_CS_03 | SpongeBob | 99.0 | - +------------+--------------+------------------+ - ``` +```sql ++------------+--------------+------------------+ +| class | stuname | max(b.courscore) | ++------------+--------------+------------------+ +| 2018_CS_01 | MonkeyDLuffy | 95.5 | +| 2018_CS_03 | SpongeBob | 99.0 | ++------------+--------------+------------------+ +``` There are two results because you did **_NOT_** specify how to get the value of the `a`.`stuname` field in SQL, and two results are both satisfied by SQL semantics. It results in an unstable result set. Therefore, if you want to guarantee the stability of the result set of the `GROUP BY` statement, use the `FULL GROUP BY` syntax. @@ -177,59 +177,59 @@ To let `GROUP_CONCAT()` get the result set output in order, you need to add the 1. Excluded `ORDER BY` - First query: + First query: - {{< copyable "sql" >}} + {{< copyable "sql" >}} - ```sql - mysql> select GROUP_CONCAT( customer_id SEPARATOR ',' ) FROM customer where customer_id like '200002%'; - +-------------------------------------------------------------------------+ - | GROUP_CONCAT(customer_id SEPARATOR ',') | - +-------------------------------------------------------------------------+ - | 20000200992,20000200993,20000200994,20000200995,20000200996,20000200... | - +-------------------------------------------------------------------------+ - ``` + ```sql + mysql> select GROUP_CONCAT( customer_id SEPARATOR ',' ) FROM customer where customer_id like '200002%'; + +-------------------------------------------------------------------------+ + | GROUP_CONCAT(customer_id SEPARATOR ',') | + +-------------------------------------------------------------------------+ + | 20000200992,20000200993,20000200994,20000200995,20000200996,20000200... | + +-------------------------------------------------------------------------+ + ``` - Second query: + Second query: - {{< copyable "sql" >}} + {{< copyable "sql" >}} - ```sql - mysql> select GROUP_CONCAT( customer_id SEPARATOR ',' ) FROM customer where customer_id like '200002%'; - +-------------------------------------------------------------------------+ - | GROUP_CONCAT(customer_id SEPARATOR ',') | - +-------------------------------------------------------------------------+ - | 20000203040,20000203041,20000203042,20000203043,20000203044,20000203... | - +-------------------------------------------------------------------------+ - ``` + ```sql + mysql> select GROUP_CONCAT( customer_id SEPARATOR ',' ) FROM customer where customer_id like '200002%'; + +-------------------------------------------------------------------------+ + | GROUP_CONCAT(customer_id SEPARATOR ',') | + +-------------------------------------------------------------------------+ + | 20000203040,20000203041,20000203042,20000203043,20000203044,20000203... | + +-------------------------------------------------------------------------+ + ``` 2. Include `ORDER BY` - First query: + First query: - {{< copyable "sql" >}} + {{< copyable "sql" >}} - ```sql - mysql> select GROUP_CONCAT( customer_id order by customer_id SEPARATOR ',' ) FROM customer where customer_id like '200002%'; - +-------------------------------------------------------------------------+ - | GROUP_CONCAT(customer_id SEPARATOR ',') | - +-------------------------------------------------------------------------+ - | 20000200000,20000200001,20000200002,20000200003,20000200004,20000200... | - +-------------------------------------------------------------------------+ - ``` + ```sql + mysql> select GROUP_CONCAT( customer_id order by customer_id SEPARATOR ',' ) FROM customer where customer_id like '200002%'; + +-------------------------------------------------------------------------+ + | GROUP_CONCAT(customer_id SEPARATOR ',') | + +-------------------------------------------------------------------------+ + | 20000200000,20000200001,20000200002,20000200003,20000200004,20000200... | + +-------------------------------------------------------------------------+ + ``` - Second query: + Second query: - {{< copyable "sql" >}} + {{< copyable "sql" >}} - ```sql - mysql> select GROUP_CONCAT( customer_id order by customer_id SEPARATOR ',' ) FROM customer where customer_id like '200002%'; - +-------------------------------------------------------------------------+ - | GROUP_CONCAT(customer_id SEPARATOR ',') | - +-------------------------------------------------------------------------+ - | 20000200000,20000200001,20000200002,20000200003,20000200004,20000200... | - +-------------------------------------------------------------------------+ - ``` + ```sql + mysql> select GROUP_CONCAT( customer_id order by customer_id SEPARATOR ',' ) FROM customer where customer_id like '200002%'; + +-------------------------------------------------------------------------+ + | GROUP_CONCAT(customer_id SEPARATOR ',') | + +-------------------------------------------------------------------------+ + | 20000200000,20000200001,20000200002,20000200003,20000200004,20000200... | + +-------------------------------------------------------------------------+ + ``` ## Unstable results in `SELECT * FROM T LIMIT N` diff --git a/develop/dev-guide-update-data.md b/develop/dev-guide-update-data.md index b0a96d7f84c5d..2dcb69a80ccbb 100644 --- a/develop/dev-guide-update-data.md +++ b/develop/dev-guide-update-data.md @@ -14,7 +14,7 @@ This document describes how to use the following SQL statements to update the da Before reading this document, you need to prepare the following: -- [Build a TiDB Cluster in TiDB Cloud (Serverless Tier)](/develop/dev-guide-build-cluster-in-cloud.md). +- [Build a TiDB Serverless Cluster](/develop/dev-guide-build-cluster-in-cloud.md). - Read [Schema Design Overview](/develop/dev-guide-schema-design-overview.md), [Create a Database](/develop/dev-guide-create-database.md), [Create a Table](/develop/dev-guide-create-table.md), and [Create Secondary Indexes](/develop/dev-guide-create-secondary-indexes.md). - If you want to `UPDATE` data, you need to [insert data](/develop/dev-guide-insert-data.md) first. diff --git a/dm/dm-faq.md b/dm/dm-faq.md index 88f98e545c6da..29fc3f7e6309f 100644 --- a/dm/dm-faq.md +++ b/dm/dm-faq.md @@ -33,7 +33,7 @@ When you encounter a DDL statement unsupported by TiDB, you need to manually han > **Note:** > -> Currently, TiDB is not compatible with all the DDL statements that MySQL supports. See [MySQL Compatibility](/mysql-compatibility.md#ddl). +> Currently, TiDB is not compatible with all the DDL statements that MySQL supports. See [MySQL Compatibility](/mysql-compatibility.md#ddl-operations). ## Does DM replicate view-related DDL statements and DML statements to TiDB? diff --git a/dm/dm-glossary.md b/dm/dm-glossary.md index 708c547e808ba..6158f813f1372 100644 --- a/dm/dm-glossary.md +++ b/dm/dm-glossary.md @@ -12,11 +12,11 @@ This document lists the terms used in the logs, monitoring, configurations, and ### Binlog -In TiDB DM, binlogs refer to the binary log files generated in the TiDB database. It has the same indications as that in MySQL or MariaDB. Refer to [MySQL Binary Log](https://dev.mysql.com/doc/internals/en/binary-log.html) and [MariaDB Binary Log](https://mariadb.com/kb/en/library/binary-log/) for details. +In TiDB DM, binlogs refer to the binary log files generated in the TiDB database. It has the same indications as that in MySQL or MariaDB. Refer to [MySQL Binary Log](https://dev.mysql.com/doc/dev/mysql-server/latest/page_protocol_replication.html) and [MariaDB Binary Log](https://mariadb.com/kb/en/library/binary-log/) for details. ### Binlog event -Binlog events are information about data modification made to a MySQL or MariaDB server instance. These binlog events are stored in the binlog files. Refer to [MySQL Binlog Event](https://dev.mysql.com/doc/internals/en/binlog-event.html) and [MariaDB Binlog Event](https://mariadb.com/kb/en/library/1-binlog-events/) for details. +Binlog events are information about data modification made to a MySQL or MariaDB server instance. These binlog events are stored in the binlog files. Refer to [MySQL Binlog Event](https://dev.mysql.com/doc/dev/mysql-server/latest/page_protocol_replication_binlog_event.html) and [MariaDB Binlog Event](https://mariadb.com/kb/en/library/1-binlog-events/) for details. ### Binlog event filter diff --git a/dm/dm-open-api.md b/dm/dm-open-api.md index 07863fa670ed4..fc507018dee64 100644 --- a/dm/dm-open-api.md +++ b/dm/dm-open-api.md @@ -28,6 +28,8 @@ To enable OpenAPI, perform one of the following operations: > - DM provides the [specification document](https://github.com/pingcap/tiflow/blob/master/dm/openapi/spec/dm.yaml) that meets the OpenAPI 3.0.0 standard. This document contains all the request parameters and returned values. You can copy the document yaml and preview it in [Swagger Editor](https://editor.swagger.io/). > > - After you deploy the DM-master nodes, you can access `http://{master-addr}/api/v1/docs` to preview the documentation online. +> +> - Some features supported in the configuration file are not supported in OpenAPI. Their capabilities are not fully aligned. In a production environment, it is recommended to use the [configuration file](/dm/dm-config-overview.md). You can use the APIs to perform the following maintenance operations on the DM cluster: diff --git a/dm/dm-overview.md b/dm/dm-overview.md index 351bab2ca7211..62726e1d3f1e4 100644 --- a/dm/dm-overview.md +++ b/dm/dm-overview.md @@ -54,7 +54,7 @@ Before using the DM tool, note the following restrictions: + DDL syntax compatibility - - Currently, TiDB is not compatible with all the DDL statements that MySQL supports. Because DM uses the TiDB parser to process DDL statements, it only supports the DDL syntax supported by the TiDB parser. For details, see [MySQL Compatibility](/mysql-compatibility.md#ddl). + - Currently, TiDB is not compatible with all the DDL statements that MySQL supports. Because DM uses the TiDB parser to process DDL statements, it only supports the DDL syntax supported by the TiDB parser. For details, see [MySQL Compatibility](/mysql-compatibility.md#ddl-operations). - DM reports an error when it encounters an incompatible DDL statement. To solve this error, you need to manually handle it using dmctl, either skipping this DDL statement or replacing it with specified DDL statements. For details, see [Skip or replace abnormal SQL statements](/dm/dm-faq.md#how-to-handle-incompatible-ddl-statements). diff --git a/dm/feature-online-ddl.md b/dm/feature-online-ddl.md index d4e52b7f4aed2..c9e18cc73d920 100644 --- a/dm/feature-online-ddl.md +++ b/dm/feature-online-ddl.md @@ -182,9 +182,9 @@ The SQL statements mostly used by pt-osc and the corresponding operation of DM a * DM splits the above `rename` operation into two SQL statements: ```sql - rename test.test4 to test._test4_old; - rename test._test4_new to test.test4; - ``` + rename test.test4 to test._test4_old; + rename test._test4_new to test.test4; + ``` * DM does not execute `rename to _test4_old`. When executing `rename ghost_table to origin table`, DM takes the following steps: diff --git a/dm/feature-shard-merge-optimistic.md b/dm/feature-shard-merge-optimistic.md index c4691ac1c8c39..57ebadcb41981 100644 --- a/dm/feature-shard-merge-optimistic.md +++ b/dm/feature-shard-merge-optimistic.md @@ -21,7 +21,7 @@ Therefore, an "optimistic mode" is needed. In this mode, a DDL statement execute ## Configuration of the optimistic mode -To use the optimistic mode, specify the `shard-mode` item in the task configuration file as `optimistic`. For the detailed sample configuration file, see [DM Advanced Task Configuration File](/dm/task-configuration-file-full.md). +To use the optimistic mode, specify the `shard-mode` item in the task configuration file as `optimistic`. You can restrict the behavior of the optimistic mode by enabling the `strict-optimistic-shard-mode` configuration. For the detailed sample configuration file, see [DM Advanced Task Configuration File](/dm/task-configuration-file-full.md). ## Restrictions @@ -46,7 +46,7 @@ Some examples of Type 2 DDL statements are as follows: - Add a `NOT NULL` column without a default value: `ALTER TABLE table_name ADD COLUMN column_1 NOT NULL;`. - Rename an index: `ALTER TABLE table_name RENAME INDEX index_1 TO index_2;`. -When the sharded tables execute the DDL statements above, if the execution order is different, the migration is interrupted. For example: +When the sharded tables execute the DDL statements above, if `strict-optimistic-shard-mode: true` is set, the task is directly interrupted and an error is reported. If `strict-optimistic-shard-mode: false` is set or not specified, different execution order of the DDL statements in sharded tables will cause migration interruption. For example: - Shard 1 renames a column and then alters the column type: 1. Rename a column: `ALTER TABLE table_name RENAME COLUMN column_1 TO column_2;`. diff --git a/dm/handle-failed-ddl-statements.md b/dm/handle-failed-ddl-statements.md index 8278ba4ff2021..985c1ff8d1e46 100644 --- a/dm/handle-failed-ddl-statements.md +++ b/dm/handle-failed-ddl-statements.md @@ -8,7 +8,7 @@ aliases: ['/docs/tidb-data-migration/dev/skip-or-replace-abnormal-sql-statements This document introduces how to handle failed DDL statements when you're using the TiDB Data Migration (DM) tool to migrate data. -Currently, TiDB is not completely compatible with all MySQL syntax (see [the DDL statements supported by TiDB](/mysql-compatibility.md#ddl)). Therefore, when DM is migrating data from MySQL to TiDB and TiDB does not support the corresponding DDL statement, an error might occur and break the migration process. In this case, you can use the `binlog` command of DM to resume the migration. +Currently, TiDB is not completely compatible with all MySQL syntax (see [the DDL statements supported by TiDB](/mysql-compatibility.md#ddl-operations)). Therefore, when DM is migrating data from MySQL to TiDB and TiDB does not support the corresponding DDL statement, an error might occur and break the migration process. In this case, you can use the `binlog` command of DM to resume the migration. ## Restrictions diff --git a/dm/maintain-dm-using-tiup.md b/dm/maintain-dm-using-tiup.md index 4c19f6aa259f1..e817efaceec8c 100644 --- a/dm/maintain-dm-using-tiup.md +++ b/dm/maintain-dm-using-tiup.md @@ -390,7 +390,7 @@ All operations above performed on the cluster machine use the SSH client embedde Then you can use the `--native-ssh` command-line flag to enable the system-native command-line tool: -- Deploy a cluster: `tiup dm deploy --native-ssh`. Fill in the name of your cluster for ``, the DM version to be deployed (such as `v6.5.0`) for `` , and the topology file name for ``. +- Deploy a cluster: `tiup dm deploy --native-ssh`. Fill in the name of your cluster for ``, the DM version to be deployed (such as `v7.3.0`) for `` , and the topology file name for ``. - Start a cluster: `tiup dm start --native-ssh`. - Upgrade a cluster: `tiup dm upgrade ... --native-ssh` diff --git a/dm/quick-start-create-task.md b/dm/quick-start-create-task.md index 8bfd9718e7400..45d7e10b69e2a 100644 --- a/dm/quick-start-create-task.md +++ b/dm/quick-start-create-task.md @@ -74,7 +74,7 @@ To run a TiDB server, use the following command: {{< copyable "shell-regular" >}} ```bash -wget https://download.pingcap.org/tidb-community-server-v7.0.0-linux-amd64.tar.gz +wget https://download.pingcap.org/tidb-community-server-v7.3.0-linux-amd64.tar.gz tar -xzvf tidb-latest-linux-amd64.tar.gz mv tidb-latest-linux-amd64/bin/tidb-server ./ ./tidb-server diff --git a/dm/task-configuration-file-full.md b/dm/task-configuration-file-full.md index 1214a72613ed9..c1d81955a3467 100644 --- a/dm/task-configuration-file-full.md +++ b/dm/task-configuration-file-full.md @@ -18,12 +18,13 @@ The following is the task configuration file template which allows you to perfor ```yaml --- -# ----------- Global setting ----------- +# ----------- Global configuration ----------- ## ********* Basic configuration ********* name: test # The name of the task. Should be globally unique. task-mode: all # The task mode. Can be set to `full`(only migrates full data)/`incremental`(replicates binlogs synchronously)/`all` (replicates both full data and incremental binlogs). shard-mode: "pessimistic" # The shard merge mode. Optional modes are ""/"pessimistic"/"optimistic". The "" mode is used by default which means sharding DDL merge is disabled. If the task is a shard merge task, set it to the "pessimistic" mode. # After understanding the principles and restrictions of the "optimistic" mode, you can set it to the "optimistic" mode. +strict-optimistic-shard-mode: false # Only takes effect in the optimistic mode. This configuration restricts the behavior of the optimistic mode. The default value is false. Introduced in v7.2.0. For details, see https://docs.pingcap.com/tidb/v7.2/feature-shard-merge-optimistic meta-schema: "dm_meta" # The downstream database that stores the `meta` information. timezone: "Asia/Shanghai" # The timezone used in SQL Session. By default, DM uses the global timezone setting in the target cluster, which ensures the correctness automatically. A customized timezone does not affect data migration but is unnecessary. case-sensitive: false # Determines whether the schema/table is case-sensitive. @@ -239,6 +240,8 @@ mysql-instances: ## Configuration order +From the sample configuration file, you can see that the configuration file contains two parts: `Global configuration` and `Instance configuration`, where the `Global configuration` contains `Basic configuration` and `Feature configuration set`. The configuration order is as follows: + 1. Edit the [global configuration](#global-configuration). 2. Edit the [instance configuration](#instance-configuration) based on the global configuration. diff --git a/dumpling-overview.md b/dumpling-overview.md index 2de0c27480cc0..3c10bccc2e87e 100644 --- a/dumpling-overview.md +++ b/dumpling-overview.md @@ -88,15 +88,7 @@ Dumpling exports data to SQL files by default. You can also export data to SQL f {{< copyable "shell-regular" >}} ```shell -dumpling \ - -u root \ - -P 4000 \ - -h 127.0.0.1 \ - --filetype sql \ - -t 8 \ - -o /tmp/test \ - -r 200000 \ - -F 256MiB +dumpling -u root -P 4000 -h 127.0.0.1 --filetype sql -t 8 -o /tmp/test -r 200000 -F 256MiB ``` In the command above: @@ -105,18 +97,18 @@ In the command above: -+ The `-o` (or `--output`) option specifies the export directory of the storage, which supports a local file path or an [external storage URI](/br/backup-and-restore-storages.md#uri-format). ++ The `-o` (or `--output`) option specifies the export directory of the storage, which supports an absolute local file path or an [external storage URI](/br/backup-and-restore-storages.md#uri-format). -+ The `-o` (or `--output`) option specifies the export directory of the storage, which supports a local file path or an [external storage URI](https://docs.pingcap.com/tidb/stable/backup-and-restore-storages#uri-format). ++ The `-o` (or `--output`) option specifies the export directory of the storage, which supports an absolute local file path or an [external storage URI](https://docs.pingcap.com/tidb/stable/backup-and-restore-storages#uri-format). + The `-t` option specifies the number of threads for the export. Increasing the number of threads improves the concurrency of Dumpling and the export speed, and also increases the database's memory consumption. Therefore, it is not recommended to set the number too large. Usually, it's less than 64. -+ The `-r` option specifies the maximum number of rows in a single file. With this option specified, Dumpling enables the in-table concurrency to speed up the export and reduce the memory usage. When the upstream database is TiDB v3.0 or later versions, a `-r` value greater than 0 indicates that the TiDB region information is used for splitting and the specific `-r` value does not affect the split algorithm. When the upstream database is MySQL and the primary key is of the `int` type, specifying `-r` can also enable the in-table concurrency. ++ The `-r` option enables the in-table concurrency to speed up the export. The default value is `0`, which means disabled. A value greater than 0 means it is enabled, and the value is of `INT` type. When the source database is TiDB, a `-r` value greater than 0 indicates that the TiDB region information is used for splitting, and reduces the memory usage. The specific `-r` value does not affect the split algorithm. When the source database is MySQL and the primary key is of the `INT` type, specifying `-r` can also enable the in-table concurrency. + The `-F` option is used to specify the maximum size of a single file (the unit here is `MiB`; inputs like `5GiB` or `8KB` are also acceptable). It is recommended to keep its value to 256 MiB or less if you plan to use TiDB Lightning to load this file into a TiDB instance. > **Note:** @@ -132,15 +124,7 @@ When you export data to CSV files, you can use `--sql ` to filter the recor {{< copyable "shell-regular" >}} ```shell -./dumpling \ - -u root \ - -P 4000 \ - -h 127.0.0.1 \ - -o /tmp/test \ - --filetype csv \ - --sql 'select * from `test`.`sbtest1` where id < 100' \ - -F 100MiB \ - --output-filename-template 'test.sbtest1.{{.Index}}' +./dumpling -u root -P 4000 -h 127.0.0.1 -o /tmp/test --filetype csv --sql 'select * from `test`.`sbtest1` where id < 100' -F 100MiB --output-filename-template 'test.sbtest1.{{.Index}}' ``` In the command above: @@ -263,12 +247,7 @@ Dumpling also supports reading credential files from `~/.aws/credentials`. Param {{< copyable "shell-regular" >}} ```shell -./dumpling \ - -u root \ - -P 4000 \ - -h 127.0.0.1 \ - -r 200000 \ - -o "s3://${Bucket}/${Folder}" +./dumpling -u root -P 4000 -h 127.0.0.1 -r 200000 -o "s3://${Bucket}/${Folder}" ``` ### Filter the exported data @@ -280,12 +259,7 @@ By default, Dumpling exports all databases except system databases (including `m {{< copyable "shell-regular" >}} ```shell -./dumpling \ - -u root \ - -P 4000 \ - -h 127.0.0.1 \ - -o /tmp/test \ - --where "id < 100" +./dumpling -u root -P 4000 -h 127.0.0.1 -o /tmp/test --where "id < 100" ``` The above command exports the data that matches `id < 100` from each table. Note that you cannot use the `--where` parameter together with `--sql`. @@ -297,14 +271,7 @@ Dumpling can filter specific databases or tables by specifying the table filter {{< copyable "shell-regular" >}} ```shell -./dumpling \ - -u root \ - -P 4000 \ - -h 127.0.0.1 \ - -o /tmp/test \ - -r 200000 \ - --filter "employees.*" \ - --filter "*.WorkOrder" +./dumpling -u root -P 4000 -h 127.0.0.1 -o /tmp/test -r 200000 --filter "employees.*" --filter "*.WorkOrder" ``` The above command exports all the tables in the `employees` database and the `WorkOrder` tables in all databases. @@ -328,7 +295,7 @@ Examples: The exported file is stored in the `./export-` directory by default. Commonly used options are as follows: - The `-t` option specifies the number of threads for the export. Increasing the number of threads improves the concurrency of Dumpling and the export speed, and also increases the database's memory consumption. Therefore, it is not recommended to set the number too large. -- The `-r` option specifies the maximum number of records (or the number of rows in the database) for a single file. When it is enabled, Dumpling enables concurrency in the table to improve the speed of exporting large tables. When the upstream database is TiDB v3.0 or later versions, a `-r` value greater than 0 indicates that the TiDB region information is used for splitting and the specific `-r` value does not affect the split algorithm. When the upstream database is MySQL and the primary key is of the `int` type, specifying `-r` can also enable the in-table concurrency. +- The `-r` option enables the in-table concurrency to speed up the export. The default value is `0`, which means disabled. A value greater than 0 means it is enabled, and the value is of `INT` type. When the source database is TiDB, a `-r` value greater than 0 indicates that the TiDB region information is used for splitting, and reduces the memory usage. The specific `-r` value does not affect the split algorithm. When the source database is MySQL and the primary key is of the `INT` type, specifying `-r` can also enable the in-table concurrency. - The `--compress ` option specifies the compression format of the dump. It supports the following compression algorithms: `gzip`, `snappy`, and `zstd`. This option can speed up dumping of data if storage is the bottleneck or if storage capacity is a concern. The drawback is an increase in CPU usage. Each file is compressed individually. With the above options specified, Dumpling can have a quicker speed of data export. @@ -423,7 +390,7 @@ SET GLOBAL tidb_gc_life_time = '10m'; | `--case-sensitive` | whether table-filter is case-sensitive | false (case-insensitive) | | `-h` or `--host` | The IP address of the connected database host | "127.0.0.1" | | `-t` or `--threads` | The number of concurrent backup threads | 4 | -| `-r` or `--rows` | Split the table into rows with a specified number of rows (generally applicable for concurrent operations of splitting a large table into multiple files. When the upstream database is TiDB v3.0 or later versions, a `-r` value greater than 0 indicates that the TiDB region information is used for splitting and the specific `-r` value does not affect the split algorithm. | +| `-r` or `--rows` | Enable the in-table concurrency to speed up the export. The default value is `0`, which means disabled. A value greater than 0 means it is enabled, and the value is of `INT` type. When the source database is TiDB, a `-r` value greater than 0 indicates that the TiDB region information is used for splitting, and reduces the memory usage. The specific `-r` value does not affect the split algorithm. When the source database is MySQL and the primary key is of the `INT` type, specifying `-r` can also enable the in-table concurrency. | | `-L` or `--logfile` | Log output address. If it is empty, the log will be output to the console | "" | | `--loglevel` | Log level {debug,info,warn,error,dpanic,panic,fatal} | "info" | | `--logfmt` | Log output format {text,json} | "text" | @@ -434,7 +401,7 @@ SET GLOBAL tidb_gc_life_time = '10m'; | `-s` or `--statement-size` | Control the size of the `INSERT` statements; the unit is bytes | | `-F` or `--filesize` | The file size of the divided tables. The unit must be specified such as `128B`, `64KiB`, `32MiB`, and `1.5GiB`. | | `--filetype` | Exported file type (csv/sql) | "sql" | -| `-o` or `--output` | The path of exported local files or [external storage URI](https://docs.pingcap.com/tidb/stable/backup-and-restore-storages#uri-format) | "./export-${time}" | +| `-o` or `--output` | Specify the absolute local file path or [external storage URI](https://docs.pingcap.com/tidb/stable/backup-and-restore-storages#uri-format) for exporting the data. | "./export-${time}" | | `-S` or `--sql` | Export data according to the specified SQL statement. This command does not support concurrent export. | | `--consistency` | flush: use FTWRL before the dump
snapshot: dump the TiDB data of a specific snapshot of a TSO
lock: execute `lock tables read` on all tables to be dumped
none: dump without adding locks, which cannot guarantee consistency
auto: use --consistency flush for MySQL; use --consistency snapshot for TiDB | "auto" | | `--snapshot` | Snapshot TSO; valid only when `consistency=snapshot` | diff --git a/dynamic-config.md b/dynamic-config.md index 2279535e5d0ec..1c49cf808088d 100644 --- a/dynamic-config.md +++ b/dynamic-config.md @@ -335,9 +335,9 @@ The following TiDB configuration items can be modified dynamically: | Configuration item | SQL variable | Description | | :--- | :--- | -| `log.enable-slow-log` | `tidb_enable_slow_log` | Whether to enable slow log | -| `log.slow-threshold` | `tidb_slow_log_threshold` | The threshold of slow log | -| `log.expensive-threshold` | `tidb_expensive_query_time_threshold` | The threshold of a expensive query | +| `instance.tidb_enable_slow_log` | `tidb_enable_slow_log` | Whether to enable slow log | +| `instance.tidb_slow_log_threshold` | `tidb_slow_log_threshold` | The threshold of slow log | +| `instance.tidb_expensive_query_time_threshold` | `tidb_expensive_query_time_threshold` | The threshold of a expensive query | ### Modify TiFlash configuration dynamically diff --git a/enable-tls-between-components.md b/enable-tls-between-components.md index 06acae406a49e..af73a7887ddfc 100644 --- a/enable-tls-between-components.md +++ b/enable-tls-between-components.md @@ -86,7 +86,7 @@ Currently, it is not supported to only enable encrypted transmission of some spe Configure in the `tiflash.toml` file, and change the `http_port` item to `https_port`: - ```toml + ```toml [security] ## The path for certificates. An empty string means that secure connections are disabled. # Path of the file that contains a list of trusted SSL CAs. If it is set, the following settings `cert_path` and `key_path` are also needed. diff --git a/encryption-at-rest.md b/encryption-at-rest.md index 4727835d84480..c025f9eec2ba9 100644 --- a/encryption-at-rest.md +++ b/encryption-at-rest.md @@ -22,7 +22,7 @@ When a TiDB cluster is deployed, the majority of user data is stored on TiKV and TiKV supports encryption at rest. This feature allows TiKV to transparently encrypt data files using [AES](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) or [SM4](https://en.wikipedia.org/wiki/SM4_(cipher)) in [CTR](https://en.wikipedia.org/wiki/Block_cipher_mode_of_operation) mode. To enable encryption at rest, an encryption key must be provided by the user and this key is called master key. TiKV automatically rotates data keys that it used to encrypt actual data files. Manually rotating the master key can be done occasionally. Note that encryption at rest only encrypts data at rest (namely, on disk) and not while data is transferred over network. It is advised to use TLS together with encryption at rest. -Optionally, you can use AWS KMS for both cloud and on-premises deployments. You can also supply the plaintext master key in a file. +Optionally, you can use AWS KMS for both cloud and self-hosted deployments. You can also supply the plaintext master key in a file. TiKV currently does not exclude encryption keys and user data from core dumps. It is advised to disable core dumps for the TiKV process when using encryption at rest. This is not currently handled by TiKV itself. @@ -310,3 +310,77 @@ When restoring the backup, both `--s3.sse` and `--s3.sse-kms-key-id` should NOT ``` ./br restore full --pd --storage "s3:///" ``` + +## BR Azure Blob Storage server-side encryption + +When backing up data to Azure Blob Storage using BR, you can specify either an encryption scope or an encryption key for server-side encryption. + +### Method 1: use an encryption scope + +To specify an encryption scope for the backup data, you can use one of the following two ways: + +- Include the `--azblob.encryption-scope` option in the `backup` command and set it to the scope name: + + ```shell + ./br backup full --pd --storage "azure:///" --azblob.encryption-scope scope1 + ``` + +- Include `encryption-scope` in the URI and set it to the scope name: + + ```shell + ./br backup full --pd --storage "azure:///?encryption-scope=scope1" + ``` + +For more information, see the Azure documentation: [Upload a blob with an encryption scope](https://learn.microsoft.com/en-us/azure/storage/blobs/encryption-scope-manage?tabs=powershell#upload-a-blob-with-an-encryption-scope). + +When restoring the backup, you do not need to specify the encryption scope. Azure Blob Storage automatically decrypts the data. For example: + +```shell +./br restore full --pd --storage "azure:///" +``` + +### Method 2: use an encryption key + +To specify an encryption key for the backup data, you can use one of the following three ways: + +- Include the `--azblob.encryption-key` option in the `backup` command and set it to an AES256 encryption key: + + ```shell + ./br backup full --pd --storage "azure:///" --azblob.encryption-key + ``` + +- Include `encryption-key` in the URI and set it to an AES256 encryption key. If the key contains URI reserved characters such as `&` and `%`, you need to percent-encode it first: + + ```shell + ./br backup full --pd --storage "azure:///?encryption-key=" + ``` + +- Set the `AZURE_ENCRYPTION_KEY` environment variable to an AES256 encryption key. Before running, make sure that you remember the encryption key in the environment variable to avoid forgetting it. + + ```shell + export AZURE_ENCRYPTION_KEY= + ./br backup full --pd --storage "azure:///" + ``` + +For more information, see the Azure documentation: [Provide an encryption key on a request to Blob storage](https://learn.microsoft.com/en-us/azure/storage/blobs/encryption-customer-provided-keys). + +When restoring the backup, you need to specify the encryption key. For example: + +- Include the `--azblob.encryption-key` option in the `restore` command: + + ```shell + ./br restore full --pd --storage "azure:///" --azblob.encryption-key + ``` + +- Include `encryption-key` in the URI: + + ```shell + ./br restore full --pd --storage "azure:///?encryption-key=" + ``` + +- Set the `AZURE_ENCRYPTION_KEY` environment variable: + + ```shell + export AZURE_ENCRYPTION_KEY= + ./br restore full --pd --storage "azure:///" + ``` diff --git a/error-codes.md b/error-codes.md index 1e3a17aac1b72..1b08664077024 100644 --- a/error-codes.md +++ b/error-codes.md @@ -364,29 +364,61 @@ TiDB is compatible with the error codes in MySQL, and in most cases returns the * Error Number: 8156 - The file path of the `LOAD DATA` statement cannot be empty. You need to set the correct path before importing. See [`LOAD DATA`](/sql-statements/sql-statement-load-data.md). + The provided path cannot be empty. You need to set a correct path before the import. + +* Error Number: 8157 + + The provided file format is unsupported. For the supported formats, see [`IMPORT INTO`](/sql-statements/sql-statement-import-into.md#format). * Error Number: 8158 - The S3 or GCS path is invalid. See [external storage](/br/backup-and-restore-storages.md) to set a valid path. + The provided path is invalid. Refer to the specific error message for actions. For Amazon S3 or GCS path settings, see [External storage](/br/backup-and-restore-storages.md#uri-format). * Error Number: 8159 - TiDB cannot access the S3 or GCS path provided in the `LOAD DATA` statement. Make sure that the S3 or GCS bucket exists, and that you have used the correct access key and secret access key to let TiDB access the bucket. + TiDB cannot access the provided Amazon S3 or GCS path. Make sure that the specified S3 or GCS bucket exists and that you have provided the correct Access Key and Secret Access Key for TiDB to access the corresponding bucket. * Error Number: 8160 - `LOAD DATA` fails to read the data file. Refer to the specific error message for action. + Failed to read the data files. Refer to the specific error message for actions. * Error Number: 8162 - There is an error in the `LOAD DATA` statement. See [`LOAD DATA`](/sql-statements/sql-statement-load-data.md) for supported features. + There is an error in the statement. Refer to the specific error message for actions. + +* Error Number: 8163 + + The provided option is unknown. For supported options, see [`IMPORT INTO`](/sql-statements/sql-statement-import-into.md#parameter-description). + +* Error Number: 8164 + + The provided option value is invalid. For valid values, see [`IMPORT INTO`](/sql-statements/sql-statement-import-into.md#parameter-description). + +* Error Number: 8165 + + Duplicate options are specified. Each option can only be specified once. + +* Error Number: 8166 + + Certain options can only be used in specific conditions. Refer to the specific error message for actions. For supported options, see [`IMPORT INTO`](/sql-statements/sql-statement-import-into.md#parameter-description). + +* Error Number: 8170 + + The specified job does not exist. + +* Error Number: 8171 + + The current operation cannot be performed for the current job status. Refer to the specific error message for actions. + +* Error Number: 8173 + + When executing `IMPORT INTO`, TiDB checks the current environment, such as checking if the downstream table is empty. Refer to the specific error message for actions. * Error Number: 8200 The DDL syntax is not yet supported. - See [compatibility of MySQL DDL](/mysql-compatibility.md#ddl) for reference. + See [compatibility of MySQL DDL](/mysql-compatibility.md#ddl-operations) for reference. * Error Number: 8214 @@ -468,12 +500,32 @@ TiDB is compatible with the error codes in MySQL, and in most cases returns the * Error Number: 8252 - The complete error message is as follows: - + The complete error message is as follows: + `ERROR 8252 (HY000) : Exceeded resource group quota limitation` This error is returned when the attempted consumption exceeds the resource group limit. This error is usually caused by a single transaction that is too large or too many concurrent transactions. You need to adjust the transaction size or reduce the number of concurrent clients. +* Error Number: 8253 + + The query stops because it meets the condition of a runaway query. See [Runaway Queries](/tidb-resource-control.md#manage-queries-that-consume-more-resources-than-expected-runaway-queries). + +* Error Number: 8254 + + The query stops because it meets the quarantined watch condition of a runaway query. See [Runaway Queries](/tidb-resource-control.md#manage-queries-that-consume-more-resources-than-expected-runaway-queries). + +* Error Number: 8260 + + DDL operations cannot be paused by `ADMIN PAUSE`. + +* Error Number: 8261 + + DDL operations cannot be resumed by `ADMIN RESUME`. + +* Error Number: 8262 + + DDL is paused by `ADMIN PAUSE` and cannot be paused again. + * Error Number: 9001 The complete error message: `ERROR 9001 (HY000): PD Server Timeout` diff --git a/explain-joins.md b/explain-joins.md index fa1ef33cae6aa..bfbda91e01aad 100644 --- a/explain-joins.md +++ b/explain-joins.md @@ -179,8 +179,8 @@ An index join operation using the hint [`INL_JOIN`](/optimizer-hints.md#inl_join Index join performance is influenced by the following system variables: -* [`tidb_index_join_batch_size`](/system-variables.md#tidb_index_join_batch_size) (default value: `25000`) - the batch size of `index lookup join` operations. -* [`tidb_index_lookup_join_concurrency`](/system-variables.md#tidb_index_lookup_join_concurrency) (default value: `4`) - the number of concurrent index lookup tasks. +- [`tidb_index_join_batch_size`](/system-variables.md#tidb_index_join_batch_size) (default value: `25000`) - the batch size of `index lookup join` operations. +- [`tidb_index_lookup_join_concurrency`](/system-variables.md#tidb_index_lookup_join_concurrency) (default value: `4`) - the number of concurrent index lookup tasks. ## Hash Join @@ -257,8 +257,12 @@ Query OK, 0 rows affected (0.00 sec) Hash join performance is influenced by the following system variables: -* [`tidb_mem_quota_query`](/system-variables.md#tidb_mem_quota_query) (default value: 1GB) - if the memory quota for a query is exceeded, TiDB will attempt to spill the `Build` operator of a hash join to disk to save memory. -* [`tidb_hash_join_concurrency`](/system-variables.md#tidb_hash_join_concurrency) (default value: `5`) - the number of concurrent hash join tasks. +- [`tidb_mem_quota_query`](/system-variables.md#tidb_mem_quota_query) (default value: 1GB) - if the memory quota for a query is exceeded, TiDB will attempt to spill the `Build` operator of a hash join to disk to save memory. +- [`tidb_hash_join_concurrency`](/system-variables.md#tidb_hash_join_concurrency) (default value: `5`) - the number of concurrent hash join tasks. + +### Related optimizations + +TiDB provides the Runtime Filter feature, which optimizes the performance of hash join and greatly improves its execution speed. For specific optimization usage, see [Runtime Filter](/runtime-filter.md). ## Merge Join diff --git a/explain-walkthrough.md b/explain-walkthrough.md index 643fc01bd3ba7..8ff69b18bbea7 100644 --- a/explain-walkthrough.md +++ b/explain-walkthrough.md @@ -214,3 +214,55 @@ From the result above, the query time has reduced from 1.03 seconds to 0.0 secon > **Note:** > > Another optimization that applies here is the coprocessor cache. If you are unable to add indexes, consider enabling the [coprocessor cache](/coprocessor-cache.md). When it is enabled, as long as the Region has not been modified since the operator is last executed, TiKV will return the value from the cache. This will also help reduce much of the cost of the expensive `TableFullScan` and `Selection` operators. + +## Disable the early execution of subqueries + +During query optimization, TiDB pre-executes subqueries that can be directly calculated. For example: + +```sql +CREATE TABLE t1(a int); +INSERT INTO t1 VALUES(1); +CREATE TABLE t2(a int); +EXPLAIN SELECT * FROM t2 WHERE a = (SELECT a FROM t1); +``` + +```sql ++--------------------------+----------+-----------+---------------+--------------------------------+ +| id | estRows | task | access object | operator info | ++--------------------------+----------+-----------+---------------+--------------------------------+ +| TableReader_14 | 10.00 | root | | data:Selection_13 | +| └─Selection_13 | 10.00 | cop[tikv] | | eq(test.t2.a, 1) | +| └─TableFullScan_12 | 10000.00 | cop[tikv] | table:t2 | keep order:false, stats:pseudo | ++--------------------------+----------+-----------+---------------+--------------------------------+ +3 rows in set (0.00 sec) +``` + +In the preceding example, the `a = (SELECT a FROM t1)` subquery is calculated during optimization and rewritten as `t2.a=1`. This allows more optimizations such as constant propagation and folding during optimization. However, it affects the execution time of the `EXPLAIN` statement. When the subquery itself takes a long time to execute, the `EXPLAIN` statement might not be completed, which could affect online troubleshooting. + +Starting from v7.3.0, TiDB introduces the [`tidb_opt_enable_non_eval_scalar_subquery`](/system-variables.md#tidb_opt_enable_non_eval_scalar_subquery-new-in-v730) system variable, which controls whether to disable the pre-execution of such subqueries in `EXPLAIN`. The default value of this variable is `OFF`, which means that the subquery is pre-calculated. You can set this variable to `ON` to disable the pre-execution of subqueries: + +```sql +SET @@tidb_opt_enable_non_eval_scalar_subquery = ON; +EXPLAIN SELECT * FROM t2 WHERE a = (SELECT a FROM t1); +``` + +```sql ++---------------------------+----------+-----------+---------------+---------------------------------+ +| id | estRows | task | access object | operator info | ++---------------------------+----------+-----------+---------------+---------------------------------+ +| Selection_13 | 8000.00 | root | | eq(test.t2.a, ScalarQueryCol#5) | +| └─TableReader_15 | 10000.00 | root | | data:TableFullScan_14 | +| └─TableFullScan_14 | 10000.00 | cop[tikv] | table:t2 | keep order:false, stats:pseudo | +| ScalarSubQuery_10 | N/A | root | | Output: ScalarQueryCol#5 | +| └─MaxOneRow_6 | 1.00 | root | | | +| └─TableReader_9 | 1.00 | root | | data:TableFullScan_8 | +| └─TableFullScan_8 | 1.00 | cop[tikv] | table:t1 | keep order:false, stats:pseudo | ++---------------------------+----------+-----------+---------------+---------------------------------+ +7 rows in set (0.00 sec) +``` + +As you can see, the scalar subquery is not expanded during the execution, which makes it easier to understand the specific execution process of such SQL. + +> **Note:** +> +> [`tidb_opt_enable_non_eval_scalar_subquery`](/system-variables.md#tidb_opt_enable_non_eval_scalar_subquery-new-in-v730) only affects the behavior of the `EXPLAIN` statement, and the `EXPLAIN ANALYZE` statement still pre-executes the subquery in advance. diff --git a/explore-htap.md b/explore-htap.md index 5d32d5cb3d9a2..ec62e5e4be6f4 100644 --- a/explore-htap.md +++ b/explore-htap.md @@ -13,7 +13,7 @@ This guide describes how to explore and use the features of TiDB Hybrid Transact ## Use cases -TiDB HTAP can handle the massive data that increases rapidly, reduce the cost of DevOps, and be deployed in either on-premises or cloud environments easily, which brings the value of data assets in real time. +TiDB HTAP can handle the massive data that increases rapidly, reduce the cost of DevOps, and be deployed in either self-hosted or cloud environments easily, which brings the value of data assets in real time. The following are the typical use cases of HTAP: diff --git a/exporting-grafana-snapshots.md b/exporting-grafana-snapshots.md index 48a0dc68e435d..ce2c8511098d4 100644 --- a/exporting-grafana-snapshots.md +++ b/exporting-grafana-snapshots.md @@ -10,6 +10,10 @@ summary: Learn how to export snapshots of Grafana Dashboard, and how to visualiz # Export Grafana Snapshots +> **Note:** +> +> Currently, MetricsTool can only be used with Grafana v6.x.x. + Metrics data is important in troubleshooting. When you request remote assistance, sometimes the support staff need to view the Grafana dashboards to diagnose problems. [MetricsTool](https://metricstool.pingcap.net/) can help export snapshots of Grafana dashboards as local files and visualize these snapshots. You can share these snapshots with outsiders and allow them to accurately read out the graphs, without giving out access to other sensitive information on the Grafana server. ## Usage @@ -40,10 +44,6 @@ The snapshot file contains the values of all graphs and panels in the selected t No, the Visualizer parses the snapshot files entirely inside your browser. Nothing will be sent to PingCAP. You are free to view snapshot files received from sensitive sources, and no need to worry about these leaking to third parties through the Visualizer. -### Can it export metrics besides Grafana? - -No, we only support Grafana v6.x.x at the moment. - ### Will there be problems to execute the script before all metrics are loaded? No, the script UI will notify you to wait for all metrics to be loaded. However, you can manually skip waiting and export the snapshot in case of some metrics loading for too long. diff --git a/faq/deploy-and-maintain-faq.md b/faq/deploy-and-maintain-faq.md index 7c791b42a9520..cf9577c84ec28 100644 --- a/faq/deploy-and-maintain-faq.md +++ b/faq/deploy-and-maintain-faq.md @@ -77,7 +77,7 @@ Check the time difference between the machine time of the monitor and the time w ### How to separately record the slow query log in TiDB? How to locate the slow query SQL statement? -1. The slow query definition for TiDB is in the TiDB configuration file. The `slow-threshold: 300` parameter is used to configure the threshold value of the slow query (unit: millisecond). +1. The slow query definition for TiDB is in the TiDB configuration file. The `tidb_slow_log_threshold: 300` parameter is used to configure the threshold value of the slow query (unit: millisecond). 2. If a slow query occurs, you can locate the `tidb-server` instance where the slow query is and the slow query time point using Grafana and find the SQL statement information recorded in the log on the corresponding node. diff --git a/functions-and-operators/information-functions.md b/functions-and-operators/information-functions.md index 29e7e1286aa77..5fbbe56c57e80 100644 --- a/functions-and-operators/information-functions.md +++ b/functions-and-operators/information-functions.md @@ -8,13 +8,12 @@ aliases: ['/docs/dev/functions-and-operators/information-functions/','/docs/dev/ TiDB supports most of the [information functions](https://dev.mysql.com/doc/refman/5.7/en/information-functions.html) available in MySQL 5.7. -## Supported functions +## TiDB supported MySQL functions | Name | Description | |:-----|:------------| | [`BENCHMARK()`](https://dev.mysql.com/doc/refman/5.7/en/information-functions.html#function_benchmark) | Execute an expression in a loop | | [`CONNECTION_ID()`](https://dev.mysql.com/doc/refman/5.7/en/information-functions.html#function_connection-id) | Return the connection ID (thread ID) for the connection | -| `CURRENT_RESOURCE_GROUP()` | Return the name of the resource group that the current session is bound to | | [`CURRENT_USER()`, `CURRENT_USER`](https://dev.mysql.com/doc/refman/5.7/en/information-functions.html#function_current-user) | Return the authenticated user name and host name | | [`DATABASE()`](https://dev.mysql.com/doc/refman/5.7/en/information-functions.html#function_database) | Return the default (current) database name | | [`FOUND_ROWS()`](https://dev.mysql.com/doc/refman/5.7/en/information-functions.html#function_found-rows) | For a `SELECT` with a `LIMIT` clause, the number of the rows that are returned if there is no `LIMIT` clause | @@ -26,6 +25,14 @@ TiDB supports most of the [information functions](https://dev.mysql.com/doc/refm | [`USER()`](https://dev.mysql.com/doc/refman/5.7/en/information-functions.html#function_user) | Return the user name and host name provided by the client | | [`VERSION()`](https://dev.mysql.com/doc/refman/5.7/en/information-functions.html#function_version) | Return a string that indicates the MySQL server version | +## TiDB specific functions + +The following function is only supported by TiDB, and there is no equivalent function in MySQL. + +| Name | Description | +|:-----|:------------| +| [`CURRENT_RESOURCE_GROUP()`](/functions-and-operators/tidb-functions.md#current_resource_group) | Return the name of the resource group that the current session is bound to | + ## Unsupported functions * `CHARSET()` diff --git a/functions-and-operators/locking-functions.md b/functions-and-operators/locking-functions.md index 549e14bf8765d..1567b9fce7953 100644 --- a/functions-and-operators/locking-functions.md +++ b/functions-and-operators/locking-functions.md @@ -12,16 +12,14 @@ TiDB supports most of the user-level [locking functions](https://dev.mysql.com/d | Name | Description | |:---------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------| | [`GET_LOCK(lockName, timeout)`](https://dev.mysql.com/doc/refman/5.7/en/locking-functions.html#function_get-lock) | Acquires an advisory lock. The `lockName` parameter must be NO longer than 64 characters. Waits maximum `timeout` seconds before timing out and returns a failure. | -| [`RELEASE_LOCK(lockName)`](https://dev.mysql.com/doc/refman/5.7/en/locking-functions.html#function_release-lock) | Releases a previously acquired lock. The `lockName` parameter must be NO longer than 64 characters. | +| [`IS_FREE_LOCK(lockName)`](https://dev.mysql.com/doc/refman/5.7/en/locking-functions.html#function_is-free-lock) | Checks if a lock is free. | +| [`IS_USED_LOCK(lockName)`](https://dev.mysql.com/doc/refman/5.7/en/locking-functions.html#function_is-used-lock) | Checks if a lock is in use. If true, it returns the corresponding connection ID. | | [`RELEASE_ALL_LOCKS()`](https://dev.mysql.com/doc/refman/5.7/en/locking-functions.html#function_release-all-locks) | Releases all locks held by the current session. | +| [`RELEASE_LOCK(lockName)`](https://dev.mysql.com/doc/refman/5.7/en/locking-functions.html#function_release-lock) | Releases a previously acquired lock. The `lockName` parameter must be NO longer than 64 characters. | ## MySQL compatibility * The minimum timeout permitted by TiDB is 1 second, and the maximum timeout is 1 hour (3600 seconds). This differs from MySQL, where both 0 second and unlimited timeouts (`timeout=-1`) are permitted. TiDB will automatically convert out-of-range values to the nearest permitted value and convert `timeout=-1` to 3600 seconds. * TiDB does not automatically detect deadlocks caused by user-level locks. Deadlocked sessions will timeout after a maximum of 1 hour, but can also be manually resolved by using `KILL` on one of the affected sessions. You can also prevent deadlocks by always acquiring user-level locks in the same order. * Locks take effect on all TiDB servers in the cluster. This differs from MySQL Cluster and Group Replication where locks are local to a single server. - -## Unsupported functions - -* `IS_FREE_LOCK()` -* `IS_USED_LOCK()` +* `IS_USED_LOCK()` returns `1` if it is called from another session and is unable to return the ID of the process that is holding the lock. diff --git a/functions-and-operators/tidb-functions.md b/functions-and-operators/tidb-functions.md index e35c4a6115287..f498e5ce24913 100644 --- a/functions-and-operators/tidb-functions.md +++ b/functions-and-operators/tidb-functions.md @@ -21,6 +21,7 @@ The following functions are TiDB extensions, and are not present in MySQL: | `VITESS_HASH(str)` | The `VITESS_HASH` function returns the hash of a string that is compatible with Vitess' `HASH` function. This is intended to help the data migration from Vitess. | | `TIDB_SHARD()` | The `TIDB_SHARD` function can be used to create a shard index to scatter the index hotspot. A shard index is an expression index with a `TIDB_SHARD` function as the prefix.| | `TIDB_ROW_CHECKSUM()` | The `TIDB_ROW_CHECKSUM` function is used to query the checksum value of a row. This function can only be used in `SELECT` statements within the FastPlan process. That is, you can query through statements like `SELECT TIDB_ROW_CHECKSUM() FROM t WHERE id = ?` or `SELECT TIDB_ROW_CHECKSUM() FROM t WHERE id IN (?, ?, ...)`. See also: [Data integrity validation for single-row data](/ticdc/ticdc-integrity-check.md). | +| `CURRENT_RESOURCE_GROUP()` | The `CURRENT_RESOURCE_GROUP` function is used to return the resource group name that the current session is bound to. See also: [Use Resource Control to Achieve Resource Isolation](/tidb-resource-control.md). | @@ -38,6 +39,7 @@ The following functions are TiDB extensions, and are not present in MySQL: | `VITESS_HASH(str)` | The `VITESS_HASH` function returns the hash of a string that is compatible with Vitess' `HASH` function. This is intended to help the data migration from Vitess. | | `TIDB_SHARD()` | The `TIDB_SHARD` function can be used to create a shard index to scatter the index hotspot. A shard index is an expression index with a `TIDB_SHARD` function as the prefix.| | `TIDB_ROW_CHECKSUM()` | The `TIDB_ROW_CHECKSUM` function is used to query the checksum value of a row. This function can only be used in `SELECT` statements within the FastPlan process. That is, you can query through statements like `SELECT TIDB_ROW_CHECKSUM() FROM t WHERE id = ?` or `SELECT TIDB_ROW_CHECKSUM() FROM t WHERE id IN (?, ?, ...)`. See also: [Data integrity validation for single-row data](https://docs.pingcap.com/tidb/stable/ticdc-integrity-check). | +| `CURRENT_RESOURCE_GROUP()` | The `CURRENT_RESOURCE_GROUP` function is used to return the resource group name that the current session is bound to. See also: [Use Resource Control to Achieve Resource Isolation](/tidb-resource-control.md). | @@ -131,8 +133,6 @@ You can find TiDB execution plans in encoded form in the slow query log. The `TI This function is useful because a plan is captured at the time the statement is executed. Re-executing the statement in `EXPLAIN` might produce different results as data distribution and statistics evolves over time. -{{< copyable "sql" >}} - ```sql SELECT tidb_decode_plan('8QIYMAkzMV83CQEH8E85LjA0CWRhdGE6U2VsZWN0aW9uXzYJOTYwCXRpbWU6NzEzLjHCtXMsIGxvb3BzOjIsIGNvcF90YXNrOiB7bnVtOiAxLCBtYXg6IDU2OC41wgErRHByb2Nfa2V5czogMCwgcnBjXxEpAQwFWBAgNTQ5LglZyGNvcHJfY2FjaGVfaGl0X3JhdGlvOiAwLjAwfQkzLjk5IEtCCU4vQQoxCTFfNgkxXzAJMwm2SGx0KHRlc3QudC5hLCAxMDAwMCkNuQRrdgmiAHsFbBQzMTMuOMIBmQnEDDk2MH0BUgEEGAoyCTQzXzUFVwX1oGFibGU6dCwga2VlcCBvcmRlcjpmYWxzZSwgc3RhdHM6cHNldWRvCTk2ISE2aAAIMTUzXmYA')\G ``` @@ -154,8 +154,6 @@ A TSO is a number that consists of two parts: - A physical timestamp - A logical counter -{{< copyable "sql" >}} - ```sql BEGIN; SELECT TIDB_PARSE_TSO(@@tidb_current_ts); @@ -177,8 +175,6 @@ Here `TIDB_PARSE_TSO` is used to extract the physical timestamp from the timesta The `TIDB_VERSION` function can be used to get the version and build details of the TiDB server that you are connected to. You can use this function when reporting issues on GitHub. -{{< copyable "sql" >}} - ```sql SELECT TIDB_VERSION()\G ``` @@ -232,8 +228,6 @@ select tidb_decode_sql_digests(@digests); In the above example, the parameter is a JSON array containing 3 SQL digests, and the corresponding SQL statements are the three items in the query results. But the SQL statement corresponding to the second SQL digest cannot be found from the cluster, so the second item in the result is `null`. -{{< copyable "sql" >}} - ```sql select tidb_decode_sql_digests(@digests, 10); ``` @@ -258,8 +252,6 @@ See also: The `TIDB_SHARD` function can be used to create a shard index to scatter the index hotspot. A shard index is an expression index prefixed with a `TIDB_SHARD` function. -#### Shard index - - Creation: To create a shard index for the index field `a`, you can use `uk((tidb_shard(a)), a))`. When there is a hotspot caused by monotonically increasing or decreasing data on the index field `a` in the unique secondary index `uk((tidb_shard(a)), a))`, the index's prefix `tidb_shard(a)` can scatter the hotspot to improve the scalability of the cluster. @@ -282,14 +274,7 @@ The `TIDB_SHARD` function can be used to create a shard index to scatter the ind - Cannot go through FastPlan process, which affects optimizer performance. - Cannot be used to prepare the execution plan cache. -#### Synopsis - -```ebnf+diagram -TIDBShardExpr ::= - "TIDB_SHARD" "(" expr ")" -``` - -#### Example +The following example shows how to use the `TIDB_SHARD` function. - Use the `TIDB_SHARD` function to calculate the SHARD value. @@ -324,15 +309,6 @@ TIDBShardExpr ::= The `TIDB_ROW_CHECKSUM` function is used to query the checksum value of a row. This function can only be used in `SELECT` statements within the FastPlan process. That is, you can query through statements like `SELECT TIDB_ROW_CHECKSUM() FROM t WHERE id = ?` or `SELECT TIDB_ROW_CHECKSUM() FROM t WHERE id IN (?, ?, ...)`. -The synopsis is as follows: - -```ebnf+diagram -TableStmt ::= - "TIDB_ROW_CHECKSUM()" -``` - -The following example shows how to use the `TIDB_ROW_CHECKSUM` function to query the checksum value of the row data: - To enable the checksum feature of single-row data in TiDB (controlled by the system variable [`tidb_enable_row_level_checksum`](/system-variables.md#tidb_enable_row_level_checksum-new-in-v710)), run the following statement: ```sql @@ -363,3 +339,51 @@ The output is as follows: +----+------+------+---------------------+ 1 row in set (0.000 sec) ``` + +### CURRENT_RESOURCE_GROUP + +The `CURRENT_RESOURCE_GROUP` function is used to show the resource group name that the current session is bound to. When the [Resource control](/tidb-resource-control.md) feature is enabled, the available resources that can be used by SQL statements are restricted by the resource quota of the bound resource group. + +When a session is established, TiDB binds the session to the resource group that the login user is bound to by default. If the user is not bound to any resource groups, the session is bound to the `default` resource group. Once the session is established, the bound resource group will not change by default, even if the bound resource group of the user is changed via [modifying the resource group bound to the user](/sql-statements/sql-statement-alter-user.md#modify-basic-user-information). To change the bound resource group of the current session, you can use [`SET RESOURCE GROUP`](/sql-statements/sql-statement-set-resource-group.md). + +#### Example + +Create a user `user1`, create two resource groups `rg1` and `rg2`, and bind the user `user1` to the resource group `rg1`: + +```sql +CREATE USER 'user1'; +CREATE RESOURCE GROUP 'rg1' RU_PER_SEC = 1000; +CREATE RESOURCE GROUP 'rg2' RU_PER_SEC = 2000; +ALTER USER 'user1' RESOURCE GROUP `rg1`; +``` + +Use `user1` to log in and view the resource group bound to the current user: + +```sql +SELECT CURRENT_RESOURCE_GROUP(); +``` + +``` ++--------------------------+ +| CURRENT_RESOURCE_GROUP() | ++--------------------------+ +| rg1 | ++--------------------------+ +1 row in set (0.00 sec) +``` + +Execute `SET RESOURCE GROUP` to set the resource group for the current session to `rg2`, and then view the resource group bound to the current user: + +```sql +SET RESOURCE GROUP `rg2`; +SELECT CURRENT_RESOURCE_GROUP(); +``` + +``` ++--------------------------+ +| CURRENT_RESOURCE_GROUP() | ++--------------------------+ +| rg2 | ++--------------------------+ +1 row in set (0.00 sec) +``` diff --git a/garbage-collection-configuration.md b/garbage-collection-configuration.md index 9fe88ee91d5d9..19479c058ce75 100644 --- a/garbage-collection-configuration.md +++ b/garbage-collection-configuration.md @@ -6,14 +6,16 @@ aliases: ['/docs/dev/garbage-collection-configuration/','/docs/dev/reference/gar # Garbage Collection Configuration -Garbage collection is configured via the following system variables: +You can configure garbage collection (GC) using the following system variables: -* [`tidb_gc_enable`](/system-variables.md#tidb_gc_enable-new-in-v50) -* [`tidb_gc_run_interval`](/system-variables.md#tidb_gc_run_interval-new-in-v50) -* [`tidb_gc_life_time`](/system-variables.md#tidb_gc_life_time-new-in-v50) -* [`tidb_gc_concurrency`](/system-variables.md#tidb_gc_concurrency-new-in-v50) -* [`tidb_gc_scan_lock_mode`](/system-variables.md#tidb_gc_scan_lock_mode-new-in-v50) -* [`tidb_gc_max_wait_time`](/system-variables.md#tidb_gc_max_wait_time-new-in-v610) +* [`tidb_gc_enable`](/system-variables.md#tidb_gc_enable-new-in-v50): controls whether to enable garbage collection for TiKV. +* [`tidb_gc_run_interval`](/system-variables.md#tidb_gc_run_interval-new-in-v50): specifies the GC interval. +* [`tidb_gc_life_time`](/system-variables.md#tidb_gc_life_time-new-in-v50): specifies the time limit during which data is retained for each GC. +* [`tidb_gc_concurrency`](/system-variables.md#tidb_gc_concurrency-new-in-v50): specifies the number of threads in the [Resolve Locks](/garbage-collection-overview.md#resolve-locks) step of GC. +* [`tidb_gc_scan_lock_mode`](/system-variables.md#tidb_gc_scan_lock_mode-new-in-v50): specifies the way of scanning locks in the Resolve Locks step of GC. +* [`tidb_gc_max_wait_time`](/system-variables.md#tidb_gc_max_wait_time-new-in-v610): specifies the maximum time that active transactions block the GC safe point. + +For more information about how to modify the value of a system variable, see [System variables](/system-variables.md). ## GC I/O limit @@ -21,7 +23,7 @@ Garbage collection is configured via the following system variables: > **Note:** > -> This section is only applicable to on-premises TiDB. TiDB Cloud does not have a GC I/O limit by default. +> This section is only applicable to TiDB Self-Hosted. TiDB Cloud does not have a GC I/O limit by default. @@ -47,7 +49,7 @@ For information on changes in previous releases, refer to earlier versions of th ## Changes in TiDB 6.1.0 -Before TiDB v6.1.0, the transaction in TiDB does not affect the GC safe point. Since v6.1.0, TiDB considers the startTS of the transaction when calculating the GC safe point, to resolve the problem that the data to be accessed has been cleared. If the transaction is too long, the safe point will be blocked for a long time, which affects the application performance. +Before TiDB v6.1.0, the transaction in TiDB does not affect the GC safe point. Starting from v6.1.0, TiDB considers the startTS of the transaction when calculating the GC safe point, to resolve the problem that the data to be accessed has been cleared. If the transaction is too long, the safe point will be blocked for a long time, which affects the application performance. In TiDB v6.1.0, the system variable [`tidb_gc_max_wait_time`](/system-variables.md#tidb_gc_max_wait_time-new-in-v610) is introduced to control the maximum time that active transactions block the GC safe point. After the value is exceeded, the GC safe point is forwarded forcefully. @@ -59,7 +61,7 @@ Based on the `DISTRIBUTED` GC mode, the mechanism of GC in Compaction Filter use > **Note:** > -> The following examples of modifying TiKV configurations are only applicable to on-premises TiDB. For TiDB Cloud, the mechanism of GC in Compaction Filter is enabled by default. +> The following examples of modifying TiKV configurations are only applicable to TiDB Self-Hosted. For TiDB Cloud, the mechanism of GC in Compaction Filter is enabled by default. diff --git a/grafana-pd-dashboard.md b/grafana-pd-dashboard.md index 8034d0628bfa5..13790d5aa6f41 100644 --- a/grafana-pd-dashboard.md +++ b/grafana-pd-dashboard.md @@ -35,6 +35,7 @@ The following is the description of PD Dashboard metrics items: - Current ID allocation: The maximum allocatable ID for new store/peer - Region label isolation level: The number of Regions in different label levels - Label distribution: The distribution status of the labels in the cluster +- Store Limit: The flow control limitation of scheduling on the Store ![PD Dashboard - Cluster metrics](/media/pd-dashboard-cluster-v4.png) @@ -107,7 +108,6 @@ The following is the description of PD Dashboard metrics items: - Filter target: The number of attempts that the store is selected as the scheduling target but failed to pass the filter - Filter source: The number of attempts that the store is selected as the scheduling source but failed to pass the filter - Balance Direction: The number of times that the Store is selected as the target or source of scheduling -- Store Limit: The flow control limitation of scheduling on the Store ![PD Dashboard - Scheduler metrics](/media/pd-dashboard-scheduler-v4.png) diff --git a/grafana-resource-control-dashboard.md b/grafana-resource-control-dashboard.md index f24eee73e2c12..3cca223262ba3 100644 --- a/grafana-resource-control-dashboard.md +++ b/grafana-resource-control-dashboard.md @@ -23,6 +23,8 @@ This document describes some key monitoring metrics displayed on the Resource Co - RRU Per Query: the average number of Read Request Units consumed by each SQL statement per second. It is obtained by dividing the above RRU metric by the number of SQL statements executed per second. - WRU: the Write Request Unit consumption information of each resource group, calculated in real time. `total` is the sum of the Write Request Units consumed by all Resource Groups. - WRU Per Query: the average number of Write Request Units consumed by each SQL statement per second. It is obtained by dividing the above WRU metric by the number of SQL statements executed per second. +- Available RU: the available tokens in the RU token bucket of each resource group. When it is `0`, this resource group consumes tokens at the rate of `RU_PER_SEC` and can be considered to be in a rate-limited state. +- Query Max Duration: the maximum Query Duration in terms of resource groups. ## Metrics about resources diff --git a/grafana-tidb-dashboard.md b/grafana-tidb-dashboard.md index 1e490fbb988c8..38b021545969c 100644 --- a/grafana-tidb-dashboard.md +++ b/grafana-tidb-dashboard.md @@ -114,6 +114,8 @@ To understand the key metrics displayed on the TiDB dashboard, check the followi - KV Request OPS: the execution times of a KV request, displayed according to TiKV - KV Request Duration 99 by store: the execution time of a KV request, displayed according to TiKV - KV Request Duration 99 by type: the execution time of a KV request, displayed according to the request type +- Stale Read OPS: the number of Stale Read requests executed per second, which is divided into `hit` and `miss` categories +- Stale Read Traffic: the traffic produced by Stale Read, which is divided into `hit` and `miss` categories ### PD Client diff --git a/grafana-tikv-dashboard.md b/grafana-tikv-dashboard.md index 20eb34b4f4e21..7a846355fcdea 100644 --- a/grafana-tikv-dashboard.md +++ b/grafana-tikv-dashboard.md @@ -77,14 +77,21 @@ This section provides a detailed description of these key metrics on the **TiKV- - Raft store CPU: The CPU utilization of the `raftstore` thread. The CPU utilization should be less than 80% * `raftstore.store-pool-size` in normal case. - Async apply CPU: The CPU utilization of the `async apply` thread. The CPU utilization should be less than 90% * `raftstore.apply-pool-size` in normal cases. -- Scheduler worker CPU: The CPU utilization of the `scheduler worker` thread. The CPU utilization should be less than 90% * `storage.scheduler-worker-pool-size` in normal cases. +- Store writer CPU: The CPU utilization of the async IO thread. The CPU utilization should be less than 90% * `raftstore.store-io-pool-size` in normal cases. - gRPC poll CPU: The CPU utilization of the `gRPC` thread. The CPU utilization should be less than 80% * `server.grpc-concurrency` in normal cases. -- Unified read pool CPU: The CPU utilization of the `unified read pool` thread +- Scheduler worker CPU: The CPU utilization of the `scheduler worker` thread. The CPU utilization should be less than 90% * `storage.scheduler-worker-pool-size` in normal cases. - Storage ReadPool CPU: The CPU utilization of the `storage read pool` thread -- Coprocessor CPU: The CPU utilization of the `coprocessor` thread +- Unified read pool CPU: The CPU utilization of the `unified read pool` thread - RocksDB CPU: The CPU utilization of the RocksDB thread +- Coprocessor CPU: The CPU utilization of the `coprocessor` thread - GC worker CPU: The CPU utilization of the `GC worker` thread - BackGround worker CPU: The CPU utilization of the `background worker` thread +- Import CPU: The CPU utilization of the `import` thread +- Backup Worker CPU: The CPU utilization of the `backup` thread +- CDC Worker CPU: The CPU utilization of the `CDC worker` thread +- CDC endpoint CPU: The CPU utilization of the `CDC endpoint` thread +- Raftlog fetch worker CPU: The CPU utilization of the async raft log fetcher worker +- TSO Worker CPU: The CPU utilization of the `TSO worker` thread ### PD @@ -117,6 +124,9 @@ This section provides a detailed description of these key metrics on the **TiKV- - 0.99 Duration of Raft store events: The time consumed by Raftstore events (P99) - Process ready duration: The time consumed for processes to be ready in Raft - Process ready duration per server: The time consumed for peer processes to be ready in Raft per TiKV instance. It should be less than 2 seconds (P99.99). +- Max Duration of Raft store events: The time consumed by the slowest Raftstore event. +- Replica read lock checking duration: The time consumed for checking locks when processing Replica Read. +- Peer msg length distribution: The number of messages processed by each Region in each TiKV instance at a time. The more messages, the busier the peer is. ![TiKV Dashboard - Raft process metrics](/media/tikv-dashboard-raft-process.png) @@ -331,6 +341,30 @@ This section provides a detailed description of these key metrics on the **TiKV- - Ingest SST duration seconds: The time consumed to ingest SST files - Stall conditions changed of each CF: Stall conditions changed of each column family +### Raft Engine + +- Operations + - write: the number of write operations by Raft Engine per second + - read_entry: the number of raft log read operations by Raft Engine per second + - read_message: the number of raft metadata read operations by Raft Engine per second +- Write duration: the duration of write operations by Raft Engine. This duration is close to the sum of the latency of disk IOs involved in writing these data. +- Flow + - write: the write traffic of Raft Engine + - rewrite append: the traffic of rewriting append logs + - rewrite rewrite: the traffic of rewriting rewrite logs +- Write Duration Breakdown (99%) + - wal: the latency of writing Raft Engine WAL + - wait: the waiting time before writing + - apply: the time consumed for applying data to memory +- Bytes/Written: the bytes written by Raft Engine every time +- WAL Duration Breakdown (P99%): the time consumed for each stage of writing Raft Engine WAL +- File Count + - append: the number of files used for appending data by Raft Engine + - rewrite: the number of files used for rewriting data by Raft Engine (rewrite is similar to RocksDB compaction) +- Entry Count + - rewrite: the number of entries rewritten by Raft Engine + - append: the number of entries appended by Raft Engine + ### Titan - All - Blob file count: The number of Titan blob files diff --git a/hardware-and-software-requirements.md b/hardware-and-software-requirements.md index 9f2abbbbf5dee..15d824567af30 100644 --- a/hardware-and-software-requirements.md +++ b/hardware-and-software-requirements.md @@ -24,15 +24,16 @@ As an open-source distributed SQL database with high performance, TiDB can be de |
  • Red Hat Enterprise Linux 7.3 or a later 7.x version
  • CentOS 7.3 or a later 7.x version
|
  • x86_64
  • ARM 64
| | Amazon Linux 2 |
  • x86_64
  • ARM 64
| | Kylin Euler V10 SP1/SP2 |
  • x86_64
  • ARM 64
| -| UOS V20 |
  • x86_64
  • ARM 64
| -| macOS Catalina or later |
  • x86_64
  • ARM 64
| -| Oracle Enterprise Linux 7.3 or a later 7.x version | x86_64 | -| Ubuntu LTS 18.04 or later | x86_64 | +| UOS V20 |
  • x86_64
  • ARM 64
| +| openEuler 22.03 LTS SP1 | x86_64 | +| macOS 12 (Monterey) or later |
  • x86_64
  • ARM 64
| +| Oracle Enterprise Linux 7.3 or a later 7.x version | x86_64 | +| Ubuntu LTS 18.04 or later | x86_64 | | CentOS 8 Stream |
  • x86_64
  • ARM 64
| -| Debian 9 (Stretch) or later | x86_64 | -| Fedora 35 or later | x86_64 | -| openSUSE Leap later than v15.3 (not including Tumbleweed) | x86_64 | -| SUSE Linux Enterprise Server 15 | x86_64 | +| Debian 9 (Stretch) or later | x86_64 | +| Fedora 35 or later | x86_64 | +| openSUSE Leap later than v15.3 (not including Tumbleweed) | x86_64 | +| SUSE Linux Enterprise Server 15 | x86_64 | > **Note:** > @@ -46,12 +47,12 @@ As an open-source distributed SQL database with high performance, TiDB can be de | Libraries required for compiling and running TiDB | Version | | :--- | :--- | -| Golang | 1.20 or later | +| Golang | 1.20 or later | | Rust | nightly-2022-07-31 or later | | GCC | 7.x | | LLVM | 13.0 or later | -Library for running TiDB:glibc(2.28-151.el8 version) +Library required for running TiDB: glibc (2.28-151.el8 version) ### Docker image dependencies @@ -170,7 +171,7 @@ As an open-source distributed SQL database, TiDB requires the following network | Component | Disk space requirement | Healthy disk usage | | :-- | :-- | :-- | -| TiDB | At least 30 GB for the log disk | Lower than 90% | +| TiDB |
  • At least 30 GB for the log disk
  • Starting from v6.5.0, `Fast Online DDL` (controlled by the [`tidb_ddl_enable_fast_reorg`](/system-variables.md#tidb_ddl_enable_fast_reorg-new-in-v630) variable) is enabled by default to accelerate DDL operations, such as adding indexes. If DDL operations involving large objects exist in your application, it is highly recommended to prepare additional SSD disk space for TiDB (100 GB or more). For detailed configuration instructions, see [Set a temporary space for a TiDB instance](/check-before-deployment.md#set-temporary-spaces-for-tidb-instances-recommended)
| Lower than 90% | | PD | At least 20 GB for the data disk and for the log disk, respectively | Lower than 90% | | TiKV | At least 100 GB for the data disk and for the log disk, respectively | Lower than 80% | | TiFlash | At least 100 GB for the data disk and at least 30 GB for the log disk, respectively | Lower than 80% | diff --git a/import-example-data.md b/import-example-data.md index 2ecadba9e2816..8556ccda969fd 100644 --- a/import-example-data.md +++ b/import-example-data.md @@ -44,7 +44,6 @@ CREATE TABLE trips ( You can import files individually using the example `LOAD DATA` command here, or import all files using the bash loop below: ```sql -SET tidb_dml_batch_size = 20000; LOAD DATA LOCAL INFILE '2017Q1-capitalbikeshare-tripdata.csv' INTO TABLE trips FIELDS TERMINATED BY ',' ENCLOSED BY '"' LINES TERMINATED BY '\r\n' @@ -64,6 +63,6 @@ To import all `*.csv` files into TiDB in a bash loop: ```bash for FILE in *.csv; do echo "== $FILE ==" - mysql bikeshare --local-infile=1 -e "SET tidb_dml_batch_size = 20000; LOAD DATA LOCAL INFILE '${FILE}' INTO TABLE trips FIELDS TERMINATED BY ',' ENCLOSED BY '\"' LINES TERMINATED BY '\r\n' IGNORE 1 LINES (duration, start_date, end_date, start_station_number, start_station, end_station_number, end_station, bike_number, member_type);" + mysql bikeshare --local-infile=1 -e "LOAD DATA LOCAL INFILE '${FILE}' INTO TABLE trips FIELDS TERMINATED BY ',' ENCLOSED BY '\"' LINES TERMINATED BY '\r\n' IGNORE 1 LINES (duration, start_date, end_date, start_station_number, start_station, end_station_number, end_station, bike_number, member_type);" done; ``` diff --git a/information-schema/information-schema-analyze-status.md b/information-schema/information-schema-analyze-status.md index 71ca9cb1f919d..b7c21f047f117 100644 --- a/information-schema/information-schema-analyze-status.md +++ b/information-schema/information-schema-analyze-status.md @@ -19,22 +19,25 @@ DESC analyze_status; ``` ```sql -+----------------+---------------------+------+------+---------+-------+ -| Field | Type | Null | Key | Default | Extra | -+----------------+---------------------+------+------+---------+-------+ -| TABLE_SCHEMA | varchar(64) | YES | | NULL | | -| TABLE_NAME | varchar(64) | YES | | NULL | | -| PARTITION_NAME | varchar(64) | YES | | NULL | | -| JOB_INFO | longtext | YES | | NULL | | -| PROCESSED_ROWS | bigint(64) unsigned | YES | | NULL | | -| START_TIME | datetime | YES | | NULL | | -| END_TIME | datetime | YES | | NULL | | -| STATE | varchar(64) | YES | | NULL | | -| FAIL_REASON | longtext | YES | | NULL | | -| INSTANCE | varchar(512) | YES | | NULL | | -| PROCESS_ID | bigint(64) unsigned | YES | | NULL | | -+----------------+---------------------+------+------+---------+-------+ -11 rows in set (0.00 sec) ++----------------------+---------------------+------+------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++----------------------+---------------------+------+------+---------+-------+ +| TABLE_SCHEMA | varchar(64) | YES | | NULL | | +| TABLE_NAME | varchar(64) | YES | | NULL | | +| PARTITION_NAME | varchar(64) | YES | | NULL | | +| JOB_INFO | longtext | YES | | NULL | | +| PROCESSED_ROWS | bigint(64) unsigned | YES | | NULL | | +| START_TIME | datetime | YES | | NULL | | +| END_TIME | datetime | YES | | NULL | | +| STATE | varchar(64) | YES | | NULL | | +| FAIL_REASON | longtext | YES | | NULL | | +| INSTANCE | varchar(512) | YES | | NULL | | +| PROCESS_ID | bigint(64) unsigned | YES | | NULL | | +| REMAINING_SECONDS | bigint(64) unsigned | YES | | NULL | | +| PROGRESS | varchar(20) | YES | | NULL | | +| ESTIMATED_TOTAL_ROWS | bigint(64) unsigned | YES | | NULL | | ++----------------------+---------------------+------+------+---------+-------+ +14 rows in set (0.00 sec) ``` {{< copyable "sql" >}} @@ -44,16 +47,17 @@ SELECT * FROM information_schema.analyze_status; ``` ```sql -+--------------+------------+----------------+--------------------------------------------------------------------+----------------+---------------------+---------------------+----------+-------------+----------------+------------+ -| TABLE_SCHEMA | TABLE_NAME | PARTITION_NAME | JOB_INFO | PROCESSED_ROWS | START_TIME | END_TIME | STATE | FAIL_REASON | INSTANCE | PROCESS_ID | -+--------------+------------+----------------+--------------------------------------------------------------------+----------------+---------------------+---------------------+----------+-------------+----------------+------------+ -| test | t | p1 | analyze table all columns with 256 buckets, 500 topn, 1 samplerate | 0 | 2022-05-27 11:30:12 | 2022-05-27 11:30:12 | finished | NULL | 127.0.0.1:4000 | NULL | -| test | t | p0 | analyze table all columns with 256 buckets, 500 topn, 1 samplerate | 0 | 2022-05-27 11:30:12 | 2022-05-27 11:30:12 | finished | NULL | 127.0.0.1:4000 | NULL | -| test | t | p1 | analyze index idx | 0 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | finished | NULL | 127.0.0.1:4000 | NULL | -| test | t | p0 | analyze index idx | 0 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | finished | NULL | 127.0.0.1:4000 | NULL | -| test | t | p1 | analyze columns | 0 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | finished | NULL | 127.0.0.1:4000 | NULL | -| test | t | p0 | analyze columns | 0 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | finished | NULL | 127.0.0.1:4000 | NULL | -+--------------+------------+----------------+--------------------------------------------------------------------+----------------+---------------------+---------------------+----------+-------------+----------------+------------+ ++--------------+------------+----------------+--------------------------------------------------------------------+----------------+---------------------+---------------------+----------+-------------+----------------+------------+----------------------+----------+-----------------------+ +| TABLE_SCHEMA | TABLE_NAME | PARTITION_NAME | JOB_INFO | PROCESSED_ROWS | START_TIME | END_TIME | STATE | FAIL_REASON | INSTANCE | PROCESS_ID | REMAINING_SECONDS | PROGRESS | ESTIMATED_TOTAL_ROWS | ++--------------+------------+----------------+--------------------------------------------------------------------+----------------+---------------------+---------------------+----------+-------------+----------------+------------+----------------------+----------+-----------------------+ +| test | t | p1 | analyze table all columns with 256 buckets, 500 topn, 1 samplerate | 0 | 2022-05-27 11:30:12 | 2022-05-27 11:30:12 | finished | NULL | 127.0.0.1:4000 | NULL | NULL | NULL | NULL | +| test | t | p0 | analyze table all columns with 256 buckets, 500 topn, 1 samplerate | 0 | 2022-05-27 11:30:12 | 2022-05-27 11:30:12 | finished | NULL | 127.0.0.1:4000 | NULL | NULL | NULL | NULL | +| test | t | p1 | analyze index idx | 0 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | finished | NULL | 127.0.0.1:4000 | NULL | NULL | NULL | NULL | +| test | t | p0 | analyze index idx | 0 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | finished | NULL | 127.0.0.1:4000 | NULL | NULL | NULL | NULL | +| test | t | p1 | analyze columns | 0 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | finished | NULL | 127.0.0.1:4000 | NULL | NULL | NULL | NULL | +| test | t | p0 | analyze columns | 0 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | finished | NULL | 127.0.0.1:4000 | NULL | NULL | NULL | NULL | +| test | t | p1 | analyze table all columns with 256 buckets, 500 topn, 1 samplerate | 1000000 | 2022-05-27 11:30:12 | 2022-05-27 11:40:12 | running | NULL | 127.0.0.1:4000 | 690208308 | 600s | 0.25 | 4000000 | ++--------------+------------+----------------+--------------------------------------------------------------------+----------------+---------------------+---------------------+----------+-------------+----------------+------------+----------------------+----------+-----------------------+ 6 rows in set (0.00 sec) ``` @@ -62,11 +66,14 @@ Fields in the `ANALYZE_STATUS` table are described as follows: * `TABLE_SCHEMA`: The name of the database to which the table belongs. * `TABLE_NAME`: The name of the table. * `PARTITION_NAME`: The name of the partitioned table. -* `JOB_INFO`: The information of the `ANALYZE` task. If an index is analyzed, this information will include the index name. When `tidb_analyze_version =2`, this information will include configuration items such as sample rate. +* `JOB_INFO`: The information of the `ANALYZE` task. If an index is analyzed, this information will include the index name. When `tidb_analyze_version = 2`, this information will include configuration items such as sample rate. * `PROCESSED_ROWS`: The number of rows that have been processed. * `START_TIME`: The start time of the `ANALYZE` task. * `END_TIME`: The end time of the `ANALYZE` task. * `STATE`: The execution status of the `ANALYZE` task. Its value can be `pending`, `running`,`finished` or `failed`. * `FAIL_REASON`: The reason why the task fails. If the execution is successful, the value is `NULL`. * `INSTANCE`: The TiDB instance that executes the task. -* `PROCESS_ID`: The process ID that executes the task. \ No newline at end of file +* `PROCESS_ID`: The process ID that executes the task. +* `REMAINING_SECONDS`: The estimated time (in seconds) remaining for the task to complete. +* `PROGRESS`: The progress of the task. +* `ESTIMATED_TOTAL_ROWS`: The total rows that need to be analyzed by the task. diff --git a/information-schema/information-schema-resource-groups.md b/information-schema/information-schema-resource-groups.md index 956852547dd07..5b6d025370f44 100644 --- a/information-schema/information-schema-resource-groups.md +++ b/information-schema/information-schema-resource-groups.md @@ -9,7 +9,7 @@ summary: Learn the `RESOURCE_GROUPS` information_schema table. > **Note:** > -> This feature is not available on [Serverless Tier clusters](/tidb-cloud/select-cluster-tier.md#serverless-tier-beta). +> This feature is not available on [TiDB Serverless clusters](https://docs.pingcap.com/tidbcloud/select-cluster-tier#tidb-serverless). @@ -72,11 +72,11 @@ SELECT * FROM information_schema.resource_groups WHERE NAME = 'rg1'; -- View the ``` ```sql -+------+------------+----------+-----------+ -| NAME | RU_PER_SEC | PRIORITY | BURSTABLE | -+------+------------+----------+-----------+ -| rg1 | 1000 | MEDIUM | NO | -+------+------------+----------+-----------+ ++------+------------+----------+-----------+-------------+ +| NAME | RU_PER_SEC | PRIORITY | BURSTABLE | QUERY_LIMIT | ++------+------------+----------+-----------+-------------+ +| rg1 | 1000 | MEDIUM | NO | NULL | ++------+------------+----------+-----------+-------------+ 1 row in set (0.00 sec) ``` @@ -89,4 +89,4 @@ The descriptions of the columns in the `RESOURCE_GROUPS` table are as follows: > **Note:** > -> TiDB automatically creates a `default` resource group during cluster initialization. For this resource group, the default value of `RU_PER_SEC` is `UNLIMITED` (equivalent to the maximum value of the `INT` type, that is, `2147483647`) and it is in `BURSTABLE` mode. All requests that are not bound to any resource group are automatically bound to this `default` resource group. When you create a new configuration for another resource group, it is recommended to modify the `default` resource group configuration as needed. \ No newline at end of file +> TiDB automatically creates a `default` resource group during cluster initialization. For this resource group, the default value of `RU_PER_SEC` is `UNLIMITED` (equivalent to the maximum value of the `INT` type, that is, `2147483647`) and it is in `BURSTABLE` mode. All requests that are not bound to any resource group are automatically bound to this `default` resource group. When you create a new configuration for another resource group, it is recommended to modify the `default` resource group configuration as needed. diff --git a/information-schema/information-schema-runaway-watches.md b/information-schema/information-schema-runaway-watches.md new file mode 100644 index 0000000000000..dfebf661e9a49 --- /dev/null +++ b/information-schema/information-schema-runaway-watches.md @@ -0,0 +1,123 @@ +--- +title: RUNAWAY_WATCHES +summary: Learn the `RUNAWAY_WATCHES` INFORMATION_SCHEMA table. +--- + +# RUNAWAY_WATCHES + +The `RUNAWAY_WATCHES` table shows the watch list of runaway queries that consume more resources than expected. For more information, see [Runaway Queries](/tidb-resource-control.md#manage-queries-that-consume-more-resources-than-expected-runaway-queries). + +```sql +USE INFORMATION_SCHEMA; +DESC RUNAWAY_WATCHES; +``` + +```sql ++---------------------+--------------+------+------+---------+-------+ +| Field | Type | Null | Key | Default | Extra | ++---------------------+--------------+------+------+---------+-------+ +| ID | bigint(64) | NO | | NULL | | +| RESOURCE_GROUP_NAME | varchar(32) | NO | | NULL | | +| START_TIME | varchar(32) | NO | | NULL | | +| END_TIME | varchar(32) | YES | | NULL | | +| WATCH | varchar(12) | NO | | NULL | | +| WATCH_TEXT | text | NO | | NULL | | +| SOURCE | varchar(128) | NO | | NULL | | +| ACTION | varchar(12) | NO | | NULL | | ++---------------------+--------------+------+------+---------+-------+ +8 rows in set (0.00 sec) +``` + +> **Warning:** +> +> This feature is experimental. It is not recommended that you use it in the production environment. This feature might be changed or removed without prior notice. If you find a bug, you can report an [issue](https://github.com/pingcap/tidb/issues) on GitHub. + +## Examples + +Query the watch list of runaway queries: + +```sql +SELECT * FROM INFORMATION_SCHEMA.RUNAWAY_WATCHES; +``` + +The output is as follows: + +```sql +*************************** 1. row *************************** + ID: 20003 +RESOURCE_GROUP_NAME: rg2 + START_TIME: 2023-07-28 13:06:08 + END_TIME: UNLIMITED + WATCH: Similar + WATCH_TEXT: 5b7fd445c5756a16f910192ad449c02348656a5e9d2aa61615e6049afbc4a82e + SOURCE: 127.0.0.1:4000 + ACTION: Kill +*************************** 2. row *************************** + ID: 16004 +RESOURCE_GROUP_NAME: rg2 + START_TIME: 2023-07-28 01:45:30 + END_TIME: UNLIMITED + WATCH: Similar + WATCH_TEXT: 3d48fca401d8cbb31a9f29adc9c0f9d4be967ca80a34f59c15f73af94e000c84 + SOURCE: 127.0.0.1:4000 + ACTION: Kill +2 rows in set (0.00 sec) +``` + +Add a watch item into list to the resource group `rg1`: + +```sql +QUERY WATCH ADD RESOURCE GROUP rg1 SQL TEXT EXACT TO 'select * from sbtest.sbtest1'; +``` + +Query the watch list of runaway queries again: + +```sql +SELECT * FROM INFORMATION_SCHEMA.RUNAWAY_WATCHES\G; +``` + +The output is as follows: + +```sql +*************************** 1. row *************************** + ID: 20003 +RESOURCE_GROUP_NAME: rg2 + START_TIME: 2023-07-28 13:06:08 + END_TIME: UNLIMITED + WATCH: Similar + WATCH_TEXT: 5b7fd445c5756a16f910192ad449c02348656a5e9d2aa61615e6049afbc4a82e + SOURCE: 127.0.0.1:4000 + ACTION: Kill +*************************** 2. row *************************** + ID: 16004 +RESOURCE_GROUP_NAME: rg2 + START_TIME: 2023-07-28 01:45:30 + END_TIME: UNLIMITED + WATCH: Similar + WATCH_TEXT: 3d48fca401d8cbb31a9f29adc9c0f9d4be967ca80a34f59c15f73af94e000c84 + SOURCE: 127.0.0.1:4000 + ACTION: Kill +*************************** 3. row *************************** + ID: 20004 +RESOURCE_GROUP_NAME: rg1 + START_TIME: 2023-07-28 14:23:04 + END_TIME: UNLIMITED + WATCH: Exact + WATCH_TEXT: select * from sbtest.sbtest1 + SOURCE: manual + ACTION: NoneAction +3 row in set (0.00 sec) +``` + +The meaning of each column field in the `RUNAWAY_WATCHES` table is as follows: + +- `ID`: the ID of the watch item. +- `RESOURCE_GROUP_NAME`: the name of the resource group. +- `START_TIME`: the start time. +- `END_TIME`: the end time. `UNLIMITED` means that the watch item has an unlimited validity period. +- `WATCH`: the match type of the quick identification. The values are as follows: + - `Plan` indicates that the Plan Digest is matched. In this case, the `WATCH_TEXT` column shows the Plan Digest. + - `Similar` indicates that the SQL Digest is matched. In this case, the `WATCH_TEXT` column shows the SQL Digest. + - `Exact` indicates that the SQL text is matched. In this case, the `WATCH_TEXT` column shows the SQL text. +- `SOURCE`: the source of the watch item. If it is identified by the `QUERY_LIMIT` rule, the identified TiDB IP address is displayed. If it is manually added, `manual` is displayed. +- `ACTION`: the corresponding operation after the identification. diff --git a/information-schema/information-schema-slow-query.md b/information-schema/information-schema-slow-query.md index 8b72f9716316f..67fb7d99e37e2 100644 --- a/information-schema/information-schema-slow-query.md +++ b/information-schema/information-schema-slow-query.md @@ -11,8 +11,8 @@ The `SLOW_QUERY` table provides the slow query information of the current node, > **Note:** > -> The `SLOW_QUERY` table is unavailable for [Serverless Tier clusters](/tidb-cloud/select-cluster-tier.md#serverless-tier-beta). - +> The `SLOW_QUERY` table is unavailable for [TiDB Serverless clusters](https://docs.pingcap.com/tidbcloud/select-cluster-tier#tidb-serverless). + diff --git a/information-schema/information-schema-tidb-servers-info.md b/information-schema/information-schema-tidb-servers-info.md index 510e346c886fa..d42ddfe1b43e8 100644 --- a/information-schema/information-schema-tidb-servers-info.md +++ b/information-schema/information-schema-tidb-servers-info.md @@ -46,7 +46,7 @@ The output is as follows: PORT: 4000 STATUS_PORT: 10080 LEASE: 45s - VERSION: 5.7.25-TiDB-v6.5.0 + VERSION: 5.7.25-TiDB-v7.3.0 GIT_HASH: 827d8ff2d22ac4c93ae1b841b79d468211e1d393 BINLOG_STATUS: Off LABELS: diff --git a/information-schema/information-schema-tiflash-segments.md b/information-schema/information-schema-tiflash-segments.md index 54ca2a18f82e9..062f2c4581c0c 100644 --- a/information-schema/information-schema-tiflash-segments.md +++ b/information-schema/information-schema-tiflash-segments.md @@ -9,6 +9,10 @@ summary: Learn the `TIFLASH_SEGMENTS` information_schema table. > > Do not use this table in production environments, as the fields of the table are unstable, and subject to change in new releases of TiDB, without prior notice. +> **Note:** +> +> This statement is not applicable to TiDB Serverless clusters. + The `TIFLASH_SEGMENTS` table provides statistical information about data tables in TiFlash. ```sql diff --git a/information-schema/information-schema-tiflash-tables.md b/information-schema/information-schema-tiflash-tables.md index 42992d07809c5..655f54818cdf7 100644 --- a/information-schema/information-schema-tiflash-tables.md +++ b/information-schema/information-schema-tiflash-tables.md @@ -9,6 +9,10 @@ summary: Learn the `TIFLASH_TABLES` information_schema table. > > Do not use this table in production environments, as the fields of the table are unstable, and subject to change in new releases of TiDB, without prior notice. +> **Note:** +> +> This statement is not applicable to TiDB Serverless clusters. + The `TIFLASH_TABLES` table provides statistical information about data tables in TiFlash. ```sql diff --git a/information-schema/information-schema-user-privileges.md b/information-schema/information-schema-user-privileges.md index 6fcca468aedda..06c8e1507a9d6 100644 --- a/information-schema/information-schema-user-privileges.md +++ b/information-schema/information-schema-user-privileges.md @@ -79,7 +79,7 @@ The output is as follows: - + ```sql +------------+---------------+-------------------------+--------------+ diff --git a/latest_translation_commit.json b/latest_translation_commit.json new file mode 100644 index 0000000000000..e36cbe443cdeb --- /dev/null +++ b/latest_translation_commit.json @@ -0,0 +1 @@ +{"target":"master","sha":""} \ No newline at end of file diff --git a/media/dashboard/dashboard-slow-queries-export-v651.png b/media/dashboard/dashboard-slow-queries-export-v651.png new file mode 100644 index 0000000000000..b587f8ead4fbf Binary files /dev/null and b/media/dashboard/dashboard-slow-queries-export-v651.png differ diff --git a/media/performance/public-cloud-best-practice/after_tuning_cpu.png b/media/performance/public-cloud-best-practice/after_tuning_cpu.png new file mode 100644 index 0000000000000..f08e1376ce185 Binary files /dev/null and b/media/performance/public-cloud-best-practice/after_tuning_cpu.png differ diff --git a/media/performance/public-cloud-best-practice/after_tuning_metrics.png b/media/performance/public-cloud-best-practice/after_tuning_metrics.png new file mode 100644 index 0000000000000..78d5075064e51 Binary files /dev/null and b/media/performance/public-cloud-best-practice/after_tuning_metrics.png differ diff --git a/media/performance/public-cloud-best-practice/baseline_cpu.png b/media/performance/public-cloud-best-practice/baseline_cpu.png new file mode 100644 index 0000000000000..e8015f75f386b Binary files /dev/null and b/media/performance/public-cloud-best-practice/baseline_cpu.png differ diff --git a/media/performance/public-cloud-best-practice/baseline_metrics.png b/media/performance/public-cloud-best-practice/baseline_metrics.png new file mode 100644 index 0000000000000..16c41df4f7b23 Binary files /dev/null and b/media/performance/public-cloud-best-practice/baseline_metrics.png differ diff --git a/media/tiflash/tiflash-pipeline-model.png b/media/tiflash/tiflash-pipeline-model.png new file mode 100644 index 0000000000000..f97c57c90524d Binary files /dev/null and b/media/tiflash/tiflash-pipeline-model.png differ diff --git a/media/tikv-dashboard-raft-process.png b/media/tikv-dashboard-raft-process.png index 46ce0875c5075..17dd5ec700dc3 100644 Binary files a/media/tikv-dashboard-raft-process.png and b/media/tikv-dashboard-raft-process.png differ diff --git a/migrate-aurora-to-tidb.md b/migrate-aurora-to-tidb.md index af23be8460410..cb6de953e2d7b 100644 --- a/migrate-aurora-to-tidb.md +++ b/migrate-aurora-to-tidb.md @@ -15,102 +15,131 @@ The whole migration has two processes: ## Prerequisites -- [Install Dumpling and TiDB Lightning](/migration-tools.md) +- [Install Dumpling and TiDB Lightning](/migration-tools.md). If you want to create the corresponding tables manually on the target side, do not install Dumpling. +- [Get the upstream database privileges required by Dumpling](/dumpling-overview.md#required-privileges). - [Get the target database privileges required for TiDB Lightning](/tidb-lightning/tidb-lightning-faq.md#what-are-the-privilege-requirements-for-the-target-database). ## Import full data to TiDB -### Step 1. Export an Aurora snapshot to Amazon S3 +### Step 1. Export and import the schema file -1. In Aurora, query the current binlog position by running the following command: +This section describes how to export the schema file from Amazon Aurora and import it to TiDB. If you have manually created the table in the target database, you can skip this step. - ```sql - mysql> SHOW MASTER STATUS; - ``` +#### 1.1 Export the schema file from Amazon Aurora - The output is similar to the following. Record the binlog name and position for later use. +Because the snapshot file from Amazon Aurora does not contain the DDL statements, you need to export the schema using Dumpling and create the schema in the target database using TiDB Lightning. - ``` - +------------------+----------+--------------+------------------+-------------------+ - | File | Position | Binlog_Do_DB | Binlog_Ignore_DB | Executed_Gtid_Set | - +------------------+----------+--------------+------------------+-------------------+ - | mysql-bin.000002 | 52806 | | | | - +------------------+----------+--------------+------------------+-------------------+ - 1 row in set (0.012 sec) - ``` +Export the schema using Dumpling by running the following command. The command includes the `--filter` parameter to only export the desired table schema. For more information about the parameters, see the [Option list of Dumpling](/dumpling-overview.md#option-list-of-dumpling). + +```shell +export AWS_ACCESS_KEY_ID=${access_key} +export AWS_SECRET_ACCESS_KEY=${secret_key} +tiup dumpling --host ${host} --port 3306 --user root --password ${password} --filter 'my_db1.table[12],mydb.*' --consistency none --no-data --output 's3://my-bucket/schema-backup' +``` + +Record the URI of the schema exported in the above command, such as 's3://my-bucket/schema-backup', which will be used when importing the schema file later. + +To get access to Amazon S3, you can pass the secret access key and access key of the account that has access to this Amazon S3 storage path into the Dumpling or TiDB Lightning node as environment variables. Dumpling and TiDB Lightning also support reading credential files from `~/.aws/credentials`. This method eliminates the need to provide the secret access key and access key again for all tasks on that Dumpling or TiDB Lightning node. + +#### 1.2 Create the TiDB Lightning configuration file for the schema file -2. Export the Aurora snapshot. For detailed steps, refer to [Exporting DB snapshot data to Amazon S3](https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/USER_ExportSnapshot.html). +Create a new `tidb-lightning-schema.toml` file, copy the following content into the file, and replace the corresponding content. -After you obtain the binlog position, export the snapshot within 5 minutes. Otherwise, the recorded binlog position might be outdated and thus cause data conflict during the incremental replication. +```toml +[tidb] + +# The target TiDB cluster information. +host = ${host} +port = ${port} +user = "${user_name} +password = "${password}" +status-port = ${status-port} # The TiDB status port. Usually the port is 10080. +pd-addr = "${ip}:${port}" # The cluster PD address. Usually the port is 2379. -After the two steps above, make sure you have the following information ready: +[tikv-importer] +# "local": Use the default Physical Import Mode (the "local" backend). +# During the import, the target TiDB cluster cannot provide any service. +# For more information about import modes, see https://docs.pingcap.com/tidb/stable/tidb-lightning-overview +backend = "local" -- The Aurora binlog name and position at the time of the snapshot creation. -- The S3 path where the snapshot is stored, and the SecretKey and AccessKey with access to the S3 path. +# Set the temporary storage directory for the sorted Key-Value files. +# The directory must be empty, and the storage space must be greater than the size of the dataset to be imported. +# For better import performance, it is recommended to use a directory different from `data-source-dir` and use flash storage, +# which can use I/O exclusively. +sorted-kv-dir = "${path}" -### Step 2. Export schema +[mydumper] +# Set the directory of the schema file exported from Amazon Aurora +data-source-dir = "s3://my-bucket/schema-backup" +``` -Because the snapshot file from Aurora does not contain the DDL statements, you need to export the schema using Dumpling and create the schema in the target database using TiDB Lightning. If you want to manually create the schema, you can skip this step. +If you need to enable TLS in the TiDB cluster, refer to [TiDB Lightning Configuration](/tidb-lightning/tidb-lightning-configuration.md). -Export the schema using Dumpling by running the following command. The command includes the `--filter` parameter to only export the desired table schema: +#### 1.3 Import the schema file to TiDB -{{< copyable "shell-regular" >}} +Use TiDB Lightning to import the schema file to the downstream TiDB. ```shell -tiup dumpling --host ${host} --port 3306 --user root --password ${password} --filter 'my_db1.table[12]' --no-data --output 's3://my-bucket/schema-backup' --filter "mydb.*" +export AWS_ACCESS_KEY_ID=${access_key} +export AWS_SECRET_ACCESS_KEY=${secret_key} +nohup tiup tidb-lightning -config tidb-lightning-schema.toml > nohup.out 2>&1 & ``` -The parameters used in the command above are as follows. For more parameters, refer to [Dumpling overview](/dumpling-overview.md). +### Step 2. Export and import an Amazon Aurora snapshot to Amazon S3 -|Parameter |Description | -|- |- | -|`-u` or `--user` |Aurora MySQL user| -|`-p` or `--password` |MySQL user password| -|`-P` or `--port` |MySQL port| -|`-h` or `--host` |MySQL IP address| -|`-t` or `--thread` |The number of threads used for export| -|`-o` or `--output` |The directory that stores the exported file. Supports local path or [external storage URI](/br/backup-and-restore-storages.md)| -|`-r` or `--row` |The maximum number of rows in a single file| -|`-F` |The maximum size of a single file, in MiB. Recommended value: 256 MiB.| -|`-B` or `--database` |Specifies a database to be exported| -|`-T` or `--tables-list`|Exports the specified tables| -|`-d` or `--no-data` |Does not export data. Only exports schema.| -|`-f` or `--filter` |Exports tables that match the pattern. Do not use `-f` and `-T` at the same time. Refer to [table-filter](/table-filter.md) for the syntax.| +This section describes how to export an Amazon Aurora snapshot to Amazon S3 and import it into TiDB by TiDB Lightning. -### Step 3. Create the TiDB Lightning configuration file +#### 2.1 Export an Amazon Aurora snapshot to Amazon S3 -Create the `tidb-lightning.toml` configuration file as follows: +1. Get the name and location of the Amazon Aurora binlog for subsequent incremental migration. In Amazon Aurora, run the `SHOW MASTER STATUS` command and record the current binlog position: -{{< copyable "shell-regular" >}} + ```sql + SHOW MASTER STATUS; + ``` -```shell -vim tidb-lightning.toml -``` + The output is similar to the following. Record the binlog name and position for later use. + + ``` + +----------------------------+----------+--------------+------------------+-------------------+ + | File | Position | Binlog_Do_DB | Binlog_Ignore_DB | Executed_Gtid_Set | + +----------------------------+----------+--------------+------------------+-------------------+ + | mysql-bin-changelog.018128 | 52806 | | | | + +----------------------------+----------+--------------+------------------+-------------------+ + 1 row in set (0.012 sec) + ``` + +2. Export the Amazon Aurora snapshot. For detailed steps, refer to [Exporting DB snapshot data to Amazon S3](https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/USER_ExportSnapshot.html). After you obtain the binlog position, export the snapshot within 5 minutes. Otherwise, the recorded binlog position might be outdated and thus cause data conflict during the incremental replication. -{{< copyable "" >}} +#### 2.2 Create the TiDB Lightning configuration file for the data file + +Create a new `tidb-lightning-data.toml` configuration file, copy the following content into the file, and replace the corresponding content. ```toml [tidb] # The target TiDB cluster information. -host = ${host} # e.g.: 172.16.32.1 -port = ${port} # e.g.: 4000 -user = "${user_name} # e.g.: "root" -password = "${password}" # e.g.: "rootroot" -status-port = ${status-port} # Obtains the table schema information from TiDB status port, e.g.: 10080 -pd-addr = "${ip}:${port}" # The cluster PD address, e.g.: 172.16.31.3:2379. TiDB Lightning obtains some information from PD. When backend = "local", you must specify status-port and pd-addr correctly. Otherwise, the import will be abnormal. +host = ${host} +port = ${port} +user = "${user_name} +password = "${password}" +status-port = ${status-port} # The TiDB status port. Usually the port is 10080. +pd-addr = "${ip}:${port}" # The cluster PD address. Usually the port is 2379. [tikv-importer] -# "local": Default backend. The local backend is recommended to import large volumes of data (1 TiB or more). During the import, the target TiDB cluster cannot provide any service. -# "tidb": The "tidb" backend is recommended to import data less than 1 TiB. During the import, the target TiDB cluster can provide service normally. +# "local": Use the default Physical Import Mode (the "local" backend). +# During the import, the target TiDB cluster cannot provide any service. +# For more information about import modes, see https://docs.pingcap.com/tidb/stable/tidb-lightning-overview backend = "local" -# Set the temporary storage directory for the sorted Key-Value files. The directory must be empty, and the storage space must be greater than the size of the dataset to be imported. For better import performance, it is recommended to use a directory different from `data-source-dir` and use flash storage, which can use I/O exclusively. -sorted-kv-dir = "/mnt/ssd/sorted-kv-dir" +# Set the temporary storage directory for the sorted Key-Value files. +# The directory must be empty, and the storage space must be greater than the size of the dataset to be imported. +# For better import performance, it is recommended to use a directory different from `data-source-dir` and use flash storage, +# which can use I/O exclusively. +sorted-kv-dir = "${path}" [mydumper] -# The path that stores the snapshot file. -data-source-dir = "${s3_path}" # e.g.: s3://my-bucket/sql-backup +# Set the directory of the snapshot file exported from Amazon Aurora +data-source-dir = "${s3_path}" # eg: s3://my-bucket/sql-backup [[mydumper.files]] # The expression that parses the parquet file. @@ -122,35 +151,23 @@ type = '$3' If you need to enable TLS in the TiDB cluster, refer to [TiDB Lightning Configuration](/tidb-lightning/tidb-lightning-configuration.md). -### Step 4. Import full data to TiDB - -1. Create the tables in the target database using TiDB Lightning: +#### 2.3 Import full data to TiDB - {{< copyable "shell-regular" >}} - - ```shell - tiup tidb-lightning -config tidb-lightning.toml -d 's3://my-bucket/schema-backup' - ``` - -2. Start the import by running `tidb-lightning`. If you launch the program directly in the command line, the process might exit unexpectedly after receiving a SIGHUP signal. In this case, it is recommended to run the program using a `nohup` or `screen` tool. For example: - - Pass the SecretKey and AccessKey that have access to the S3 storage path as environment variables to the Dumpling node. You can also read the credentials from `~/.aws/credentials`. - - {{< copyable "shell-regular" >}} +1. Use TiDB Lightning to import data from an Amazon Aurora snapshot to TiDB. ```shell export AWS_ACCESS_KEY_ID=${access_key} export AWS_SECRET_ACCESS_KEY=${secret_key} - nohup tiup tidb-lightning -config tidb-lightning.toml > nohup.out 2>&1 & + nohup tiup tidb-lightning -config tidb-lightning-data.toml > nohup.out 2>&1 & ``` -3. After the import starts, you can check the progress of the import by either of the following methods: +2. After the import starts, you can check the progress of the import by either of the following methods: - `grep` the keyword `progress` in the log. The progress is updated every 5 minutes by default. - Check progress in [the monitoring dashboard](/tidb-lightning/monitor-tidb-lightning.md). - Check progress in [the TiDB Lightning web interface](/tidb-lightning/tidb-lightning-web-interface.md). -4. After TiDB Lightning completes the import, it exits automatically. Check whether `tidb-lightning.log` contains `the whole procedure completed` in the last lines. If yes, the import is successful. If no, the import encounters an error. Address the error as instructed in the error message. +3. After TiDB Lightning completes the import, it exits automatically. Check whether `tidb-lightning.log` contains `the whole procedure completed` in the last lines. If yes, the import is successful. If no, the import encounters an error. Address the error as instructed in the error message. > **Note:** > @@ -169,8 +186,6 @@ If you encounter any problem during the import, refer to [TiDB Lightning FAQ](/t 1. Create the `source1.yaml` file as follows: - {{< copyable "" >}} - ```yaml # Must be unique. source-id: "mysql-01" @@ -186,8 +201,6 @@ If you encounter any problem during the import, refer to [TiDB Lightning FAQ](/t 2. Load the data source configuration to the DM cluster using `tiup dmctl` by running the following command: - {{< copyable "shell-regular" >}} - ```shell tiup dmctl --master-addr ${advertise-addr} operate-source create source1.yaml ``` @@ -203,8 +216,6 @@ If you encounter any problem during the import, refer to [TiDB Lightning FAQ](/t Create the `task1.yaml` file as follows: -{{< copyable "" >}} - ```yaml # Task name. Multiple tasks that are running at the same time must each have a unique name. name: "test" @@ -233,7 +244,7 @@ mysql-instances: block-allow-list: "listA" # References the block-allow-list configuration above. # syncer-config-name: "global" # Name of the syncer configuration. meta: # The position where the binlog replication starts when `task-mode` is `incremental` and the downstream database checkpoint does not exist. If the checkpoint exists, the checkpoint is used. If neither the `meta` configuration item nor the downstream database checkpoint exists, the migration starts from the latest binlog position of the upstream. - binlog-name: "mysql-bin.000004" # The binlog position recorded in "Step 1. Export an Aurora snapshot to Amazon S3". When the upstream database has source-replica switching, GTID mode is required. + binlog-name: "mysql-bin.000004" # The binlog position recorded in "Step 1. Export an Amazon Aurora snapshot to Amazon S3". When the upstream database has source-replica switching, GTID mode is required. binlog-pos: 109227 # binlog-gtid: "09bec856-ba95-11ea-850a-58f2b4af5188:1-9" @@ -250,16 +261,12 @@ The YAML file above is the minimum configuration required for the migration task Before you start the migration task, to reduce the probability of errors, it is recommended to confirm that the configuration meets the requirements of DM by running the `check-task` command: -{{< copyable "shell-regular" >}} - ```shell tiup dmctl --master-addr ${advertise-addr} check-task task.yaml ``` After that, start the migration task by running `tiup dmctl`: -{{< copyable "shell-regular" >}} - ```shell tiup dmctl --master-addr ${advertise-addr} start-task task.yaml ``` @@ -279,8 +286,6 @@ If you encounter any problem, refer to [DM error handling](/dm/dm-error-handling To learn whether the DM cluster has an ongoing migration task and the task status, run the `query-status` command using `tiup dmctl`: -{{< copyable "shell-regular" >}} - ```shell tiup dmctl --master-addr ${advertise-addr} query-status ${task-name} ``` diff --git a/migrate-from-tidb-to-mysql.md b/migrate-from-tidb-to-mysql.md index 60bd0018c5fdd..b676bbee47888 100644 --- a/migrate-from-tidb-to-mysql.md +++ b/migrate-from-tidb-to-mysql.md @@ -108,7 +108,9 @@ After setting up the environment, you can use [Dumpling](/dumpling-overview.md) 3. Restore data. - Use MyLoader (an open-source tool) to import data to the downstream MySQL instance. For details about how to install and use MyLoader, see [MyDumpler/MyLoader](https://github.com/mydumper/mydumper). Run the following command to import full data exported by Dumpling to MySQL: + Use MyLoader (an open-source tool) to import data to the downstream MySQL instance. For details about how to install and use MyLoader, see [MyDumpler/MyLoader](https://github.com/mydumper/mydumper). Note that you need to use MyLoader v0.10 or earlier versions. Higher versions cannot process metadata files exported by Dumpling. + + Run the following command to import full data exported by Dumpling to MySQL: ```shell myloader -h 127.0.0.1 -P 3306 -d ./dumpling_output/ diff --git a/migrate-large-mysql-to-tidb.md b/migrate-large-mysql-to-tidb.md index 972baabd29be2..cb21154d79649 100644 --- a/migrate-large-mysql-to-tidb.md +++ b/migrate-large-mysql-to-tidb.md @@ -7,10 +7,7 @@ summary: Learn how to migrate MySQL of large datasets to TiDB. When the data volume to be migrated is small, you can easily [use DM to migrate data](/migrate-small-mysql-to-tidb.md), both for full migration and incremental replication. However, because DM imports data at a slow speed (30~50 GiB/h), when the data volume is large, the migration might take a long time. "Large datasets" in this document usually mean data around one TiB or more. -This document describes how to migrate large datasets from MySQL to TiDB. The whole migration has two processes: - -1. *Full migration*. Use Dumpling and TiDB Lightning to perform the full migration. TiDB Lightning's **local backend** mode can import data at a speed of up to 500 GiB/h. -2. *Incremental replication*. After the full migration is completed, you can replicate the incremental data using DM. +This document describes how to perform the full migration using Dumpling and TiDB Lightning. TiDB Lightning [Physical Import Mode](/tidb-lightning/tidb-lightning-physical-import-mode.md) can import data at a speed of up to 500 GiB/h. Note that this speed is affected by various factors such as hardware configuration, table schema, and the number of indexes. After the full migration is completed, you can replicate the incremental data using DM. ## Prerequisites diff --git a/mysql-compatibility.md b/mysql-compatibility.md index 4d288c0a69188..07349cb395917 100644 --- a/mysql-compatibility.md +++ b/mysql-compatibility.md @@ -6,15 +6,25 @@ aliases: ['/docs/dev/mysql-compatibility/','/docs/dev/reference/mysql-compatibil # MySQL Compatibility -TiDB is highly compatible with the MySQL 5.7 protocol and the common features and syntax of MySQL 5.7. The ecosystem tools for MySQL 5.7 (PHPMyAdmin, Navicat, MySQL Workbench, mysqldump, and Mydumper/myloader) and the MySQL client can be used for TiDB. + + +TiDB is highly compatible with the MySQL protocol and the common features and syntax of MySQL 5.7 and MySQL 8.0. The ecosystem tools for MySQL (PHPMyAdmin, Navicat, MySQL Workbench, DBeaver and [more](/develop/dev-guide-third-party-support.md#gui)) and the MySQL client can be used for TiDB. + + + + -However, some features of MySQL are not supported. This could be because there is now a better way to solve the problem (such as XML functions superseded by JSON), or a lack of current demand versus effort required (such as stored procedures and functions). Some features might also be difficult to implement as a distributed system. +TiDB is highly compatible with the MySQL protocol and the common features and syntax of MySQL 5.7 and MySQL 8.0. The ecosystem tools for MySQL (PHPMyAdmin, Navicat, MySQL Workbench, DBeaver and [more](https://docs.pingcap.com/tidb/v7.2/dev-guide-third-party-support#gui)) and the MySQL client can be used for TiDB. + + + +However, some features of MySQL are not supported in TiDB. This could be because there is now a better way to solve the problem (such as the use of JSON instead of XML functions) or a lack of current demand versus effort required (such as stored procedures and functions). Additionally, some features might be difficult to implement in a distributed system. -In addition, TiDB does not support the MySQL replication protocol, but provides specific tools to replicate data with MySQL: +It's important to note that TiDB does not support the MySQL replication protocol. Instead, specific tools are provided to replicate data with MySQL: -- Replicate data from MySQL: [TiDB Data Migration (DM)](/dm/dm-overview.md) is a tool that supports the full data migration and the incremental data replication from MySQL/MariaDB into TiDB. +- Replicate data from MySQL: [TiDB Data Migration (DM)](/dm/dm-overview.md) is a tool that supports full data migration and incremental data replication from MySQL or MariaDB into TiDB. - Replicate data to MySQL: [TiCDC](/ticdc/ticdc-overview.md) is a tool for replicating the incremental data of TiDB by pulling TiKV change logs. TiCDC uses the [MySQL sink](/ticdc/ticdc-overview.md#replication-consistency) to replicate the incremental data of TiDB to MySQL. @@ -23,7 +33,7 @@ In addition, TiDB does not support the MySQL replication protocol, but provides > **Note:** > -> This page describes general differences between MySQL and TiDB. See the dedicated pages for [Security](/security-compatibility-with-mysql.md) and [Pessimistic Transaction Mode](/pessimistic-transaction.md#difference-with-mysql-innodb) compatibility. +> This page describes general differences between MySQL and TiDB. For more information on compatibility with MySQL in the areas of security and pessimistic transaction mode, refer to the dedicated pages on [Security](/security-compatibility-with-mysql.md) and [Pessimistic Transaction Mode](/pessimistic-transaction.md#difference-with-mysql-innodb). @@ -35,6 +45,8 @@ In addition, TiDB does not support the MySQL replication protocol, but provides +You can try out TiDB features on [TiDB Playground](https://play.tidbcloud.com/?utm_source=docs&utm_medium=mysql_compatibility). + ## Unsupported features + Stored procedures and functions @@ -58,24 +70,27 @@ In addition, TiDB does not support the MySQL replication protocol, but provides + `HANDLER` statement + `CREATE TABLESPACE` statement + "Session Tracker: Add GTIDs context to the OK packet" ++ Descending Index [#2519](https://github.com/pingcap/tidb/issues/2519) ++ `SKIP LOCKED` syntax [#18207](https://github.com/pingcap/tidb/issues/18207) ++ Lateral derived tables [#40328](https://github.com/pingcap/tidb/issues/40328) -## Features that are different from MySQL +## Differences from MySQL ### Auto-increment ID -+ In TiDB, the values (IDs) of an auto-incremental column are globally unique. They are incremental on a single TiDB server. If you want the IDs to be incremental among multiple TiDB servers, you can use the [`AUTO_INCREMENT` MySQL compatibility mode](/auto-increment.md#mysql-compatibility-mode). But the IDs are not necessarily allocated sequentially. It is recommended that you do not mix default values and custom values. Otherwise, you might encounter the `Duplicated Error` error message. ++ In TiDB, the auto-incremental column values (IDs) are globally unique and incremental within a single TiDB server. To make the IDs incremental among multiple TiDB servers, you can use the [`AUTO_INCREMENT` MySQL compatibility mode](/auto-increment.md#mysql-compatibility-mode). However, the IDs are not necessarily allocated sequentially, so it is recommended that you avoid mixing default and custom values to prevent encountering the `Duplicated Error` message. -+ You can use the `tidb_allow_remove_auto_inc` system variable to allow or forbid removing the `AUTO_INCREMENT` column attribute. The syntax of removing the column attribute is `ALTER TABLE MODIFY` or `ALTER TABLE CHANGE`. ++ You can use the `tidb_allow_remove_auto_inc` system variable to allow or forbid removing the `AUTO_INCREMENT` column attribute. To remove the column attribute, use the `ALTER TABLE MODIFY` or `ALTER TABLE CHANGE` syntax. -+ TiDB does not support adding the `AUTO_INCREMENT` column attribute, and this attribute cannot be recovered once it is removed. ++ TiDB does not support adding the `AUTO_INCREMENT` column attribute, and once removed, it cannot be recovered. -+ For TiDB v6.6.0 and earlier versions, TiDB behaves the same as MySQL InnoDB, which requires auto-increment columns to be primary keys or index prefixes. Starting from v7.0.0, TiDB removes the restriction that auto-increment columns must be indexes or index prefixes, which allows you to define table primary keys more flexibly. [#40580](https://github.com/pingcap/tidb/issues/40580) ++ For TiDB v6.6.0 and earlier versions, auto-increment columns in TiDB behave the same as in MySQL InnoDB, requiring them to be primary keys or index prefixes. Starting from v7.0.0, TiDB removes this restriction, allowing for more flexible table primary key definitions. [#40580](https://github.com/pingcap/tidb/issues/40580) For more details, see [`AUTO_INCREMENT`](/auto-increment.md). > **Note:** > -> + If you have not specified the primary key when creating a table, TiDB uses `_tidb_rowid` to identify the row. The allocation of this value shares an allocator with the auto-increment column (if such a column exists). If you specify an auto-increment column as the primary key, TiDB uses this column to identify the row. In this situation, the following situation might happen: +> + If you do not specify a primary key when creating a table, TiDB uses `_tidb_rowid` to identify the row. The allocation of this value shares an allocator with the auto-increment column (if such a column exists). If you specify an auto-increment column as the primary key, TiDB uses this column to identify the row. In this situation, the following situation might occur: ```sql mysql> CREATE TABLE t(id INT UNIQUE KEY AUTO_INCREMENT); @@ -101,7 +116,7 @@ mysql> SELECT _tidb_rowid, id FROM t; 3 rows in set (0.01 sec) ``` -As you can see, because of the shared allocator, the `id` increments by 2 each time. This behavior is changed in [MySQL compatibility mode](/auto-increment.md#mysql-compatibility-mode), where there is no shared allocator and therefore no skipping of numbers. +As shown, because of the shared allocator, the `id` increments by 2 each time. This behavior changes in [MySQL compatibility mode](/auto-increment.md#mysql-compatibility-mode), where there is no shared allocator and therefore no skipping of numbers. @@ -123,7 +138,7 @@ As you can see, because of the shared allocator, the `id` increments by 2 each t -TiDB uses a combination of [Prometheus and Grafana](/tidb-monitoring-api.md) to store and query the performance monitoring metrics. Performance schema tables return empty results in TiDB. +TiDB utilizes a combination of [Prometheus and Grafana](/tidb-monitoring-api.md) for storing and querying performance monitoring metrics. In TiDB, performance schema tables do not return any results. @@ -135,25 +150,25 @@ To check performance metrics in TiDB Cloud, you can either check the cluster ove ### Query Execution Plan -The output format, output content, and the privilege setting of Query Execution Plan (`EXPLAIN`/`EXPLAIN FOR`) in TiDB is greatly different from those in MySQL. +The output format, content, and privilege settings of Query Execution Plan (`EXPLAIN`/`EXPLAIN FOR`) in TiDB differ significantly from those in MySQL. -The MySQL system variable `optimizer_switch` is read-only in TiDB and has no effect on query plans. You can also use [optimizer hints](/optimizer-hints.md) in similar syntax to MySQL, but the available hints and implementation might differ. +In TiDB, the MySQL system variable `optimizer_switch` is read-only and has no effect on query plans. Although optimizer hints can be used in similar syntax to MySQL, the available hints and their implementation might differ. -See [Understand the Query Execution Plan](/explain-overview.md) for more details. +For more information, refer to [Understand the Query Execution Plan](/explain-overview.md). ### Built-in functions -TiDB supports most of the MySQL built-in functions, but not all. The statement `SHOW BUILTINS` provides a list of functions that are available. +TiDB supports most of the built-in functions in MySQL, but not all. You can use the statement `SHOW BUILTINS` to get a list of the available functions. -See also: [TiDB SQL Grammar](https://pingcap.github.io/sqlgram/#functioncallkeyword). +For more information, refer to the [TiDB SQL Grammar](https://pingcap.github.io/sqlgram/#functioncallkeyword). -### DDL +### DDL operations -In TiDB, all supported DDL changes are performed online. Compared with DDL operations in MySQL, the DDL operations in TiDB have the following major restrictions: +In TiDB, all supported DDL changes can be performed online. However, there are some major restrictions on DDL operations in TiDB compared to MySQL: -* When you use a single `ALTER TABLE` statement to alter multiple schema objects (such as columns or indexes) of a table, specifying the same object in multiple changes is not supported. For example, if you execute the `ALTER TABLE t1 MODIFY COLUMN c1 INT, DROP COLUMN c1` command, the `Unsupported operate same column/index` error is output. +* When using a single `ALTER TABLE` statement to alter multiple schema objects (such as columns or indexes) of a table, specifying the same object in multiple changes is not supported. For example, if you execute the `ALTER TABLE t1 MODIFY COLUMN c1 INT, DROP COLUMN c1` command, the `Unsupported operate same column/index` error is output. * It is not supported to modify multiple TiDB-specific schema objects using a single `ALTER TABLE` statement, such as `TIFLASH REPLICA`, `SHARD_ROW_ID_BITS`, and `AUTO_ID_CACHE`. -* `ALTER TABLE` in TiDB does not support the changes of some data types. For example, TiDB does not support the change from the `DECIMAL` type to the `DATE` type. If a data type change is unsupported, TiDB reports the `Unsupported modify column: type %d not match origin %d` error. Refer to [`ALTER TABLE`](/sql-statements/sql-statement-modify-column.md) for more details. +* TiDB does not support the changes of some data types using `ALTER TABLE`. For example, TiDB does not support the change from the `DECIMAL` type to the `DATE` type. If a data type change is unsupported, TiDB reports the `Unsupported modify column: type %d not match origin %d` error. Refer to [`ALTER TABLE`](/sql-statements/sql-statement-modify-column.md) for more details. * The `ALGORITHM={INSTANT,INPLACE,COPY}` syntax functions only as an assertion in TiDB, and does not modify the `ALTER` algorithm. See [`ALTER TABLE`](/sql-statements/sql-statement-alter-table.md) for further details. * Adding/Dropping the primary key of the `CLUSTERED` type is unsupported. For more details about the primary key of the `CLUSTERED` type, refer to [clustered index](/clustered-indexes.md). * Different types of indexes (`HASH|BTREE|RTREE|FULLTEXT`) are not supported, and will be parsed and ignored when specified. @@ -165,21 +180,23 @@ In TiDB, all supported DDL changes are performed online. Compared with DDL opera - `SUBPARTITION` - `{CHECK|OPTIMIZE|REPAIR|IMPORT|DISCARD|REBUILD} PARTITION` - For more details, see [Partitioning](/partitioned-table.md). + For more details on partitioning, see [Partitioning](/partitioned-table.md). -### Analyze table +### Analyzing tables -[Statistics Collection](/statistics.md#manual-collection) works differently in TiDB than in MySQL, in that it is a relatively lightweight and short-lived operation in MySQL/InnoDB, while in TiDB it completely rebuilds the statistics for a table and can take much longer to complete. +In TiDB, [Statistics Collection](/statistics.md#manual-collection) differs from MySQL in that it completely rebuilds the statistics for a table, making it a more resource-intensive operation that takes longer to complete. In contrast, MySQL/InnoDB performs a relatively lightweight and short-lived operation. -These differences are documented further in [`ANALYZE TABLE`](/sql-statements/sql-statement-analyze-table.md). +For more information, refer to [`ANALYZE TABLE`](/sql-statements/sql-statement-analyze-table.md). ### Limitations of `SELECT` syntax -- The syntax `SELECT ... INTO @variable` is not supported. -- The syntax `SELECT ... GROUP BY ... WITH ROLLUP` is not supported. -- The syntax `SELECT .. GROUP BY expr` does not imply `GROUP BY expr ORDER BY expr` as it does in MySQL 5.7. +TiDB does not support the following `SELECT` syntax: + +- `SELECT ... INTO @variable` +- `SELECT ... GROUP BY ... WITH ROLLUP` +- `SELECT .. GROUP BY expr` does not imply `GROUP BY expr ORDER BY expr` as it does in MySQL 5.7. -For details, see the [`SELECT`](/sql-statements/sql-statement-select.md) statement reference. +For more details, see the [`SELECT`](/sql-statements/sql-statement-select.md) statement reference. ### `UPDATE` statement @@ -187,27 +204,27 @@ See the [`UPDATE`](/sql-statements/sql-statement-update.md) statement reference. ### Views -Views in TiDB are not updatable. They do not support write operations such as `UPDATE`, `INSERT`, and `DELETE`. +Views in TiDB are not updatable and do not support write operations such as `UPDATE`, `INSERT`, and `DELETE`. ### Temporary tables -For details, see [Compatibility between TiDB local temporary tables and MySQL temporary tables](/temporary-tables.md#compatibility-with-mysql-temporary-tables). +For more information, see [Compatibility between TiDB local temporary tables and MySQL temporary tables](/temporary-tables.md#compatibility-with-mysql-temporary-tables). ### Character sets and collations -* To learn the details of the character sets and collations supported by TiDB, see [Character Set and Collation Overview](/character-set-and-collation.md). +* To learn about the character sets and collations supported by TiDB, see [Character Set and Collation Overview](/character-set-and-collation.md). -* To learn the MySQL compatibility of the GBK character set, see [GBK compatibility](/character-set-gbk.md#mysql-compatibility) . +* For information on the MySQL compatibility of the GBK character set, refer to [GBK compatibility](/character-set-gbk.md#mysql-compatibility) . * TiDB inherits the character set used in the table as the national character set. ### Storage engines -For compatibility reasons, TiDB supports the syntax to create tables with alternative storage engines. In implementation, TiDB describes the metadata as the InnoDB storage engine. +TiDB allows for tables to be created with alternative storage engines. Despite this, the metadata as described by TiDB is for the InnoDB storage engine as a way to ensure compatibility. -TiDB supports storage engine abstraction similar to MySQL, but you need to specify the storage engine using the [`--store`](/command-line-flags-for-tidb-configuration.md#--store) option when you start the TiDB server. +To specify a storage engine using the [`--store`](/command-line-flags-for-tidb-configuration.md#--store) option, it is necessary to start the TiDB server. This storage engine abstraction feature is similar to MySQL. @@ -221,55 +238,57 @@ TiDB supports most [SQL modes](/sql-mode.md): ### Default differences +TiDB has default differences when compared with MySQL 5.7 and MySQL 8.0: + - Default character set: - - The default value in TiDB is `utf8mb4`. - - The default value in MySQL 5.7 is `latin1`. - - The default value in MySQL 8.0 is `utf8mb4`. + - TiDB’s default value is `utf8mb4`. + - MySQL 5.7’s default value is `latin1`. + - MySQL 8.0’s default value is `utf8mb4`. - Default collation: - - The default collation of `utf8mb4` in TiDB is `utf8mb4_bin`. - - The default collation of `utf8mb4` in MySQL 5.7 is `utf8mb4_general_ci`. - - The default collation of `utf8mb4` in MySQL 8.0 is `utf8mb4_0900_ai_ci`. + - TiDB’s default collation is `utf8mb4_bin`. + - MySQL 5.7’s default collation is `utf8mb4_general_ci`. + - MySQL 8.0’s default collation is `utf8mb4_0900_ai_ci`. - Default SQL mode: - - The default SQL mode in TiDB includes these modes: `ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION`. - - The default SQL mode in MySQL: + - TiDB’s default SQL mode includes these modes: `ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION`. + - MySQL’s default SQL mode: - The default SQL mode in MySQL 5.7 is the same as TiDB. - The default SQL mode in MySQL 8.0 includes these modes: `ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION`. - Default value of `lower_case_table_names`: - - The default value in TiDB is `2` and currently TiDB only supports `2`. - - The default value in MySQL: - - On Linux: `0`. It means that table and database names are stored on disk using the lettercase specified in the `CREATE TABLE` or `CREATE DATABASE` statement. Name comparisons are case-sensitive. - - On Windows: `1`. It means that table names are stored in lowercase on disk and name comparisons are not case-sensitive. MySQL converts all table names to lowercase on storage and lookup. This behavior also applies to database names and table aliases. - - On macOS: `2`. It means that table and database names are stored on disk using the lettercase specified in the `CREATE TABLE` or `CREATE DATABASE` statement, but MySQL converts them to lowercase on lookup. Name comparisons are not case-sensitive. + - The default value in TiDB is `2`, and only `2` is currently supported. + - MySQL defaults to the following values: + - On Linux: `0`. It means that table and database names are stored on disk according to the letter case specified in the `CREATE TABLE` or `CREATE DATABASE` statement. Name comparisons are case-sensitive. + - On Windows: `1`. It means table names are stored in lowercase on disk, and name comparisons are not case-sensitive. MySQL converts all table names to lowercase on storage and lookup. This behavior also applies to database names and table aliases. + - On macOS: `2`. It means table and database names are stored on disk according to the letter case specified in the `CREATE TABLE` or `CREATE DATABASE` statement, but MySQL converts them to lowercase on lookup. Name comparisons are not case-sensitive. - Default value of `explicit_defaults_for_timestamp`: - - The default value in TiDB is `ON` and currently TiDB only supports `ON`. - - The default value in MySQL: + - The default value in TiDB is `ON`, and only `ON` is currently supported. + - MySQL defaults to the following values: - For MySQL 5.7: `OFF`. - For MySQL 8.0: `ON`. ### Date and Time -#### Named timezone +TiDB supports named timezones with the following considerations: -+ TiDB uses all time zone rules currently installed in the system for calculation (usually the `tzdata` package). You can use all time zone names without importing the time zone table data. You cannot modify the calculation rules by importing the time zone table data. -+ MySQL uses the local time zone by default and relies on the current time zone rules built into the system (such as when to start daylight saving time) for calculation; and the time zone cannot be specified by the time zone name without [importing the time zone table data](https://dev.mysql.com/doc/refman/5.7/en/time-zone-support.html#time-zone-installation). ++ TiDB uses all the timezone rules presently installed in the system for calculation, typically the `tzdata` package. This makes it possible to use all timezone names without needing to import timezone table data. Importing timezone table data will not change the calculation rules. ++ Currently, MySQL uses the local timezone by default, then relies on the current timezone rules built into the system (for example, when daylight savings time begins) for calculation. Without [importing timezone table data](https://dev.mysql.com/doc/refman/5.7/en/time-zone-support.html#time-zone-installation), MySQL cannot specify the timezone by name. ### Type system differences -The following column types are supported by MySQL, but **NOT** by TiDB: +The following column types are supported by MySQL but **not** by TiDB: -+ FLOAT4/FLOAT8 -+ `SQL_TSI_*` (including SQL_TSI_MONTH, SQL_TSI_WEEK, SQL_TSI_DAY, SQL_TSI_HOUR, SQL_TSI_MINUTE and SQL_TSI_SECOND, excluding SQL_TSI_YEAR) +- FLOAT4/FLOAT8 +- `SQL_TSI_*` (includes SQL_TSI_MONTH, SQL_TSI_WEEK, SQL_TSI_DAY, SQL_TSI_HOUR, SQL_TSI_MINUTE, and SQL_TSI_SECOND, but excludes SQL_TSI_YEAR) -### Incompatibility caused by deprecated features +### Incompatibility due to deprecated features -TiDB does not implement certain features that have been marked as deprecated in MySQL, including: +TiDB does not implement specific features deprecated in MySQL, including: -* Specifying precision for floating point types. MySQL 8.0 [deprecates](https://dev.mysql.com/doc/refman/8.0/en/floating-point-types.html) this feature, and it is recommended to use the `DECIMAL` type instead. -* The `ZEROFILL` attribute. MySQL 8.0 [deprecates](https://dev.mysql.com/doc/refman/8.0/en/numeric-type-attributes.html) this feature, and it is recommended to instead pad numeric values in your application. +- Specifying precision for floating-point types. MySQL 8.0 [deprecates](https://dev.mysql.com/doc/refman/8.0/en/floating-point-types.html) this feature, and it is recommended to use the `DECIMAL` type instead. +- The `ZEROFILL` attribute. MySQL 8.0 [deprecates](https://dev.mysql.com/doc/refman/8.0/en/numeric-type-attributes.html) this feature, and it is recommended to pad numeric values in your application instead. ### `CREATE RESOURCE GROUP`, `DROP RESOURCE GROUP`, and `ALTER RESOURCE GROUP` statements -For the statements of creating, modifying, and dropping resource groups, the supported parameters are different from that of MySQL. See the following documents for details: +The following statements for creating, modifying, and dropping resource groups have different supported parameters than MySQL. For details, see the following documents: - [`CREATE RESOURCE GROUP`](/sql-statements/sql-statement-create-resource-group.md) - [`DROP RESOURCE GROUP`](/sql-statements/sql-statement-drop-resource-group.md) diff --git a/mysql-schema.md b/mysql-schema.md index 12255b071beb8..250959ff55dba 100644 --- a/mysql-schema.md +++ b/mysql-schema.md @@ -56,9 +56,15 @@ Currently, the `help_topic` is NULL. ## TTL related system tables -* `mysql.tidb_ttl_table_status` the previously executed TTL job and ongoing TTL job for all TTL tables -* `mysql.tidb_ttl_task` the current ongoing TTL subtasks -* `mysql.tidb_ttl_job_history` the execution history of TTL tasks in the last 90 days +* `tidb_ttl_table_status`: the previously executed TTL job and ongoing TTL job for all TTL tables +* `tidb_ttl_task`: the current ongoing TTL subtasks +* `tidb_ttl_job_history`: the execution history of TTL tasks in the last 90 days + +## Runaway queries related system tables + +* `tidb_runaway_queries`: the history records of all identified runaway queries in the past 7 days +* `tidb_runaway_watch`: the watch list of runaway queries +* `tidb_runaway_watch_done`: a watch list of deleted or expired runaway queries ## Miscellaneous system tables @@ -70,5 +76,7 @@ Currently, the `help_topic` is NULL. - `expr_pushdown_blacklist`: the blocklist for expression pushdown - `opt_rule_blacklist`: the blocklist for logical optimization rules - `table_cache_meta`: the metadata of cached tables +- `tidb_import_jobs`: the job information of [`IMPORT INTO`](/sql-statements/sql-statement-import-into.md) +- `tidb_timers`: the metadata of internal timers diff --git a/optimizer-fix-controls.md b/optimizer-fix-controls.md new file mode 100644 index 0000000000000..73af599f5777c --- /dev/null +++ b/optimizer-fix-controls.md @@ -0,0 +1,59 @@ +--- +title: Optimizer Fix Controls +summary: Learn about the Optimizer Fix Controls feature and how to use `tidb_opt_fix_control` to control the TiDB optimizer in a more fine-grained way. +--- + +# Optimizer Fix Controls + +As the product evolves iteratively, the behavior of the TiDB optimizer changes, which in turn generates more reasonable execution plans. However, in some particular scenarios, the new behavior might lead to unexpected results. For example: + +- The effect of some behaviors relies on a specific scenario. Changes that bring improvements to most scenarios might cause regressions to others. +- Sometimes, the relationship between changes in the behavior details and their consequences is very complicated. An improvement in a certain behavior might cause overall regression. + +Therefore, TiDB provides the Optimizer Fix Controls feature that allows you to make fine-grained control of TiDB optimizer behaviors by setting values for a group of fixes. This document describes the Optimizer Fix Controls feature and how to use them, and lists all the fixes that TiDB currently supports for Optimizer Fix Controls. + +## Introduction to `tidb_opt_fix_control` + +Starting from v7.1.0, TiDB provides the [`tidb_opt_fix_control`](/system-variables.md#tidb_opt_fix_control-new-in-v710) system variable to control the behavior of the optimizer in a more fine-grained way. + +Each fix is a control item used to adjust the behavior in the TiDB optimizer for one particular purpose. It is denoted by a number that corresponds to a GitHub Issue that contains the technical details of the behavior change. For example, for fix `44262`, you can review what it controls in [Issue 44262](https://github.com/pingcap/tidb/issues/44262). + +The [`tidb_opt_fix_control`](/system-variables.md#tidb_opt_fix_control-new-in-v710) system variable accepts multiple fixes as one value, separated by commas (`,`). The format is `"<#issue1>:,<#issue2>:,...,<#issueN>:"`, where `<#issueN>` is the fix number. For example: + +```sql +SET SESSION tidb_opt_fix_control = '44262:ON,44389:ON'; +``` + +## Optimizer Fix Controls reference + +### [`44262`](https://github.com/pingcap/tidb/issues/44262) New in v7.2.0 + +- Default value: `OFF` +- Possible values: `ON`, `OFF` +- This variable controls whether to allow the use of [Dynamic pruning mode](/partitioned-table.md#dynamic-pruning-mode) to access the partitioned table when the [GlobalStats](/statistics.md#collect-statistics-of-partitioned-tables-in-dynamic-pruning-mode) are missing. + +### [`44389`](https://github.com/pingcap/tidb/issues/44389) New in v7.2.0 + +- Default value: `OFF` +- Possible values: `ON`, `OFF` +- For filters such as `c = 10 and (a = 'xx' or (a = 'kk' and b = 1))`, this variable controls whether to try to build more comprehensive scan ranges for `IndexRangeScan`. + +### [`44823`](https://github.com/pingcap/tidb/issues/44823) New in v7.3.0 + +- Default value: `200` +- Possible values: `[0, 2147483647]` +- To save memory, Plan Cache does not cache queries with parameters exceeding the specified number of this variable. `0` means no limit. + +### [`44830`](https://github.com/pingcap/tidb/issues/44830) New in v7.3.0 + +- Default value: `OFF` +- Possible values: `ON`, `OFF` +- This variable controls whether Plan Cache is allowed to cache execution plans with the `PointGet` operator generated during physical optimization. + +### [`44855`](https://github.com/pingcap/tidb/issues/44855) New in v7.3.0 + +- Default value: `OFF` +- Possible values: `ON`, `OFF` +- In some scenarios, when the `Probe` side of an `IndexJoin` operator contains a `Selection` operator, TiDB severely overestimates the row count of `IndexScan`. This might cause suboptimal query plans to be selected instead of `IndexJoin`. +- To mitigate this issue, TiDB has introduced an improvement. However, due to potential query plan fallback risks, this improvement is disabled by default. +- This variable controls whether to enable the preceding improvement. diff --git a/optimizer-hints.md b/optimizer-hints.md index b4569d86fd953..30486b687c516 100644 --- a/optimizer-hints.md +++ b/optimizer-hints.md @@ -8,38 +8,10 @@ aliases: ['/docs/dev/optimizer-hints/','/docs/dev/reference/performance/optimize TiDB supports optimizer hints, which are based on the comment-like syntax introduced in MySQL 5.7. For example, one of the common syntaxes is `/*+ HINT_NAME([t1_name [, t2_name] ...]) */`. Use of optimizer hints is recommended in cases where the TiDB optimizer selects a less optimal query plan. -> **Note:** -> -> MySQL command-line clients earlier than 5.7.7 strip optimizer hints by default. If you want to use the `Hint` syntax in these earlier versions, add the `--comments` option when starting the client. For example: `mysql -h 127.0.0.1 -P 4000 -uroot --comments`. +If you encounter a situation where hints do not take effect, see [Troubleshoot common issues that hints do not take effect](#troubleshoot-common-issues-that-hints-do-not-take-effect). ## Syntax -> **Note:** -> -> If the table you want to hint is not in the database specified by `USE DATABASE`, you need to specify the database name explicitly. For example: -> -> ```sql -> tidb> SELECT /*+ HASH_JOIN(t2, t) */ * FROM t, test2.t2; -> Empty set, 1 warning (0.00 sec) -> -> tidb> SHOW WARNINGS; -> +---------+------+-------------------------------------------------------------------------------------------------------------------------------------------------------+ -> | Level | Code | Message | -> +---------+------+-------------------------------------------------------------------------------------------------------------------------------------------------------+ -> | Warning | 1815 | There are no matching table names for (t2) in optimizer hint /*+ HASH_JOIN(t2, t) */ or /*+ TIDB_HJ(t2, t) */. Maybe you can use the table alias name | -> +---------+------+-------------------------------------------------------------------------------------------------------------------------------------------------------+ -> 1 row in set (0.00 sec) -> -> tidb> SELECT /*+ HASH_JOIN(test2.t2, t) */ * FROM t, test2.t2; -> Empty set (0.00 sec) -> -> tidb> SELECT /*+ READ_FROM_STORAGE(TIFLASH[test1.t1,test2.t2]) */ t1.a FROM test1.t t1, test2.t t2 WHERE t1.a = t2.a; -> Empty set (0.00 sec) -> -> ``` -> -> The examples in this document are all tables in the same database. If the tables you use are not in the same database, refer to the instructions to explicitly specify the database name. - Optimizer hints are case insensitive and specified within `/*+ ... */` comments following the `SELECT`, `UPDATE` or `DELETE` keyword in a SQL statement. Optimizer hints are not currently supported for `INSERT` statements. Multiple hints can be specified by separating with commas. For example, the following query uses three different hints: @@ -125,6 +97,14 @@ select /*+ MERGE_JOIN(t1, t2) */ * from t1, t2 where t1.id = t2.id; > > `TIDB_SMJ` is the alias for `MERGE_JOIN` in TiDB 3.0.x and earlier versions. If you are using any of these versions, you must apply the `TIDB_SMJ(t1_name [, tl_name ...])` syntax for the hint. For the later versions of TiDB, `TIDB_SMJ` and `MERGE_JOIN` are both valid names for the hint, but `MERGE_JOIN` is recommended. +### NO_MERGE_JOIN(t1_name [, tl_name ...]) + +The `NO_MERGE_JOIN(t1_name [, tl_name ...])` hint tells the optimizer not to use the sort-merge join algorithm for the given table(s). For example: + +```sql +SELECT /*+ NO_MERGE_JOIN(t1, t2) */ * FROM t1, t2 WHERE t1.id = t2.id; +``` + ### INL_JOIN(t1_name [, tl_name ...]) The `INL_JOIN(t1_name [, tl_name ...])` hint tells the optimizer to use the index nested loop join algorithm for the given table(s). This algorithm might consume less system resources and take shorter processing time in some scenarios and might produce an opposite result in other scenarios. If the result set is less than 10,000 rows after the outer table is filtered by the `WHERE` condition, it is recommended to use this hint. For example: @@ -141,10 +121,30 @@ The parameter(s) given in `INL_JOIN()` is the candidate table for the inner tabl > > `TIDB_INLJ` is the alias for `INL_JOIN` in TiDB 3.0.x and earlier versions. If you are using any of these versions, you must apply the `TIDB_INLJ(t1_name [, tl_name ...])` syntax for the hint. For the later versions of TiDB, `TIDB_INLJ` and `INL_JOIN` are both valid names for the hint, but `INL_JOIN` is recommended. +### NO_INDEX_JOIN(t1_name [, tl_name ...]) + +The `NO_INDEX_JOIN(t1_name [, tl_name ...])` hint tells the optimizer not to use the index nested loop join algorithm for the given table(s). For example: + +```sql +SELECT /*+ NO_INDEX_JOIN(t1, t2) */ * FROM t1, t2 WHERE t1.id = t2.id; +``` + ### INL_HASH_JOIN The `INL_HASH_JOIN(t1_name [, tl_name])` hint tells the optimizer to use the index nested loop hash join algorithm. The conditions for using this algorithm are the same with the conditions for using the index nested loop join algorithm. The difference between the two algorithms is that `INL_JOIN` creates a hash table on the joined inner table, but `INL_HASH_JOIN` creates a hash table on the joined outer table. `INL_HASH_JOIN` has a fixed limit on memory usage, while the memory used by `INL_JOIN` depends on the number of rows matched in the inner table. +### NO_INDEX_HASH_JOIN(t1_name [, tl_name ...]) + +The `NO_INDEX_HASH_JOIN(t1_name [, tl_name ...])` hint tells the optimizer not to use the index nested loop hash join algorithm for the given table(s). + +### INL_MERGE_JOIN + +The `INL_MERGE_JOIN(t1_name [, tl_name])` hint tells the optimizer to use the index nested loop merge join algorithm. The conditions for using this algorithm are the same with the conditions for using the index nested loop join algorithm. + +### NO_INDEX_MERGE_JOIN(t1_name [, tl_name ...]) + +The `NO_INDEX_MERGE_JOIN(t1_name [, tl_name ...])` hint tells the optimizer not to use the index nested loop merge join algorithm for the given table(s). + ### HASH_JOIN(t1_name [, tl_name ...]) The `HASH_JOIN(t1_name [, tl_name ...])` hint tells the optimizer to use the hash join algorithm for the given table(s). This algorithm allows the query to be executed concurrently with multiple threads, which achieves a higher processing speed but consumes more memory. For example: @@ -159,6 +159,14 @@ select /*+ HASH_JOIN(t1, t2) */ * from t1, t2 where t1.id = t2.id; > > `TIDB_HJ` is the alias for `HASH_JOIN` in TiDB 3.0.x and earlier versions. If you are using any of these versions, you must apply the `TIDB_HJ(t1_name [, tl_name ...])` syntax for the hint. For the later versions of TiDB, `TIDB_HJ` and `HASH_JOIN` are both valid names for the hint, but `HASH_JOIN` is recommended. +### NO_HASH_JOIN(t1_name [, tl_name ...]) + +The `NO_HASH_JOIN(t1_name [, tl_name ...])` hint tells the optimizer not to use the hash join algorithm for the given table(s). For example: + +```sql +SELECT /*+ NO_HASH_JOIN(t1, t2) */ * FROM t1, t2 WHERE t1.id = t2.id; +``` + ### HASH_JOIN_BUILD(t1_name [, tl_name ...]) The `HASH_JOIN_BUILD(t1_name [, tl_name ...])` hint tells the optimizer to use the hash join algorithm on specified tables with these tables working as the build side. In this way, you can build hash tables using specific tables. For example: @@ -811,4 +819,173 @@ Example: ```sql SELECT /*+ RESOURCE_GROUP(rg1) */ * FROM t limit 10; -``` \ No newline at end of file +``` + +## Troubleshoot common issues that hints do not take effect + +### Hints do not take effect because your MySQL command-line client strips hints + +MySQL command-line clients earlier than 5.7.7 strip optimizer hints by default. If you want to use the Hint syntax in these earlier versions, add the `--comments` option when starting the client. For example: `mysql -h 127.0.0.1 -P 4000 -uroot --comments`. + +### Hints do not take effect because the database name is not specified + +If you do not specify the database name when creating a connection, hints might not take effect. For example: + +When connecting to TiDB, you use the `mysql -h127.0.0.1 -P4000 -uroot` command without the `-D` option, and then execute the following SQL statements: + +```sql +SELECT /*+ use_index(t, a) */ a FROM test.t; +SHOW WARNINGS; +``` + +Because TiDB cannot identify the database for table `t`, the `use_index(t, a)` hint does not take effect. + +```sql ++---------+------+----------------------------------------------------------------------+ +| Level | Code | Message | ++---------+------+----------------------------------------------------------------------+ +| Warning | 1815 | use_index(.t, a) is inapplicable, check whether the table(.t) exists | ++---------+------+----------------------------------------------------------------------+ +1 row in set (0.00 sec) +``` + +### Hints do not take effect because the database name is not explicitly specified in cross-table queries + +When executing cross-table queries, you need to explicitly specify database names. Otherwise, hints might not take effect. For example: + +```sql +USE test1; +CREATE TABLE t1(a INT, KEY(a)); +USE test2; +CREATE TABLE t2(a INT, KEY(a)); +SELECT /*+ use_index(t1, a) */ * FROM test1.t1, t2; +SHOW WARNINGS; +``` + +In the preceding statements, because table `t1` is not in the current `test2` database, the `use_index(t1, a)` hint does not take effect. + +```sql ++---------+------+----------------------------------------------------------------------------------+ +| Level | Code | Message | ++---------+------+----------------------------------------------------------------------------------+ +| Warning | 1815 | use_index(test2.t1, a) is inapplicable, check whether the table(test2.t1) exists | ++---------+------+----------------------------------------------------------------------------------+ +1 row in set (0.00 sec) +``` + +In this case, you need to specify the database name explicitly by using `use_index(test1.t1, a)` instead of `use_index(t1, a)`. + +### Hints do not take effect because they are placed in wrong locations + +Hints cannot take effect if they are not placed directly after the specific keywords. For example: + +```sql +SELECT * /*+ use_index(t, a) */ FROM t; +SHOW WARNINGS; +``` + +The warning is as follows: + +```sql ++---------+------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Level | Code | Message | ++---------+------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Warning | 1064 | You have an error in your SQL syntax; check the manual that corresponds to your TiDB version for the right syntax to use [parser:8066]Optimizer hint can only be followed by certain keywords like SELECT, INSERT, etc. | ++---------+------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +1 row in set (0.01 sec) +``` + +In this case, you need to place the hint directly after the `SELECT` keyword. For more details, see the [Syntax](#syntax) section. + +### INL_JOIN hint does not take effect due to collation incompatibility + +When the collation of the join key is incompatible between two tables, the `IndexJoin` operator cannot be utilized to execute the query. In this case, the [`INL_JOIN` hint](#inl_joint1_name--tl_name-) does not take effect. For example: + +```sql +CREATE TABLE t1 (k varchar(8), key(k)) COLLATE=utf8mb4_general_ci; +CREATE TABLE t2 (k varchar(8), key(k)) COLLATE=utf8mb4_bin; +EXPLAIN SELECT /*+ tidb_inlj(t1) */ * FROM t1, t2 WHERE t1.k=t2.k; +``` + +The execution plan is as follows: + +```sql ++-----------------------------+----------+-----------+----------------------+----------------------------------------------+ +| id | estRows | task | access object | operator info | ++-----------------------------+----------+-----------+----------------------+----------------------------------------------+ +| HashJoin_19 | 12487.50 | root | | inner join, equal:[eq(test.t1.k, test.t2.k)] | +| ├─IndexReader_24(Build) | 9990.00 | root | | index:IndexFullScan_23 | +| │ └─IndexFullScan_23 | 9990.00 | cop[tikv] | table:t2, index:k(k) | keep order:false, stats:pseudo | +| └─IndexReader_22(Probe) | 9990.00 | root | | index:IndexFullScan_21 | +| └─IndexFullScan_21 | 9990.00 | cop[tikv] | table:t1, index:k(k) | keep order:false, stats:pseudo | ++-----------------------------+----------+-----------+----------------------+----------------------------------------------+ +5 rows in set, 1 warning (0.00 sec) +``` + +In the preceding statements, the collations of `t1.k` and `t2.k` are incompatible (`utf8mb4_general_ci` and `utf8mb4_bin` respectively), which prevents the `INL_JOIN` or `TIDB_INLJ` hint from taking effect. + +```sql +SHOW WARNINGS; ++---------+------+----------------------------------------------------------------------------+ +| Level | Code | Message | ++---------+------+----------------------------------------------------------------------------+ +| Warning | 1815 | Optimizer Hint /*+ INL_JOIN(t1) */ or /*+ TIDB_INLJ(t1) */ is inapplicable | ++---------+------+----------------------------------------------------------------------------+ +1 row in set (0.00 sec) +``` + +### `INL_JOIN` hint does not take effect because of join order + +The [`INL_JOIN(t1, t2)`](#inl_joint1_name--tl_name-) or `TIDB_INLJ(t1, t2)` hint semantically instructs `t1` and `t2` to act as inner tables in an `IndexJoin` operator to join with other tables, rather than directly joining them using an `IndexJoin`operator. For example: + +```sql +EXPLAIN SELECT /*+ inl_join(t1, t3) */ * FROM t1, t2, t3 WHERE t1.id = t2.id AND t2.id = t3.id AND t1.id = t3.id; ++---------------------------------+----------+-----------+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| id | estRows | task | access object | operator info | ++---------------------------------+----------+-----------+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| IndexJoin_16 | 15625.00 | root | | inner join, inner:TableReader_13, outer key:test.t2.id, test.t1.id, inner key:test.t3.id, test.t3.id, equal cond:eq(test.t1.id, test.t3.id), eq(test.t2.id, test.t3.id) | +| ├─IndexJoin_34(Build) | 12500.00 | root | | inner join, inner:TableReader_31, outer key:test.t2.id, inner key:test.t1.id, equal cond:eq(test.t2.id, test.t1.id) | +| │ ├─TableReader_40(Build) | 10000.00 | root | | data:TableFullScan_39 | +| │ │ └─TableFullScan_39 | 10000.00 | cop[tikv] | table:t2 | keep order:false, stats:pseudo | +| │ └─TableReader_31(Probe) | 10000.00 | root | | data:TableRangeScan_30 | +| │ └─TableRangeScan_30 | 10000.00 | cop[tikv] | table:t1 | range: decided by [test.t2.id], keep order:false, stats:pseudo | +| └─TableReader_13(Probe) | 12500.00 | root | | data:TableRangeScan_12 | +| └─TableRangeScan_12 | 12500.00 | cop[tikv] | table:t3 | range: decided by [test.t2.id test.t1.id], keep order:false, stats:pseudo | ++---------------------------------+----------+-----------+---------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +``` + +In the preceding example, `t1` and `t3` are not directly joined together by an `IndexJoin`. + +To perform a direct `IndexJoin` between `t1` and `t3`, you can first use [`LEADING(t1, t3)` hint](#leadingt1_name--tl_name-) to specify the join order of `t1` and `t3`, and then use the `INL_JOIN` hint to specify the join algorithm. For example: + +```sql +EXPLAIN SELECT /*+ leading(t1, t3), inl_join(t3) */ * FROM t1, t2, t3 WHERE t1.id = t2.id AND t2.id = t3.id AND t1.id = t3.id; ++---------------------------------+----------+-----------+---------------+---------------------------------------------------------------------------------------------------------------------+ +| id | estRows | task | access object | operator info | ++---------------------------------+----------+-----------+---------------+---------------------------------------------------------------------------------------------------------------------+ +| Projection_12 | 15625.00 | root | | test.t1.id, test.t1.name, test.t2.id, test.t2.name, test.t3.id, test.t3.name | +| └─HashJoin_21 | 15625.00 | root | | inner join, equal:[eq(test.t1.id, test.t2.id) eq(test.t3.id, test.t2.id)] | +| ├─TableReader_36(Build) | 10000.00 | root | | data:TableFullScan_35 | +| │ └─TableFullScan_35 | 10000.00 | cop[tikv] | table:t2 | keep order:false, stats:pseudo | +| └─IndexJoin_28(Probe) | 12500.00 | root | | inner join, inner:TableReader_25, outer key:test.t1.id, inner key:test.t3.id, equal cond:eq(test.t1.id, test.t3.id) | +| ├─TableReader_34(Build) | 10000.00 | root | | data:TableFullScan_33 | +| │ └─TableFullScan_33 | 10000.00 | cop[tikv] | table:t1 | keep order:false, stats:pseudo | +| └─TableReader_25(Probe) | 10000.00 | root | | data:TableRangeScan_24 | +| └─TableRangeScan_24 | 10000.00 | cop[tikv] | table:t3 | range: decided by [test.t1.id], keep order:false, stats:pseudo | ++---------------------------------+----------+-----------+---------------+---------------------------------------------------------------------------------------------------------------------+ +9 rows in set (0.01 sec) +``` + +### Using hints causes the `Can't find a proper physical plan for this query` error + +The `Can't find a proper physical plan for this query` error might occur in the following scenarios: + +- A query itself does not require reading indexes in order. That is, for this query, the optimizer does not generate a plan to read indexes in order in any case without using hints. In this case, if the `ORDER_INDEX` hint is specified, this error occurs. To resolve this issue, remove the corresponding `ORDER_INDEX` hint. +- A query excludes all possible join methods by using the `NO_JOIN` related hints. + +```sql +CREATE TABLE t1 (a INT); +CREATE TABLE t2 (a INT); +EXPLAIN SELECT /*+ NO_HASH_JOIN(t1), NO_MERGE_JOIN(t1) */ * FROM t1, t2 WHERE t1.a=t2.a; +ERROR 1815 (HY000): Internal : Can't find a proper physical plan for this query +``` diff --git a/overview.md b/overview.md index e1c6a094655af..65defc64836e4 100644 --- a/overview.md +++ b/overview.md @@ -22,43 +22,43 @@ The following video introduces key features of TiDB. ## Key features -- **Horizontally scaling out or scaling in easily** +- **Easy horizontal scaling** - The TiDB architecture design of separating computing from storage enables you to separately scale out or scale in the computing or storage capacity online as needed. The scaling process is transparent to application operations and maintenance staff. + The TiDB architecture design separates computing from storage, letting you scale out or scale in the computing or storage capacity online as needed. The scaling process is transparent to application operations and maintenance staff. - **Financial-grade high availability** - The data is stored in multiple replicas. Data replicas obtain the transaction log using the Multi-Raft protocol. A transaction can be committed only when data has been successfully written into the majority of replicas. This can guarantee strong consistency, and availability when a minority of replicas go down. To meet the requirements of different disaster tolerance levels, you can configure the geographic location and number of replicas as needed. + Data is stored in multiple replicas, and the Multi-Raft protocol is used to obtain the transaction log. A transaction can only be committed when data has been successfully written into the majority of replicas. This guarantees strong consistency and availability when a minority of replicas go down. You can configure the geographic location and number of replicas as needed to meet different disaster tolerance levels. - **Real-time HTAP** - TiDB provides two storage engines: [TiKV](/tikv-overview.md), a row-based storage engine, and [TiFlash](/tiflash/tiflash-overview.md), a columnar storage engine. TiFlash uses the Multi-Raft Learner protocol to replicate data from TiKV in real time, ensuring that the data between the TiKV row-based storage engine and the TiFlash columnar storage engine are consistent. TiKV and TiFlash can be deployed on different machines as needed to solve the problem of HTAP resource isolation. + TiDB provides two storage engines: [TiKV](/tikv-overview.md), a row-based storage engine, and [TiFlash](/tiflash/tiflash-overview.md), a columnar storage engine. TiFlash uses the Multi-Raft Learner protocol to replicate data from TiKV in real time, ensuring consistent data between the TiKV row-based storage engine and the TiFlash columnar storage engine. TiKV and TiFlash can be deployed on different machines as needed to solve the problem of HTAP resource isolation. - **Cloud-native distributed database** - TiDB is a distributed database designed for the cloud, providing flexible scalability, reliability and security on the cloud platform. Users can elastically scale TiDB to meet the requirements of their changing workloads. In TiDB, each piece of data has 3 replicas at least, which can be scheduled in different cloud availability zones to tolerate the outage of a whole data center. [TiDB Operator](https://docs.pingcap.com/tidb-in-kubernetes/stable/tidb-operator-overview) helps manage TiDB on Kubernetes and automates tasks related to operating the TiDB cluster, which makes TiDB easier to deploy on any cloud that provides managed Kubernetes. [TiDB Cloud](https://pingcap.com/tidb-cloud/), the fully-managed TiDB service, is the easiest, most economical, and most resilient way to unlock the full power of [TiDB in the cloud](https://docs.pingcap.com/tidbcloud/), allowing you to deploy and run TiDB clusters with just a few clicks. + TiDB is a distributed database designed for the cloud, providing flexible scalability, reliability, and security on the cloud platform. Users can elastically scale TiDB to meet the requirements of their changing workloads. In TiDB, each piece of data has at least 3 replicas, which can be scheduled in different cloud availability zones to tolerate the outage of a whole data center. [TiDB Operator](https://docs.pingcap.com/tidb-in-kubernetes/stable/tidb-operator-overview) helps manage TiDB on Kubernetes and automates tasks related to operating the TiDB cluster, making TiDB easier to deploy on any cloud that provides managed Kubernetes. [TiDB Cloud](https://pingcap.com/tidb-cloud/), the fully-managed TiDB service, is the easiest, most economical, and most resilient way to unlock the full power of [TiDB in the cloud](https://docs.pingcap.com/tidbcloud/), allowing you to deploy and run TiDB clusters with just a few clicks. - **Compatible with the MySQL 5.7 protocol and MySQL ecosystem** - TiDB is compatible with the MySQL 5.7 protocol, common features of MySQL, and the MySQL ecosystem. To migrate your applications to TiDB, you do not need to change a single line of code in many cases or only need to modify a small amount of code. In addition, TiDB provides a series of [data migration tools](/ecosystem-tool-user-guide.md) to help easily migrate application data into TiDB. + TiDB is compatible with the MySQL 5.7 protocol, common features of MySQL, and the MySQL ecosystem. To migrate applications to TiDB, you do not need to change a single line of code in many cases, or only need to modify a small amount of code. In addition, TiDB provides a series of [data migration tools](/ecosystem-tool-user-guide.md) to help easily migrate application data into TiDB. ## Use cases -- **Financial industry scenarios with high requirements for data consistency, reliability, availability, scalability, and disaster tolerance** +- **Financial industry scenarios** - As we all know, the financial industry has high requirements for data consistency, reliability, availability, scalability, and disaster tolerance. The traditional solution is to provide services in two data centers in the same city, and provide data disaster recovery but no services in a third data center located in another city. This solution has the disadvantages of low resource utilization, high maintenance cost, and the fact that RTO (Recovery Time Objective) and RPO (Recovery Point Objective) cannot meet expectations. TiDB uses multiple replicas and the Multi-Raft protocol to schedule data to different data centers, racks, and machines. When some machines fail, the system can automatically switch to ensure that the system RTO ≦ 30 seconds and RPO = 0. + TiDB is ideal for financial industry scenarios with high requirements for data consistency, reliability, availability, scalability, and disaster tolerance. Traditional solutions are costly and inefficient, with low resource utilization and high maintenance costs. TiDB uses multiple replicas and the Multi-Raft protocol to schedule data to different data centers, racks, and machines, ensuring system RTO ≦ 30 seconds and RPO = 0. -- **Massive data and high concurrency scenarios with high requirements for storage capacity, scalability, and concurrency** +- **Massive data and high concurrency scenarios** - As applications grow rapidly, the data surges. Traditional standalone databases cannot meet the data capacity requirements. The solution is to use sharding middleware or a distributed SQL database (like TiDB), and the latter is more cost-effective. TiDB adopts a separate computing and storage architecture, which enables you to scale out or scale in the computing or storage capacity separately. The computing layer supports a maximum of 512 nodes, each node supports a maximum of 1,000 concurrencies, and the maximum cluster capacity is at the PB (petabytes) level. + Traditional standalone databases cannot meet the data capacity requirements of rapidly growing applications. TiDB is a cost-effective solution that adopts a separate computing and storage architecture, enabling easy scaling of computing or storage capacity separately. The computing layer supports a maximum of 512 nodes, each node supports a maximum of 1,000 concurrencies, and the maximum cluster capacity is at the PB (petabytes) level. - **Real-time HTAP scenarios** - With the fast growth of 5G, Internet of Things, and artificial intelligence, the data generated by a company keeps increasing tremendously, reaching a scale of hundreds of TB (terabytes) or even the PB level. The traditional solution is to process online transactional applications using an OLTP database and use an ETL (Extract, Transform, Load) tool to replicate the data into an OLAP database for data analysis. This solution has multiple disadvantages such as high storage costs and poor real-time performance. TiDB introduces the TiFlash columnar storage engine in v4.0, which combines with the TiKV row-based storage engine to build TiDB as a true HTAP database. With a small amount of extra storage cost, you can handle both online transactional processing and real-time data analysis in the same system, which greatly saves the cost. + TiDB is ideal for scenarios with massive data and high concurrency that require real-time processing. TiDB introduces the TiFlash columnar storage engine in v4.0, which combines with the TiKV row-based storage engine to build TiDB as a true HTAP database. With a small amount of extra storage cost, you can handle both online transactional processing and real-time data analysis in the same system, which greatly saves cost. - **Data aggregation and secondary processing scenarios** - The application data of most companies are scattered in different systems. As the application grows, the decision-making leaders need to understand the business status of the entire company to make decisions in time. In this case, the company needs to aggregate the scattered data into the same system and execute secondary processing to generate a T+0 or T+1 report. The traditional solution is to use ETL and Hadoop, but the Hadoop system is complicated, with high operations and maintenance cost and storage cost. Compared with Hadoop, TiDB is much simpler. You can replicate data into TiDB using ETL tools or data migration tools provided by TiDB. Reports can be directly generated using SQL statements. + TiDB is suitable for companies that need to aggregate scattered data into the same system and execute secondary processing to generate a T+0 or T+1 report. Compared with Hadoop, TiDB is much simpler. You can replicate data into TiDB using ETL (Extract, Transform, Load) tools or data migration tools provided by TiDB. Reports can be directly generated using SQL statements. ## See also diff --git a/package-lock.json b/package-lock.json index fb880a06c5601..fd224f072eb07 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,6 +9,7 @@ "version": "1.0.0", "license": "MIT", "dependencies": { + "axios": "^1.4.0", "glob": "^8.0.3", "mdast-util-from-markdown": "^1.2.0", "mdast-util-frontmatter": "^1.0.0", @@ -18,9 +19,335 @@ "micromark-extension-frontmatter": "^1.0.0", "micromark-extension-gfm": "^2.0.1", "micromark-extension-mdxjs": "^1.0.0", + "octokit": "^3.1.0", "unist-util-visit": "^4.1.0" } }, + "node_modules/@octokit/app": { + "version": "14.0.0", + "resolved": "https://registry.npmjs.org/@octokit/app/-/app-14.0.0.tgz", + "integrity": "sha512-g/zDXttroZ9Se08shK0d0d/j0cgSA+h4WV7qGUevNEM0piNBkIlfb4Fm6bSwCNAZhNf72mBgERmYOoxicPkqdw==", + "dependencies": { + "@octokit/auth-app": "^6.0.0", + "@octokit/auth-unauthenticated": "^5.0.0", + "@octokit/core": "^5.0.0", + "@octokit/oauth-app": "^6.0.0", + "@octokit/plugin-paginate-rest": "^8.0.0", + "@octokit/types": "^11.1.0", + "@octokit/webhooks": "^12.0.1" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/@octokit/auth-app": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/@octokit/auth-app/-/auth-app-6.0.0.tgz", + "integrity": "sha512-OKct7Rukf3g9DjpzcpdacQsdmd6oPrJ7fZND22JkjzhDvfhttUOnmh+qPS4kHhaNNyTxqSThnfrUWvkqNLd1nw==", + "dependencies": { + "@octokit/auth-oauth-app": "^7.0.0", + "@octokit/auth-oauth-user": "^4.0.0", + "@octokit/request": "^8.0.2", + "@octokit/request-error": "^5.0.0", + "@octokit/types": "^11.0.0", + "deprecation": "^2.3.1", + "lru-cache": "^10.0.0", + "universal-github-app-jwt": "^1.1.1", + "universal-user-agent": "^6.0.0" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/@octokit/auth-oauth-app": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/@octokit/auth-oauth-app/-/auth-oauth-app-7.0.0.tgz", + "integrity": "sha512-8JvJEXGoEqrbzLwt3SwIUvkDd+1wrM8up0KawvDIElB8rbxPbvWppGO0SLKAWSJ0q8ILcVq+mWck6pDcZ3a9KA==", + "dependencies": { + "@octokit/auth-oauth-device": "^6.0.0", + "@octokit/auth-oauth-user": "^4.0.0", + "@octokit/request": "^8.0.2", + "@octokit/types": "^11.0.0", + "@types/btoa-lite": "^1.0.0", + "btoa-lite": "^1.0.0", + "universal-user-agent": "^6.0.0" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/@octokit/auth-oauth-device": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/@octokit/auth-oauth-device/-/auth-oauth-device-6.0.0.tgz", + "integrity": "sha512-Zgf/LKhwWk54rJaTGYVYtbKgUty+ouil6VQeRd+pCw7Gd0ECoSWaZuHK6uDGC/HtnWHjpSWFhzxPauDoHcNRtg==", + "dependencies": { + "@octokit/oauth-methods": "^4.0.0", + "@octokit/request": "^8.0.0", + "@octokit/types": "^11.0.0", + "universal-user-agent": "^6.0.0" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/@octokit/auth-oauth-user": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@octokit/auth-oauth-user/-/auth-oauth-user-4.0.0.tgz", + "integrity": "sha512-VOm5aIkVGHaOhIvsF/4YmSjoYDzzrKbbYkdSEO0KqHK7I8SlO3ZndSikQ1fBlNPUEH0ve2BOTxLrVvI1qBf9/Q==", + "dependencies": { + "@octokit/auth-oauth-device": "^6.0.0", + "@octokit/oauth-methods": "^4.0.0", + "@octokit/request": "^8.0.2", + "@octokit/types": "^11.0.0", + "btoa-lite": "^1.0.0", + "universal-user-agent": "^6.0.0" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/@octokit/auth-token": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@octokit/auth-token/-/auth-token-4.0.0.tgz", + "integrity": "sha512-tY/msAuJo6ARbK6SPIxZrPBms3xPbfwBrulZe0Wtr/DIY9lje2HeV1uoebShn6mx7SjCHif6EjMvoREj+gZ+SA==", + "engines": { + "node": ">= 18" + } + }, + "node_modules/@octokit/auth-unauthenticated": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/@octokit/auth-unauthenticated/-/auth-unauthenticated-5.0.0.tgz", + "integrity": "sha512-AjOI6FNB2dweJ85p6rf7D4EhE4y6VBcwYfX/7KJkR5Q9fD9ET6NABAjajUTSNFfCxmNIaQgISggZ3pkgwtTqsA==", + "dependencies": { + "@octokit/request-error": "^5.0.0", + "@octokit/types": "^11.0.0" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/@octokit/core": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/@octokit/core/-/core-5.0.0.tgz", + "integrity": "sha512-YbAtMWIrbZ9FCXbLwT9wWB8TyLjq9mxpKdgB3dUNxQcIVTf9hJ70gRPwAcqGZdY6WdJPZ0I7jLaaNDCiloGN2A==", + "dependencies": { + "@octokit/auth-token": "^4.0.0", + "@octokit/graphql": "^7.0.0", + "@octokit/request": "^8.0.2", + "@octokit/request-error": "^5.0.0", + "@octokit/types": "^11.0.0", + "before-after-hook": "^2.2.0", + "universal-user-agent": "^6.0.0" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/@octokit/endpoint": { + "version": "9.0.0", + "resolved": "https://registry.npmjs.org/@octokit/endpoint/-/endpoint-9.0.0.tgz", + "integrity": "sha512-szrQhiqJ88gghWY2Htt8MqUDO6++E/EIXqJ2ZEp5ma3uGS46o7LZAzSLt49myB7rT+Hfw5Y6gO3LmOxGzHijAQ==", + "dependencies": { + "@octokit/types": "^11.0.0", + "is-plain-object": "^5.0.0", + "universal-user-agent": "^6.0.0" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/@octokit/graphql": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/@octokit/graphql/-/graphql-7.0.1.tgz", + "integrity": "sha512-T5S3oZ1JOE58gom6MIcrgwZXzTaxRnxBso58xhozxHpOqSTgDS6YNeEUvZ/kRvXgPrRz/KHnZhtb7jUMRi9E6w==", + "dependencies": { + "@octokit/request": "^8.0.1", + "@octokit/types": "^11.0.0", + "universal-user-agent": "^6.0.0" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/@octokit/oauth-app": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/@octokit/oauth-app/-/oauth-app-6.0.0.tgz", + "integrity": "sha512-bNMkS+vJ6oz2hCyraT9ZfTpAQ8dZNqJJQVNaKjPLx4ue5RZiFdU1YWXguOPR8AaSHS+lKe+lR3abn2siGd+zow==", + "dependencies": { + "@octokit/auth-oauth-app": "^7.0.0", + "@octokit/auth-oauth-user": "^4.0.0", + "@octokit/auth-unauthenticated": "^5.0.0", + "@octokit/core": "^5.0.0", + "@octokit/oauth-authorization-url": "^6.0.2", + "@octokit/oauth-methods": "^4.0.0", + "@types/aws-lambda": "^8.10.83", + "universal-user-agent": "^6.0.0" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/@octokit/oauth-authorization-url": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/@octokit/oauth-authorization-url/-/oauth-authorization-url-6.0.2.tgz", + "integrity": "sha512-CdoJukjXXxqLNK4y/VOiVzQVjibqoj/xHgInekviUJV73y/BSIcwvJ/4aNHPBPKcPWFnd4/lO9uqRV65jXhcLA==", + "engines": { + "node": ">= 18" + } + }, + "node_modules/@octokit/oauth-methods": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@octokit/oauth-methods/-/oauth-methods-4.0.0.tgz", + "integrity": "sha512-dqy7BZLfLbi3/8X8xPKUKZclMEK9vN3fK5WF3ortRvtplQTszFvdAGbTo71gGLO+4ZxspNiLjnqdd64Chklf7w==", + "dependencies": { + "@octokit/oauth-authorization-url": "^6.0.2", + "@octokit/request": "^8.0.2", + "@octokit/request-error": "^5.0.0", + "@octokit/types": "^11.0.0", + "btoa-lite": "^1.0.0" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/@octokit/openapi-types": { + "version": "18.0.0", + "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-18.0.0.tgz", + "integrity": "sha512-V8GImKs3TeQRxRtXFpG2wl19V7444NIOTDF24AWuIbmNaNYOQMWRbjcGDXV5B+0n887fgDcuMNOmlul+k+oJtw==" + }, + "node_modules/@octokit/plugin-paginate-graphql": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@octokit/plugin-paginate-graphql/-/plugin-paginate-graphql-4.0.0.tgz", + "integrity": "sha512-7HcYW5tP7/Z6AETAPU14gp5H5KmCPT3hmJrS/5tO7HIgbwenYmgw4OY9Ma54FDySuxMwD+wsJlxtuGWwuZuItA==", + "engines": { + "node": ">= 18" + }, + "peerDependencies": { + "@octokit/core": ">=5" + } + }, + "node_modules/@octokit/plugin-paginate-rest": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/@octokit/plugin-paginate-rest/-/plugin-paginate-rest-8.0.0.tgz", + "integrity": "sha512-2xZ+baZWUg+qudVXnnvXz7qfrTmDeYPCzangBVq/1gXxii/OiS//4shJp9dnCCvj1x+JAm9ji1Egwm1BA47lPQ==", + "dependencies": { + "@octokit/types": "^11.0.0" + }, + "engines": { + "node": ">= 18" + }, + "peerDependencies": { + "@octokit/core": ">=5" + } + }, + "node_modules/@octokit/plugin-rest-endpoint-methods": { + "version": "9.0.0", + "resolved": "https://registry.npmjs.org/@octokit/plugin-rest-endpoint-methods/-/plugin-rest-endpoint-methods-9.0.0.tgz", + "integrity": "sha512-KquMF/VB1IkKNiVnzJKspY5mFgGyLd7HzdJfVEGTJFzqu9BRFNWt+nwTCMuUiWc72gLQhRWYubTwOkQj+w/1PA==", + "dependencies": { + "@octokit/types": "^11.0.0" + }, + "engines": { + "node": ">= 18" + }, + "peerDependencies": { + "@octokit/core": ">=5" + } + }, + "node_modules/@octokit/plugin-retry": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/@octokit/plugin-retry/-/plugin-retry-6.0.0.tgz", + "integrity": "sha512-a1/A4A+PB1QoAHQfLJxGHhLfSAT03bR1jJz3GgQJZvty2ozawFWs93MiBQXO7SL2YbO7CIq0Goj4qLOBj8JeMQ==", + "dependencies": { + "@octokit/request-error": "^5.0.0", + "@octokit/types": "^11.0.0", + "bottleneck": "^2.15.3" + }, + "engines": { + "node": ">= 18" + }, + "peerDependencies": { + "@octokit/core": ">=5" + } + }, + "node_modules/@octokit/plugin-throttling": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/@octokit/plugin-throttling/-/plugin-throttling-7.0.0.tgz", + "integrity": "sha512-KL2k/d0uANc8XqP5S64YcNFCudR3F5AaKO39XWdUtlJIjT9Ni79ekWJ6Kj5xvAw87udkOMEPcVf9xEge2+ahew==", + "dependencies": { + "@octokit/types": "^11.0.0", + "bottleneck": "^2.15.3" + }, + "engines": { + "node": ">= 18" + }, + "peerDependencies": { + "@octokit/core": "^5.0.0" + } + }, + "node_modules/@octokit/request": { + "version": "8.1.1", + "resolved": "https://registry.npmjs.org/@octokit/request/-/request-8.1.1.tgz", + "integrity": "sha512-8N+tdUz4aCqQmXl8FpHYfKG9GelDFd7XGVzyN8rc6WxVlYcfpHECnuRkgquzz+WzvHTK62co5di8gSXnzASZPQ==", + "dependencies": { + "@octokit/endpoint": "^9.0.0", + "@octokit/request-error": "^5.0.0", + "@octokit/types": "^11.1.0", + "is-plain-object": "^5.0.0", + "universal-user-agent": "^6.0.0" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/@octokit/request-error": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/@octokit/request-error/-/request-error-5.0.0.tgz", + "integrity": "sha512-1ue0DH0Lif5iEqT52+Rf/hf0RmGO9NWFjrzmrkArpG9trFfDM/efx00BJHdLGuro4BR/gECxCU2Twf5OKrRFsQ==", + "dependencies": { + "@octokit/types": "^11.0.0", + "deprecation": "^2.0.0", + "once": "^1.4.0" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/@octokit/types": { + "version": "11.1.0", + "resolved": "https://registry.npmjs.org/@octokit/types/-/types-11.1.0.tgz", + "integrity": "sha512-Fz0+7GyLm/bHt8fwEqgvRBWwIV1S6wRRyq+V6exRKLVWaKGsuy6H9QFYeBVDV7rK6fO3XwHgQOPxv+cLj2zpXQ==", + "dependencies": { + "@octokit/openapi-types": "^18.0.0" + } + }, + "node_modules/@octokit/webhooks": { + "version": "12.0.3", + "resolved": "https://registry.npmjs.org/@octokit/webhooks/-/webhooks-12.0.3.tgz", + "integrity": "sha512-8iG+/yza7hwz1RrQ7i7uGpK2/tuItZxZq1aTmeg2TNp2xTUB8F8lZF/FcZvyyAxT8tpDMF74TjFGCDACkf1kAQ==", + "dependencies": { + "@octokit/request-error": "^5.0.0", + "@octokit/webhooks-methods": "^4.0.0", + "@octokit/webhooks-types": "7.1.0", + "aggregate-error": "^3.1.0" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/@octokit/webhooks-methods": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@octokit/webhooks-methods/-/webhooks-methods-4.0.0.tgz", + "integrity": "sha512-M8mwmTXp+VeolOS/kfRvsDdW+IO0qJ8kYodM/sAysk093q6ApgmBXwK1ZlUvAwXVrp/YVHp6aArj4auAxUAOFw==", + "engines": { + "node": ">= 18" + } + }, + "node_modules/@octokit/webhooks-types": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/@octokit/webhooks-types/-/webhooks-types-7.1.0.tgz", + "integrity": "sha512-y92CpG4kFFtBBjni8LHoV12IegJ+KFxLgKRengrVjKmGE5XMeCuGvlfRe75lTRrgXaG6XIWJlFpIDTlkoJsU8w==" + }, "node_modules/@types/acorn": { "version": "4.0.6", "resolved": "https://registry.npmjs.org/@types/acorn/-/acorn-4.0.6.tgz", @@ -29,6 +356,16 @@ "@types/estree": "*" } }, + "node_modules/@types/aws-lambda": { + "version": "8.10.119", + "resolved": "https://registry.npmjs.org/@types/aws-lambda/-/aws-lambda-8.10.119.tgz", + "integrity": "sha512-Vqm22aZrCvCd6I5g1SvpW151jfqwTzEZ7XJ3yZ6xaZG31nUEOEyzzVImjRcsN8Wi/QyPxId/x8GTtgIbsy8kEw==" + }, + "node_modules/@types/btoa-lite": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@types/btoa-lite/-/btoa-lite-1.0.0.tgz", + "integrity": "sha512-wJsiX1tosQ+J5+bY5LrSahHxr2wT+uME5UDwdN1kg4frt40euqA+wzECkmq4t5QbveHiJepfdThgQrPw6KiSlg==" + }, "node_modules/@types/debug": { "version": "4.1.7", "resolved": "https://registry.npmmirror.com/@types/debug/-/debug-4.1.7.tgz", @@ -58,6 +395,14 @@ "@types/unist": "*" } }, + "node_modules/@types/jsonwebtoken": { + "version": "9.0.2", + "resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz", + "integrity": "sha512-drE6uz7QBKq1fYqqoFKTDRdFCPHd5TCub75BM+D+cMx7NU9hUz7SESLfC2fSCXVFMO5Yj8sOWHuGqPgjc+fz0Q==", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@types/mdast": { "version": "3.0.10", "resolved": "https://registry.npmmirror.com/@types/mdast/-/mdast-3.0.10.tgz", @@ -71,6 +416,11 @@ "resolved": "https://registry.npmmirror.com/@types/ms/-/ms-0.7.31.tgz", "integrity": "sha512-iiUgKzV9AuaEkZqkOLDIvlQiL6ltuZd9tGcW3gwpnX8JbuiuhFlEGmmFXEXkN50Cvq7Os88IY2v0dkDqXYWVgA==" }, + "node_modules/@types/node": { + "version": "20.4.5", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.4.5.tgz", + "integrity": "sha512-rt40Nk13II9JwQBdeYqmbn2Q6IVTA5uPhvSO+JVqdXw/6/4glI6oR9ezty/A9Hg5u7JH4OmYmuQ+XvjKm0Datg==" + }, "node_modules/@types/unist": { "version": "2.0.6", "resolved": "https://registry.npmmirror.com/@types/unist/-/unist-2.0.6.tgz", @@ -95,11 +445,48 @@ "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0" } }, + "node_modules/aggregate-error": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/aggregate-error/-/aggregate-error-3.1.0.tgz", + "integrity": "sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA==", + "dependencies": { + "clean-stack": "^2.0.0", + "indent-string": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" + }, + "node_modules/axios": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.4.0.tgz", + "integrity": "sha512-S4XCWMEmzvo64T9GfvQDOXgYRDJ/wsSZc7Jvdgx5u1sd0JwsuPLqb3SYmusag+edF6ziyMensPVqLTSc1PiSEA==", + "dependencies": { + "follow-redirects": "^1.15.0", + "form-data": "^4.0.0", + "proxy-from-env": "^1.1.0" + } + }, "node_modules/balanced-match": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" }, + "node_modules/before-after-hook": { + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/before-after-hook/-/before-after-hook-2.2.3.tgz", + "integrity": "sha512-NzUnlZexiaH/46WDhANlyR2bXRopNg4F/zuSA3OpZnllCUgRaOF2znDioDWrmbNVsuZk6l9pMquQB38cfBZwkQ==" + }, + "node_modules/bottleneck": { + "version": "2.19.5", + "resolved": "https://registry.npmjs.org/bottleneck/-/bottleneck-2.19.5.tgz", + "integrity": "sha512-VHiNCbI1lKdl44tGrhNfU3lup0Tj/ZBMJB5/2ZbNXRCPuRCO7ed2mgcK4r17y+KB2EfuYuRaVlwNbAeaWGSpbw==" + }, "node_modules/brace-expansion": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", @@ -108,6 +495,16 @@ "balanced-match": "^1.0.0" } }, + "node_modules/btoa-lite": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/btoa-lite/-/btoa-lite-1.0.0.tgz", + "integrity": "sha512-gvW7InbIyF8AicrqWoptdW08pUxuhq8BEgowNajy9RhiE86fmGAGl+bLKo6oB8QP0CkqHLowfN0oJdKC/J6LbA==" + }, + "node_modules/buffer-equal-constant-time": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/buffer-equal-constant-time/-/buffer-equal-constant-time-1.0.1.tgz", + "integrity": "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==" + }, "node_modules/ccount": { "version": "2.0.1", "resolved": "https://registry.npmmirror.com/ccount/-/ccount-2.0.1.tgz", @@ -133,6 +530,25 @@ "resolved": "https://registry.npmmirror.com/character-reference-invalid/-/character-reference-invalid-2.0.1.tgz", "integrity": "sha512-iBZ4F4wRbyORVsu0jPV7gXkOsGYjGHPmAyv+HiHG8gi5PtC9KI2j1+v8/tlibRvjoWX027ypmG/n0HtO5t7unw==" }, + "node_modules/clean-stack": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/clean-stack/-/clean-stack-2.2.0.tgz", + "integrity": "sha512-4diC9HaTE+KRAMWhDhrGOECgWZxoevMc5TlkObMqNSsVU62PYzXZ/SMTjzyGAFF1YusgxGcSWTEXBhp0CPwQ1A==", + "engines": { + "node": ">=6" + } + }, + "node_modules/combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "dependencies": { + "delayed-stream": "~1.0.0" + }, + "engines": { + "node": ">= 0.8" + } + }, "node_modules/debug": { "version": "4.3.4", "resolved": "https://registry.npmmirror.com/debug/-/debug-4.3.4.tgz", @@ -157,6 +573,19 @@ "character-entities": "^2.0.0" } }, + "node_modules/delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/deprecation": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/deprecation/-/deprecation-2.3.1.tgz", + "integrity": "sha512-xmHIy4F3scKVwMsQ4WnVaS8bHOx0DmVwRywosKhaILI0ywMDWPtBSku2HNxRvF7jtwDRsoEwYQSfbxj8b7RlJQ==" + }, "node_modules/dequal": { "version": "2.0.2", "resolved": "https://registry.npmmirror.com/dequal/-/dequal-2.0.2.tgz", @@ -173,6 +602,14 @@ "node": ">=0.3.1" } }, + "node_modules/ecdsa-sig-formatter": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz", + "integrity": "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==", + "dependencies": { + "safe-buffer": "^5.0.1" + } + }, "node_modules/escape-string-regexp": { "version": "5.0.0", "resolved": "https://registry.npmmirror.com/escape-string-regexp/-/escape-string-regexp-5.0.0.tgz", @@ -211,6 +648,38 @@ "format": "^0.2.0" } }, + "node_modules/follow-redirects": { + "version": "1.15.2", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz", + "integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==", + "funding": [ + { + "type": "individual", + "url": "https://github.com/sponsors/RubenVerborgh" + } + ], + "engines": { + "node": ">=4.0" + }, + "peerDependenciesMeta": { + "debug": { + "optional": true + } + } + }, + "node_modules/form-data": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", + "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==", + "dependencies": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "mime-types": "^2.1.12" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/format": { "version": "0.2.2", "resolved": "https://registry.npmmirror.com/format/-/format-0.2.2.tgz", @@ -242,6 +711,14 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/indent-string": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/indent-string/-/indent-string-4.0.0.tgz", + "integrity": "sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg==", + "engines": { + "node": ">=8" + } + }, "node_modules/inflight": { "version": "1.0.6", "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", @@ -302,6 +779,48 @@ "resolved": "https://registry.npmmirror.com/is-hexadecimal/-/is-hexadecimal-2.0.1.tgz", "integrity": "sha512-DgZQp241c8oO6cA1SbTEWiXeoxV42vlcJxgH+B3hi1AiqqKruZR3ZGF8In3fj4+/y/7rHvlOZLZtgJ/4ttYGZg==" }, + "node_modules/is-plain-object": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-5.0.0.tgz", + "integrity": "sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/jsonwebtoken": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.1.tgz", + "integrity": "sha512-K8wx7eJ5TPvEjuiVSkv167EVboBDv9PZdDoF7BgeQnBLVvZWW9clr2PsQHVJDTKaEIH5JBIwHujGcHp7GgI2eg==", + "dependencies": { + "jws": "^3.2.2", + "lodash": "^4.17.21", + "ms": "^2.1.1", + "semver": "^7.3.8" + }, + "engines": { + "node": ">=12", + "npm": ">=6" + } + }, + "node_modules/jwa": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/jwa/-/jwa-1.4.1.tgz", + "integrity": "sha512-qiLX/xhEEFKUAJ6FiBMbes3w9ATzyk5W7Hvzpa/SLYdxNtng+gcurvrI7TbACjIXlsJyr05/S1oUhZrc63evQA==", + "dependencies": { + "buffer-equal-constant-time": "1.0.1", + "ecdsa-sig-formatter": "1.0.11", + "safe-buffer": "^5.0.1" + } + }, + "node_modules/jws": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/jws/-/jws-3.2.2.tgz", + "integrity": "sha512-YHlZCB6lMTllWDtSPHz/ZXTsi8S00usEV6v1tjq8tOUZzw7DpSDWVXjXDre6ed1w/pd495ODpHZYSdkRTsa0HA==", + "dependencies": { + "jwa": "^1.4.1", + "safe-buffer": "^5.0.1" + } + }, "node_modules/kleur": { "version": "4.1.4", "resolved": "https://registry.npmmirror.com/kleur/-/kleur-4.1.4.tgz", @@ -310,11 +829,24 @@ "node": ">=6" } }, + "node_modules/lodash": { + "version": "4.17.21", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", + "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" + }, "node_modules/longest-streak": { "version": "3.0.1", "resolved": "https://registry.npmmirror.com/longest-streak/-/longest-streak-3.0.1.tgz", "integrity": "sha512-cHlYSUpL2s7Fb3394mYxwTYj8niTaNHUCLr0qdiCXQfSjfuA7CKofpX2uSwEfFDQ0EB7JcnMnm+GjbqqoinYYg==" }, + "node_modules/lru-cache": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.0.0.tgz", + "integrity": "sha512-svTf/fzsKHffP42sujkO/Rjs37BCIsQVRCeNYIm9WN8rgT7ffoUnRtZCqU+6BqcSBdv8gwJeTz8knJpgACeQMw==", + "engines": { + "node": "14 || >=16.14" + } + }, "node_modules/markdown-table": { "version": "3.0.2", "resolved": "https://registry.npmmirror.com/markdown-table/-/markdown-table-3.0.2.tgz", @@ -950,6 +1482,25 @@ "resolved": "https://registry.npmmirror.com/micromark-util-types/-/micromark-util-types-1.0.2.tgz", "integrity": "sha512-DCfg/T8fcrhrRKTPjRrw/5LLvdGV7BHySf/1LOZx7TzWZdYRjogNtyNq885z3nNallwr3QUKARjqvHqX1/7t+w==" }, + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "dependencies": { + "mime-db": "1.52.0" + }, + "engines": { + "node": ">= 0.6" + } + }, "node_modules/minimatch": { "version": "5.1.0", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.0.tgz", @@ -974,6 +1525,26 @@ "resolved": "https://registry.npmmirror.com/ms/-/ms-2.1.2.tgz", "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" }, + "node_modules/octokit": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/octokit/-/octokit-3.1.0.tgz", + "integrity": "sha512-dmIH5D+edpb4/ASd6ZGo6BiRR1g4ytu8lG4f+6XN/2AW+CSuTsT0nj1d6rv/HKgoflMQ1+rb3KlVWcvrmgQZhw==", + "dependencies": { + "@octokit/app": "^14.0.0", + "@octokit/core": "^5.0.0", + "@octokit/oauth-app": "^6.0.0", + "@octokit/plugin-paginate-graphql": "^4.0.0", + "@octokit/plugin-paginate-rest": "^8.0.0", + "@octokit/plugin-rest-endpoint-methods": "^9.0.0", + "@octokit/plugin-retry": "^6.0.0", + "@octokit/plugin-throttling": "^7.0.0", + "@octokit/request-error": "^5.0.0", + "@octokit/types": "^11.1.0" + }, + "engines": { + "node": ">= 18" + } + }, "node_modules/once": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", @@ -997,6 +1568,11 @@ "is-hexadecimal": "^2.0.0" } }, + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" + }, "node_modules/sade": { "version": "1.8.1", "resolved": "https://registry.npmmirror.com/sade/-/sade-1.8.1.tgz", @@ -1008,6 +1584,50 @@ "node": ">=6" } }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ] + }, + "node_modules/semver": { + "version": "7.5.4", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.5.4.tgz", + "integrity": "sha512-1bCSESV6Pv+i21Hvpxp3Dx+pSD8lIPt8uVjRrxAUt/nbswYc+tK6Y2btiULjd4+fnq15PX+nqQDC7Oft7WkwcA==", + "dependencies": { + "lru-cache": "^6.0.0" + }, + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/semver/node_modules/lru-cache": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", + "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==", + "dependencies": { + "yallist": "^4.0.0" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/stringify-entities": { "version": "4.0.2", "resolved": "https://registry.npmmirror.com/stringify-entities/-/stringify-entities-4.0.2.tgz", @@ -1079,6 +1699,20 @@ "unist-util-is": "^5.0.0" } }, + "node_modules/universal-github-app-jwt": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/universal-github-app-jwt/-/universal-github-app-jwt-1.1.1.tgz", + "integrity": "sha512-G33RTLrIBMFmlDV4u4CBF7dh71eWwykck4XgaxaIVeZKOYZRAAxvcGMRFTUclVY6xoUPQvO4Ne5wKGxYm/Yy9w==", + "dependencies": { + "@types/jsonwebtoken": "^9.0.0", + "jsonwebtoken": "^9.0.0" + } + }, + "node_modules/universal-user-agent": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/universal-user-agent/-/universal-user-agent-6.0.0.tgz", + "integrity": "sha512-isyNax3wXoKaulPDZWHQqbmIx1k2tb9fb3GGDBRxCscfYV2Ch7WxPArBsFEG8s/safwXTT7H4QGhaIkTp9447w==" + }, "node_modules/uvu": { "version": "0.5.3", "resolved": "https://registry.npmmirror.com/uvu/-/uvu-0.5.3.tgz", @@ -1138,6 +1772,11 @@ "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=" }, + "node_modules/yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==" + }, "node_modules/zwitch": { "version": "2.0.2", "resolved": "https://registry.npmmirror.com/zwitch/-/zwitch-2.0.2.tgz", @@ -1145,6 +1784,251 @@ } }, "dependencies": { + "@octokit/app": { + "version": "14.0.0", + "resolved": "https://registry.npmjs.org/@octokit/app/-/app-14.0.0.tgz", + "integrity": "sha512-g/zDXttroZ9Se08shK0d0d/j0cgSA+h4WV7qGUevNEM0piNBkIlfb4Fm6bSwCNAZhNf72mBgERmYOoxicPkqdw==", + "requires": { + "@octokit/auth-app": "^6.0.0", + "@octokit/auth-unauthenticated": "^5.0.0", + "@octokit/core": "^5.0.0", + "@octokit/oauth-app": "^6.0.0", + "@octokit/plugin-paginate-rest": "^8.0.0", + "@octokit/types": "^11.1.0", + "@octokit/webhooks": "^12.0.1" + } + }, + "@octokit/auth-app": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/@octokit/auth-app/-/auth-app-6.0.0.tgz", + "integrity": "sha512-OKct7Rukf3g9DjpzcpdacQsdmd6oPrJ7fZND22JkjzhDvfhttUOnmh+qPS4kHhaNNyTxqSThnfrUWvkqNLd1nw==", + "requires": { + "@octokit/auth-oauth-app": "^7.0.0", + "@octokit/auth-oauth-user": "^4.0.0", + "@octokit/request": "^8.0.2", + "@octokit/request-error": "^5.0.0", + "@octokit/types": "^11.0.0", + "deprecation": "^2.3.1", + "lru-cache": "^10.0.0", + "universal-github-app-jwt": "^1.1.1", + "universal-user-agent": "^6.0.0" + } + }, + "@octokit/auth-oauth-app": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/@octokit/auth-oauth-app/-/auth-oauth-app-7.0.0.tgz", + "integrity": "sha512-8JvJEXGoEqrbzLwt3SwIUvkDd+1wrM8up0KawvDIElB8rbxPbvWppGO0SLKAWSJ0q8ILcVq+mWck6pDcZ3a9KA==", + "requires": { + "@octokit/auth-oauth-device": "^6.0.0", + "@octokit/auth-oauth-user": "^4.0.0", + "@octokit/request": "^8.0.2", + "@octokit/types": "^11.0.0", + "@types/btoa-lite": "^1.0.0", + "btoa-lite": "^1.0.0", + "universal-user-agent": "^6.0.0" + } + }, + "@octokit/auth-oauth-device": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/@octokit/auth-oauth-device/-/auth-oauth-device-6.0.0.tgz", + "integrity": "sha512-Zgf/LKhwWk54rJaTGYVYtbKgUty+ouil6VQeRd+pCw7Gd0ECoSWaZuHK6uDGC/HtnWHjpSWFhzxPauDoHcNRtg==", + "requires": { + "@octokit/oauth-methods": "^4.0.0", + "@octokit/request": "^8.0.0", + "@octokit/types": "^11.0.0", + "universal-user-agent": "^6.0.0" + } + }, + "@octokit/auth-oauth-user": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@octokit/auth-oauth-user/-/auth-oauth-user-4.0.0.tgz", + "integrity": "sha512-VOm5aIkVGHaOhIvsF/4YmSjoYDzzrKbbYkdSEO0KqHK7I8SlO3ZndSikQ1fBlNPUEH0ve2BOTxLrVvI1qBf9/Q==", + "requires": { + "@octokit/auth-oauth-device": "^6.0.0", + "@octokit/oauth-methods": "^4.0.0", + "@octokit/request": "^8.0.2", + "@octokit/types": "^11.0.0", + "btoa-lite": "^1.0.0", + "universal-user-agent": "^6.0.0" + } + }, + "@octokit/auth-token": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@octokit/auth-token/-/auth-token-4.0.0.tgz", + "integrity": "sha512-tY/msAuJo6ARbK6SPIxZrPBms3xPbfwBrulZe0Wtr/DIY9lje2HeV1uoebShn6mx7SjCHif6EjMvoREj+gZ+SA==" + }, + "@octokit/auth-unauthenticated": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/@octokit/auth-unauthenticated/-/auth-unauthenticated-5.0.0.tgz", + "integrity": "sha512-AjOI6FNB2dweJ85p6rf7D4EhE4y6VBcwYfX/7KJkR5Q9fD9ET6NABAjajUTSNFfCxmNIaQgISggZ3pkgwtTqsA==", + "requires": { + "@octokit/request-error": "^5.0.0", + "@octokit/types": "^11.0.0" + } + }, + "@octokit/core": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/@octokit/core/-/core-5.0.0.tgz", + "integrity": "sha512-YbAtMWIrbZ9FCXbLwT9wWB8TyLjq9mxpKdgB3dUNxQcIVTf9hJ70gRPwAcqGZdY6WdJPZ0I7jLaaNDCiloGN2A==", + "requires": { + "@octokit/auth-token": "^4.0.0", + "@octokit/graphql": "^7.0.0", + "@octokit/request": "^8.0.2", + "@octokit/request-error": "^5.0.0", + "@octokit/types": "^11.0.0", + "before-after-hook": "^2.2.0", + "universal-user-agent": "^6.0.0" + } + }, + "@octokit/endpoint": { + "version": "9.0.0", + "resolved": "https://registry.npmjs.org/@octokit/endpoint/-/endpoint-9.0.0.tgz", + "integrity": "sha512-szrQhiqJ88gghWY2Htt8MqUDO6++E/EIXqJ2ZEp5ma3uGS46o7LZAzSLt49myB7rT+Hfw5Y6gO3LmOxGzHijAQ==", + "requires": { + "@octokit/types": "^11.0.0", + "is-plain-object": "^5.0.0", + "universal-user-agent": "^6.0.0" + } + }, + "@octokit/graphql": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/@octokit/graphql/-/graphql-7.0.1.tgz", + "integrity": "sha512-T5S3oZ1JOE58gom6MIcrgwZXzTaxRnxBso58xhozxHpOqSTgDS6YNeEUvZ/kRvXgPrRz/KHnZhtb7jUMRi9E6w==", + "requires": { + "@octokit/request": "^8.0.1", + "@octokit/types": "^11.0.0", + "universal-user-agent": "^6.0.0" + } + }, + "@octokit/oauth-app": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/@octokit/oauth-app/-/oauth-app-6.0.0.tgz", + "integrity": "sha512-bNMkS+vJ6oz2hCyraT9ZfTpAQ8dZNqJJQVNaKjPLx4ue5RZiFdU1YWXguOPR8AaSHS+lKe+lR3abn2siGd+zow==", + "requires": { + "@octokit/auth-oauth-app": "^7.0.0", + "@octokit/auth-oauth-user": "^4.0.0", + "@octokit/auth-unauthenticated": "^5.0.0", + "@octokit/core": "^5.0.0", + "@octokit/oauth-authorization-url": "^6.0.2", + "@octokit/oauth-methods": "^4.0.0", + "@types/aws-lambda": "^8.10.83", + "universal-user-agent": "^6.0.0" + } + }, + "@octokit/oauth-authorization-url": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/@octokit/oauth-authorization-url/-/oauth-authorization-url-6.0.2.tgz", + "integrity": "sha512-CdoJukjXXxqLNK4y/VOiVzQVjibqoj/xHgInekviUJV73y/BSIcwvJ/4aNHPBPKcPWFnd4/lO9uqRV65jXhcLA==" + }, + "@octokit/oauth-methods": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@octokit/oauth-methods/-/oauth-methods-4.0.0.tgz", + "integrity": "sha512-dqy7BZLfLbi3/8X8xPKUKZclMEK9vN3fK5WF3ortRvtplQTszFvdAGbTo71gGLO+4ZxspNiLjnqdd64Chklf7w==", + "requires": { + "@octokit/oauth-authorization-url": "^6.0.2", + "@octokit/request": "^8.0.2", + "@octokit/request-error": "^5.0.0", + "@octokit/types": "^11.0.0", + "btoa-lite": "^1.0.0" + } + }, + "@octokit/openapi-types": { + "version": "18.0.0", + "resolved": "https://registry.npmjs.org/@octokit/openapi-types/-/openapi-types-18.0.0.tgz", + "integrity": "sha512-V8GImKs3TeQRxRtXFpG2wl19V7444NIOTDF24AWuIbmNaNYOQMWRbjcGDXV5B+0n887fgDcuMNOmlul+k+oJtw==" + }, + "@octokit/plugin-paginate-graphql": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@octokit/plugin-paginate-graphql/-/plugin-paginate-graphql-4.0.0.tgz", + "integrity": "sha512-7HcYW5tP7/Z6AETAPU14gp5H5KmCPT3hmJrS/5tO7HIgbwenYmgw4OY9Ma54FDySuxMwD+wsJlxtuGWwuZuItA==", + "requires": {} + }, + "@octokit/plugin-paginate-rest": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/@octokit/plugin-paginate-rest/-/plugin-paginate-rest-8.0.0.tgz", + "integrity": "sha512-2xZ+baZWUg+qudVXnnvXz7qfrTmDeYPCzangBVq/1gXxii/OiS//4shJp9dnCCvj1x+JAm9ji1Egwm1BA47lPQ==", + "requires": { + "@octokit/types": "^11.0.0" + } + }, + "@octokit/plugin-rest-endpoint-methods": { + "version": "9.0.0", + "resolved": "https://registry.npmjs.org/@octokit/plugin-rest-endpoint-methods/-/plugin-rest-endpoint-methods-9.0.0.tgz", + "integrity": "sha512-KquMF/VB1IkKNiVnzJKspY5mFgGyLd7HzdJfVEGTJFzqu9BRFNWt+nwTCMuUiWc72gLQhRWYubTwOkQj+w/1PA==", + "requires": { + "@octokit/types": "^11.0.0" + } + }, + "@octokit/plugin-retry": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/@octokit/plugin-retry/-/plugin-retry-6.0.0.tgz", + "integrity": "sha512-a1/A4A+PB1QoAHQfLJxGHhLfSAT03bR1jJz3GgQJZvty2ozawFWs93MiBQXO7SL2YbO7CIq0Goj4qLOBj8JeMQ==", + "requires": { + "@octokit/request-error": "^5.0.0", + "@octokit/types": "^11.0.0", + "bottleneck": "^2.15.3" + } + }, + "@octokit/plugin-throttling": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/@octokit/plugin-throttling/-/plugin-throttling-7.0.0.tgz", + "integrity": "sha512-KL2k/d0uANc8XqP5S64YcNFCudR3F5AaKO39XWdUtlJIjT9Ni79ekWJ6Kj5xvAw87udkOMEPcVf9xEge2+ahew==", + "requires": { + "@octokit/types": "^11.0.0", + "bottleneck": "^2.15.3" + } + }, + "@octokit/request": { + "version": "8.1.1", + "resolved": "https://registry.npmjs.org/@octokit/request/-/request-8.1.1.tgz", + "integrity": "sha512-8N+tdUz4aCqQmXl8FpHYfKG9GelDFd7XGVzyN8rc6WxVlYcfpHECnuRkgquzz+WzvHTK62co5di8gSXnzASZPQ==", + "requires": { + "@octokit/endpoint": "^9.0.0", + "@octokit/request-error": "^5.0.0", + "@octokit/types": "^11.1.0", + "is-plain-object": "^5.0.0", + "universal-user-agent": "^6.0.0" + } + }, + "@octokit/request-error": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/@octokit/request-error/-/request-error-5.0.0.tgz", + "integrity": "sha512-1ue0DH0Lif5iEqT52+Rf/hf0RmGO9NWFjrzmrkArpG9trFfDM/efx00BJHdLGuro4BR/gECxCU2Twf5OKrRFsQ==", + "requires": { + "@octokit/types": "^11.0.0", + "deprecation": "^2.0.0", + "once": "^1.4.0" + } + }, + "@octokit/types": { + "version": "11.1.0", + "resolved": "https://registry.npmjs.org/@octokit/types/-/types-11.1.0.tgz", + "integrity": "sha512-Fz0+7GyLm/bHt8fwEqgvRBWwIV1S6wRRyq+V6exRKLVWaKGsuy6H9QFYeBVDV7rK6fO3XwHgQOPxv+cLj2zpXQ==", + "requires": { + "@octokit/openapi-types": "^18.0.0" + } + }, + "@octokit/webhooks": { + "version": "12.0.3", + "resolved": "https://registry.npmjs.org/@octokit/webhooks/-/webhooks-12.0.3.tgz", + "integrity": "sha512-8iG+/yza7hwz1RrQ7i7uGpK2/tuItZxZq1aTmeg2TNp2xTUB8F8lZF/FcZvyyAxT8tpDMF74TjFGCDACkf1kAQ==", + "requires": { + "@octokit/request-error": "^5.0.0", + "@octokit/webhooks-methods": "^4.0.0", + "@octokit/webhooks-types": "7.1.0", + "aggregate-error": "^3.1.0" + } + }, + "@octokit/webhooks-methods": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@octokit/webhooks-methods/-/webhooks-methods-4.0.0.tgz", + "integrity": "sha512-M8mwmTXp+VeolOS/kfRvsDdW+IO0qJ8kYodM/sAysk093q6ApgmBXwK1ZlUvAwXVrp/YVHp6aArj4auAxUAOFw==" + }, + "@octokit/webhooks-types": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/@octokit/webhooks-types/-/webhooks-types-7.1.0.tgz", + "integrity": "sha512-y92CpG4kFFtBBjni8LHoV12IegJ+KFxLgKRengrVjKmGE5XMeCuGvlfRe75lTRrgXaG6XIWJlFpIDTlkoJsU8w==" + }, "@types/acorn": { "version": "4.0.6", "resolved": "https://registry.npmjs.org/@types/acorn/-/acorn-4.0.6.tgz", @@ -1153,6 +2037,16 @@ "@types/estree": "*" } }, + "@types/aws-lambda": { + "version": "8.10.119", + "resolved": "https://registry.npmjs.org/@types/aws-lambda/-/aws-lambda-8.10.119.tgz", + "integrity": "sha512-Vqm22aZrCvCd6I5g1SvpW151jfqwTzEZ7XJ3yZ6xaZG31nUEOEyzzVImjRcsN8Wi/QyPxId/x8GTtgIbsy8kEw==" + }, + "@types/btoa-lite": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@types/btoa-lite/-/btoa-lite-1.0.0.tgz", + "integrity": "sha512-wJsiX1tosQ+J5+bY5LrSahHxr2wT+uME5UDwdN1kg4frt40euqA+wzECkmq4t5QbveHiJepfdThgQrPw6KiSlg==" + }, "@types/debug": { "version": "4.1.7", "resolved": "https://registry.npmmirror.com/@types/debug/-/debug-4.1.7.tgz", @@ -1182,6 +2076,14 @@ "@types/unist": "*" } }, + "@types/jsonwebtoken": { + "version": "9.0.2", + "resolved": "https://registry.npmjs.org/@types/jsonwebtoken/-/jsonwebtoken-9.0.2.tgz", + "integrity": "sha512-drE6uz7QBKq1fYqqoFKTDRdFCPHd5TCub75BM+D+cMx7NU9hUz7SESLfC2fSCXVFMO5Yj8sOWHuGqPgjc+fz0Q==", + "requires": { + "@types/node": "*" + } + }, "@types/mdast": { "version": "3.0.10", "resolved": "https://registry.npmmirror.com/@types/mdast/-/mdast-3.0.10.tgz", @@ -1195,6 +2097,11 @@ "resolved": "https://registry.npmmirror.com/@types/ms/-/ms-0.7.31.tgz", "integrity": "sha512-iiUgKzV9AuaEkZqkOLDIvlQiL6ltuZd9tGcW3gwpnX8JbuiuhFlEGmmFXEXkN50Cvq7Os88IY2v0dkDqXYWVgA==" }, + "@types/node": { + "version": "20.4.5", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.4.5.tgz", + "integrity": "sha512-rt40Nk13II9JwQBdeYqmbn2Q6IVTA5uPhvSO+JVqdXw/6/4glI6oR9ezty/A9Hg5u7JH4OmYmuQ+XvjKm0Datg==" + }, "@types/unist": { "version": "2.0.6", "resolved": "https://registry.npmmirror.com/@types/unist/-/unist-2.0.6.tgz", @@ -1211,11 +2118,45 @@ "integrity": "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==", "requires": {} }, + "aggregate-error": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/aggregate-error/-/aggregate-error-3.1.0.tgz", + "integrity": "sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA==", + "requires": { + "clean-stack": "^2.0.0", + "indent-string": "^4.0.0" + } + }, + "asynckit": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", + "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" + }, + "axios": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.4.0.tgz", + "integrity": "sha512-S4XCWMEmzvo64T9GfvQDOXgYRDJ/wsSZc7Jvdgx5u1sd0JwsuPLqb3SYmusag+edF6ziyMensPVqLTSc1PiSEA==", + "requires": { + "follow-redirects": "^1.15.0", + "form-data": "^4.0.0", + "proxy-from-env": "^1.1.0" + } + }, "balanced-match": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" }, + "before-after-hook": { + "version": "2.2.3", + "resolved": "https://registry.npmjs.org/before-after-hook/-/before-after-hook-2.2.3.tgz", + "integrity": "sha512-NzUnlZexiaH/46WDhANlyR2bXRopNg4F/zuSA3OpZnllCUgRaOF2znDioDWrmbNVsuZk6l9pMquQB38cfBZwkQ==" + }, + "bottleneck": { + "version": "2.19.5", + "resolved": "https://registry.npmjs.org/bottleneck/-/bottleneck-2.19.5.tgz", + "integrity": "sha512-VHiNCbI1lKdl44tGrhNfU3lup0Tj/ZBMJB5/2ZbNXRCPuRCO7ed2mgcK4r17y+KB2EfuYuRaVlwNbAeaWGSpbw==" + }, "brace-expansion": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", @@ -1224,6 +2165,16 @@ "balanced-match": "^1.0.0" } }, + "btoa-lite": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/btoa-lite/-/btoa-lite-1.0.0.tgz", + "integrity": "sha512-gvW7InbIyF8AicrqWoptdW08pUxuhq8BEgowNajy9RhiE86fmGAGl+bLKo6oB8QP0CkqHLowfN0oJdKC/J6LbA==" + }, + "buffer-equal-constant-time": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/buffer-equal-constant-time/-/buffer-equal-constant-time-1.0.1.tgz", + "integrity": "sha512-zRpUiDwd/xk6ADqPMATG8vc9VPrkck7T07OIx0gnjmJAnHnTVXNQG3vfvWNuiZIkwu9KrKdA1iJKfsfTVxE6NA==" + }, "ccount": { "version": "2.0.1", "resolved": "https://registry.npmmirror.com/ccount/-/ccount-2.0.1.tgz", @@ -1249,6 +2200,19 @@ "resolved": "https://registry.npmmirror.com/character-reference-invalid/-/character-reference-invalid-2.0.1.tgz", "integrity": "sha512-iBZ4F4wRbyORVsu0jPV7gXkOsGYjGHPmAyv+HiHG8gi5PtC9KI2j1+v8/tlibRvjoWX027ypmG/n0HtO5t7unw==" }, + "clean-stack": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/clean-stack/-/clean-stack-2.2.0.tgz", + "integrity": "sha512-4diC9HaTE+KRAMWhDhrGOECgWZxoevMc5TlkObMqNSsVU62PYzXZ/SMTjzyGAFF1YusgxGcSWTEXBhp0CPwQ1A==" + }, + "combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "requires": { + "delayed-stream": "~1.0.0" + } + }, "debug": { "version": "4.3.4", "resolved": "https://registry.npmmirror.com/debug/-/debug-4.3.4.tgz", @@ -1265,6 +2229,16 @@ "character-entities": "^2.0.0" } }, + "delayed-stream": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", + "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==" + }, + "deprecation": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/deprecation/-/deprecation-2.3.1.tgz", + "integrity": "sha512-xmHIy4F3scKVwMsQ4WnVaS8bHOx0DmVwRywosKhaILI0ywMDWPtBSku2HNxRvF7jtwDRsoEwYQSfbxj8b7RlJQ==" + }, "dequal": { "version": "2.0.2", "resolved": "https://registry.npmmirror.com/dequal/-/dequal-2.0.2.tgz", @@ -1275,6 +2249,14 @@ "resolved": "https://registry.npmmirror.com/diff/-/diff-5.0.0.tgz", "integrity": "sha512-/VTCrvm5Z0JGty/BWHljh+BAiw3IK+2j87NGMu8Nwc/f48WoDAC395uomO9ZD117ZOBaHmkX1oyLvkVM/aIT3w==" }, + "ecdsa-sig-formatter": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz", + "integrity": "sha512-nagl3RYrbNv6kQkeJIpt6NJZy8twLB/2vtz6yN9Z4vRKHN4/QZJIEbqohALSgwKdnksuY3k5Addp5lg8sVoVcQ==", + "requires": { + "safe-buffer": "^5.0.1" + } + }, "escape-string-regexp": { "version": "5.0.0", "resolved": "https://registry.npmmirror.com/escape-string-regexp/-/escape-string-regexp-5.0.0.tgz", @@ -1302,6 +2284,21 @@ "format": "^0.2.0" } }, + "follow-redirects": { + "version": "1.15.2", + "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz", + "integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==" + }, + "form-data": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", + "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==", + "requires": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.8", + "mime-types": "^2.1.12" + } + }, "format": { "version": "0.2.2", "resolved": "https://registry.npmmirror.com/format/-/format-0.2.2.tgz", @@ -1324,6 +2321,11 @@ "once": "^1.3.0" } }, + "indent-string": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/indent-string/-/indent-string-4.0.0.tgz", + "integrity": "sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg==" + }, "inflight": { "version": "1.0.6", "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", @@ -1367,16 +2369,61 @@ "resolved": "https://registry.npmmirror.com/is-hexadecimal/-/is-hexadecimal-2.0.1.tgz", "integrity": "sha512-DgZQp241c8oO6cA1SbTEWiXeoxV42vlcJxgH+B3hi1AiqqKruZR3ZGF8In3fj4+/y/7rHvlOZLZtgJ/4ttYGZg==" }, + "is-plain-object": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/is-plain-object/-/is-plain-object-5.0.0.tgz", + "integrity": "sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==" + }, + "jsonwebtoken": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/jsonwebtoken/-/jsonwebtoken-9.0.1.tgz", + "integrity": "sha512-K8wx7eJ5TPvEjuiVSkv167EVboBDv9PZdDoF7BgeQnBLVvZWW9clr2PsQHVJDTKaEIH5JBIwHujGcHp7GgI2eg==", + "requires": { + "jws": "^3.2.2", + "lodash": "^4.17.21", + "ms": "^2.1.1", + "semver": "^7.3.8" + } + }, + "jwa": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/jwa/-/jwa-1.4.1.tgz", + "integrity": "sha512-qiLX/xhEEFKUAJ6FiBMbes3w9ATzyk5W7Hvzpa/SLYdxNtng+gcurvrI7TbACjIXlsJyr05/S1oUhZrc63evQA==", + "requires": { + "buffer-equal-constant-time": "1.0.1", + "ecdsa-sig-formatter": "1.0.11", + "safe-buffer": "^5.0.1" + } + }, + "jws": { + "version": "3.2.2", + "resolved": "https://registry.npmjs.org/jws/-/jws-3.2.2.tgz", + "integrity": "sha512-YHlZCB6lMTllWDtSPHz/ZXTsi8S00usEV6v1tjq8tOUZzw7DpSDWVXjXDre6ed1w/pd495ODpHZYSdkRTsa0HA==", + "requires": { + "jwa": "^1.4.1", + "safe-buffer": "^5.0.1" + } + }, "kleur": { "version": "4.1.4", "resolved": "https://registry.npmmirror.com/kleur/-/kleur-4.1.4.tgz", "integrity": "sha512-8QADVssbrFjivHWQU7KkMgptGTl6WAcSdlbBPY4uNF+mWr6DGcKrvY2w4FQJoXch7+fKMjj0dRrL75vk3k23OA==" }, + "lodash": { + "version": "4.17.21", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", + "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" + }, "longest-streak": { "version": "3.0.1", "resolved": "https://registry.npmmirror.com/longest-streak/-/longest-streak-3.0.1.tgz", "integrity": "sha512-cHlYSUpL2s7Fb3394mYxwTYj8niTaNHUCLr0qdiCXQfSjfuA7CKofpX2uSwEfFDQ0EB7JcnMnm+GjbqqoinYYg==" }, + "lru-cache": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.0.0.tgz", + "integrity": "sha512-svTf/fzsKHffP42sujkO/Rjs37BCIsQVRCeNYIm9WN8rgT7ffoUnRtZCqU+6BqcSBdv8gwJeTz8knJpgACeQMw==" + }, "markdown-table": { "version": "3.0.2", "resolved": "https://registry.npmmirror.com/markdown-table/-/markdown-table-3.0.2.tgz", @@ -1958,6 +3005,19 @@ "resolved": "https://registry.npmmirror.com/micromark-util-types/-/micromark-util-types-1.0.2.tgz", "integrity": "sha512-DCfg/T8fcrhrRKTPjRrw/5LLvdGV7BHySf/1LOZx7TzWZdYRjogNtyNq885z3nNallwr3QUKARjqvHqX1/7t+w==" }, + "mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==" + }, + "mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "requires": { + "mime-db": "1.52.0" + } + }, "minimatch": { "version": "5.1.0", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.1.0.tgz", @@ -1976,6 +3036,23 @@ "resolved": "https://registry.npmmirror.com/ms/-/ms-2.1.2.tgz", "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" }, + "octokit": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/octokit/-/octokit-3.1.0.tgz", + "integrity": "sha512-dmIH5D+edpb4/ASd6ZGo6BiRR1g4ytu8lG4f+6XN/2AW+CSuTsT0nj1d6rv/HKgoflMQ1+rb3KlVWcvrmgQZhw==", + "requires": { + "@octokit/app": "^14.0.0", + "@octokit/core": "^5.0.0", + "@octokit/oauth-app": "^6.0.0", + "@octokit/plugin-paginate-graphql": "^4.0.0", + "@octokit/plugin-paginate-rest": "^8.0.0", + "@octokit/plugin-rest-endpoint-methods": "^9.0.0", + "@octokit/plugin-retry": "^6.0.0", + "@octokit/plugin-throttling": "^7.0.0", + "@octokit/request-error": "^5.0.0", + "@octokit/types": "^11.1.0" + } + }, "once": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", @@ -1999,6 +3076,11 @@ "is-hexadecimal": "^2.0.0" } }, + "proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" + }, "sade": { "version": "1.8.1", "resolved": "https://registry.npmmirror.com/sade/-/sade-1.8.1.tgz", @@ -2007,6 +3089,29 @@ "mri": "^1.1.0" } }, + "safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==" + }, + "semver": { + "version": "7.5.4", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.5.4.tgz", + "integrity": "sha512-1bCSESV6Pv+i21Hvpxp3Dx+pSD8lIPt8uVjRrxAUt/nbswYc+tK6Y2btiULjd4+fnq15PX+nqQDC7Oft7WkwcA==", + "requires": { + "lru-cache": "^6.0.0" + }, + "dependencies": { + "lru-cache": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-6.0.0.tgz", + "integrity": "sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==", + "requires": { + "yallist": "^4.0.0" + } + } + } + }, "stringify-entities": { "version": "4.0.2", "resolved": "https://registry.npmmirror.com/stringify-entities/-/stringify-entities-4.0.2.tgz", @@ -2076,6 +3181,20 @@ "unist-util-is": "^5.0.0" } }, + "universal-github-app-jwt": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/universal-github-app-jwt/-/universal-github-app-jwt-1.1.1.tgz", + "integrity": "sha512-G33RTLrIBMFmlDV4u4CBF7dh71eWwykck4XgaxaIVeZKOYZRAAxvcGMRFTUclVY6xoUPQvO4Ne5wKGxYm/Yy9w==", + "requires": { + "@types/jsonwebtoken": "^9.0.0", + "jsonwebtoken": "^9.0.0" + } + }, + "universal-user-agent": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/universal-user-agent/-/universal-user-agent-6.0.0.tgz", + "integrity": "sha512-isyNax3wXoKaulPDZWHQqbmIx1k2tb9fb3GGDBRxCscfYV2Ch7WxPArBsFEG8s/safwXTT7H4QGhaIkTp9447w==" + }, "uvu": { "version": "0.5.3", "resolved": "https://registry.npmmirror.com/uvu/-/uvu-0.5.3.tgz", @@ -2121,6 +3240,11 @@ "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", "integrity": "sha1-tSQ9jz7BqjXxNkYFvA0QNuMKtp8=" }, + "yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==" + }, "zwitch": { "version": "2.0.2", "resolved": "https://registry.npmmirror.com/zwitch/-/zwitch-2.0.2.tgz", diff --git a/package.json b/package.json index 5bcea2c612fe7..7367d12460c5a 100644 --- a/package.json +++ b/package.json @@ -5,6 +5,7 @@ "license": "MIT", "type": "module", "dependencies": { + "axios": "^1.4.0", "glob": "^8.0.3", "mdast-util-from-markdown": "^1.2.0", "mdast-util-frontmatter": "^1.0.0", @@ -14,6 +15,7 @@ "micromark-extension-frontmatter": "^1.0.0", "micromark-extension-gfm": "^2.0.1", "micromark-extension-mdxjs": "^1.0.0", + "octokit": "^3.1.0", "unist-util-visit": "^4.1.0" } } diff --git a/partitioned-raft-kv.md b/partitioned-raft-kv.md index 6782268d9dbae..ce9d6883a417d 100644 --- a/partitioned-raft-kv.md +++ b/partitioned-raft-kv.md @@ -35,7 +35,13 @@ To enable Partitioned Raft KV, set the configuration item [`storage.engine`](/ti Partitioned Raft KV has the following restrictions: -* It does not support data import, replication, and backup tools, such as TiDB Lightning, TiCDC, BR, and Dumping. -* It does not support the tikv-ctl command-line tool. -* It cannot be used together with TiFlash. -* You can only enable Partitioned Raft KV when creating a cluster and cannot change the type of engine after the cluster is created. +* It does not support EBS volume snapshot backup yet. +* It does not support online unsafe recovery or Titan yet. +* It does not support the following subcommands of the tikv-ctl command-line tool: + * `unsafe-recover` + * `raw-scan` + * `remove-fail-stores` + * `recreate-region` + * `reset-to-version` +* It is not compatible with TiFlash yet. +* You cannot enable or disable this feature after the cluster is initialized. diff --git a/partitioned-table.md b/partitioned-table.md index 8a5029630e063..b283a56870972 100644 --- a/partitioned-table.md +++ b/partitioned-table.md @@ -177,22 +177,16 @@ CREATE TABLE t ( name varchar(255) CHARACTER SET ascii, notes text ) -PARTITION BY RANGE COLUMNS(name,valid_until) +PARTITION BY RANGE COLUMNS(name, valid_until) (PARTITION `p2022-g` VALUES LESS THAN ('G','2023-01-01 00:00:00'), PARTITION `p2023-g` VALUES LESS THAN ('G','2024-01-01 00:00:00'), - PARTITION `p2024-g` VALUES LESS THAN ('G','2025-01-01 00:00:00'), PARTITION `p2022-m` VALUES LESS THAN ('M','2023-01-01 00:00:00'), PARTITION `p2023-m` VALUES LESS THAN ('M','2024-01-01 00:00:00'), - PARTITION `p2024-m` VALUES LESS THAN ('M','2025-01-01 00:00:00'), PARTITION `p2022-s` VALUES LESS THAN ('S','2023-01-01 00:00:00'), - PARTITION `p2023-s` VALUES LESS THAN ('S','2024-01-01 00:00:00'), - PARTITION `p2024-s` VALUES LESS THAN ('S','2025-01-01 00:00:00'), - PARTITION `p2022-` VALUES LESS THAN (0x7f,'2023-01-01 00:00:00'), - PARTITION `p2023-` VALUES LESS THAN (0x7f,'2024-01-01 00:00:00'), - PARTITION `p2024-` VALUES LESS THAN (0x7f,'2025-01-01 00:00:00')) + PARTITION `p2023-s` VALUES LESS THAN ('S','2024-01-01 00:00:00')) ``` -It will partition the data by year and by name in the ranges ['', 'G'), ['G', 'M'), ['M', 'S') and ['S',). It allows you to easily drop invalid data while still benefit from partition pruning on both `name` and `valid_until` columns. In this example, `[,)` indicates a left-closed, right-open range. For example, ['G', 'M') indicates a range containing `G` and from `G` to `M`, but excluding `M`. +The preceding SQL statement will partition the data by year and by name in the ranges `[ ('', ''), ('G', '2023-01-01 00:00:00') )`, `[ ('G', '2023-01-01 00:00:00'), ('G', '2024-01-01 00:00:00') )`, `[ ('G', '2024-01-01 00:00:00'), ('M', '2023-01-01 00:00:00') )`, `[ ('M', '2023-01-01 00:00:00'), ('M', '2024-01-01 00:00:00') )`, `[ ('M', '2024-01-01 00:00:00'), ('S', '2023-01-01 00:00:00') )`, and `[ ('S', '2023-01-01 00:00:00'), ('S', '2024-01-01 00:00:00') )`. It allows you to easily drop invalid data while still benefit from partition pruning on both `name` and `valid_until` columns. In this example, `[,)` indicates a left-closed, right-open range. For example, `[ ('G', '2023-01-01 00:00:00'), ('G', '2024-01-01 00:00:00') )` indicates a range of data whose name is `'G'`, the year contains `2023-01-01 00:00:00` and is greater than `2023-01-01 00:00:00` but less than `2024-01-01 00:00:00`. It does not include `(G, 2024-01-01 00:00:00)`. ### Range INTERVAL partitioning @@ -305,15 +299,10 @@ ALTER TABLE table_name LAST PARTITION LESS THAN () ### List partitioning -Before creating a List partitioned table, you need to set the value of the session variable `tidb_enable_list_partition` to `ON`. +Before creating a List partitioned table, make sure the following system variables are set to their default values of `ON`: -{{< copyable "sql" >}} - -```sql -set @@session.tidb_enable_list_partition = ON -``` - -Also, make sure that `tidb_enable_table_partition` is set to `ON`, which is the default setting. +- [`tidb_enable_list_partition`](/system-variables.md#tidb_enable_list_partition-new-in-v50) +- [`tidb_enable_table_partition`](/system-variables.md#tidb_enable_table_partition) List partitioning is similar to Range partitioning. Unlike Range partitioning, in List partitioning, the partitioning expression values for all rows in each partition are in a given value set. This value set defined for each partition can have any number of values but cannot have duplicate values. You can use the `PARTITION ... VALUES IN (...)` clause to define a value set. @@ -362,24 +351,82 @@ After creating the partitions as above, you can easily add or delete records rel You can also execute `ALTER TABLE employees DROP PARTITION pEast` to delete all related rows, but this statement also deletes the `pEast` partition from the table definition. In this situation, you must execute the `ALTER TABLE ... ADD PARTITION` statement to recover the original partitioning scheme of the table. -Unlike Range partitioning, List partitioning does not have a similar `MAXVALUE` partition to store all values that do not belong to other partitions. Instead, all expected values of the partition expression must be included in the `PARTITION ... VALUES IN (...)` clause. If the value to be inserted in an `INSERT` statement does not match the column value set of any partition, the statement fails to execute and an error is reported. See the following example: +#### Default List partition + +Starting from v7.3.0, you can add a default partition to a List or List COLUMNS partitioned table. The default partition acts as a fallback partition, where rows that do not match value sets of any partitions can be placed. + +> **Note:** +> +> This feature is a TiDB extension to MySQL syntax. For a List or List COLUMNS partitioned table with a default partition, the data in the table cannot be directly replicated to MySQL. + +Take the following List partitioned table as an example: ```sql -test> CREATE TABLE t ( - -> a INT, - -> b INT - -> ) - -> PARTITION BY LIST (a) ( - -> PARTITION p0 VALUES IN (1, 2, 3), - -> PARTITION p1 VALUES IN (4, 5, 6) - -> ); +CREATE TABLE t ( + a INT, + b INT +) +PARTITION BY LIST (a) ( + PARTITION p0 VALUES IN (1, 2, 3), + PARTITION p1 VALUES IN (4, 5, 6) +); Query OK, 0 rows affected (0.11 sec) +``` + +You can add a default list partition named `pDef` to the table as follows: -test> INSERT INTO t VALUES (7, 7); +```sql +ALTER TABLE t ADD PARTITION (PARTITION pDef DEFAULT); +``` + +or + +```sql +ALTER TABLE t ADD PARTITION (PARTITION pDef VALUES IN (DEFAULT)); +``` + +In this way, newly inserted values that do not match value sets of any partitions can automatically go into the default partition. + +```sql +INSERT INTO t VALUES (7, 7); +Query OK, 1 row affected (0.01 sec) +``` + +You can also add a default partition when creating a List or List COLUMNS partitioned table. For example: + +```sql +CREATE TABLE employees ( + id INT NOT NULL, + hired DATE NOT NULL DEFAULT '1970-01-01', + store_id INT +) +PARTITION BY LIST (store_id) ( + PARTITION pNorth VALUES IN (1, 2, 3, 4, 5), + PARTITION pEast VALUES IN (6, 7, 8, 9, 10), + PARTITION pWest VALUES IN (11, 12, 13, 14, 15), + PARTITION pCentral VALUES IN (16, 17, 18, 19, 20), + PARTITION pDefault DEFAULT +); +``` + +For a List or List COLUMNS partitioned table without a default partition, the values to be inserted using an `INSERT` statement must match value sets defined in the `PARTITION ... VALUES IN (...)` clauses of the table. If the values to be inserted do not match value sets of any partitions, the statement will fail and an error is returned, as shown in the following example: + +```sql +CREATE TABLE t ( + a INT, + b INT +) +PARTITION BY LIST (a) ( + PARTITION p0 VALUES IN (1, 2, 3), + PARTITION p1 VALUES IN (4, 5, 6) +); +Query OK, 0 rows affected (0.11 sec) + +INSERT INTO t VALUES (7, 7); ERROR 1525 (HY000): Table has no partition for value 7 ``` -To ignore the error type above, you can use the `IGNORE` keyword. After using this keyword, if a row contains values that do not match the column value set of any partition, this row will not be inserted. Instead, any row with matched values is inserted, and no error is reported: +To ignore the preceding error, you can add the `IGNORE` keyword to the `INSERT` statement. After this keyword is added, the `INSERT` statement will only insert rows that match the partition value sets and will not insert unmatched rows, without returning an error: ```sql test> TRUNCATE t; @@ -811,8 +858,8 @@ For Key partitioning, the way of handling `NULL` value is consistent with that o For `RANGE`, `RANGE COLUMNS`, `LIST`, and `LIST COLUMNS` partitioned tables, you can manage the partitions as follows: -- Add partitions using the `ALTER TABLE ADD PARTITION ()` statement. -- Drop partitions using the `ALTER TABLE
DROP PARTITION ` statement. +- Add partitions using the `ALTER TABLE
ADD PARTITION ()` statement. +- Drop partitions using the `ALTER TABLE
DROP PARTITION ` statement. - Remove all data from specified partitions using the `ALTER TABLE
TRUNCATE PARTITION ` statement. The logic of `TRUNCATE PARTITION` is similar to [`TRUNCATE TABLE`](/sql-statements/sql-statement-truncate.md) but it is for partitions. - Merge, split, or make other changes to the partitions using the `ALTER TABLE
REORGANIZE PARTITION INTO ()` statement. @@ -1597,7 +1644,7 @@ Currently, TiDB supports Range partitioning, Range COLUMNS partitioning, List pa Currently, TiDB does not support using an empty partition column list for Key partitioning. -With regard to partition management, any operation that requires moving data in the bottom implementation is not supported currently, including but not limited to: adjust the number of partitions in a Hash partitioned table, modify the Range of a Range partitioned table, merge partitions and exchange partitions. +With regard to partition management, any operation that requires moving data in the bottom implementation is not supported currently, including but not limited to: adjust the number of partitions in a Hash partitioned table, modify the Range of a Range partitioned table, and merge partitions. For the unsupported partitioning types, when you create a table in TiDB, the partitioning information is ignored and the table is created in the regular form with a warning reported. @@ -1774,10 +1821,10 @@ In `static` mode, TiDB accesses each partition separately using multiple operato ```sql mysql> create table t1(id int, age int, key(id)) partition by range(id) ( - -> partition p0 values less than (100), - -> partition p1 values less than (200), - -> partition p2 values less than (300), - -> partition p3 values less than (400)); + partition p0 values less than (100), + partition p1 values less than (200), + partition p2 values less than (300), + partition p3 values less than (400)); Query OK, 0 rows affected (0.01 sec) mysql> explain select * from t1 where id < 150; @@ -1827,10 +1874,10 @@ From the above query results, you can see that the `Union` operator in the execu ```sql mysql> create table t1 (id int, age int, key(id)) partition by range(id) - -> (partition p0 values less than (100), - -> partition p1 values less than (200), - -> partition p2 values less than (300), - -> partition p3 values less than (400)); + (partition p0 values less than (100), + partition p1 values less than (200), + partition p2 values less than (300), + partition p3 values less than (400)); Query OK, 0 rows affected (0,08 sec) mysql> create table t2 (id int, code int); @@ -1926,12 +1973,14 @@ Currently, neither `static` nor `dynamic` pruning mode supports prepared stateme 2. Generate the statements for updating the statistics of all partitioned tables: - {{< copyable "sql" >}} - ```sql - select distinct concat('ANALYZE TABLE ',TABLE_SCHEMA,'.',TABLE_NAME,' ALL COLUMNS;') - from information_schema.PARTITIONS - where TABLE_SCHEMA not in ('INFORMATION_SCHEMA','mysql','sys','PERFORMANCE_SCHEMA','METRICS_SCHEMA'); + SELECT DISTINCT CONCAT('ANALYZE TABLE ',TABLE_SCHEMA,'.',TABLE_NAME,' ALL COLUMNS;') + FROM information_schema.PARTITIONS + WHERE TIDB_PARTITION_ID IS NOT NULL + AND TABLE_SCHEMA NOT IN ('INFORMATION_SCHEMA','mysql','sys','PERFORMANCE_SCHEMA','METRICS_SCHEMA'); + ``` + + ``` +----------------------------------------------------------------------+ | concat('ANALYZE TABLE ',TABLE_SCHEMA,'.',TABLE_NAME,' ALL COLUMNS;') | +----------------------------------------------------------------------+ @@ -1944,12 +1993,11 @@ Currently, neither `static` nor `dynamic` pruning mode supports prepared stateme 3. Export the batch update statements to a file: - {{< copyable "sql" >}} - - ```sql - mysql --host xxxx --port xxxx -u root -p -e "select distinct concat('ANALYZE TABLE ',TABLE_SCHEMA,'.',TABLE_NAME,' ALL COLUMNS;') \ - from information_schema.PARTITIONS \ - where TABLE_SCHEMA not in ('INFORMATION_SCHEMA','mysql','sys','PERFORMANCE_SCHEMA','METRICS_SCHEMA');" | tee gatherGlobalStats.sql + ```shell + mysql --host xxxx --port xxxx -u root -p -e "SELECT DISTINCT CONCAT('ANALYZE TABLE ',TABLE_SCHEMA,'.',TABLE_NAME,' ALL COLUMNS;') \ + FROM information_schema.PARTITIONS \ + WHERE TIDB_PARTITION_ID IS NOT NULL \ + AND TABLE_SCHEMA NOT IN ('INFORMATION_SCHEMA','mysql','sys','PERFORMANCE_SCHEMA','METRICS_SCHEMA');" | tee gatherGlobalStats.sql ``` 4. Execute a batch update: diff --git a/pd-configuration-file.md b/pd-configuration-file.md index 4d5beae52e140..5fec37f838d67 100644 --- a/pd-configuration-file.md +++ b/pd-configuration-file.md @@ -165,6 +165,17 @@ Configuration items related to pd-server > > If you have upgraded your cluster from a TiDB 4.0 version to the current version, the behavior of `flow-round-by-digit` after the upgrading and the behavior of `trace-region-flow` before the upgrading are consistent by default. This means that if the value of `trace-region-flow` is false before the upgrading, the value of `flow-round-by-digit` after the upgrading is 127; if the value of `trace-region-flow` is `true` before the upgrading, the value of `flow-round-by-digit` after the upgrading is `3`. +### `min-resolved-ts-persistence-interval` New in v6.0.0 + ++ Determines the interval at which the minimum resolved timestamp is persistent to the PD. If this value is set to `0`, it means that the persistence is disabled. ++ Default value: Before v6.3.0, the default value is `"0s"`. Starting from v6.3.0, the default value is `"1s"`, which is the smallest positive value. ++ Minimum value: `0` ++ Unit: second + +> **Note:** +> +> For clusters upgraded from v6.0.0~v6.2.0, the default value of `min-resolved-ts-persistence-interval` does not change after the upgrade, which means that it will remain `"0s"`. To enable this feature, you need to manually change the value of this configuration item. + ## security Configuration items related to security @@ -301,7 +312,7 @@ Configuration items related to scheduling ### `enable-diagnostic` New in v6.3.0 + Controls whether to enable the diagnostic feature. When it is enabled, PD records the state during scheduling to help diagnose. If enabled, it might slightly affect the scheduling speed and consume more memory when there are many stores. -+ Default value: true ++ Default value: Starting from v7.1.0, the default value is changed from `false` to `true`. If your cluster is upgraded from a version earlier than v7.1.0 to v7.1.0 or later, the default value does not change. ### `hot-region-schedule-limit` diff --git a/pd-control.md b/pd-control.md index 728ffcc6a8e0d..20e391c4b9852 100644 --- a/pd-control.md +++ b/pd-control.md @@ -24,12 +24,12 @@ To obtain `pd-ctl` of the latest version, download the TiDB server installation | Installation package | OS | Architecture | SHA256 checksum | | :------------------------------------------------------------------------ | :------- | :---- | :--------------------------------------------------------------- | -| `https://download.pingcap.org/tidb-community-server-{version}-linux-amd64.tar.gz` (pd-ctl) | Linux | amd64 | `https://download.pingcap.org/tidb-community-server-{version}-linux-amd64.sha256` | -| `https://download.pingcap.org/tidb-community-server-{version}-linux-arm64.tar.gz` (pd-ctl) | Linux | arm64 | `https://download.pingcap.org/tidb-community-server-{version}-linux-arm64.sha256` | +| `https://download.pingcap.org/tidb-community-server-{version}-linux-amd64.tar.gz` (pd-ctl) | Linux | amd64 | `https://download.pingcap.org/tidb-community-server-{version}-linux-amd64.tar.gz.sha256` | +| `https://download.pingcap.org/tidb-community-server-{version}-linux-arm64.tar.gz` (pd-ctl) | Linux | arm64 | `https://download.pingcap.org/tidb-community-server-{version}-linux-arm64.tar.gz.sha256` | > **Note:** > -> `{version}` in the link indicates the version number of TiDB. For example, the download link for `v7.0.0` in the `amd64` architecture is `https://download.pingcap.org/tidb-community-server-v7.0.0-linux-amd64.tar.gz`. +> `{version}` in the link indicates the version number of TiDB. For example, the download link for `v7.3.0` in the `amd64` architecture is `https://download.pingcap.org/tidb-community-server-v7.3.0-linux-amd64.tar.gz`. ### Compile from source code @@ -334,7 +334,7 @@ Usage: - `store-limit-mode` is used to control the mode of limiting the store speed. The optional modes are `auto` and `manual`. In `auto` mode, the stores are automatically balanced according to the load (deprecated). - `store-limit-version` controls the version of the store limit formula. In v1 mode, you can manually modify the `store limit` to limit the scheduling speed of a single TiKV. The v2 mode is an experimental feature. In v2 mode, you do not need to manually set the `store limit` value, as PD dynamically adjusts it based on the capability of TiKV snapshots. For more details, refer to [Principles of store limit v2](/configure-store-limit.md#principles-of-store-limit-v2). - + ```bash config set store-limit-version v2 // using store limit v2 ``` diff --git a/post-installation-check.md b/post-installation-check.md index b97f3b92fadfa..ae0079adfb5b3 100644 --- a/post-installation-check.md +++ b/post-installation-check.md @@ -63,7 +63,7 @@ The following information indicates successful login: ```sql Welcome to the MySQL monitor. Commands end with ; or \g. Your MySQL connection id is 3 -Server version: 5.7.25-TiDB-v5.0.0 TiDB Server (Apache License 2.0) Community Edition, MySQL 5.7 compatible +Server version: 5.7.25-TiDB-v7.3.0 TiDB Server (Apache License 2.0) Community Edition, MySQL 5.7 compatible Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. Oracle is a registered trademark of Oracle Corporation and/or its affiliates. Other names may be trademarks of their respective diff --git a/privilege-management.md b/privilege-management.md index 3e7394838021f..2cc48fe20902d 100644 --- a/privilege-management.md +++ b/privilege-management.md @@ -271,6 +271,10 @@ mysql> SELECT * FROM INFORMATION_SCHEMA.USER_PRIVILEGES WHERE grantee = "'root'@ Requires the `SUPER` or `BACKUP_ADMIN` privilege. +### CANCEL IMPORT JOB + +Requires the `SUPER` privilege to cancel jobs created by other users. Otherwise, only jobs created by the current user can be canceled. + ### CREATE DATABASE Requires the `CREATE` privilege for the database. @@ -305,6 +309,10 @@ Requires the `INDEX` privilege for the table. Requires the `DROP` privilege for the table. +### IMPORT INTO + +Requires the `SELECT`, `UPDATE`, `INSERT`, `DELETE`, and `ALTER` privileges for the target table. To import files stored locally in TiDB, the `FILE` privilege is also required. + ### LOAD DATA Requires the `INSERT` privilege for the table. When you use `REPLACE INTO`, the `DELETE` privilege is also required. @@ -329,7 +337,9 @@ Requires the `INSERT` and `SELECT` privileges for the table. `SHOW GRANTS` requires the `SELECT` privilege to the `mysql` database. If the target user is current user, `SHOW GRANTS` does not require any privilege. -`SHOW PROCESSLIST` requires `SUPER` to show connections belonging to other users. +`SHOW PROCESSLIST` requires the `SUPER` privilege to show connections belonging to other users. + +`SHOW IMPORT JOB` requires the `SUPER` privilege to show connections belonging to other users. Otherwise, it only shows jobs created by the current user. ### CREATE ROLE/USER diff --git a/production-deployment-using-tiup.md b/production-deployment-using-tiup.md index 6299b4dd65540..fe0f788580a1a 100644 --- a/production-deployment-using-tiup.md +++ b/production-deployment-using-tiup.md @@ -139,12 +139,12 @@ Method 2: Manually pack an offline component package using `tiup mirror clone`. If you want to adjust an existing offline mirror (such as adding a new version of a component), take the following steps: - 1. When pulling an offline mirror, you can get an incomplete offline mirror by specifying specific information via parameters, such as the component and version information. For example, you can pull an offline mirror that includes only the offline mirror of TiUP v1.11.3 and TiUP Cluster v1.11.3 by running the following command: + 1. When pulling an offline mirror, you can get an incomplete offline mirror by specifying specific information via parameters, such as the component and version information. For example, you can pull an offline mirror that includes only the offline mirror of TiUP v1.12.3 and TiUP Cluster v1.12.3 by running the following command: {{< copyable "shell-regular" >}} ```bash - tiup mirror clone tiup-custom-mirror-v1.11.3 --tiup v1.11.3 --cluster v1.11.3 + tiup mirror clone tiup-custom-mirror-v1.12.3 --tiup v1.12.3 --cluster v1.12.3 ``` If you only need the components for a particular platform, you can specify them using the `--os` or `--arch` parameters. @@ -176,10 +176,10 @@ Method 2: Manually pack an offline component package using `tiup mirror clone`. {{< copyable "shell-regular" >}} ```bash - tiup mirror merge tiup-custom-mirror-v1.11.3 + tiup mirror merge tiup-custom-mirror-v1.12.3 ``` - 5. When the above steps are completed, check the result by running the `tiup list` command. In this document's example, the outputs of both `tiup list tiup` and `tiup list cluster` show that the corresponding components of `v1.11.3` are available. + 5. When the above steps are completed, check the result by running the `tiup list` command. In this document's example, the outputs of both `tiup list tiup` and `tiup list cluster` show that the corresponding components of `v1.12.3` are available. #### Deploy the offline TiUP component @@ -334,13 +334,13 @@ Before you run the `deploy` command, use the `check` and `check --apply` command {{< copyable "shell-regular" >}} ```shell - tiup cluster deploy tidb-test v7.0.0 ./topology.yaml --user root [-p] [-i /home/root/.ssh/gcp_rsa] + tiup cluster deploy tidb-test v7.3.0 ./topology.yaml --user root [-p] [-i /home/root/.ssh/gcp_rsa] ``` In the `tiup cluster deploy` command above: - `tidb-test` is the name of the TiDB cluster to be deployed. -- `v7.0.0` is the version of the TiDB cluster to be deployed. You can see the latest supported versions by running `tiup list tidb`. +- `v7.3.0` is the version of the TiDB cluster to be deployed. You can see the latest supported versions by running `tiup list tidb`. - `topology.yaml` is the initialization configuration file. - `--user root` indicates logging into the target machine as the `root` user to complete the cluster deployment. The `root` user is expected to have `ssh` and `sudo` privileges to the target machine. Alternatively, you can use other users with `ssh` and `sudo` privileges to complete the deployment. - `[-i]` and `[-p]` are optional. If you have configured login to the target machine without password, these parameters are not required. If not, choose one of the two parameters. `[-i]` is the private key of the root user (or other users specified by `--user`) that has access to the target machine. `[-p]` is used to input the user password interactively. diff --git a/quick-start-with-htap.md b/quick-start-with-htap.md index 6c5b0e6c16f92..abd0f6f2ea961 100644 --- a/quick-start-with-htap.md +++ b/quick-start-with-htap.md @@ -97,7 +97,7 @@ In the following steps, you can create a [TPC-H](http://www.tpc.org/tpch/) datas | test.lineitem | 6491711 | 849.07 MiB| 99.06 MiB | 948.13 MiB| +---------------+----------------+-----------+------------+-----------+ 8 rows in set (0.06 sec) - ``` + ``` This is a database of a commercial ordering system. In which, the `test.nation` table indicates the information about countries, the `test.region` table indicates the information about regions, the `test.part` table indicates the information about parts, the `test.supplier` table indicates the information about suppliers, the `test.partsupp` table indicates the information about parts of suppliers, the `test.customer` table indicates the information about customers, the `test.customer` table indicates the information about orders, and the `test.lineitem` table indicates the information about online items. @@ -139,7 +139,7 @@ This is a shipping priority query, which provides the priority and potential rev ### Step 4. Replicate the test data to the columnar storage engine -After TiFlash is deployed, TiKV does not replicate data to TiFlash immediately. You need to execute the following DDL statements in a MySQL client of TiDB to specify which tables need to be replicated. After that, TiDB will create the specified replicas in TiFlash accordingly. +After TiFlash is deployed, TiKV does not replicate data to TiFlash immediately. You need to execute the following DDL statements in a MySQL client of TiDB to specify which tables need to be replicated. After that, TiDB will create the specified replicas in TiFlash accordingly. {{< copyable "sql" >}} diff --git a/quick-start-with-tidb.md b/quick-start-with-tidb.md index d6f0545f7e693..655af53dd48db 100644 --- a/quick-start-with-tidb.md +++ b/quick-start-with-tidb.md @@ -6,16 +6,18 @@ aliases: ['/docs/dev/quick-start-with-tidb/','/docs/dev/test-deployment-using-do # Quick Start Guide for the TiDB Database Platform -This guide walks you through the quickest way to get started with TiDB. For non-production environments, you can deploy your TiDB database by either of the following methods: +This guide provides the quickest way to get started with TiDB. For non-production environments, you can deploy your TiDB database using either of the following methods: - [Deploy a local test cluster](#deploy-a-local-test-cluster) (for macOS and Linux) - [Simulate production deployment on a single machine](#simulate-production-deployment-on-a-single-machine) (for Linux only) +In addition, you can try out TiDB features on [TiDB Playground](https://play.tidbcloud.com/?utm_source=docs&utm_medium=tidb_quick_start). + > **Note:** > > The deployment method provided in this guide is **ONLY FOR** quick start, **NOT FOR** production. > -> - To deploy an on-premises production cluster, see [production installation guide](/production-deployment-using-tiup.md). +> - To deploy a self-hosted production cluster, see the [production installation guide](/production-deployment-using-tiup.md). > - To deploy TiDB on Kubernetes, see [Get Started with TiDB on Kubernetes](https://docs.pingcap.com/tidb-in-kubernetes/stable/get-started). > - To manage TiDB in the cloud, see [TiDB Cloud Quick Start](https://docs.pingcap.com/tidbcloud/tidb-cloud-quickstart). @@ -26,7 +28,7 @@ This guide walks you through the quickest way to get started with TiDB. For non-
-As a distributed system, a basic TiDB test cluster usually consists of 2 TiDB instances, 3 TiKV instances, 3 PD instances, and optional TiFlash instances. With TiUP Playground, you can quickly build the test cluster by taking the following steps: +As a distributed system, a basic TiDB test cluster usually consists of 2 TiDB instances, 3 TiKV instances, 3 PD instances, and optional TiFlash instances. With TiUP Playground, you can quickly build the test cluster by following these steps: 1. Download and install TiUP: @@ -36,7 +38,7 @@ As a distributed system, a basic TiDB test cluster usually consists of 2 TiDB in curl --proto '=https' --tlsv1.2 -sSf https://tiup-mirrors.pingcap.com/install.sh | sh ``` - If the following message is displayed, you have installed TiUP successfully: + If the following message is displayed, you have successfully installed TiUP: ```log Successfully set mirror to https://tiup-mirrors.pingcap.com @@ -66,7 +68,7 @@ As a distributed system, a basic TiDB test cluster usually consists of 2 TiDB in 3. Start the cluster in the current session: - - If you want to start a TiDB cluster of the latest version with 1 TiDB instance, 1 TiKV instance, 1 PD instance, and 1 TiFlash instance, run the following command: + - To start a TiDB cluster of the latest version with 1 TiDB instance, 1 TiKV instance, 1 PD instance, and 1 TiFlash instance, run the following command: {{< copyable "shell-regular" >}} @@ -74,15 +76,15 @@ As a distributed system, a basic TiDB test cluster usually consists of 2 TiDB in tiup playground ``` - - If you want to specify the TiDB version and the number of the instances of each component, run a command like this: + - To specify the TiDB version and the number of instances of each component, run a command like this: {{< copyable "shell-regular" >}} ```shell - tiup playground v7.0.0 --db 2 --pd 3 --kv 3 + tiup playground v7.3.0 --db 2 --pd 3 --kv 3 ``` - The command downloads a version cluster to the local machine and starts it, such as v7.0.0. To view the latest version, run `tiup list tidb`. + The command downloads a version cluster to the local machine and starts it, such as v7.3.0. To view the latest version, run `tiup list tidb`. This command returns the access methods of the cluster: @@ -100,7 +102,7 @@ As a distributed system, a basic TiDB test cluster usually consists of 2 TiDB in > > + Since v5.2.0, TiDB supports running `tiup playground` on the machine that uses the Apple M1 chip. > + For the playground operated in this way, after the test deployment is finished, TiUP will clean up the original cluster data. You will get a new cluster after re-running the command. - > + If you want the data to be persisted on storage, run `tiup --tag playground ...`. For details, refer to [TiUP Reference Guide](/tiup/tiup-reference.md#-t---tag). + > + If you want the data to be persisted on storage, run `tiup --tag playground ...`. For details, refer to the [TiUP Reference](/tiup/tiup-reference.md#-t---tag) guide. 4. Start a new session to access TiDB: @@ -112,7 +114,7 @@ As a distributed system, a basic TiDB test cluster usually consists of 2 TiDB in tiup client ``` - + You can also use the MySQL client to connect to TiDB. + + Alternatively, you can use the MySQL client to connect to TiDB. {{< copyable "shell-regular" >}} @@ -122,7 +124,7 @@ As a distributed system, a basic TiDB test cluster usually consists of 2 TiDB in 5. Access the Prometheus dashboard of TiDB at . -6. Access the [TiDB Dashboard](/dashboard/dashboard-intro.md) at . The default username is `root`, with an empty password. +6. Access the [TiDB Dashboard](/dashboard/dashboard-intro.md) at . The default username is `root`, and the password is empty. 7. Access the Grafana dashboard of TiDB through . Both the default username and password are `admin`. @@ -147,7 +149,7 @@ As a distributed system, a basic TiDB test cluster usually consists of 2 TiDB in
-As a distributed system, a basic TiDB test cluster usually consists of 2 TiDB instances, 3 TiKV instances, 3 PD instances, and optional TiFlash instances. With TiUP Playground, you can quickly build the test cluster by taking the following steps: +As a distributed system, a basic TiDB test cluster usually consists of 2 TiDB instances, 3 TiKV instances, 3 PD instances, and optional TiFlash instances. With TiUP Playground, you can quickly build the test cluster by following these steps: 1. Download and install TiUP: @@ -157,7 +159,7 @@ As a distributed system, a basic TiDB test cluster usually consists of 2 TiDB in curl --proto '=https' --tlsv1.2 -sSf https://tiup-mirrors.pingcap.com/install.sh | sh ``` - If the following message is displayed, you have installed TiUP successfully: + If the following message is displayed, you have successfully installed TiUP: ```log Successfully set mirror to https://tiup-mirrors.pingcap.com @@ -187,7 +189,7 @@ As a distributed system, a basic TiDB test cluster usually consists of 2 TiDB in 3. Start the cluster in the current session: - - If you want to start a TiDB cluster of the latest version with 1 TiDB instance, 1 TiKV instance, 1 PD instance, and 1 TiFlash instance, run the following command: + - To start a TiDB cluster of the latest version with 1 TiDB instance, 1 TiKV instance, 1 PD instance, and 1 TiFlash instance, run the following command: {{< copyable "shell-regular" >}} @@ -195,15 +197,15 @@ As a distributed system, a basic TiDB test cluster usually consists of 2 TiDB in tiup playground ``` - - If you want to specify the TiDB version and the number of the instances of each component, run a command like this: + - To specify the TiDB version and the number of instances of each component, run a command like this: {{< copyable "shell-regular" >}} ```shell - tiup playground v7.0.0 --db 2 --pd 3 --kv 3 + tiup playground v7.3.0 --db 2 --pd 3 --kv 3 ``` - The command downloads a version cluster to the local machine and starts it, such as v7.0.0. To view the latest version, run `tiup list tidb`. + The command downloads a version cluster to the local machine and starts it, such as v7.3.0. To view the latest version, run `tiup list tidb`. This command returns the access methods of the cluster: @@ -219,7 +221,7 @@ As a distributed system, a basic TiDB test cluster usually consists of 2 TiDB in > **Note:** > > For the playground operated in this way, after the test deployment is finished, TiUP will clean up the original cluster data. You will get a new cluster after re-running the command. - > If you want the data to be persisted on storage, run `tiup --tag playground ...`. For details, refer to [TiUP Reference Guide](/tiup/tiup-reference.md#-t---tag). + > If you want the data to be persisted on storage, run `tiup --tag playground ...`. For details, refer to the [TiUP Reference](/tiup/tiup-reference.md#-t---tag) guide. 4. Start a new session to access TiDB: @@ -231,7 +233,7 @@ As a distributed system, a basic TiDB test cluster usually consists of 2 TiDB in tiup client ``` - + You can also use the MySQL client to connect to TiDB. + + Alternatively, you can use the MySQL client to connect to TiDB. {{< copyable "shell-regular" >}} @@ -241,7 +243,7 @@ As a distributed system, a basic TiDB test cluster usually consists of 2 TiDB in 5. Access the Prometheus dashboard of TiDB at . -6. Access the [TiDB Dashboard](/dashboard/dashboard-intro.md) at . The default username is `root`, with an empty password. +6. Access the [TiDB Dashboard](/dashboard/dashboard-intro.md) at . The default username is `root`, and the password is empty. 7. Access the Grafana dashboard of TiDB through . Both the default username and password are `admin`. @@ -274,16 +276,16 @@ This section describes how to deploy a TiDB cluster using a YAML file of the sma ### Prepare -Prepare a target machine that meets the following requirements: +Before deploying the TiDB cluster, ensure that the target machine meets the following requirements: -- CentOS 7.3 or a later version is installed -- The Linux OS has access to the Internet, which is required to download TiDB and related software installation packages +- CentOS 7.3 or a later version is installed. +- The Linux OS has access to the internet, which is required to download TiDB and related software installation packages. -The smallest TiDB cluster topology is as follows: +The smallest TiDB cluster topology consists of the following instances: > **Note:** > -> The IP address of the following instances only serves as an example IP. In your actual deployment, you need to replace the IP with your actual IP. +> The IP addresses of the instances are given as examples only. In your actual deployment, replace the IP addresses with your actual IP addresses. | Instance | Count | IP | Configuration | |:-- | :-- | :-- | :-- | @@ -293,14 +295,14 @@ The smallest TiDB cluster topology is as follows: | TiFlash | 1 | 10.0.1.1 | The default port
Global directory configuration | | Monitor | 1 | 10.0.1.1 | The default port
Global directory configuration | -Other requirements for the target machine: +Other requirements for the target machine include: -- The `root` user and its password is required +- The `root` user and its password are required - [Stop the firewall service of the target machine](/check-before-deployment.md#check-and-stop-the-firewall-service-of-target-machines), or open the port needed by the TiDB cluster nodes - Currently, the TiUP cluster supports deploying TiDB on the x86_64 (AMD64) and ARM architectures: - - It is recommended to use CentOS 7.3 or later versions on AMD64 - - It is recommended to use CentOS 7.6 1810 on ARM + - It is recommended to use CentOS 7.3 or later versions on AMD64. + - It is recommended to use CentOS 7.6 1810 on ARM. ### Deploy @@ -344,7 +346,7 @@ Other requirements for the target machine: tiup update --self && tiup update cluster ``` -5. Use the root user privilege to increase the connection limit of the `sshd` service. This is because TiUP needs to simulate deployment on multiple machines. +5. Increase the connection limit of the `sshd` service using the root user privilege. This is because TiUP needs to simulate deployment on multiple machines. 1. Modify `/etc/ssh/sshd_config`, and set `MaxSessions` to `20`. 2. Restart the `sshd` service: @@ -377,7 +379,7 @@ Other requirements for the target machine: server_configs: tidb: - log.slow-threshold: 300 + instance.tidb_slow_log_threshold: 300 tikv: readpool.storage.use-unified-pool: false readpool.coprocessor.use-unified-pool: true @@ -435,7 +437,7 @@ Other requirements for the target machine: ``` - ``: Set the cluster name - - ``: Set the TiDB cluster version, such as `v6.5.0`. You can see all the supported TiDB versions by running the `tiup list tidb` command + - ``: Set the TiDB cluster version, such as `v7.3.0`. You can see all the supported TiDB versions by running the `tiup list tidb` command - `-p`: Specify the password used to connect to the target machine. > **Note:** @@ -497,17 +499,17 @@ Other requirements for the target machine: ## What's next -- If you have just deployed a TiDB cluster for the local test environment: +If you have just deployed a TiDB cluster for the local test environment, here are the next steps: - - Learn [Basic SQL operations in TiDB](/basic-sql-operations.md) - - [Migrate data to TiDB](/migration-overview.md) +- Learn about basic SQL operations in TiDB by referring to [Basic SQL operations in TiDB](/basic-sql-operations.md). +- You can also migrate data to TiDB by referring to [Migrate data to TiDB](/migration-overview.md). -- If you are ready to deploy a TiDB cluster for the production environment: +If you are ready to deploy a TiDB cluster for the production environment, here are the next steps: - - [Deploy TiDB using TiUP](/production-deployment-using-tiup.md) - - [Deploy TiDB on Cloud using TiDB Operator](https://docs.pingcap.com/tidb-in-kubernetes/stable) +- [Deploy TiDB using TiUP](/production-deployment-using-tiup.md) +- Alternatively, you can deploy TiDB on Cloud using TiDB Operator by referring to the [TiDB on Kubernetes](https://docs.pingcap.com/tidb-in-kubernetes/stable) documentation. -- If you're looking for analytics solution with TiFlash: +If you are looking for an analytics solution with TiFlash, here are the next steps: - - [Use TiFlash](/tiflash/tiflash-overview.md#use-tiflash) - - [TiFlash Overview](/tiflash/tiflash-overview.md) +- [Use TiFlash](/tiflash/tiflash-overview.md#use-tiflash) +- [TiFlash Overview](/tiflash/tiflash-overview.md) diff --git a/read-historical-data.md b/read-historical-data.md index 8649b4699a88a..30fe523dacce1 100644 --- a/read-historical-data.md +++ b/read-historical-data.md @@ -173,6 +173,6 @@ SET GLOBAL tidb_gc_life_time="60m"; To restore data from an older version, you can use one of the following methods: -- For simple cases, use `SELECT` after setting the `tidb_snapshot` variable and copy-paste the output, or use `SELECT ... INTO LOCAL OUTFLE` and use `LOAD DATA` to import the data later on. +- For simple cases, use [`SELECT`](/sql-statements/sql-statement-select.md) after setting the `tidb_snapshot` variable and copy-paste the output, or use `SELECT ... INTO OUTFILE` and then use [`LOAD DATA`](/sql-statements/sql-statement-load-data.md) to import the data later on. - Use [Dumpling](/dumpling-overview.md#export-historical-data-snapshots-of-tidb) to export a historical snapshot. Dumpling performs well in exporting larger sets of data. \ No newline at end of file diff --git a/releases/release-5.1.0.md b/releases/release-5.1.0.md index ac386cac21f75..704cb272766f6 100644 --- a/releases/release-5.1.0.md +++ b/releases/release-5.1.0.md @@ -249,7 +249,7 @@ To learn more about the information and how to disable this behavior, refer to [ - Improve data importing speed. The optimization results show that the speed of importing TPC-C data is increased by 30%, and the speed of importing large tables (2TB+) with more indexes (5 indexes) is increased by more than 50%. [#753](https://github.com/pingcap/br/pull/753) - Add a pre-check on the data to be imported and also on the target cluster before importing, and report errors to reject the import process if it does not meet the import requirements [#999](https://github.com/pingcap/br/pull/999) - - Optimize the timing of checkpoint updates on the Local backend to improve performance of restarting from breakpoints[#1080](https://github.com/pingcap/br/pull/1080) + - Optimize the timing of checkpoint updates on the Local backend to improve performance of restarting from breakpoints [#1080](https://github.com/pingcap/br/pull/1080) ## Bug Fixes @@ -268,7 +268,7 @@ To learn more about the information and how to disable this behavior, refer to [ - Fix the wrong results of some string functions [#23759](https://github.com/pingcap/tidb/issues/23759) - Users now need both `INSERT` and `DELETE` privileges on a table to perform `REPLACE` operations [#23909](https://github.com/pingcap/tidb/issues/23909) - Users now need both `INSERT` and `DELETE` privileges on a table to perform `REPLACE` operations [#24070](https://github.com/pingcap/tidb/pull/24070) - - Fix the wrong `TableDual` plans caused by incorrectly comparing binaries and bytes[#23846](https://github.com/pingcap/tidb/issues/23846) + - Fix the wrong `TableDual` plans caused by incorrectly comparing binaries and bytes [#23846](https://github.com/pingcap/tidb/issues/23846) - Fix the panic issue caused by using the prefix index and index join in some cases [#24547](https://github.com/pingcap/tidb/issues/24547) [#24716](https://github.com/pingcap/tidb/issues/24716) [#24717](https://github.com/pingcap/tidb/issues/24717) - Fix the issue that the prepared plan cache of `point get` is incorrectly used by the `point get` statement in the transaction [#24741](https://github.com/pingcap/tidb/issues/24741) - Fix the issue of writing the wrong prefix index value when the collation is `ascii_bin` or `latin1_bin` [#24569](https://github.com/pingcap/tidb/issues/24569) diff --git a/releases/release-5.2.0.md b/releases/release-5.2.0.md index 91d1c90240dba..ed6f6e0ebe14f 100644 --- a/releases/release-5.2.0.md +++ b/releases/release-5.2.0.md @@ -20,7 +20,7 @@ In v5.2, the key new features and improvements are as follows: - Add the TiFlash I/O traffic limit feature to improve the stability of read and write for TiFlash - TiKV introduces a new flow control mechanism to replace the previous RocksDB write stall mechanism to improve the stability of TiKV flow control - Simplify the operation and maintenance of Data Migration (DM) to reduce the management cost. -- TiCDC supports HTTP protocol OpenAPI to manage TiCDC tasks. It provides a more user-friendly operation method for both Kubernetes and on-premises environments. (Experimental feature) +- TiCDC supports HTTP protocol OpenAPI to manage TiCDC tasks. It provides a more user-friendly operation method for both Kubernetes and self-hosted environments. (Experimental feature) ## Compatibility changes @@ -165,7 +165,7 @@ In v5.2, the key new features and improvements are as follows: ### TiDB data share subscription -TiCDC supports using the HTTP protocol (OpenAPI) to manage TiCDC tasks, which is a more user-friendly operation method for both Kubernetes and on-premises environments. (Experimental feature) +TiCDC supports using the HTTP protocol (OpenAPI) to manage TiCDC tasks, which is a more user-friendly operation method for both Kubernetes and self-hosted environments. (Experimental feature) [#2411](https://github.com/pingcap/tiflow/issues/2411) @@ -210,7 +210,7 @@ Support running the `tiup playground` command on Mac computers with Apple M1 chi - Support completing the garbage collection automatically for the bindings in the "deleted" status [#26206](https://github.com/pingcap/tidb/pull/26206) - Support showing whether a binding is used for query optimization in the result of `EXPLAIN VERBOSE` [#26930](https://github.com/pingcap/tidb/pull/26930) - Add a new status variation `last_plan_binding_update_time` to view the timestamp corresponding to the binding cache in the current TiDB instance [#26340](https://github.com/pingcap/tidb/pull/26340) - - Support reporting an error when starting binding evolution or running `admin evolve bindings` to ban the baseline evolution (currently disabled in the on-premises TiDB version because it is an experimental feature) affecting other features [#26333](https://github.com/pingcap/tidb/pull/26333) + - Support reporting an error when starting binding evolution or running `admin evolve bindings` to ban the baseline evolution (currently disabled in the TiDB Self-Hosted version because it is an experimental feature) affecting other features [#26333](https://github.com/pingcap/tidb/pull/26333) + PD diff --git a/releases/release-5.2.2.md b/releases/release-5.2.2.md index 47d5fe449246a..9e2e748b99d34 100644 --- a/releases/release-5.2.2.md +++ b/releases/release-5.2.2.md @@ -35,7 +35,7 @@ TiDB version: 5.2.2 + TiCDC - Reduce the default value of the Kafka sink configuration item `MaxMessageBytes` from 64 MB to 1 MB to fix the issue that large messages are rejected by the Kafka Broker [#3104](https://github.com/pingcap/tiflow/pull/3104) - - Reduce memory usage in the relpication pipeline [#2553](https://github.com/pingcap/tiflow/issues/2553)[#3037](https://github.com/pingcap/tiflow/pull/3037) [#2726](https://github.com/pingcap/tiflow/pull/2726) + - Reduce memory usage in the relpication pipeline [#2553](https://github.com/pingcap/tiflow/issues/2553) [#3037](https://github.com/pingcap/tiflow/pull/3037) [#2726](https://github.com/pingcap/tiflow/pull/2726) - Optimize monitoring items and alert rules to improve observability of synchronous links, memory GC, and stock data scanning processes [#2735](https://github.com/pingcap/tiflow/pull/2735) [#1606](https://github.com/pingcap/tiflow/issues/1606) [#3000](https://github.com/pingcap/tiflow/pull/3000) [#2985](https://github.com/pingcap/tiflow/issues/2985) [#2156](https://github.com/pingcap/tiflow/issues/2156) - When the sync task status is normal, no more historical error messages are displayed to avoid misleading users [#2242](https://github.com/pingcap/tiflow/issues/2242) @@ -94,7 +94,7 @@ TiDB version: 5.2.2 - Fix the issue that the scatter range scheduler cannot schedule empty regions [#4118](https://github.com/tikv/pd/pull/4118) - Fix the issue that the key manager cost too much CPU [#4071](https://github.com/tikv/pd/issues/4071) - Fix the data race issue that might occur when setting configurations of hot region scheduler [#4159](https://github.com/tikv/pd/issues/4159) - - Fix slow leader election caused by stuck region syncer[#3936](https://github.com/tikv/pd/issues/3936) + - Fix slow leader election caused by stuck region syncer [#3936](https://github.com/tikv/pd/issues/3936) + TiFlash diff --git a/releases/release-5.3.0.md b/releases/release-5.3.0.md index 1027fa819499d..a7c026b88a0b1 100644 --- a/releases/release-5.3.0.md +++ b/releases/release-5.3.0.md @@ -309,7 +309,7 @@ Starting from TiCDC v5.3.0, the cyclic replication feature between TiDB clusters + TiCDC - Reduce the default value of the Kafka sink configuration item `MaxMessageBytes` from 64 MB to 1 MB to fix the issue that large messages are rejected by the Kafka Broker [#3104](https://github.com/pingcap/tiflow/pull/3104) - - Reduce memory usage in the replication pipeline [#2553](https://github.com/pingcap/tiflow/issues/2553)[#3037](https://github.com/pingcap/tiflow/pull/3037) [#2726](https://github.com/pingcap/tiflow/pull/2726) + - Reduce memory usage in the replication pipeline [#2553](https://github.com/pingcap/tiflow/issues/2553) [#3037](https://github.com/pingcap/tiflow/pull/3037) [#2726](https://github.com/pingcap/tiflow/pull/2726) - Optimize monitoring items and alert rules to improve observability of synchronous links, memory GC, and stock data scanning processes [#2735](https://github.com/pingcap/tiflow/pull/2735) [#1606](https://github.com/pingcap/tiflow/issues/1606) [#3000](https://github.com/pingcap/tiflow/pull/3000) [#2985](https://github.com/pingcap/tiflow/issues/2985) [#2156](https://github.com/pingcap/tiflow/issues/2156) - When the sync task status is normal, no more historical error messages are displayed to avoid misleading users [#2242](https://github.com/pingcap/tiflow/issues/2242) diff --git a/releases/release-6.0.0-dmr.md b/releases/release-6.0.0-dmr.md index 565324f36029e..9b8c8211fc528 100644 --- a/releases/release-6.0.0-dmr.md +++ b/releases/release-6.0.0-dmr.md @@ -41,7 +41,7 @@ Starting from TiDB v6.0.0, TiDB provides two types of releases: - Development Milestone Releases - Development Milestone Releases (DMR) are released approximately every two months. A DMR introduces new features and improvements, but does not accept patch releases. It is not recommended for on-premises users to use DMR in production environments. For example, v6.0.0-DMR is a DMR. + Development Milestone Releases (DMR) are released approximately every two months. A DMR introduces new features and improvements, but does not accept patch releases. It is not recommended for users to use DMR in production environments. For example, v6.0.0-DMR is a DMR. TiDB v6.0.0 is a DMR, and its version is 6.0.0-DMR. @@ -266,7 +266,7 @@ TiDB v6.0.0 is a DMR, and its version is 6.0.0-DMR. - An enterprise-level database management platform, TiDB Enterprise Manager - TiDB Enterprise Manager (TiEM) is an enterprise-level database management platform based on the TiDB database, which aims to help users manage TiDB clusters in on-premises or public cloud environments. + TiDB Enterprise Manager (TiEM) is an enterprise-level database management platform based on the TiDB database, which aims to help users manage TiDB clusters in self-hosted or public cloud environments. TiEM not only provides full lifecycle visual management for TiDB clusters, but also provides one-stop services: parameter management, version upgrades, cluster clone, active-standby cluster switching, data import and export, data replication, and data backup and restore services. TiEM can improve the efficiency of DevOps on TiDB and reduce the DevOps cost for enterprises. diff --git a/releases/release-6.1.0.md b/releases/release-6.1.0.md index d47eac2632c24..4c838c63bc3f5 100644 --- a/releases/release-6.1.0.md +++ b/releases/release-6.1.0.md @@ -231,7 +231,7 @@ In 6.1.0, the key new features or improvements are as follows: | [`require_secure_transport`](/system-variables.md#require_secure_transport-new-in-v610) | Newly added | This setting was previously a `tidb.toml` option (`security.require-secure-transport`), but changed to a system variable starting from TiDB v6.1.0. | | [`tidb_committer_concurrency`](/system-variables.md#tidb_committer_concurrency-new-in-v610) | Newly added | This setting was previously a `tidb.toml` option (`performance.committer-concurrency`), but changed to a system variable starting from TiDB v6.1.0. | | [`tidb_enable_auto_analyze`](/system-variables.md#tidb_enable_auto_analyze-new-in-v610) | Newly added | This setting was previously a `tidb.toml` option (`run-auto-analyze`), but changed to a system variable starting from TiDB v6.1.0. | -| [`tidb_enable_new_only_full_group_by_check`](/system-variables.md#tidb_enable_new_only_full_group_by_check-new-in-v610) | Newly added | This variable controls the behavior when TiDB performs the `ONLY_FULL_GOUP_BY` check. | +| [`tidb_enable_new_only_full_group_by_check`](/system-variables.md#tidb_enable_new_only_full_group_by_check-new-in-v610) | Newly added | This variable controls the behavior when TiDB performs the `ONLY_FULL_GROUP_BY` check. | | [`tidb_enable_outer_join_reorder`](/system-variables.md#tidb_enable_outer_join_reorder-new-in-v610) | Newly added | Since v6.1.0, the Join Reorder algorithm of TiDB supports Outer Join. This variable controls the support behavior, and the default value is `ON`. | | [`tidb_enable_prepared_plan_cache`](/system-variables.md#tidb_enable_prepared_plan_cache-new-in-v610) | Newly added | This setting was previously a `tidb.toml` option (`prepared-plan-cache.enabled`), but changed to a system variable starting from TiDB v6.1.0. | | [`tidb_gc_max_wait_time`](/system-variables.md#tidb_gc_max_wait_time-new-in-v610) | Newly added | This variable is used to set the maximum time of GC safe point blocked by uncommitted transactions. | diff --git a/releases/release-6.1.6.md b/releases/release-6.1.6.md index 97dc88ffd266e..c21c534b24fd7 100644 --- a/releases/release-6.1.6.md +++ b/releases/release-6.1.6.md @@ -44,10 +44,10 @@ Quick access: [Quick start](https://docs.pingcap.com/tidb/v6.1/quick-start-with- - Fix the issue that Plan Cache might cache Shuffle operators and return incorrect results [#38335](https://github.com/pingcap/tidb/issues/38335) @[qw4990](https://github.com/qw4990) - Fix the issue that data race in time zone might cause data-index inconsistency [#40710](https://github.com/pingcap/tidb/issues/40710) @[wjhuang2016](https://github.com/wjhuang2016) - Fix the issue that goroutine leak might occur in `indexMerge` [#41545](https://github.com/pingcap/tidb/issues/41545) [#41605](https://github.com/pingcap/tidb/issues/41605) @[guo-shaoge](https://github.com/guo-shaoge) @[guo-shaoge](https://github.com/guo-shaoge) - - Fix the issue that, when using Cursor Fetch and running other statements among Execute, Fetch, and Close, the Fetch and Close commands might return incorrect results or cause TiDB to panic [#40094](https://github.com/pingcap/tidb/issues/40094) [@YangKeao](https://github.com/YangKeao) - - Fix the issue that when modifying the floating-point type using DDL to keep the length unchanged and reduce the decimal places, the old data still remains the same [#41281](https://github.com/pingcap/tidb/issues/41281) [@zimulala](https://github.com/zimulala) - - Fix the issue that joining the `information_schema.columns` table causes TiDB to panic [#32459](https://github.com/pingcap/tidb/issues/32459) [@tangenta](https://github.com/tangenta) - - Fix the issue that TiDB panic occurs due to inconsistent InfoSchema being obtained when generating the execution plan [#41622](https://github.com/pingcap/tidb/issues/41622) [@tiancaiamao](https://github.com/tiancaiamao) + - Fix the issue that, when using Cursor Fetch and running other statements among Execute, Fetch, and Close, the Fetch and Close commands might return incorrect results or cause TiDB to panic [#40094](https://github.com/pingcap/tidb/issues/40094) @[YangKeao](https://github.com/YangKeao) + - Fix the issue that when modifying the floating-point type using DDL to keep the length unchanged and reduce the decimal places, the old data still remains the same [#41281](https://github.com/pingcap/tidb/issues/41281) @[zimulala](https://github.com/zimulala) + - Fix the issue that joining the `information_schema.columns` table causes TiDB to panic [#32459](https://github.com/pingcap/tidb/issues/32459) @[tangenta](https://github.com/tangenta) + - Fix the issue that TiDB panic occurs due to inconsistent InfoSchema being obtained when generating the execution plan [#41622](https://github.com/pingcap/tidb/issues/41622) @[tiancaiamao](https://github.com/tiancaiamao) - Fix the issue that TiFlash reports an error for generated columns during execution [#40663](https://github.com/pingcap/tidb/issues/40663) @[guo-shaoge](https://github.com/guo-shaoge) - Fix the issue that TiDB might produce incorrect results when different partitioned tables appear in a single SQL statement [#42135](https://github.com/pingcap/tidb/issues/42135) @[mjonss](https://github.com/mjonss) - Fix the issue that Plan Cache might cache Shuffle operators and return incorrect results [#38335](https://github.com/pingcap/tidb/issues/38335) @[qw4990](https://github.com/qw4990) @[fzzf678](https://github.com/fzzf678) @@ -61,8 +61,8 @@ Quick access: [Quick start](https://docs.pingcap.com/tidb/v6.1/quick-start-with- + TiKV - Fix an error that occurs when casting the `const Enum` type to other types [#14156](https://github.com/tikv/tikv/issues/14156) @[wshwsh12](https://github.com/wshwsh12) - - Fix the issue of CPU quota limitation [13084](https://github.com/tikv/tikv/issues/13084) @[BornChanger](https://github.com/BornChanger) - - Fix the issue of incorrect snapshot last index [12618](https://github.com/tikv/tikv/issues/12618) @[LintianShi](https://github.com/LintianShi) + - Fix the issue of CPU quota limitation [#13084](https://github.com/tikv/tikv/issues/13084) @[BornChanger](https://github.com/BornChanger) + - Fix the issue of incorrect snapshot last index [#12618](https://github.com/tikv/tikv/issues/12618) @[LintianShi](https://github.com/LintianShi) + PD @@ -82,7 +82,7 @@ Quick access: [Quick start](https://docs.pingcap.com/tidb/v6.1/quick-start-with- + TiCDC - - Fix the issue that the disorder of `UPDATE` and `INSERT` statements during data replication might cause the `Duplicate entry` error [#8597](https://github.com/pingcap/tiflow/issues/8597) @[sdojjy](https://github.com/sojjy) + - Fix the issue that the disorder of `UPDATE` and `INSERT` statements during data replication might cause the `Duplicate entry` error [#8597](https://github.com/pingcap/tiflow/issues/8597) @[sdojjy](https://github.com/sdojjy) - Fix the abnormal exit issue of the TiCDC service caused by network isolation between PD and TiCDC [#8562](https://github.com/pingcap/tiflow/issues/8562) @[overvenus](https://github.com/overvenus) - Fix the data inconsistency that occurs when replicating data to a TiDB or MySQL sink and when `CHARACTER SET` is specified on the column that has the non-null unique index without a primary key [#8420](https://github.com/pingcap/tiflow/issues/8420) @[zhaoxinyu](https://github.com/zhaoxinyu) - Fix the issue that the memory usage of `db sorter` is not controlled by `cgroup memory limit` [#8588](https://github.com/pingcap/tiflow/issues/8588) @[amyangfei](https://github.com/amyangfei) diff --git a/releases/release-6.1.7.md b/releases/release-6.1.7.md new file mode 100644 index 0000000000000..728ea0b6dab7d --- /dev/null +++ b/releases/release-6.1.7.md @@ -0,0 +1,108 @@ +--- +title: TiDB 6.1.7 Release Notes +summary: Learn about the improvements and bug fixes in TiDB 6.1.7. +--- + +# TiDB 6.1.7 Release Notes + +Release date: July 12, 2023 + +TiDB version: 6.1.7 + +Quick access: [Quick start](https://docs.pingcap.com/tidb/v6.1/quick-start-with-tidb) | [Production deployment](https://docs.pingcap.com/tidb/v6.1/production-deployment-using-tiup) | [Installation packages](https://www.pingcap.com/download/?version=v6.1.7#version-list) + +## Improvements + ++ TiDB + + - Use pessimistic transactions in internal transaction retry to avoid retry failure and reduce time consumption [#38136](https://github.com/pingcap/tidb/issues/38136) @[jackysp](https://github.com/jackysp) + ++ Tools + + + TiCDC + + - Support batch `UPDATE` DML statements to improve TiCDC replication performance [#8084](https://github.com/pingcap/tiflow/issues/8084) @[amyangfei](https://github.com/amyangfei) + + + TiDB Lightning + + - Verify checksum through SQL after the import to improve stability of verification [#41941](https://github.com/pingcap/tidb/issues/41941) @[GMHDBJD](https://github.com/GMHDBJD) + +## Bug fixes + ++ TiDB + + - Fix the panic issue caused by empty `processInfo` [#43829](https://github.com/pingcap/tidb/issues/43829) @[zimulala](https://github.com/zimulala) + - Fix the issue that `resolve lock` might hang when there is a sudden change in PD time [#44822](https://github.com/pingcap/tidb/issues/44822) @[zyguan](https://github.com/zyguan) + - Fix the issue that queries containing Common Table Expressions (CTEs) might cause insufficient disk space [#44477](https://github.com/pingcap/tidb/issues/44477) @[guo-shaoge](https://github.com/guo-shaoge) + - Fix the issue that using CTEs and correlated subqueries simultaneously might result in incorrect query results or panic [#44649](https://github.com/pingcap/tidb/issues/44649) [#38170](https://github.com/pingcap/tidb/issues/38170) [#44774](https://github.com/pingcap/tidb/issues/44774) @[winoros](https://github.com/winoros) @[guo-shaoge](https://github.com/guo-shaoge) + - Fix the issue that the query result of the `SELECT CAST(n AS CHAR)` statement is incorrect when `n` in the statement is a negative number [#44786](https://github.com/pingcap/tidb/issues/44786) @[xhebox](https://github.com/xhebox) + - Fix the query panic issue of TiDB in certain cases [#40857](https://github.com/pingcap/tidb/issues/40857) @[Dousir9](https://github.com/Dousir9) + - Fix the issue that SQL compile error logs are not redacted [#41831](https://github.com/pingcap/tidb/issues/41831) @[lance6716](https://github.com/lance6716) + - Fix the issue that the `SELECT` statement returns an error for a partitioned table if the table partition definition uses the `FLOOR()` function to round a partitioned column [#42323](https://github.com/pingcap/tidb/issues/42323) @[jiyfhust](https://github.com/jiyfhust) + - Fix the issue that querying partitioned tables might cause errors during Region split [#43144](https://github.com/pingcap/tidb/issues/43144) @[lcwangchao](https://github.com/lcwangchao) + - Fix the issue of unnecessary memory usage during reading statistical information [#42052](https://github.com/pingcap/tidb/issues/42052) @[xuyifangreeneyes](https://github.com/xuyifangreeneyes) + - Fix the issue of excessive memory usage after creating a large number of empty partitioned tables [#44308](https://github.com/pingcap/tidb/issues/44308) @[hawkingrei](https://github.com/hawkingrei) + - Fix the issue that queries might return incorrect results when `tidb_opt_agg_push_down` is enabled [#44795](https://github.com/pingcap/tidb/issues/44795) @[AilinKid](https://github.com/AilinKid) + - Fix the issue that the join result of common table expressions might be wrong [#38170](https://github.com/pingcap/tidb/issues/38170) @[wjhuang2016](https://github.com/wjhuang2016) + - Fix the issue that in some rare cases, residual pessimistic locks of pessimistic transactions might affect data correctness when GC resolves locks [#43243](https://github.com/pingcap/tidb/issues/43243) @[MyonKeminta](https://github.com/MyonKeminta) + - Fix the issue that after a new column is added in the cache table, the value is `NULL` instead of the default value of the column [#42928](https://github.com/pingcap/tidb/issues/42928) @[lqs](https://github.com/lqs) + - Fix the issue that TiDB returns an error when the corresponding rows in partitioned tables cannot be found in the probe phase of index join [#43686](https://github.com/pingcap/tidb/issues/43686) @[AilinKid](https://github.com/AilinKid) @[mjonss](https://github.com/mjonss) + - Fix the issue that dropping a database causes slow GC progress [#33069](https://github.com/pingcap/tidb/issues/33069) @[tiancaiamao](https://github.com/tiancaiamao) + - Fix the issue that data and indexes are inconsistent when the `ON UPDATE` statement does not correctly update the primary key [#44565](https://github.com/pingcap/tidb/issues/44565) @[zyguan](https://github.com/zyguan) + - Fix the issue that TiCDC might lose some row changes during table renaming [#43338](https://github.com/pingcap/tidb/issues/43338) @[tangenta](https://github.com/tangenta) + - Fix the behavior issue of Placement Rules in partitioned tables, so that the Placement Rules in deleted partitions can be correctly set and recycled [#44116](https://github.com/pingcap/tidb/issues/44116) @[lcwangchao](https://github.com/lcwangchao) + - Fix the issue that when `tidb_scatter_region` is enabled, Region does not automatically split after a partition is truncated [#43174](https://github.com/pingcap/tidb/issues/43174) [#43028](https://github.com/pingcap/tidb/issues/43028) + - Fix the issue of DDL retry caused by write conflict when executing `TRUNCATE TABLE` for partitioned tables with many partitions and TiFlash replicas [#42940](https://github.com/pingcap/tidb/issues/42940) @[mjonss](https://github.com/mjonss) + - Fix the issue of incorrect execution plans when pushing down window functions to TiFlash [#43922](https://github.com/pingcap/tidb/issues/43922) @[gengliqi](https://github.com/gengliqi) + - Fix the issue that incorrect results might be returned when using a common table expression (CTE) in statements with non-correlated subqueries [#44051](https://github.com/pingcap/tidb/issues/44051) @[winoros](https://github.com/winoros) + - Fix the issue that using `memTracker` with cursor fetch causes memory leaks [#44254](https://github.com/pingcap/tidb/issues/44254) @[YangKeao](https://github.com/YangKeao) + - Fix the issue that the data length in the `QUERY` column of the `INFORMATION_SCHEMA.DDL_JOBS` table might exceed the column definition [#42440](https://github.com/pingcap/tidb/issues/42440) @[tiancaiamao](https://github.com/tiancaiamao) + - Fix the issue that the `min, max` query result is incorrect [#43805](https://github.com/pingcap/tidb/issues/43805) @[wshwsh12](https://github.com/wshwsh12) + - Fix the issue that TiDB reports syntax errors when analyzing tables [#43392](https://github.com/pingcap/tidb/issues/43392) @[guo-shaoge](https://github.com/guo-shaoge) + - Fix the issue that the `SHOW PROCESSLIST` statement cannot display the TxnStart of the transaction of the statement with a long subquery time [#40851](https://github.com/pingcap/tidb/issues/40851) @[crazycs520](https://github.com/crazycs520) + - Fix the issue of missing table names in the `ADMIN SHOW DDL JOBS` result when a `DROP TABLE` operation is being executed [#42268](https://github.com/pingcap/tidb/issues/42268) @[tiancaiamao](https://github.com/tiancaiamao) + - Fix the issue of displaying the incorrect TiDB address in IPv6 environment [#43260](https://github.com/pingcap/tidb/issues/43260) @[nexustar](https://github.com/nexustar) + - Fix the issue that the SQL statement reports the `runtime error: index out of range` error when using the `AES_DECRYPT` expression [#43063](https://github.com/pingcap/tidb/issues/43063) @[lcwangchao](https://github.com/lcwangchao) + - Fix the issue that there is no warning when using `SUBPARTITION` to create partitioned tables [#41198](https://github.com/pingcap/tidb/issues/41198) [#41200](https://github.com/pingcap/tidb/issues/41200) @[mjonss](https://github.com/mjonss) + - Fix the issue that the query with CTE causes TiDB to hang [#43749](https://github.com/pingcap/tidb/issues/43749) [#36896](https://github.com/pingcap/tidb/issues/36896) @[guo-shaoge](https://github.com/guo-shaoge) + - Fix the issue that truncating a partition of a partitioned table might cause the Placement Rule of the partition to become invalid [#44031](https://github.com/pingcap/tidb/issues/44031) @[lcwangchao](https://github.com/lcwangchao) + - Fix the issue that CTE results are incorrect when pushing down predicates [#43645](https://github.com/pingcap/tidb/issues/43645) @[winoros](https://github.com/winoros) + - Fix the issue that `auto-commit` change affects transaction commit behaviours [#36581](https://github.com/pingcap/tidb/issues/36581) @[cfzjywxk](https://github.com/cfzjywxk) + ++ TiKV + + - Fix the issue that TiDB Lightning might cause SST file leakage [#14745](https://github.com/tikv/tikv/issues/14745) @[YuJuncen](https://github.com/YuJuncen) + - Fix the issue that encryption key ID conflict might cause the deletion of the old keys [#14585](https://github.com/tikv/tikv/issues/14585) @[tabokie](https://github.com/tabokie) + - Fix the issue of file handle leakage in Continuous Profiling [#14224](https://github.com/tikv/tikv/issues/14224) @[tabokie](https://github.com/tabokie) + ++ PD + + - Fix the issue that gRPC returns errors with unexpected formats [#5161](https://github.com/tikv/pd/issues/5161) @[HuSharp](https://github.com/HuSharp) + ++ Tools + + + Backup & Restore (BR) + + - Fix the issue that `resolved lock timeout` is falsely reported in some cases [#43236](https://github.com/pingcap/tidb/issues/43236) @[YuJuncen](https://github.com/YuJuncen) + - Fix the issue of backup slowdown when a TiKV node crashes in a cluster [#42973](https://github.com/pingcap/tidb/issues/42973) @[YuJuncen](https://github.com/YuJuncen) + + + TiCDC + + - Fix the issue that TiCDC cannot create a changefeed with a downstream Kafka-on-Pulsar [#8892](https://github.com/pingcap/tiflow/issues/8892) @[hi-rustin](https://github.com/hi-rustin) + - Fix the issue that TiCDC cannot automatically recover when PD address or leader fails [#8812](https://github.com/pingcap/tiflow/issues/8812) [#8877](https://github.com/pingcap/tiflow/issues/8877) @[asddongmen](https://github.com/asddongmen) + - Fix the issue that when the downstream is Kafka, TiCDC queries the downstream metadata too frequently and causes excessive workload in the downstream [#8957](https://github.com/pingcap/tiflow/issues/8957) [#8959](https://github.com/pingcap/tiflow/issues/8959) @[hi-rustin](https://github.com/hi-rustin) + - Fix the issue that TiCDC gets stuck when PD fails such as network isolation or PD Owner node reboot [#8808](https://github.com/pingcap/tiflow/issues/8808) [#8812](https://github.com/pingcap/tiflow/issues/8812) [#8877](https://github.com/pingcap/tiflow/issues/8877) @[asddongmen](https://github.com/asddongmen) + + + TiDB Lightning + + - Fix the issue that in Logical Import Mode, deleting tables downstream during import might cause TiDB Lightning metadata not to be updated in time [#44614](https://github.com/pingcap/tidb/issues/44614) @[dsdashun](https://github.com/dsdashun) + - Fix the issue that disk quota might be inaccurate due to competing conditions [#44867](https://github.com/pingcap/tidb/issues/44867) @[D3Hunter](https://github.com/D3Hunter) + - Fix the issue of `write to tikv with no leader returned` when importing a large amount of data [#43055](https://github.com/pingcap/tidb/issues/43055) @[lance6716](https://github.com/lance6716) + - Fix a possible OOM problem when there is an unclosed delimiter in the data file [#40400](https://github.com/pingcap/tidb/issues/40400) @[buchuitoudegou](https://github.com/buchuitoudegou) + - Fix the issue that OOM might occur when importing a wide table [#43728](https://github.com/pingcap/tidb/issues/43728) @[D3Hunter](https://github.com/D3Hunter) + + + TiDB Binlog + + - Fix the issue that the etcd client does not automatically synchronize the latest node information during initialization [#1236](https://github.com/pingcap/tidb-binlog/issues/1236) @[lichunzhu](https://github.com/lichunzhu) + - Fix the panic issue of Drainer due to an old TiKV client version by upgrading the TiKV client [#1170](https://github.com/pingcap/tidb-binlog/issues/1170) @[lichunzhu](https://github.com/lichunzhu) + - Fix the issue that unfiltered failed DDL statements cause task errors [#1228](https://github.com/pingcap/tidb-binlog/issues/1228) @[lichunzhu](https://github.com/lichunzhu) diff --git a/releases/release-6.2.0.md b/releases/release-6.2.0.md index ddd210af075a4..06d58def94ec0 100644 --- a/releases/release-6.2.0.md +++ b/releases/release-6.2.0.md @@ -121,7 +121,7 @@ In v6.2.0-DMR, the key new features and improvements are as follows: In TiDB v6.2.0, data is stored in the new storage format by default. Note that if TiFlash is upgraded from earlier versions to v6.2.0, you cannot perform in-place downgrade on TiFlash, because earlier TiFlash versions cannot recognize the new storage format. - For more information about upgrading TiFlash, see [TiFlash v6.2.0 Upgrade Guide](/tiflash-620-upgrade-guide.md). + For more information about upgrading TiFlash, see [TiFlash Upgrade Guide](/tiflash-upgrade-guide.md). [User document](/tiflash/tiflash-configuration.md#configure-the-tiflashtoml-file) [#3594](https://github.com/pingcap/tiflash/issues/3594) @[JaySon-Huang](https://github.com/JaySon-Huang) @[lidezhu](https://github.com/lidezhu) @[jiaqizho](https://github.com/jiaqizho) @@ -283,7 +283,9 @@ In v6.2.0-DMR, the key new features and improvements are as follows: | TiKV | [log-backup.initial-scan-rate-limit](/tikv-configuration-file.md#initial-scan-rate-limit-new-in-v620) | Newly added | This configuration specifies the rate limit on throughput in an incremental data scan in log backup. | | TiKV | [log-backup.num-threads](/tikv-configuration-file.md#num-threads-new-in-v620) | Newly added | This configuration specifies the number of threads used in log backup. | | TiKV | [log-backup.temp-path](/tikv-configuration-file.md#temp-path-new-in-v620) | Newly added | This configuration specifies temporary path to which log files are written before being flushed to external storage. | -| TiKV | [rocksdb.defaultcf|writecf|lockcf.format-version](/tikv-configuration-file.md#format-version-new-in-v620) | Newly added | The format version of SST files. | +| TiKV | [rocksdb.defaultcf.format-version](/tikv-configuration-file.md#format-version-new-in-v620) | Newly added | The format version of SST files. | +| TiKV | [rocksdb.writecf.format-version](/tikv-configuration-file.md#format-version-new-in-v620) | Newly added | The format version of SST files. | +| TiKV | [rocksdb.lockcf.format-version](/tikv-configuration-file.md#format-version-new-in-v620) | Newly added | The format version of SST files. | | PD | replication-mode.dr-auto-sync.wait-async-timeout | Deleted | This configuration does not take effect and is deleted. | | PD | replication-mode.dr-auto-sync.wait-sync-timeout | Deleted | This configuration does not take effect and is deleted. | | TiFlash | [`storage.format_version`](/tiflash/tiflash-configuration.md#configure-the-tiflashtoml-file) | Modified | The default value of `format_version` changes to `4`, the default format for v6.2.0 and later versions, which reduces write amplification and background task resource consumption. | @@ -299,7 +301,7 @@ In v6.2.0-DMR, the key new features and improvements are as follows: ### Others -- TiFlash `format_version` cannot be downgraded from `4` to `3`. For details, see [TiFlash v6.2.0 Upgrade Guide](/tiflash-620-upgrade-guide.md). +- TiFlash `format_version` cannot be downgraded from `4` to `3`. For details, see [TiFlash Upgrade Guide](/tiflash-upgrade-guide.md). - In v6.2.0 and later versions, it is strongly recommended to keep the default value `false` of `dt_enable_logical_split` and not to change it to `true`. For details, see known issue [#5576](https://github.com/pingcap/tiflash/issues/5576). - If the backup cluster has a TiFlash replica, after you perform PITR, the restoration cluster does not contain the data in the TiFlash replica. To restore data from the TiFlash replica, you need to manually configure TiFlash replicas. Executing the `exchange partition` DDL statement might result in a failure of PITR. If the upstream database uses TiDB Lightning's physical import mode to import data, the data cannot be backed up in log backup. It is recommended to perform a full backup after the data import. For other compatibility issues of PITR, see [PITR limitations](/br/backup-and-restore-overview.md#before-you-use). - Since TiDB v6.2.0, you can restore table in `mysql` schema by specifying the parameter `--with-sys-table=true` when restoring data. diff --git a/releases/release-6.3.0.md b/releases/release-6.3.0.md index 9d042b9ebcb4f..d5e0a5f1b8e6c 100644 --- a/releases/release-6.3.0.md +++ b/releases/release-6.3.0.md @@ -8,6 +8,10 @@ Release date: September 30, 2022 TiDB version: 6.3.0-DMR +> **Note:** +> +> The TiDB 6.3.0-DMR documentation has been [archived](https://docs-archive.pingcap.com/tidb/v6.3/). PingCAP encourages you to use [the latest LTS version](https://docs.pingcap.com/tidb/stable) of the TiDB database. + Quick access: [Quick start](https://docs.pingcap.com/tidb/v6.3/quick-start-with-tidb) | [Installation packages](https://www.pingcap.com/download/?version=v6.3.0#version-list) In v6.3.0-DMR, the key new features and improvements are as follows: @@ -397,7 +401,7 @@ Since v6.3.0, TiCDC no longer supports configuring Pulsar sink. [kop](https://gi - Fix the issue that might cause the information of the checkpoint being stale [#36423](https://github.com/pingcap/tidb/issues/36423) @[YuJuncen](https://github.com/YuJuncen) - Fix the issue that the regions are not balanced because the concurrency is set too large during the restoration [#37549](https://github.com/pingcap/tidb/issues/37549) @[3pointer](https://github.com/3pointer) - Fix the issue that might cause log backup checkpoint TS stuck when TiCDC exists in the cluster [#37822](https://github.com/pingcap/tidb/issues/37822) @[YuJuncen](https://github.com/YuJuncen) - - Fix the issue that might lead to backup and restoration failure if special characters exist in the authorization key of external storage [#37469](https://github.com/pingcap/tidb/issues/37469) [@MoCuishle28](https://github.com/MoCuishle28) + - Fix the issue that might lead to backup and restoration failure if special characters exist in the authorization key of external storage [#37469](https://github.com/pingcap/tidb/issues/37469) @[MoCuishle28](https://github.com/MoCuishle28) + TiCDC diff --git a/releases/release-6.4.0.md b/releases/release-6.4.0.md index 888359c8d1d6a..98e1499960f1a 100644 --- a/releases/release-6.4.0.md +++ b/releases/release-6.4.0.md @@ -8,6 +8,10 @@ Release date: November 17, 2022 TiDB version: 6.4.0-DMR +> **Note:** +> +> The TiDB 6.4.0-DMR documentation has been [archived](https://docs-archive.pingcap.com/tidb/v6.4/). PingCAP encourages you to use [the latest LTS version](https://docs.pingcap.com/tidb/stable) of the TiDB database. + Quick access: [Quick start](https://docs.pingcap.com/tidb/v6.4/quick-start-with-tidb) | [Installation packages](https://www.pingcap.com/download/?version=v6.4.0#version-list) In v6.4.0-DMR, the key new features and improvements are as follows: diff --git a/releases/release-6.5.0.md b/releases/release-6.5.0.md index 2e29b124709cf..f08b24fb32795 100644 --- a/releases/release-6.5.0.md +++ b/releases/release-6.5.0.md @@ -13,7 +13,14 @@ Quick access: [Quick start](https://docs.pingcap.com/tidb/v6.5/quick-start-with- TiDB 6.5.0 is a Long-Term Support Release (LTS). -Compared with the previous LTS 6.1.0, 6.5.0 not only includes new features, improvements, and bug fixes released in [6.2.0-DMR](/releases/release-6.2.0.md), [6.3.0-DMR](/releases/release-6.3.0.md), [6.4.0-DMR](/releases/release-6.4.0.md), but also introduces the following key features and improvements: +Compared with TiDB [6.4.0-DMR](/releases/release-6.4.0.md), TiDB 6.5.0 introduces the following key features and improvements: + +> **Tip:** +> +> Compared with the previous LTS 6.1.0, TiDB 6.5.0 also includes new features, improvements, and bug fixes released in [6.2.0-DMR](/releases/release-6.2.0.md), [6.3.0-DMR](/releases/release-6.3.0.md), and [6.4.0-DMR](/releases/release-6.4.0.md). +> +> - To get a full list of changes between the 6.1.0 LTS and 6.5.0 LTS versions, in addition to this release notes, also see [6.2.0-DMR release notes](/releases/release-6.2.0.md), [6.3.0-DMR release notes](/releases/release-6.3.0.md), and [6.4.0-DMR release notes](/releases/release-6.4.0.md). +> - To have a quick comparison of key features between the 6.1.0 LTS and 6.5.0 LTS versions, you can check the `v6.1` and `v6.5` columns in [TiDB features](/basic-features.md). - The [index acceleration](/system-variables.md#tidb_ddl_enable_fast_reorg-new-in-v630) feature becomes generally available (GA), which improves the performance of adding indexes by about 10 times compared with v6.1.0. - The TiDB global memory control becomes GA, and you can control the memory consumption threshold via [`tidb_server_memory_limit`](/system-variables.md#tidb_server_memory_limit-new-in-v640). @@ -270,11 +277,11 @@ Compared with the previous LTS 6.1.0, 6.5.0 not only includes new features, impr To keep security of the database system, you need to set an expiration policy for the certificate used by the system. After the expiration period, the system needs a new certificate. TiCDC v6.5.0 supports online updates of TLS certificates. Without interrupting the replication tasks, TiCDC can automatically detect and update the certificate, without the need for manual intervention. -* TiCDC performance improves significantly [#7540](https://github.com/pingcap/tiflow/issues/7540) [#7478](https://github.com/pingcap/tiflow/issues/7478) [#7532](https://github.com/pingcap/tiflow/issues/7532) @[sdojjy](https://github.com/sdojjy) [@3AceShowHand](https://github.com/3AceShowHand) +* TiCDC performance improves significantly [#7540](https://github.com/pingcap/tiflow/issues/7540) [#7478](https://github.com/pingcap/tiflow/issues/7478) [#7532](https://github.com/pingcap/tiflow/issues/7532) @[sdojjy](https://github.com/sdojjy) @[3AceShowHand](https://github.com/3AceShowHand) - In a test scenario of the TiDB cluster, the performance of TiCDC has improved significantly. Specifically, in the scenario of replicating data to Kafka, the maximum row changes that a single TiCDC can process reaches 30K rows/s, and the replication latency is reduced to 10s. Even during TiKV and TiCDC rolling upgrade, the replication latency is less than 30s. + In a test scenario of the TiDB cluster, the performance of TiCDC has improved significantly. Specifically, in the scenario of [replicating data to Kafka](/replicate-data-to-kafka.md), the maximum row changes that a single TiCDC can process reaches 30K rows/s, and the replication latency is reduced to 10s. Even during TiKV and TiCDC rolling upgrade, the replication latency is less than 30s. - In a disaster recovery (DR) scenario, if TiCDC redo log and Syncpoint are enabled, the TiCDC throughput of [replicating data to Kafka](/replicate-data-to-kafka.md) can be improved from 4000 rows/s to 35000 rows/s, and the replication latency can be limited to 2s. + In a disaster recovery (DR) scenario, if TiCDC redo log and Syncpoint are enabled, the TiCDC throughput can be improved from 4000 rows/s to 35000 rows/s, and the replication latency can be limited to 2s. ### Backup and restore @@ -284,7 +291,7 @@ Compared with the previous LTS 6.1.0, 6.5.0 not only includes new features, impr Note that if you do not recover the system from a failure within one hour after BR exits, the snapshot data to be backed up might be recycled by the GC mechanism, causing the backup to fail. For more information, see [documentation](/br/br-checkpoint-backup.md#backup-retry-must-be-prior-to-gc). -* PITR performance improved remarkably [@joccau](https://github.com/joccau) +* PITR performance improved remarkably @[joccau](https://github.com/joccau) In the log restore stage, the restore speed of one TiKV can reach 9 MiB/s, which is 50% faster than before. The restore speed is scalable and the RTO in DR scenarios is reduced greatly. The RPO in DR scenarios can be as short as 5 minutes. In normal cluster operation and maintenance (OM), for example, a rolling upgrade is performed or only one TiKV is down, the RPO can be 5 minutes. @@ -385,8 +392,8 @@ Starting from v6.5.0, the `AMEND TRANSACTION` mechanism introduced in v4.0.7 is - Reduce waiting time on failure recovery by notifying TiKV to wake up Regions [#13648](https://github.com/tikv/tikv/issues/13648) @[LykxSassinator](https://github.com/LykxSassinator) - Reduce the requested size of memory usage by code optimization [#13827](https://github.com/tikv/tikv/issues/13827) @[BusyJay](https://github.com/BusyJay) - Introduce the Raft extension to improve code extensibility [#13827](https://github.com/tikv/tikv/issues/13827) @[BusyJay](https://github.com/BusyJay) - - Support using tikv-ctl to query which Regions are included in a certain key range [#13760](https://github.com/tikv/tikv/issues/13760) [@HuSharp](https://github.com/HuSharp) - - Improve read and write performance for rows that are not updated but locked continuously [#13694](https://github.com/tikv/tikv/issues/13694) [@sticnarf](https://github.com/sticnarf) + - Support using tikv-ctl to query which Regions are included in a certain key range [#13760](https://github.com/tikv/tikv/issues/13760) @[HuSharp](https://github.com/HuSharp) + - Improve read and write performance for rows that are not updated but locked continuously [#13694](https://github.com/tikv/tikv/issues/13694) @[sticnarf](https://github.com/sticnarf) + PD diff --git a/releases/release-6.5.1.md b/releases/release-6.5.1.md index 562cbe211e7cc..d2be099af32e5 100644 --- a/releases/release-6.5.1.md +++ b/releases/release-6.5.1.md @@ -104,7 +104,7 @@ Quick access: [Quick start](https://docs.pingcap.com/tidb/v6.5/quick-start-with- - Fix the issue that IndexMerge plans might generate incorrect ranges on the SET type columns [#41273](https://github.com/pingcap/tidb/issues/41273) [#41293](https://github.com/pingcap/tidb/issues/41293) @[time-and-fate](https://github.com/time-and-fate) - Fix the issue that Plan Cache might cache FullScan plans when processing `int_col decimal` conditions [#40679](https://github.com/pingcap/tidb/issues/40679) [#41032](https://github.com/pingcap/tidb/issues/41032) @[qw4990](https://github.com/qw4990) - Fix the issue that Plan Cache might cache FullScan plans when processing `int_col in (decimal...)` conditions [#40224](https://github.com/pingcap/tidb/issues/40224) @[qw4990](https://github.com/qw4990) - - Fix the issue that the `ignore_plan_cache` hint might not work for `INSERT` statements [#40079](https://github.com/pingcap/tidb/issues/40079) [#39717](https://github.com/pingcap/tidb/issues/39717) @[qw4990](https://github.com/qw4990) + - Fix the issue that the `ignore_plan_cache` hint might not work for `INSERT` statements [#40079](https://github.com/pingcap/tidb/issues/40079) [#39717](https://github.com/pingcap/tidb/issues/39717) @[qw4990](https://github.com/qw4990) - Fix the issue that Auto Analyze might hinder TiDB from exiting [#40038](https://github.com/pingcap/tidb/issues/40038) @[xuyifangreeneyes](https://github.com/xuyifangreeneyes) - Fix the issue that incorrect access intervals might be constructed on Unsigned Primary Keys in partitioned tables [#40309](https://github.com/pingcap/tidb/issues/40309) @[winoros](https://github.com/winoros) - Fix the issue that Plan Cache might cache Shuffle operators and return incorrect results [#38335](https://github.com/pingcap/tidb/issues/38335) @[qw4990](https://github.com/qw4990) @@ -121,7 +121,7 @@ Quick access: [Quick start](https://docs.pingcap.com/tidb/v6.5/quick-start-with- - Fix the issue that Resolved TS causes higher network traffic [#14092](https://github.com/tikv/tikv/issues/14092) @[overvenus](https://github.com/overvenus) - Fix the data inconsistency issue caused by network failure between TiDB and TiKV during the execution of a DML after a failed pessimistic DML [#14038](https://github.com/tikv/tikv/issues/14038) @[MyonKeminta](https://github.com/MyonKeminta) - Fix an error that occurs when casting the `const Enum` type to other types [#14156](https://github.com/tikv/tikv/issues/14156) @[wshwsh12](https://github.com/wshwsh12) - - Fix the issue that the paging in a cop task is inaccurate [#14254](https://github.com/tikv/tikv/issues/14254) @[you06](https://github.com/you06) + - Fix the issue that the paging in a cop task is inaccurate [#14254](https://github.com/tikv/tikv/issues/14254) @[you06](https://github.com/you06) - Fix the issue that the `scan_detail` field is inaccurate in `batch_cop` mode [#14109](https://github.com/tikv/tikv/issues/14109) @[you06](https://github.com/you06) - Fix a potential error in the Raft Engine that might cause TiKV to detect Raft data corruption and fail to restart [#14338](https://github.com/tikv/tikv/issues/14338) @[tonyxuqqi](https://github.com/tonyxuqqi) @@ -155,14 +155,14 @@ Quick access: [Quick start](https://docs.pingcap.com/tidb/v6.5/quick-start-with- - Fix the issue that the frequency of `resolve lock` is too high when there is no PITR backup task in the TiDB cluster [#40759](https://github.com/pingcap/tidb/issues/40759) @[joccau](https://github.com/joccau) - Fix the issue that restoring data to a cluster on which the log backup is running causes the log backup file unable to be restored [#40797](https://github.com/pingcap/tidb/issues/40797) @[Leavrth](https://github.com/Leavrth) - Fix the panic issue that occurs when attempting to resume backup from a checkpoint after a full backup failure [#40704](https://github.com/pingcap/tidb/issues/40704) @[Leavrth](https://github.com/Leavrth) - - Fix the issue that PITR errors are overwritten [#40576](https://github.com/pingcap/tidb/issues/40576)@[Leavrth](https://github.com/Leavrth) + - Fix the issue that PITR errors are overwritten [#40576](https://github.com/pingcap/tidb/issues/40576) @[Leavrth](https://github.com/Leavrth) - Fix the issue that checkpoints do not advance in PITR backup tasks when the advance owner and gc owner are different [#41806](https://github.com/pingcap/tidb/issues/41806) @[joccau](https://github.com/joccau) + TiCDC - Fix the issue that changefeed might get stuck in special scenarios such as when scaling in or scaling out TiKV or TiCDC nodes [#8174](https://github.com/pingcap/tiflow/issues/8174) @[hicqu](https://github.com/hicqu) - Fix the issue that precheck is not performed on the storage path of redo log [#6335](https://github.com/pingcap/tiflow/issues/6335) @[CharlesCheung96](https://github.com/CharlesCheung96) - - Fix the issue of insufficient duration that redo log can tolerate for S3 storage failure [#8089](https://github.com/pingcap/tiflow/issues/8089) @[CharlesCheung96](https://github.com/CharlesCheung96) + - Fix the issue of insufficient duration that redo log can tolerate for S3 storage failure [#8089](https://github.com/pingcap/tiflow/issues/8089) @[CharlesCheung96](https://github.com/CharlesCheung96) - Fix the issue that `transaction_atomicity` and `protocol` cannot be updated via the configuration file [#7935](https://github.com/pingcap/tiflow/issues/7935) @[CharlesCheung96](https://github.com/CharlesCheung96) - Fix the issue that the checkpoint cannot advance when TiCDC replicates an excessively large number of tables [#8004](https://github.com/pingcap/tiflow/issues/8004) @[overvenus](https://github.com/overvenus) - Fix the issue that applying redo log might cause OOM when the replication lag is excessively high [#8085](https://github.com/pingcap/tiflow/issues/8085) @[CharlesCheung96](https://github.com/CharlesCheung96) diff --git a/releases/release-6.5.2.md b/releases/release-6.5.2.md index bfeaedae051fc..53a6eded2e74a 100644 --- a/releases/release-6.5.2.md +++ b/releases/release-6.5.2.md @@ -55,10 +55,10 @@ Quick access: [Quick start](https://docs.pingcap.com/tidb/v6.5/quick-start-with- - Fix the issue of missing table names in the `ADMIN SHOW DDL JOBS` result when a `DROP TABLE` operation is being executed [#42268](https://github.com/pingcap/tidb/issues/42268) @[tiancaiamao](https://github.com/tiancaiamao) - Fix the issue that TiDB server cannot start due to an error in reading the cgroup information with the error message "can't read file memory.stat from cgroup v1: open /sys/memory.stat no such file or directory" [#42659](https://github.com/pingcap/tidb/issues/42659) @[hawkingrei](https://github.com/hawkingrei) - Fix frequent write conflicts in transactions when performing DDL data backfill [#24427](https://github.com/pingcap/tidb/issues/24427) @[mjonss](https://github.com/mjonss) - - Fix the issue that TiDB panic occurs due to inconsistent InfoSchema being obtained when generating the execution plan [#41622](https://github.com/pingcap/tidb/issues/41622) [@tiancaiamao](https://github.com/tiancaiamao) - - Fix the issue that when modifying the floating-point type using DDL to keep the length unchanged and reduce the decimal places, the old data still remains the same [#41281](https://github.com/pingcap/tidb/issues/41281) [@zimulala](https://github.com/zimulala) + - Fix the issue that TiDB panic occurs due to inconsistent InfoSchema being obtained when generating the execution plan [#41622](https://github.com/pingcap/tidb/issues/41622) @[tiancaiamao](https://github.com/tiancaiamao) + - Fix the issue that when modifying the floating-point type using DDL to keep the length unchanged and reduce the decimal places, the old data still remains the same [#41281](https://github.com/pingcap/tidb/issues/41281) @[zimulala](https://github.com/zimulala) - Fix the issue that after executing `PointUpdate` within a transaction, TiDB returns incorrect results for the `SELECT` statement [#28011](https://github.com/pingcap/tidb/issues/28011) @[zyguan](https://github.com/zyguan) - - Fix the issue that, when using Cursor Fetch and running other statements among Execute, Fetch, and Close, the Fetch and Close commands might return incorrect results or cause TiDB to panic [#40094](https://github.com/pingcap/tidb/issues/40094) [@YangKeao](https://github.com/YangKeao) + - Fix the issue that, when using Cursor Fetch and running other statements among Execute, Fetch, and Close, the Fetch and Close commands might return incorrect results or cause TiDB to panic [#40094](https://github.com/pingcap/tidb/issues/40094) @[YangKeao](https://github.com/YangKeao) - Fix the issue that `INSERT IGNORE` and `REPLACE` statements do not lock keys that do not modify values [#42121](https://github.com/pingcap/tidb/issues/42121) @[zyguan](https://github.com/zyguan) - Fix the issue that TiFlash reports an error for generated columns during execution [#40663](https://github.com/pingcap/tidb/issues/40663) @[guo-shaoge](https://github.com/guo-shaoge) - Fix the issue that TiDB might produce incorrect results when different partitioned tables appear in a single SQL statement [#42135](https://github.com/pingcap/tidb/issues/42135) @[mjonss](https://github.com/mjonss) @@ -101,7 +101,7 @@ Quick access: [Quick start](https://docs.pingcap.com/tidb/v6.5/quick-start-with- - Fix the issue that the memory usage of `db sorter` is not controlled by `cgroup memory limit` [#8588](https://github.com/pingcap/tiflow/issues/8588) @[amyangfei](https://github.com/amyangfei) - Fix the issue that data loss might occur in special cases during the apply of Redo log [#8591](https://github.com/pingcap/tiflow/issues/8591) @[CharlesCheung96](https://github.com/CharlesCheung96) - Fix the issue that the memory usage of `db sorter` is not controlled by `cgroup memory limit` [#8588](https://github.com/pingcap/tiflow/issues/8588) @[amyangfei](https://github.com/amyangfei) - - Fix the issue that the disorder of `UPDATE` and `INSERT` statements during data replication might cause the `Duplicate entry` error [#8597](https://github.com/pingcap/tiflow/issues/8597) @[sdojjy](https://github.com/sojjy) + - Fix the issue that the disorder of `UPDATE` and `INSERT` statements during data replication might cause the `Duplicate entry` error [#8597](https://github.com/pingcap/tiflow/issues/8597) @[sdojjy](https://github.com/sdojjy) - Fix the abnormal exit issue of the TiCDC service caused by network isolation between PD and TiCDC [#8562](https://github.com/pingcap/tiflow/issues/8562) @[overvenus](https://github.com/overvenus) - Fix the issue that graceful upgrade for TiCDC clusters fails on Kubernetes [#8484](https://github.com/pingcap/tiflow/issues/8484) @[overvenus](https://github.com/overvenus) - Fix the issue that the TiCDC server panics when all downstream Kafka servers are unavailable [#8523](https://github.com/pingcap/tiflow/issues/8523) @[3AceShowHand](https://github.com/3AceShowHand) diff --git a/releases/release-6.5.3.md b/releases/release-6.5.3.md new file mode 100644 index 0000000000000..385a70d6aff68 --- /dev/null +++ b/releases/release-6.5.3.md @@ -0,0 +1,132 @@ +--- +title: TiDB 6.5.3 Release Notes +summary: Learn about the compatibility changes, improvements, and bug fixes in TiDB 6.5.3. +--- + +# TiDB 6.5.3 Release Notes + +Release date: June 14, 2023 + +TiDB version: 6.5.3 + +Quick access: [Quick start](https://docs.pingcap.com/tidb/v6.5/quick-start-with-tidb) | [Production deployment](https://docs.pingcap.com/tidb/v6.5/production-deployment-using-tiup) | [Installation packages](https://www.pingcap.com/download/?version=v6.5.3#version-list) + +## Improvements + ++ TiDB + + - Improve the performance of `TRUNCATE` on partitioned tables with Placement Rules [#43070](https://github.com/pingcap/tidb/issues/43070) @[Lloyd-Pottiger](https://github.com/Lloyd-Pottiger) + - Avoid invalid Stale Read retries after resolving locks [#43659](https://github.com/pingcap/tidb/issues/43659) @[you06](https://github.com/you06) + - Reduce latency by using leader read when Stale Read encounters the `DataIsNotReady` error [#765](https://github.com/tikv/client-go/pull/765) @[Tema](https://github.com/Tema) + - Add `Stale Read OPS` and `Stale Read MBps` metrics to track hit rate and traffic when using Stale Read [#43325](https://github.com/pingcap/tidb/issues/43325) @[you06](https://github.com/you06) + ++ TiKV + + - Reduce traffic by using gzip to compress `check_leader` requests [#14839](https://github.com/tikv/tikv/issues/14839) @[cfzjywxk](https://github.com/cfzjywxk) + ++ PD + + - Use a separate gRPC connection for PD leader election to prevent the impact of other requests [#6403](https://github.com/tikv/pd/issues/6403) @[rleungx](https://github.com/rleungx) + ++ Tools + + + TiCDC + + - Optimize the way TiCDC handles DDLs so that DDLs do not block the use of other unrelated DML Events, and reduce memory usage [#8106](https://github.com/pingcap/tiflow/issues/8106) @[asddongmen](https://github.com/asddongmen) + - Optimize the Decoder interface and add a new method `AddKeyValue` [#8861](https://github.com/pingcap/tiflow/issues/8861) @[3AceShowHand](https://github.com/3AceShowHand) + - Optimize the directory structure when DDL events occur in the scenario of replicating data to object storage [#8890](https://github.com/pingcap/tiflow/issues/8890) @[CharlesCheung96](https://github.com/CharlesCheung96) + - Support replicating data to the Kafka-on-Pulsar downstream [#8892](https://github.com/pingcap/tiflow/issues/8892) @[hi-rustin](https://github.com/hi-rustin) + - Support using the OAuth protocol for validation when replicating data to Kafka [#8865](https://github.com/pingcap/tiflow/issues/8865) @[hi-rustin](https://github.com/hi-rustin) + - Optimize the way TiCDC handles the `UPDATE` statement during data replication using the Avro or CSV protocol, by splitting `UPDATE` into `DELETE` and `INSERT` statements, so that you can get the old value from the `DELETE` statement [#9086](https://github.com/pingcap/tiflow/issues/9086) @[3AceShowHand](https://github.com/3AceShowHand) + - Add a configuration item `insecure-skip-verify` to control whether to set the authentication algorithm in the scenario of enabling TLS [#8867](https://github.com/pingcap/tiflow/issues/8867) @[hi-rustin](https://github.com/hi-rustin) + - Optimize DDL replication operations to mitigate the impact of DDL operations on downstream latency [#8686](https://github.com/pingcap/tiflow/issues/8686) @[hi-rustin](https://github.com/hi-rustin) + - Optimize the method of setting GC TLS for the upstream when the TiCDC replication task fails [#8403](https://github.com/pingcap/tiflow/issues/8403) @[charleszheng44](https://github.com/charleszheng44) + + + TiDB Binlog + + - Optimize the method of retrieving table information to reduce the initialization time and memory usage of Drainer [#1137](https://github.com/pingcap/tidb-binlog/issues/1137) @[lichunzhu](https://github.com/lichunzhu) + +## Bug fixes + ++ TiDB + + - Fix the issue that the `min, max` query result is incorrect [#43805](https://github.com/pingcap/tidb/issues/43805) @[wshwsh12](https://github.com/wshwsh12) + - Fix the issue of incorrect execution plans when pushing down window functions to TiFlash [#43922](https://github.com/pingcap/tidb/issues/43922) @[gengliqi](https://github.com/gengliqi) + - Fix the issue that the query with CTE causes TiDB to hang [#43749](https://github.com/pingcap/tidb/issues/43749) [#36896](https://github.com/pingcap/tidb/issues/36896) @[guo-shaoge](https://github.com/guo-shaoge) + - Fix the issue that the SQL statement reports the `runtime error: index out of range` error when using the `AES_DECRYPT` expression [#43063](https://github.com/pingcap/tidb/issues/43063) @[lcwangchao](https://github.com/lcwangchao) + - Fix the issue that the `SHOW PROCESSLIST` statement cannot display the TxnStart of the transaction of the statement with a long subquery time [#40851](https://github.com/pingcap/tidb/issues/40851) @[crazycs520](https://github.com/crazycs520) + - Fix the issue that PD isolation might block the running DDL [#44014](https://github.com/pingcap/tidb/issues/44014) [#43755](https://github.com/pingcap/tidb/issues/43755) [#44267](https://github.com/pingcap/tidb/issues/44267) @[wjhuang2016](https://github.com/wjhuang2016) + - Fix the TiDB panic issue that occurs when querying union views and temporary tables with `UNION` [#42563](https://github.com/pingcap/tidb/issues/42563) @[lcwangchao](https://github.com/lcwangchao) + - Fix the behavior issue of Placement Rules in partitioned tables, so that the Placement Rules in deleted partitions can be correctly set and recycled [#44116](https://github.com/pingcap/tidb/issues/44116) @[lcwangchao](https://github.com/lcwangchao) + - Fix the issue that truncating a partition of a partitioned table might cause the Placement Rule of the partition to become invalid [#44031](https://github.com/pingcap/tidb/issues/44031) @[lcwangchao](https://github.com/lcwangchao) + - Fix the issue that TiCDC might lose some row changes during table renaming [#43338](https://github.com/pingcap/tidb/issues/43338) @[tangenta](https://github.com/tangenta) + - Fix the issue that the DDL job history is lost after importing a table using BR [#43725](https://github.com/pingcap/tidb/issues/43725) @[tangenta](https://github.com/tangenta) + - Fix the issue that `JSON_OBJECT` might report an error in some cases [#39806](https://github.com/pingcap/tidb/issues/39806) @[YangKeao](https://github.com/YangKeao) + - Fix the issue that the cluster cannot query some system views in IPv6 environment [#43286](https://github.com/pingcap/tidb/issues/43286) @[Defined2014](https://github.com/Defined2014) + - Fix the issue that when the PD member address changes, allocating ID for the `AUTO_INCREMENT` column will be blocked for a long time [#42643](https://github.com/pingcap/tidb/issues/42643) @[tiancaiamao](https://github.com/tiancaiamao) + - Fix the issue that TiDB sends duplicate requests to PD during placement rules recycling, causing numerous `full config reset` entries in the PD log [#33069](https://github.com/pingcap/tidb/issues/33069) @[tiancaiamao](https://github.com/tiancaiamao) + - Fix the issue that the `SHOW PRIVILEGES` statement returns an incomplete privilege list [#40591](https://github.com/pingcap/tidb/issues/40591) @[CbcWestwolf](https://github.com/CbcWestwolf) + - Fix the issue that `ADMIN SHOW DDL JOBS LIMIT` returns incorrect results [#42298](https://github.com/pingcap/tidb/issues/42298) @[CbcWestwolf](https://github.com/CbcWestwolf) + - Fix the issue that the `tidb_auth_token` user fails to be created when the password complexity check is enabled [#44098](https://github.com/pingcap/tidb/issues/44098) @[CbcWestwolf](https://github.com/CbcWestwolf) + - Fix the issue of not finding the partition during inner join in dynamic pruning mode [#43686](https://github.com/pingcap/tidb/issues/43686) @[mjonss](https://github.com/mjonss) + - Fix the issue that the `Data Truncated` warning occurs when executing `MODIFY COLUMN` on a partitioned table [#41118](https://github.com/pingcap/tidb/issues/41118) @[mjonss](https://github.com/mjonss) + - Fix the issue of displaying the incorrect TiDB address in IPv6 environment [#43260](https://github.com/pingcap/tidb/issues/43260) @[nexustar](https://github.com/nexustar) + - Fix the issue that CTE results are incorrect when pushing down predicates [#43645](https://github.com/pingcap/tidb/issues/43645) @[winoros](https://github.com/winoros) + - Fix the issue that incorrect results might be returned when using a common table expression (CTE) in statements with non-correlated subqueries [#44051](https://github.com/pingcap/tidb/issues/44051) @[winoros](https://github.com/winoros) + - Fix the issue that Join Reorder might cause incorrect outer join results [#44314](https://github.com/pingcap/tidb/issues/44314) @[AilinKid](https://github.com/AilinKid) + - Fix the issue that in some extreme cases, when the first statement of a pessimistic transaction is retried, resolving locks on this transaction might affect transaction correctness [#42937](https://github.com/pingcap/tidb/issues/42937) @[MyonKeminta](https://github.com/MyonKeminta) + - Fix the issue that in some rare cases, residual pessimistic locks of pessimistic transactions might affect data correctness when GC resolves locks [#43243](https://github.com/pingcap/tidb/issues/43243) @[MyonKeminta](https://github.com/MyonKeminta) + - Fix the issue that the scan detail information during the execution of `batch cop` might be inaccurate [#41582](https://github.com/pingcap/tidb/issues/41582) @[you06](https://github.com/you06) + - Fix the issue that TiDB cannot read data updates when Stale Read and `PREPARE` statements are used at the same time [#43044](https://github.com/pingcap/tidb/issues/43044) @[you06](https://github.com/you06) + - Fix the issue that an `assertion failed` error might be mistakenly reported when executing the `LOAD DATA` statement [#43849](https://github.com/pingcap/tidb/issues/43849) @[you06](https://github.com/you06) + - Fix the issue that the coprocessor cannot fall back to the leader when a `region data not ready` error occurs during the use of Stale Read [#43365](https://github.com/pingcap/tidb/issues/43365) @[you06](https://github.com/you06) + ++ TiKV + + - Fix the issue of file handle leakage in Continuous Profiling [#14224](https://github.com/tikv/tikv/issues/14224) @[tabokie](https://github.com/tabokie) + - Fix the issue that PD crash might cause PITR to fail to proceed [#14184](https://github.com/tikv/tikv/issues/14184) @[YuJuncen](https://github.com/YuJuncen) + - Fix the issue that encryption key ID conflict might cause the deletion of the old keys [#14585](https://github.com/tikv/tikv/issues/14585) @[tabokie](https://github.com/tabokie) + - Fix the issue that autocommit and point get replica read might break linearizability [#14715](https://github.com/tikv/tikv/issues/14715) @[cfzjywxk](https://github.com/cfzjywxk) + - Fix the performance degradation issue caused by accumulated lock records when a cluster is upgraded from a previous version to v6.5 or later versions [#14780](https://github.com/tikv/tikv/issues/14780) @[MyonKeminta](https://github.com/MyonKeminta) + - Fix the issue that TiDB Lightning might cause SST file leakage [#14745](https://github.com/tikv/tikv/issues/14745) @[YuJuncen](https://github.com/YuJuncen) + - Fix the potential conflict between the encryption key and raft log file deletion that might cause TiKV to fail to start [#14761](https://github.com/tikv/tikv/issues/14761) @[Connor1996](https://github.com/Connor1996) + ++ TiFlash + + - Fix the performance degradation issue of the partition TableScan operator during Region transfer [#7519](https://github.com/pingcap/tiflash/issues/7519) @[Lloyd-Pottiger](https://github.com/Lloyd-Pottiger) + - Fix the issue that a TiFlash query might report an error if the `GENERATED` type field is present along with the `TIMESTAMP` or `TIME` type [#7468](https://github.com/pingcap/tiflash/issues/7468) @[Lloyd-Pottiger](https://github.com/Lloyd-Pottiger) + - Fix the issue that large update transactions might cause TiFlash to repeatedly report errors and restart [#7316](https://github.com/pingcap/tiflash/issues/7316) @[JaySon-Huang](https://github.com/JaySon-Huang) + - Fix the issue that the error "Truncate error cast decimal as decimal" occurs when reading data from TiFlash with the `INSERT SELECT` statement [#7348](https://github.com/pingcap/tiflash/issues/7348) @[windtalker](https://github.com/windtalker) + - Fix the issue that queries might consume more memory than needed when the data on the Join build side is very large and contains many small string type columns [#7416](https://github.com/pingcap/tiflash/issues/7416) @[yibin87](https://github.com/yibin87) + ++ Tools + + + Backup & Restore (BR) + + - Fix the issue that the error message "resolve lock timeout" of BR is misleading when a backup fails, which hides the actual error information [#43236](https://github.com/pingcap/tidb/issues/43236) @[YuJuncen](https://github.com/YuJuncen) + + + TiCDC + + - Fix an OOM issue that might occur when there are as many as 50,000 tables [#7872](https://github.com/pingcap/tiflow/issues/7872) @[sdojjy](https://github.com/sdojjy) + - Fix the issue that TiCDC gets stuck when an OOM occurs in upstream TiDB [#8561](https://github.com/pingcap/tiflow/issues/8561) @[overvenus](https://github.com/overvenus) + - Fix the issue that TiCDC gets stuck when PD fails such as network isolation or PD Owner node reboot [#8808](https://github.com/pingcap/tiflow/issues/8808) [#8812](https://github.com/pingcap/tiflow/issues/8812) [#8877](https://github.com/pingcap/tiflow/issues/8877) @[asddongmen](https://github.com/asddongmen) + - Fix the issue of TiCDC time zone setting [#8798](https://github.com/pingcap/tiflow/issues/8798) @[hi-rustin](https://github.com/hi-rustin) + - Fix the issue that checkpoint lag increases when one of the upstream TiKV nodes crashes [#8858](https://github.com/pingcap/tiflow/issues/8858) @[hicqu](https://github.com/hicqu) + - Fix the issue that when replicating data to downstream MySQL, a replication error occurs after the `FLASHBACK CLUSTER TO TIMESTAMP` statement is executed in the upstream TiDB [#8040](https://github.com/pingcap/tiflow/issues/8040) @[asddongmen](https://github.com/asddongmen) + - Fix the issue that when replicating data to object storage, the `EXCHANGE PARTITION` operation in the upstream cannot be properly replicated to the downstream [#8914](https://github.com/pingcap/tiflow/issues/8914) @[CharlesCheung96](https://github.com/CharlesCheung96) + - Fix the OOM issue caused by excessive memory usage of the sorter component in some special scenarios [#8974](https://github.com/pingcap/tiflow/issues/8974) @[hicqu](https://github.com/hicqu) + - Fix the issue that when the downstream is Kafka, TiCDC queries the downstream metadata too frequently and causes excessive workload in the downstream [#8957](https://github.com/pingcap/tiflow/issues/8957) [#8959](https://github.com/pingcap/tiflow/issues/8959) @[hi-rustin](https://github.com/hi-rustin) + - Fix the issue that when a replication error occurs due to an oversized Kafka message, the message body is recorded in the log [#9031](https://github.com/pingcap/tiflow/issues/9031) @[darraes](https://github.com/darraes) + - Fix the TiCDC node panic that occurs when the downstream Kafka sinks are rolling restarted [#9023](https://github.com/pingcap/tiflow/issues/9023) @[asddongmen](https://github.com/asddongmen) + - Fix the issue that when replicating data to storage services, the JSON file corresponding to downstream DDL statements does not record the default values of table fields [#9066](https://github.com/pingcap/tiflow/issues/9066) @[CharlesCheung96](https://github.com/CharlesCheung96) + + + TiDB Lightning + + - Fix the issue that OOM might occur when importing a wide table [#43728](https://github.com/pingcap/tidb/issues/43728) @[D3Hunter](https://github.com/D3Hunter) + - Fix the issue of `write to tikv with no leader returned` when importing a large amount of data [#43055](https://github.com/pingcap/tidb/issues/43055) @[lance6716](https://github.com/lance6716) + - Fix a possible OOM problem when there is an unclosed delimiter in the data file [#40400](https://github.com/pingcap/tidb/issues/40400) @[buchuitoudegou](https://github.com/buchuitoudegou) + - Add a retry mechanism when encountering an `unknown RPC` error during data import [#43291](https://github.com/pingcap/tidb/issues/43291) @[D3Hunter](https://github.com/D3Hunter) + + + TiDB Binlog + + - Fix the issue that TiDB Binlog reports an error when encountering a `CANCELED` DDL statement [#1228](https://github.com/pingcap/tidb-binlog/issues/1228) @[okJiang](https://github.com/okJiang) diff --git a/releases/release-6.6.0.md b/releases/release-6.6.0.md index cd778310caeff..34327e529665c 100644 --- a/releases/release-6.6.0.md +++ b/releases/release-6.6.0.md @@ -9,6 +9,10 @@ Release date: February 20, 2023 TiDB version: 6.6.0-[DMR](/releases/versioning.md#development-milestone-releases) +> **Note:** +> +> The TiDB 6.6.0-DMR documentation has been [archived](https://docs-archive.pingcap.com/tidb/v6.6/). PingCAP encourages you to use [the latest LTS version](https://docs.pingcap.com/tidb/stable) of the TiDB database. + Quick access: [Quick start](https://docs.pingcap.com/tidb/v6.6/quick-start-with-tidb) | [Installation package](https://www.pingcap.com/download/?version=v6.6.0#version-list) In v6.6.0-DMR, the key new features and improvements are as follows: @@ -50,7 +54,7 @@ In v6.6.0-DMR, the key new features and improvements are as follows:
- + @@ -172,19 +176,19 @@ In v6.6.0-DMR, the key new features and improvements are as follows: ### SQL -* Support MySQL-compatible foreign key constraints [#18209](https://github.com/pingcap/tidb/issues/18209) [@crazycs520](https://github.com/crazycs520) +* Support MySQL-compatible foreign key constraints [#18209](https://github.com/pingcap/tidb/issues/18209) @[crazycs520](https://github.com/crazycs520) TiDB v6.6.0 introduces the foreign key constraints feature, which is compatible with MySQL. This feature supports referencing within a table or between tables, constraints validation, and cascade operations. This feature helps to migrate applications to TiDB, maintain data consistency, improve data quality, and facilitate data modeling. For more information, see [documentation](/foreign-key.md). -* Support the MySQL-compatible multi-valued index (experimental) [#39592](https://github.com/pingcap/tidb/issues/39592) @[xiongjiwei](https://github.com/xiongjiwei) @[qw4990](https://github.com/qw4990) +* Support MySQL-compatible multi-valued indexes (experimental) [#39592](https://github.com/pingcap/tidb/issues/39592) @[xiongjiwei](https://github.com/xiongjiwei) @[qw4990](https://github.com/qw4990) - TiDB introduces the MySQL-compatible multi-valued index in v6.6.0. Filtering the values of an array in a JSON column is a common operation, but normal indexes cannot help speed up such an operation. Creating a multi-valued index on an array can greatly improve filtering performance. If an array in the JSON column has a multi-valued index, you can use the multi-value index to filter the retrieval conditions with `MEMBER OF()`, `JSON_CONTAINS()`, `JSON_OVERLAPS()` functions, thereby reducing much I/O consumption and improving operation speed. + TiDB introduces MySQL-compatible multi-valued indexes in v6.6.0. Filtering the values of an array in a JSON column is a common operation, but normal indexes cannot help speed up such an operation. Creating a multi-valued index on an array can greatly improve filtering performance. If an array in the JSON column has a multi-valued index, you can use the multi-valued index to filter the retrieval conditions with `MEMBER OF()`, `JSON_CONTAINS()`, `JSON_OVERLAPS()` functions, thereby reducing much I/O consumption and improving operation speed. Introducing multi-valued indexes further enhances TiDB's support for the JSON data type and also improves TiDB's compatibility with MySQL 8.0. - For more information, see [documentation](/sql-statements/sql-statement-create-index.md#multi-valued-index). + For more information, see [documentation](/sql-statements/sql-statement-create-index.md#multi-valued-indexes). ### DB operations @@ -337,9 +341,9 @@ In v6.6.0-DMR, the key new features and improvements are as follows: For more information, see the [SQL](#sql) section in this document and [documentation](/foreign-key.md). -* Support the MySQL-compatible multi-valued index (experimental) [#39592](https://github.com/pingcap/tidb/issues/39592) @[xiongjiwei](https://github.com/xiongjiwei) @[qw4990](https://github.com/qw4990) +* Support the MySQL-compatible multi-valued indexes (experimental) [#39592](https://github.com/pingcap/tidb/issues/39592) @[xiongjiwei](https://github.com/xiongjiwei) @[qw4990](https://github.com/qw4990) - For more information, see the [SQL](#sql) section in this document and [documentation](/sql-statements/sql-statement-create-index.md#multi-valued-index). + For more information, see the [SQL](#sql) section in this document and [documentation](/sql-statements/sql-statement-create-index.md#multi-valued-indexes). ### System variables @@ -561,6 +565,7 @@ In v6.6.0-DMR, the key new features and improvements are as follows: - Fix the issue that querying TiFlash-related system tables might get stuck [#6745](https://github.com/pingcap/tiflash/pull/6745) @[lidezhu](https://github.com/lidezhu) - Fix the issue that semi-joins use excessive memory when calculating Cartesian products [#6730](https://github.com/pingcap/tiflash/issues/6730) @[gengliqi](https://github.com/gengliqi) - Fix the issue that the result of the division operation on the DECIMAL data type is not rounded [#6393](https://github.com/pingcap/tiflash/issues/6393) @[LittleFall](https://github.com/LittleFall) + - Fix the issue that `start_ts` cannot uniquely identify an MPP query in TiFlash queries, which might cause an MPP query to be incorrectly canceled [#43426](https://github.com/pingcap/tidb/issues/43426) @[hehechen](https://github.com/hehechen) + Tools @@ -580,7 +585,7 @@ In v6.6.0-DMR, the key new features and improvements are as follows: - Fix the issue that `transaction_atomicity` and `protocol` cannot be updated via the configuration file [#7935](https://github.com/pingcap/tiflow/issues/7935) @[CharlesCheung96](https://github.com/CharlesCheung96) - Fix the issue that precheck is not performed on the storage path of redo log [#6335](https://github.com/pingcap/tiflow/issues/6335) @[CharlesCheung96](https://github.com/CharlesCheung96) - - Fix the issue of insufficient duration that redo log can tolerate for S3 storage failure [#8089](https://github.com/pingcap/tiflow/issues/8089) @[CharlesCheung96](https://github.com/CharlesCheung96) + - Fix the issue of insufficient duration that redo log can tolerate for S3 storage failure [#8089](https://github.com/pingcap/tiflow/issues/8089) @[CharlesCheung96](https://github.com/CharlesCheung96) - Fix the issue that changefeed might get stuck in special scenarios such as when scaling in or scaling out TiKV or TiCDC nodes [#8174](https://github.com/pingcap/tiflow/issues/8174) @[hicqu](https://github.com/hicqu) - Fix the issue of too high traffic among TiKV nodes [#14092](https://github.com/tikv/tikv/issues/14092) @[overvenus](https://github.com/overvenus) - Fix the performance issues of TiCDC in terms of CPU usage, memory control, and throughput when the pull-based sink is enabled [#8142](https://github.com/pingcap/tiflow/issues/8142) [#8157](https://github.com/pingcap/tiflow/issues/8157) [#8001](https://github.com/pingcap/tiflow/issues/8001) [#5928](https://github.com/pingcap/tiflow/issues/5928) @[hicqu](https://github.com/hicqu) @[hi-rustin](https://github.com/hi-rustin) diff --git a/releases/release-7.0.0.md b/releases/release-7.0.0.md index c9e48788be394..37f5d91f399f6 100644 --- a/releases/release-7.0.0.md +++ b/releases/release-7.0.0.md @@ -248,7 +248,7 @@ In v7.0.0-DMR, the key new features and improvements are as follows: * [DBeaver](https://dbeaver.io/) v23.0.1 supports TiDB by default [#17396](https://github.com/dbeaver/dbeaver/issues/17396) @[Icemap](https://github.com/Icemap) - Provides an independent TiDB module, icon, and logo. - - The default configuration supports [TiDB Cloud Serverless Tier](https://docs.pingcap.com/tidbcloud/select-cluster-tier#serverless-tier-beta), making it easier to connect to Serverless Tier. + - The default configuration supports [TiDB Serverless](https://docs.pingcap.com/tidbcloud/select-cluster-tier#tidb-serverless), making it easier to connect to TiDB Serverless. - Supports identifying TiDB versions to display or hide foreign key tabs. - Supports visualizing SQL execution plans in `EXPLAIN` results. - Supports highlighting TiDB keywords such as `PESSIMISTIC`, `OPTIMISTIC`, `AUTO_RANDOM`, `PLACEMENT`, `POLICY`, `REORGANIZE`, `EXCHANGE`, `CACHE`, `NONCLUSTERED`, and `CLUSTERED`. diff --git a/releases/release-7.1.0.md b/releases/release-7.1.0.md index 9a2b23d7ff4b3..bd9a1e60fc309 100644 --- a/releases/release-7.1.0.md +++ b/releases/release-7.1.0.md @@ -5,13 +5,15 @@ summary: Learn about the new features, compatibility changes, improvements, and # TiDB 7.1.0 Release Notes -TiDB version: 7.1.0 (upcoming) +Release date: May 31, 2023 -> **Note:** -> -> TiDB v7.1.0 is not yet available. This release note is a preview version to provide insights into upcoming features and is subject to change. The features outlined here are not guaranteed to be included in the final release. +TiDB version: 7.1.0 + +Quick access: [Quick start](https://docs.pingcap.com/tidb/v7.1/quick-start-with-tidb) | [Production deployment](https://docs.pingcap.com/tidb/v7.1/production-deployment-using-tiup) | [Installation packages](https://www.pingcap.com/download/?version=v7.1.0#version-list) -In v7.1.0, the key new features and improvements are as follows: +TiDB 7.1.0 is a Long-Term Support Release (LTS). + +Compared with the previous LTS 6.5.0, 7.1.0 not only includes new features, improvements, and bug fixes released in [6.6.0-DMR](/releases/release-6.6.0.md), [7.0.0-DMR](/releases/release-7.0.0.md), but also introduces the following key features and improvements:
Support MySQL-compatible foreign key constraints to maintain data consistency and improve data quality.
Multi-valued index (experimental)Multi-valued indexes (experimental) Introduce MySQL-compatible multi-valued indexes and enhance the JSON type to improve TiDB's compatibility with MySQL 8.0.
@@ -23,28 +25,58 @@ In v7.1.0, the key new features and improvements are as follows: - - - + + + + + + + - - + + - - - - + + + + + + + + + + + + - - + + - + + + + + + + + + + + + + +
Scalability and PerformanceSession-level plan cache for non-prepared statements (GA)Support automatically reusing plan cache at the session level to remove query planning time, reducing query time for repeat SQL patterns without manually setting prepare statements in advance.Scalability and PerformanceTiFlash supports the disaggregated storage and compute architecture and S3 shared storage (experimental, introduced in v7.0.0)TiFlash introduces a cloud-native architecture as an option: +
    +
  • Disaggregates TiFlash's compute and storage, which is a milestone for elastic HTAP resource utilization.
  • +
  • Introduces S3-based storage engine, which can provide shared storage at a lower cost.
  • +
+
TiKV supports batch aggregating data requests (introduced in v6.6.0) This enhancement significantly reduces total RPCs in TiKV batch-get operations. In situations where data is highly dispersed and the gRPC thread pool has insufficient resources, batching coprocessor requests can improve performance by more than 50%.
Load-based replica readIn a read hotspot scenario, TiDB can redirect read requests for a hotspot TiKV node to its replicas. This feature efficiently scatters read hotspots and optimizes the use of cluster resources. To control the threshold for triggering load-based replica read, you can adjust the system variable tidb_load_based_replica_read_threshold.Load-based replica readIn a read hotspot scenario, TiDB can redirect read requests for a hotspot TiKV node to its replicas. This feature efficiently scatters read hotspots and optimizes the use of cluster resources. To control the threshold for triggering load-based replica read, you can adjust the system variable tidb_load_based_replica_read_threshold.
Reliability and availabilityResource control by resource groups (GA)Support resource management based on resource groups, which maps database users to the corresponding resource groups and sets quotas for each resource group based on actual needs.
TiKV supports partitioned Raft KV storage engine (experimental)TiKV introduces a new generation of storage engine, the partitioned Raft KV. By allowing each data Region to have a dedicated RocksDB instance, it can expand the cluster's storage capacity from TB-level to PB-level and provide more stable write latency and stronger scalability.
Reliability and availabilityResource control by resource groups (GA)Support resource management based on resource groups, which allocates and isolates resources for different workloads in the same cluster. This feature significantly enhances the stability of multi-application clusters and lays the foundation for multi-tenancy. In v7.1.0, this feature introduces the ability to estimate system capacity based on actual workload or hardware deployment.
TiFlash supports spill to disk (introduced in v7.0.0)TiFlash supports intermediate result spill to disk to mitigate OOMs in data-intensive operations such as aggregations, sorts, and hash joins.
SQLMulti-valued index (GA)SQLMulti-valued indexes (GA) Support MySQL-compatible multi-valued indexes and enhance the JSON type to improve compatibility with MySQL 8.0. This feature improves the efficiency of membership checks on multi-valued columns.
Generated columns (GA)Row-level TTL (GA in v7.0.0)Support managing database size and improve performance by automatically expiring data of a certain age.
Generated columns (GA) Values in a generated column are calculated by a SQL expression in the column definition in real time. This feature pushes some application logic to the database level, thus improving query efficiency.
SecurityLDAP authenticationTiDB supports LDAP authentication, which is compatible with MySQL 8.0.
Audit log enhancement (Enterprise Edition only)TiDB Enterprise Edition enhances the database auditing feature. It significantly improves the system auditing capacity by providing more fine-grained event filtering controls, more user-friendly filter settings, a new file output format in JSON, and lifecycle management of audit logs.
@@ -52,9 +84,19 @@ In v7.1.0, the key new features and improvements are as follows: ### Performance +* Enhance the Partitioned Raft KV storage engine (experimental) [#11515](https://github.com/tikv/tikv/issues/11515) [#12842](https://github.com/tikv/tikv/issues/12842) @[busyjay](https://github.com/busyjay) @[tonyxuqqi](https://github.com/tonyxuqqi) @[tabokie](https://github.com/tabokie) @[bufferflies](https://github.com/bufferflies) @[5kbpers](https://github.com/5kbpers) @[SpadeA-Tang](https://github.com/SpadeA-Tang) @[nolouch](https://github.com/nolouch) + + TiDB v6.6.0 introduces the Partitioned Raft KV storage engine as an experimental feature, which uses multiple RocksDB instances to store TiKV Region data, and the data of each Region is independently stored in a separate RocksDB instance. The new storage engine can better control the number and level of files in the RocksDB instance, achieve physical isolation of data operations between Regions, and support stably managing more data. Compared with the original TiKV storage engine, using the Partitioned Raft KV storage engine can achieve about twice the write throughput and reduce the elastic scaling time by about 4/5 under the same hardware conditions and mixed read and write scenarios. + + In TiDB v7.1.0, the Partitioned Raft KV storage engine supports tools such as TiDB Lightning, BR, and TiCDC. + + Currently, this feature is experimental and not recommended for use in production environments. You can only use this engine in a newly created cluster and you cannot directly upgrade from the original TiKV storage engine. + + For more information, see [documentation](/partitioned-raft-kv.md). + * TiFlash supports late materialization (GA) [#5829](https://github.com/pingcap/tiflash/issues/5829) @[Lloyd-Pottiger](https://github.com/Lloyd-Pottiger) - In v7.0.0, late materialization was introduced in TiFlash as an experimental feature for optimizing query performance. This feature is disabled by default (the [`tidb_opt_enable_late_materialization`](/system-variables.md#tidb_opt_enable_late_materialization-new-in-v700) system variable defaults to `OFF`). When processing a `SELECT` statement with filter conditions (`WHERE` clause), TiFlash reads all the data from the columns required by the query, and then filters and aggregates the data based on the query conditions. When Late materialization is enabled, TiDB supports pushing down part of the filter conditions to the TableScan operator. That is, TiFlash first scans the column data related to the filter conditions that are pushed down to the TableScan operator, filters the rows that meet the condition, and then scans the other column data of these rows for further calculation, thereby reducing IO scans and computations of data processing. + In v7.0.0, late materialization was introduced in TiFlash as an experimental feature for optimizing query performance. This feature is disabled by default (the [`tidb_opt_enable_late_materialization`](/system-variables.md#tidb_opt_enable_late_materialization-new-in-v700) system variable defaults to `OFF`). When processing a `SELECT` statement with filter conditions (`WHERE` clause), TiFlash reads all the data from the columns required by the query, and then filters and aggregates the data based on the query conditions. When Late materialization is enabled, TiDB supports pushing down part of the filter conditions to the TableScan operator. That is, TiFlash first scans the column data related to the filter conditions that are pushed down to the TableScan operator, filters the rows that meet the condition, and then scans the other column data of these rows for further calculation, thereby reducing IO scans and computations of data processing. Starting from v7.1.0, the TiFlash late materialization feature is generally available and enabled by default (the [`tidb_opt_enable_late_materialization`](/system-variables.md#tidb_opt_enable_late_materialization-new-in-v700) system variable defaults to `ON`). The TiDB optimizer decides which filters to be pushed down to the TableScan operator based on the statistics and the filter conditions of the query. @@ -74,16 +116,30 @@ In v7.1.0, the key new features and improvements are as follows: For more information, see [documentation](/troubleshoot-hot-spot-issues.md#scatter-read-hotspots). -* Support caching execution plans for non-prepared statements (GA) [#36598](https://github.com/pingcap/tidb/issues/36598) @[qw4990](https://github.com/qw4990) +* Enhance the capability of caching execution plans for non-prepared statements (experimental) [#36598](https://github.com/pingcap/tidb/issues/36598) @[qw4990](https://github.com/qw4990) - TiDB v7.0.0 introduces non-prepared plan cache as an experimental feature to improve the load capacity of concurrent OLTP. In v7.1.0, this feature is generally available, enabled by default, and supports caching more SQL statements. + TiDB v7.0.0 introduces non-prepared plan cache as an experimental feature to improve the load capacity of concurrent OLTP. In v7.1.0, TiDB enhances this feature and supports caching more SQL statements. To improve memory utilization, TiDB v7.1.0 merges the cache pools of non-prepared and prepared plan caches. You can control the cache size using the system variable [`tidb_session_plan_cache_size`](/system-variables.md#tidb_session_plan_cache_size-new-in-v710). The [`tidb_prepared_plan_cache_size`](/system-variables.md#tidb_prepared_plan_cache_size-new-in-v610) and [`tidb_non_prepared_plan_cache_size`](/system-variables.md#tidb_non_prepared_plan_cache_size) system variables are deprecated. To maintain forward compatibility, when you upgrade from an earlier version to v7.1.0 or later versions, the cache size `tidb_session_plan_cache_size` remains the same value as `tidb_prepared_plan_cache_size`, and [`tidb_enable_non_prepared_plan_cache`](/system-variables.md#tidb_enable_non_prepared_plan_cache) remains the setting before the upgrade. After sufficient performance testing, you can enable non-prepared plan cache using `tidb_enable_non_prepared_plan_cache`. For a newly created cluster, non-prepared plan cache is enabled by default. + Non-prepared plan cache does not support DML statements by default. To remove this restriction, you can set the [`tidb_enable_non_prepared_plan_cache_for_dml`](/system-variables.md#tidb_enable_non_prepared_plan_cache_for_dml-new-in-v710) system variable to `ON`. + For more information, see [documentation](/sql-non-prepared-plan-cache.md). +* Support the DDL distributed parallel execution framework (experimental) [#41495](https://github.com/pingcap/tidb/issues/41495) @[benjamin2037](https://github.com/benjamin2037) + + Before TiDB v7.1.0, only one TiDB node can serve as the DDL owner and execute DDL tasks at the same time. Starting from TiDB v7.1.0, in the new distributed parallel execution framework, multiple TiDB nodes can execute the same DDL task in parallel, thus better utilizing the resources of the TiDB cluster and significantly improving the performance of DDL. In addition, you can linearly improve the performance of DDL by adding more TiDB nodes. Note that this feature is currently experimental and only supports `ADD INDEX` operations. + + To use the distributed framework, set the value of [`tidb_enable_dist_task`](/system-variables.md#tidb_enable_dist_task-new-in-v710) to `ON`: + + ```sql + SET GLOBAL tidb_enable_dist_task = ON; + ``` + + For more information, see [documentation](/tidb-distributed-execution-framework.md). + ### Reliability * Resource Control becomes generally available (GA) [#38825](https://github.com/pingcap/tidb/issues/38825) @[nolouch](https://github.com/nolouch) @[BornChanger](https://github.com/BornChanger) @[glorv](https://github.com/glorv) @[tiancaiamao](https://github.com/tiancaiamao) @[Connor1996](https://github.com/Connor1996) @[JmPotato](https://github.com/JmPotato) @[hnes](https://github.com/hnes) @[CabinfeverB](https://github.com/CabinfeverB) @[HuSharp](https://github.com/HuSharp) @@ -94,6 +150,8 @@ In v7.1.0, the key new features and improvements are as follows: In TiDB v7.1.0, this feature introduces the ability to estimate system capacity based on actual workload or hardware deployment. The estimation ability provides you with a more accurate reference for capacity planning and assists you in better managing TiDB resource allocation to meet the stability needs of enterprise-level scenarios. + To improve user experience, TiDB Dashboard provides the [Resource Manager page](/dashboard/dashboard-resource-manager.md). You can view the resource group configuration on this page and estimate cluster capacity in a visual way to facilitate reasonable resource allocation. + For more information, see [documentation](/tidb-resource-control.md). * Support the checkpoint mechanism for Fast Online DDL to improve fault tolerance and automatic recovery capability [#42164](https://github.com/pingcap/tidb/issues/42164) @[tangenta](https://github.com/tangenta) @@ -112,10 +170,35 @@ In v7.1.0, the key new features and improvements are as follows: * Optimize the strategy of loading statistics [#42160](https://github.com/pingcap/tidb/issues/42160) @[xuyifangreeneyes](https://github.com/xuyifangreeneyes) - Enabling synchronous loading of statistics can significantly reduce the number of statistics that must be loaded during startup, thus improving the speed of loading statistics. This feature increases the stability of TiDB in complex runtime environments and reduces the impact of individual TiDB nodes restart on the overall service. + TiDB v7.1.0 introduces lightweight statistics initialization as an experimental feature. Lightweight statistics initialization can significantly reduce the number of statistics that must be loaded during startup, thus improving the speed of loading statistics. This feature increases the stability of TiDB in complex runtime environments and reduces the impact on the overall service when TiDB nodes restart. You can set the parameter [`lite-init-stats`](/tidb-configuration-file.md#lite-init-stats-new-in-v710) to `true` to enable this feature. + + During TiDB startup, SQL statements executed before the initial statistics are fully loaded might have suboptimal execution plans, thus causing performance issues. To avoid such issues, TiDB v7.1.0 introduces the configuration parameter [`force-init-stats`](/tidb-configuration-file.md#force-init-stats-new-in-v710). With this option, you can control whether TiDB provides services only after statistics initialization has been finished during startup. This parameter is disabled by default. For more information, see [documentation](/statistics.md#load-statistics). +* TiCDC supports the data integrity validation feature for single-row data [#8718](https://github.com/pingcap/tiflow/issues/8718) [#42747](https://github.com/pingcap/tidb/issues/42747) @[3AceShowHand](https://github.com/3AceShowHand) @[zyguan](https://github.com/zyguan) + + Starting from v7.1.0, TiCDC introduces the data integrity validation feature, which uses a checksum algorithm to validate the integrity of single-row data. This feature helps verify whether any error occurs in the process of writing data from TiDB, replicating it through TiCDC, and then writing it to a Kafka cluster. The data integrity validation feature only supports changefeeds that use Kafka as the downstream and currently supports the Avro protocol. + + For more information, see [documentation](/ticdc/ticdc-integrity-check.md). + +* TiCDC optimizes DDL replication operations [#8686](https://github.com/pingcap/tiflow/issues/8686) @[hi-rustin](https://github.com/hi-rustin) + + Before v7.1.0, when you perform a DDL operation that affects all rows on a large table (such as adding or deleting a column), the replication latency of TiCDC would significantly increase. Starting from v7.1.0, TiCDC optimizes this replication operation and mitigates the impact of DDL operations on downstream latency. + + For more information, see [documentation](/ticdc/ticdc-faq.md#does-ticdc-replicate-data-changes-caused-by-lossy-ddl-operations-to-the-downstream). + +* Improve the stability of TiDB Lightning when importing TiB-level data [#43510](https://github.com/pingcap/tidb/issues/43510) [#43657](https://github.com/pingcap/tidb/issues/43657) @[D3Hunter](https://github.com/D3Hunter) @[lance6716](https://github.com/lance6716) + + Starting from v7.1.0, TiDB Lightning has added four configuration items to improve stability when importing TiB-level data. + + - `tikv-importer.region-split-batch-size` controls the number of Regions when splitting Regions in a batch. The default value is `4096`. + - `tikv-importer.region-split-concurrency` controls the concurrency when splitting Regions. The default value is the number of CPU cores. + - `tikv-importer.region-check-backoff-limit` controls the number of retries to wait for the Region to come online after the split and scatter operations. The default value is `1800` and the maximum retry interval is two seconds. The number of retries is not increased if any Region becomes online between retries. + - `tikv-importer.pause-pd-scheduler-scope` controls the scope in which TiDB Lightning pauses PD scheduling. Value options are `"table"` and `"global"`. The default value is `"table"`. For TiDB versions earlier than v6.1.0, you can only configure the `"global"` option, which pauses global scheduling during data import. Starting from v6.1.0, the `"table"` option is supported, which means that scheduling is only paused for the Region that stores the target table data. It is recommended to set this configuration item to `"global"` in scenarios with large data volumes to improve stability. + + For more information, see [documentation](/tidb-lightning/tidb-lightning-configuration.md). + ### SQL * Support saving TiFlash query results using the `INSERT INTO SELECT` statement (GA) [#37515](https://github.com/pingcap/tidb/issues/37515) @[gengliqi](https://github.com/gengliqi) @@ -130,9 +213,9 @@ In v7.1.0, the key new features and improvements are as follows: Filtering the values of an array in a JSON column is a common operation, but normal indexes cannot help speed up such an operation. Creating a multi-valued index on an array can greatly improve filtering performance. If an array in the JSON column has a multi-valued index, you can use the multi-valued index to filter retrieval conditions in `MEMBER OF()`, `JSON_CONTAINS()`, and `JSON_OVERLAPS()` functions, thereby reducing I/O consumption and improving operation speed. - In v7.1.0, the multi-valued index feature becomes generally available (GA). It supports more complete data types and is compatible with TiDB tools. You can use multi-valued indexes to speed up the search operations on JSON arrays in production environments. + In v7.1.0, the multi-valued indexes feature becomes generally available (GA). It supports more complete data types and is compatible with TiDB tools. You can use multi-valued indexes to speed up the search operations on JSON arrays in production environments. - For more information, see [documentation](/sql-statements/sql-statement-create-index.md#multi-valued-index). + For more information, see [documentation](/sql-statements/sql-statement-create-index.md#multi-valued-indexes). * Improve the partition management for Hash and Key partitioned tables [#42728](https://github.com/pingcap/tidb/issues/42728) @[mjonss](https://github.com/mjonss) @@ -148,7 +231,7 @@ In v7.1.0, the key new features and improvements are as follows: * Generated columns become generally available (GA) @[bb7133](https://github.com/bb7133) - Generated columns are a valuable feature for database. When creating a table, you can define that the value of a column is calculated based on the values of other columns in the table, rather than being explicitly inserted or updated by users. This generated column can be either a virtual column or a stored column. TiDB has supported MySQL-compatible generated columns since earlier versions, and this feature becomes GA in v7.1.0. + Generated columns are a valuable feature for a database. When creating a table, you can define that the value of a column is calculated based on the values of other columns in the table, rather than being explicitly inserted or updated by users. This generated column can be either a virtual column or a stored column. TiDB has supported MySQL-compatible generated columns since earlier versions, and this feature becomes GA in v7.1.0. Using generated columns can improve MySQL compatibility for TiDB, simplifying the process of migrating from MySQL. It also reduces data maintenance complexity and improves data consistency and query efficiency. @@ -156,18 +239,13 @@ In v7.1.0, the key new features and improvements are as follows: ### DB operations -* DDL tasks support pause and resume operations (experimental) [#18015](https://github.com/pingcap/tidb/issues/18015) @[godouxm](https://github.com/godouxm) - - Before TiDB v7.1.0, when a DDL task encounters a business peak period during execution, you can only manually cancel the DDL task to reduce its impact on the business. In v7.1.0, TiDB introduces pause and resume operations for DDL tasks. These operations let you pause DDL tasks during peak periods and resume them after the peak ends, thus avoiding any impact on your application workloads. +* Support smooth cluster upgrade without manually canceling DDL operations (experimental) [#39751](https://github.com/pingcap/tidb/issues/39751) @[zimulala](https://github.com/zimulala) - For example, you can pause and resume multiple DDL tasks using `ADMIN PAUSE DDL JOBS` or `ADMIN RESUME DDL JOBS`: + Before TiDB v7.1.0, to upgrade a cluster, you must manually cancel its running or queued DDL tasks before the upgrade and then add them back after the upgrade. - ```sql - ADMIN PAUSE DDL JOBS 1,2; - ADMIN RESUME DDL JOBS 1,2; - ``` + To provide a smoother upgrade experience, TiDB v7.1.0 supports automatically pausing and resuming DDL tasks. Starting from v7.1.0, you can upgrade your clusters without manually canceling DDL tasks in advance. TiDB will automatically pause any running or queued user DDL tasks before the upgrade and resume these tasks after the rolling upgrade, making it easier for you to upgrade your TiDB clusters. - For more information, see [documentation](/ddl-introduction.md#ddl-related-commands). + For more information, see [documentation](/smooth-upgrade-tidb.md). ### Observability @@ -187,6 +265,26 @@ In v7.1.0, the key new features and improvements are as follows: Starting from v7.1.0, when providing the query service of [`INFORMATION_SCHEMA.TIFLASH_TABLES`](/information-schema/information-schema-tiflash-tables.md) and [`INFORMATION_SCHEMA.TIFLASH_SEGMENTS`](/information-schema/information-schema-tiflash-segments.md) system tables for TiDB, TiFlash uses the gRPC port instead of the HTTP port, which avoids the security risks of the HTTP service. +* Support LDAP authentication [#43580](https://github.com/pingcap/tidb/issues/43580) @[YangKeao](https://github.com/YangKeao) + + Starting from v7.1.0, TiDB supports LDAP authentication and provides two authentication plugins: `authentication_ldap_sasl` and `authentication_ldap_simple`. + + For more information, see [documentation](/security-compatibility-with-mysql.md). + +* Enhance the database auditing feature (Enterprise Edition) + + In v7.1.0, TiDB Enterprise Edition enhances the database auditing feature, which significantly expands its capacity and improves the user experience to meet the needs of enterprises for database security compliance: + + - Introduce the concepts of "Filter" and "Rule" for more granular audit event definitions and more fine-grained audit settings. + - Support defining rules in JSON format, providing a more user-friendly configuration method. + - Add automatic log rotation and space management functions, and support configuring log rotation in two dimensions: retention time and log size. + - Support outputting audit logs in both TEXT and JSON formats, facilitating easier integration with third-party tools. + - Support audit log redaction. You can replace all literals to enhance security. + + Database auditing is an important feature in TiDB Enterprise Edition. This feature provides a powerful monitoring and auditing tool for enterprises to ensure data security and compliance. It can help enterprise managers in tracking the source and impact of database operations to prevent illegal data theft or tampering. Furthermore, database auditing can also help enterprises meet various regulatory and compliance requirements, ensuring legal and ethical compliance. This feature has important application value for enterprise information security. + + This feature is included in TiDB Enterprise Edition. To use this feature and its documentation, navigate to the [TiDB Enterprise](https://www.pingcap.com/tidb-enterprise) page. + ## Compatibility changes > **Note:** @@ -199,20 +297,45 @@ In v7.1.0, the key new features and improvements are as follows: If you have upgraded TiFlash to v7.1.0, then during the TiDB upgrade to v7.1.0, TiDB cannot read the TiFlash system tables ([`INFORMATION_SCHEMA.TIFLASH_TABLES`](/information-schema/information-schema-tiflash-tables.md) and [`INFORMATION_SCHEMA.TIFLASH_SEGMENTS`](/information-schema/information-schema-tiflash-segments.md)). +* TiDB Lightning in TiDB versions from v6.2.0 to v7.0.0 decides whether to pause global scheduling based on the TiDB cluster version. When TiDB cluster version >= v6.1.0, scheduling is only paused for the Region that stores the target table data and is resumed after the target table import is complete. While for other versions, TiDB Lightning pauses global scheduling. Starting from TiDB v7.1.0, you can control whether to pause global scheduling by configuring [`pause-pd-scheduler-scope`](/tidb-lightning/tidb-lightning-configuration.md). By default, TiDB Lightning pauses scheduling for the Region that stores the target table data. If the target cluster version is earlier than v6.1.0, an error occurs. In this case, you can change the value of the parameter to `"global"` and try again. + +* When you use [`FLASHBACK CLUSTER TO TIMESTAMP`](/sql-statements/sql-statement-flashback-to-timestamp.md) in TiDB v7.1.0, some Regions might remain in the FLASHBACK process even after the completion of the FLASHBACK operation. It is recommended to avoid using this feature in v7.1.0. For more information, see issue [#44292](https://github.com/pingcap/tidb/issues/44292). If you have encountered this issue, you can use the [TiDB snapshot backup and restore](/br/br-snapshot-guide.md) feature to restore data. + ### System variables -| Variable name | Change type | Description | +| Variable name | Change type | Description | |--------|------------------------------|------| | [`tidb_enable_tiflash_read_for_write_stmt`](/system-variables.md#tidb_enable_tiflash_read_for_write_stmt-new-in-v630) | Deprecated | Changes the default value from `OFF` to `ON`. When [`tidb_allow_mpp = ON`](/system-variables.md#tidb_allow_mpp-new-in-v50), the optimizer intelligently decides whether to push a query down to TiFlash based on the [SQL mode](/sql-mode.md) and the cost estimates of the TiFlash replica. | | [`tidb_non_prepared_plan_cache_size`](/system-variables.md#tidb_non_prepared_plan_cache_size) | Deprecated | Starting from v7.1.0, this system variable is deprecated. You can use [`tidb_session_plan_cache_size`](/system-variables.md#tidb_session_plan_cache_size-new-in-v710) to control the maximum number of plans that can be cached. | | [`tidb_prepared_plan_cache_size`](/system-variables.md#tidb_prepared_plan_cache_size-new-in-v610) | Deprecated | Starting from v7.1.0, this system variable is deprecated. You can use [`tidb_session_plan_cache_size`](/system-variables.md#tidb_session_plan_cache_size-new-in-v710) to control the maximum number of plans that can be cached. | | `tidb_ddl_distribute_reorg` | Deleted | This variable is renamed to [`tidb_enable_dist_task`](/system-variables.md#tidb_enable_dist_task-new-in-v710). | -| [`tidb_enable_non_prepared_plan_cache`](/system-variables.md#tidb_enable_non_prepared_plan_cache) | Modified | Changes the default value from `OFF` to `ON` after further tests, meaning that non-prepared plan cache is enabled by default. | +| [`default_authentication_plugin`](/system-variables.md#default_authentication_plugin) | Modified | Introduces two new value options: `authentication_ldap_sasl` and `authentication_ldap_simple`. | | [`tidb_load_based_replica_read_threshold`](/system-variables.md#tidb_load_based_replica_read_threshold-new-in-v700) | Modified | Takes effect starting from v7.1.0 and controls the threshold for triggering load-based replica read. Changes the default value from `"0s"` to `"1s"` after further tests. | | [`tidb_opt_enable_late_materialization`](/system-variables.md#tidb_opt_enable_late_materialization-new-in-v700) | Modified | Changes the default value from `OFF` to `ON`, meaning that the TiFlash late materialization feature is enabled by default. | -| [`tidb_enable_dist_task`](/system-variables.md#tidb_enable_dist_task-new-in-v710) | Newly added | Controls whether to enable the distributed execution framework. After enabling distributed execution, DDL, Import and other supported backend tasks will be jointly completed by multiple TiDB nodes in the cluster. This variable was renamed from `tidb_ddl_distribute_reorg`. | +| [`authentication_ldap_sasl_auth_method_name`](/system-variables.md#authentication_ldap_sasl_auth_method_name-new-in-v710) | Newly added | Specifies the authentication method name in LDAP SASL authentication. | +| [`authentication_ldap_sasl_bind_base_dn`](/system-variables.md#authentication_ldap_sasl_bind_base_dn-new-in-v710) | Newly added | Limits the search scope within the search tree in LDAP SASL authentication. If a user is created without `AS ...` clause, TiDB automatically searches the `dn` in LDAP server according to the user name. | +| [`authentication_ldap_sasl_bind_root_dn`](/system-variables.md#authentication_ldap_sasl_bind_root_dn-new-in-v710) | Newly added | Specifies the `dn` used to login to the LDAP server to search users in LDAP SASL authentication. | +| [`authentication_ldap_sasl_bind_root_pwd`](/system-variables.md#authentication_ldap_sasl_bind_root_pwd-new-in-v710) | Newly added | Specifies the password used to login to the LDAP server to search users in LDAP SASL authentication. | +| [`authentication_ldap_sasl_ca_path`](/system-variables.md#authentication_ldap_sasl_ca_path-new-in-v710) | Newly added | Specifies the absolute path of the certificate authority file for StartTLS connections in LDAP SASL authentication. | +| [`authentication_ldap_sasl_init_pool_size`](/system-variables.md#authentication_ldap_sasl_init_pool_size-new-in-v710) | Newly added | Specifies the initial connections in the connection pool to the LDAP server in LDAP SASL authentication. | +| [`authentication_ldap_sasl_max_pool_size`](/system-variables.md#authentication_ldap_sasl_max_pool_size-new-in-v710) | Newly added | Specifies the maximum connections in the connection pool to the LDAP server in LDAP SASL authentication. | +| [`authentication_ldap_sasl_server_host`](/system-variables.md#authentication_ldap_sasl_server_host-new-in-v710) | Newly added | Specifies the LDAP server host in LDAP SASL authentication. | +| [`authentication_ldap_sasl_server_port`](/system-variables.md#authentication_ldap_sasl_server_port-new-in-v710) | Newly added | Specifies the LDAP server TCP/IP port number in LDAP SASL authentication. | +| [`authentication_ldap_sasl_tls`](/system-variables.md#authentication_ldap_sasl_tls-new-in-v710) | Newly added | Specifies whether connections by the plugin to the LDAP server are protected with StartTLS in LDAP SASL authentication. | +| [`authentication_ldap_simple_auth_method_name`](/system-variables.md#authentication_ldap_simple_auth_method_name-new-in-v710) | Newly added | Specifies the authentication method name in LDAP simple authentication. It only supports `SIMPLE`. | +| [`authentication_ldap_simple_bind_base_dn`](/system-variables.md#authentication_ldap_simple_bind_base_dn-new-in-v710) | Newly added | Limits the search scope within the search tree in LDAP simple authentication. If a user is created without `AS ...` clause, TiDB will automatically search the `dn` in LDAP server according to the user name. | +| [`authentication_ldap_simple_bind_root_dn`](/system-variables.md#authentication_ldap_simple_bind_root_dn-new-in-v710) | Newly added | Specifies the `dn` used to login to the LDAP server to search users in LDAP simple authentication. | +| [`authentication_ldap_simple_bind_root_pwd`](/system-variables.md#authentication_ldap_simple_bind_root_pwd-new-in-v710) | Newly added | Specifies the password used to login to the LDAP server to search users in LDAP simple authentication. | +| [`authentication_ldap_simple_ca_path`](/system-variables.md#authentication_ldap_simple_ca_path-new-in-v710) | Newly added | Specifies the absolute path of the certificate authority file for StartTLS connections in LDAP simple authentication. | +| [`authentication_ldap_simple_init_pool_size`](/system-variables.md#authentication_ldap_simple_init_pool_size-new-in-v710) | Newly added | Specifies the initial connections in the connection pool to the LDAP server in LDAP simple authentication. | +| [`authentication_ldap_simple_max_pool_size`](/system-variables.md#authentication_ldap_simple_max_pool_size-new-in-v710) | Newly added | Specifies the maximum connections in the connection pool to the LDAP server in LDAP simple authentication. | +| [`authentication_ldap_simple_server_host`](/system-variables.md#authentication_ldap_simple_server_host-new-in-v710) | Newly added | Specifies the LDAP server host in LDAP simple authentication. | +| [`authentication_ldap_simple_server_port`](/system-variables.md#authentication_ldap_simple_server_port-new-in-v710) | Newly added | Specifies the LDAP server TCP/IP port number in LDAP simple authentication. | +| [`authentication_ldap_simple_tls`](/system-variables.md#authentication_ldap_simple_tls-new-in-v710) | Newly added | Specifies whether connections by the plugin to the LDAP server are protected with StartTLS in LDAP simple authentication. | +| [`tidb_enable_dist_task`](/system-variables.md#tidb_enable_dist_task-new-in-v710) | Newly added | Controls whether to enable the distributed execution framework. After enabling distributed execution, DDL, import, and other supported backend tasks will be jointly completed by multiple TiDB nodes in the cluster. This variable was renamed from `tidb_ddl_distribute_reorg`. | | [`tidb_enable_non_prepared_plan_cache_for_dml`](/system-variables.md#tidb_enable_non_prepared_plan_cache_for_dml-new-in-v710) | Newly added | Controls whether to enable the [Non-prepared plan cache](/sql-non-prepared-plan-cache.md) feature for DML statements. | -| [`tidb_opt_fix_control`](/system-variables.md#tidb_opt_fix_control-new-in-v710) | Newly added | This variable provides a more fine-grained control over the optimizer and helps to prevent performance regression after upgrading caused by behavior changes in the optimizer. | +| [`tidb_enable_row_level_checksum`](/system-variables.md#tidb_enable_row_level_checksum-new-in-v710) | Newly added | Controls whether to enable the TiCDC data integrity validation for single-row data feature.| +| [`tidb_opt_fix_control`](/system-variables.md#tidb_opt_fix_control-new-in-v710) | Newly added | This variable provides more fine-grained control over the optimizer and helps to prevent performance regression after upgrading caused by behavior changes in the optimizer. | | [`tidb_plan_cache_invalidation_on_fresh_stats`](/system-variables.md#tidb_plan_cache_invalidation_on_fresh_stats-new-in-v710) | Newly added | Controls whether to invalidate the plan cache automatically when statistics on related tables are updated. | | [`tidb_plan_cache_max_plan_size`](/system-variables.md#tidb_plan_cache_max_plan_size-new-in-v710) | Newly added | Controls the maximum size of a plan that can be cached in prepared or non-prepared plan cache. | | [`tidb_prefer_broadcast_join_by_exchange_data_size`](/system-variables.md#tidb_prefer_broadcast_join_by_exchange_data_size-new-in-v710) | Newly added | Controls whether to use the algorithm with the minimum overhead of network transmission. If this variable is enabled, TiDB estimates the size of the data to be exchanged in the network using `Broadcast Hash Join` and `Shuffled Hash Join` respectively, and then chooses the one with the smaller size. [`tidb_broadcast_join_threshold_count`](/system-variables.md#tidb_broadcast_join_threshold_count-new-in-v50) and [`tidb_broadcast_join_threshold_size`](/system-variables.md#tidb_broadcast_join_threshold_size-new-in-v50) will not take effect after this variable is enabled. | @@ -222,30 +345,202 @@ In v7.1.0, the key new features and improvements are as follows: | Configuration file | Configuration parameter | Change type | Description | | -------- | -------- | -------- | -------- | -| TiDB | [`force-init-stats`](/tidb-configuration-file.md#force-init-stats-new-in-v710) | Newly added | Controls whether to wait for statistics initialization to finish before providing services during TiDB startup. | -| TiDB | [`lite-init-stats`](/tidb-configuration-file.md#lite-init-stats-new-in-v710) | Newly added | Controls whether to use lightweight statistics initialization during TiDB startup. | -| TiDB | [`timeout`](/tidb-configuration-file.md#timeout-new-in-v710) | Newly added | Sets the timeout for log-writing operations in TiDB. In case of a disk failure that prevents logs from being written, this configuration item can trigger the TiDB process to panic instead of hang. The default value is `0`, which means no timeout is set. | -| TiKV | [`optimize-filters-for-memory`](/tikv-configuration-file.md#optimize-filters-for-memory-new-in-v710) | Newly added | Controls whether to generate Bloom/Ribbon filters that minimize memory internal fragmentation. | -| TiKV | [`ribbon-filter-above-level`](/tikv-configuration-file.md#ribbon-filter-above-level-new-in-v710) | Newly added | Controls whether to use Ribbon filters for levels greater than or equal to this value and use non-block-based bloom filters for levels less than this value. | +| TiDB | [`performance.force-init-stats`](/tidb-configuration-file.md#force-init-stats-new-in-v710) | Newly added | Controls whether to wait for statistics initialization to finish before providing services during TiDB startup. | +| TiDB | [`performance.lite-init-stats`](/tidb-configuration-file.md#lite-init-stats-new-in-v710) | Newly added | Controls whether to use lightweight statistics initialization during TiDB startup. | +| TiDB | [`log.timeout`](/tidb-configuration-file.md#timeout-new-in-v710) | Newly added | Sets the timeout for log-writing operations in TiDB. In case of a disk failure that prevents logs from being written, this configuration item can trigger the TiDB process to panic instead of hang. The default value is `0`, which means no timeout is set. | +| TiKV | [`region-compact-min-redundant-rows`](/tikv-configuration-file.md#region-compact-min-redundant-rows-new-in-v710) | Newly added | Sets the number of redundant MVCC rows required to trigger RocksDB compaction. The default value is `50000`. | +| TiKV | [`region-compact-redundant-rows-percent`](/tikv-configuration-file.md#region-compact-redundant-rows-percent-new-in-v710) | Newly added | Sets the percentage of redundant MVCC rows required to trigger RocksDB compaction. The default value is `20`. | | TiKV | [`split.byte-threshold`](/tikv-configuration-file.md#byte-threshold-new-in-v50) | Modified | Changes the default value from `30MiB` to `100MiB` when [`region-split-size`](/tikv-configuration-file.md#region-split-size) is greater than or equal to 4 GB. | | TiKV | [`split.qps-threshold`](/tikv-configuration-file.md#qps-threshold) | Modified | Changes the default value from `3000` to `7000` when [`region-split-size`](/tikv-configuration-file.md#region-split-size) is greater than or equal to 4 GB. | | TiKV | [`split.region-cpu-overload-threshold-ratio`](/tikv-configuration-file.md#region-cpu-overload-threshold-ratio-new-in-v620) | Modified | Changes the default value from `0.25` to `0.75` when [`region-split-size`](/tikv-configuration-file.md#region-split-size) is greater than or equal to 4 GB. | +| TiKV | [`region-compact-check-step`](/tikv-configuration-file.md#region-compact-check-step) | Modified | Changes the default value from `100` to `5` when Partitioned Raft KV is enabled (`storage.engine="partitioned-raft-kv"`). | | PD | [`store-limit-version`](/pd-configuration-file.md#store-limit-version-new-in-v710) | Newly added | Controls the mode of store limit. Value options are `"v1"` and `"v2"`. | | PD | [`schedule.enable-diagnostic`](/pd-configuration-file.md#enable-diagnostic-new-in-v630) | Modified | Changes the default value from `false` to `true`, meaning that the diagnostic feature of scheduler is enabled by default. | | TiFlash | `http_port` | Deleted | Deprecates the HTTP service port (default `8123`). | +| TiDB Lightning | [`tikv-importer.pause-pd-scheduler-scope`](/tidb-lightning/tidb-lightning-configuration.md) | Newly added | Controls the scope in which TiDB Lightning pauses PD scheduling. The default value is `"table"` and value options are `"global"` and `"table"`. | +| TiDB Lightning | [`tikv-importer.region-check-backoff-limit`](/tidb-lightning/tidb-lightning-configuration.md) | Newly added | Controls the number of retries to wait for the Region to come online after the split and scatter operations. The default value is `1800`. The maximum retry interval is two seconds. The number of retries is not increased if any Region becomes online between retries.| +| TiDB Lightning | [`tikv-importer.region-split-batch-size`](/tidb-lightning/tidb-lightning-configuration.md) | Newly added | Controls the number of Regions when splitting Regions in a batch. The default value is `4096`. | +| TiDB Lightning | [`tikv-importer.region-split-concurrency`](/tidb-lightning/tidb-lightning-configuration.md) | Newly added | Controls the concurrency when splitting Regions. The default value is the number of CPU cores. | +| TiCDC | [`insecure-skip-verify`](/ticdc/ticdc-sink-to-kafka.md) | Newly added | Controls whether the authentication algorithm is set when TLS is enabled in the scenario of replicating data to Kafka. | +| TiCDC | [`integrity.corruption-handle-level`](/ticdc/ticdc-changefeed-config.md#cli-and-configuration-parameters-of-ticdc-changefeeds) | Newly added | Specifies the log level of the Changefeed when the checksum validation for single-row data fails. The default value is `"warn"`. Value options are `"warn"` and `"error"`. | +| TiCDC | [`integrity.integrity-check-level`](/ticdc/ticdc-changefeed-config.md#cli-and-configuration-parameters-of-ticdc-changefeeds) | Newly added | Controls whether to enable the checksum validation for single-row data. The default value is `"none"`, which means to disable the feature. | +| TiCDC | [`sink.only-output-updated-columns`](/ticdc/ticdc-changefeed-config.md#cli-and-configuration-parameters-of-ticdc-changefeeds) | Newly added | Controls whether to only output the updated columns. The default value is `false`. | | TiCDC | [`sink.enable-partition-separator`](/ticdc/ticdc-changefeed-config.md#cli-and-configuration-parameters-of-ticdc-changefeeds) | Modified | Changes the default value from `false` to `true` after further tests, meaning that partitions in a table are stored in separate directories by default. It is recommended that you keep the value as `true` to avoid the potential issue of data loss during replication of partitioned tables to storage services. | ## Improvements ++ TiDB + + - Display the number of distinct values for the corresponding column in the Cardinality column of the `SHOW INDEX` result [#42227](https://github.com/pingcap/tidb/issues/42227) @[winoros](https://github.com/winoros) + - Use `SQL_NO_CACHE` to prevent TTL Scan queries from impacting the TiKV block cache [#43206](https://github.com/pingcap/tidb/issues/43206) @[lcwangchao](https://github.com/lcwangchao) + - Improve an error message related to `MAX_EXECUTION_TIME` to make it compatible with MySQL [#43031](https://github.com/pingcap/tidb/issues/43031) @[dveeden](https://github.com/dveeden) + - Support using the MergeSort operator on partitioned tables in IndexLookUp [#26166](https://github.com/pingcap/tidb/issues/26166) @[Defined2014](https://github.com/Defined2014) + - Enhance `caching_sha2_password` to make it compatible with MySQL [#43576](https://github.com/pingcap/tidb/issues/43576) @[asjdf](https://github.com/asjdf) + ++ TiKV + + - Reduce the impact of split operations on write QPS when using partitioned Raft KV [#14447](https://github.com/tikv/tikv/issues/14447) @[SpadeA-Tang](https://github.com/SpadeA-Tang) + - Optimize the space occupied by snapshots when using partitioned Raft KV [#14581](https://github.com/tikv/tikv/issues/14581) @[bufferflies](https://github.com/bufferflies) + - Provide more detailed time information for each stage of processing requests in TiKV [#12362](https://github.com/tikv/tikv/issues/12362) @[cfzjywxk](https://github.com/cfzjywxk) + - Use PD as metastore in log backup [#13867](https://github.com/tikv/tikv/issues/13867) @[YuJuncen](https://github.com/YuJuncen) + ++ PD + + - Add a controller that automatically adjusts the size of the store limit based on the execution details of the snapshot. To enable this controller, set `store-limit-version` to `v2`. Once enabled, you do not need to manually adjust the `store limit` configuration to control the speed of scaling in or scaling out [#6147](https://github.com/tikv/pd/issues/6147) @[bufferflies](https://github.com/bufferflies) + - Add historical load information to avoid frequent scheduling of Regions with unstable loads by the hotspot scheduler when the storage engine is raft-kv2 [#6297](https://github.com/tikv/pd/issues/6297) @[bufferflies](https://github.com/bufferflies) + - Add a leader health check mechanism. When the PD server where the etcd leader is located cannot be elected as the leader, PD actively switches the etcd leader to ensure that the PD leader is available [#6403](https://github.com/tikv/pd/issues/6403) @[nolouch](https://github.com/nolouch) + + TiFlash - - Improve TiFlash performance and stability in the disaggregated storage and compute architecture [#6882](https://github.com/pingcap/tiflash/issues/6882) @[JaySon-Huang](https://github.com/JaySon-Huang) @[breezewish](https://github.com/breezewish) @[JinheLin](https://github.com/JinheLin) + - Improve TiFlash performance and stability in the disaggregated storage and compute architecture [#6882](https://github.com/pingcap/tiflash/issues/6882) @[JaySon-Huang](https://github.com/JaySon-Huang) @[breezewish](https://github.com/breezewish) @[JinheLin](https://github.com/JinheLin) - Support optimizing query performance in Semi Join or Anti Semi Join by selecting the smaller table as the build side [#7280](https://github.com/pingcap/tiflash/issues/7280) @[yibin87](https://github.com/yibin87) + - Improve performance of data import from BR and TiDB Lightning to TiFlash with default configurations [#7272](https://github.com/pingcap/tiflash/issues/7272) @[breezewish](https://github.com/breezewish) + ++ Tools + + + Backup & Restore (BR) + + - Support modifying the TiKV configuration item `log-backup.max-flush-interval` during log backup [#14433](https://github.com/tikv/tikv/issues/14433) @[joccau](https://github.com/joccau) + + + TiCDC + + - Optimize the directory structure when DDL events occur in the scenario of replicating data to object storage [#8890](https://github.com/pingcap/tiflow/issues/8890) @[CharlesCheung96](https://github.com/CharlesCheung96) + - Optimize the method of setting GC TLS for the upstream when the TiCDC replication task fails [#8403](https://github.com/pingcap/tiflow/issues/8403) @[charleszheng44](https://github.com/charleszheng44) + - Support replicating data to the Kafka-on-Pulsar downstream [#8892](https://github.com/pingcap/tiflow/issues/8892) @[hi-rustin](https://github.com/hi-rustin) + - Support using the open-protocol protocol to only replicate the changed columns after an update occurs when replicating data to Kafka [#8706](https://github.com/pingcap/tiflow/issues/8706) @[sdojjy](https://github.com/sdojjy) + - Optimize the error handling of TiCDC in the downstream failures or other scenarios [#8657](https://github.com/pingcap/tiflow/issues/8657) @[hicqu](https://github.com/hicqu) + - Add a configuration item `insecure-skip-verify` to control whether to set the authentication algorithm in the scenario of enabling TLS [#8867](https://github.com/pingcap/tiflow/issues/8867) @[hi-rustin](https://github.com/hi-rustin) + + + TiDB Lightning + + - Change the severity level of the precheck item related to uneven Region distribution from `Critical` to `Warn` to avoid blocking users from importing data [#42836](https://github.com/pingcap/tidb/issues/42836) @[okJiang](https://github.com/okJiang) + - Add a retry mechanism when encountering an `unknown RPC` error during data import [#43291](https://github.com/pingcap/tidb/issues/43291) @[D3Hunter](https://github.com/D3Hunter) + - Enhance the retry mechanism for Region jobs [#43682](https://github.com/pingcap/tidb/issues/43682) @[lance6716](https://github.com/lance6716) + +## Bug fixes + ++ TiDB + + - Fix the issue that there is no prompt about manually executing `ANALYZE TABLE` after reorganizing partitions [#42183](https://github.com/pingcap/tidb/issues/42183) @[CbcWestwolf](https://github.com/CbcWestwolf) + - Fix the issue of missing table names in the `ADMIN SHOW DDL JOBS` result when a `DROP TABLE` operation is being executed [#42268](https://github.com/pingcap/tidb/issues/42268) @[tiancaiamao](https://github.com/tiancaiamao) + - Fix the issue that `Ignore Event Per Minute` and `Stats Cache LRU Cost` charts might not be displayed normally in the Grafana monitoring panel [#42562](https://github.com/pingcap/tidb/issues/42562) @[pingandb](https://github.com/pingandb) + - Fix the issue that the `ORDINAL_POSITION` column returns incorrect results when querying the `INFORMATION_SCHEMA.COLUMNS` table [#43379](https://github.com/pingcap/tidb/issues/43379) @[bb7133](https://github.com/bb7133) + - Fix the case sensitivity issue in some columns of the permission table [#41048](https://github.com/pingcap/tidb/issues/41048) @[bb7133](https://github.com/bb7133) + - Fix the issue that after a new column is added in the cache table, the value is `NULL` instead of the default value of the column [#42928](https://github.com/pingcap/tidb/issues/42928) @[lqs](https://github.com/lqs) + - Fix the issue that CTE results are incorrect when pushing down predicates [#43645](https://github.com/pingcap/tidb/issues/43645) @[winoros](https://github.com/winoros) + - Fix the issue of DDL retry caused by write conflict when executing `TRUNCATE TABLE` for partitioned tables with many partitions and TiFlash replicas [#42940](https://github.com/pingcap/tidb/issues/42940) @[mjonss](https://github.com/mjonss) + - Fix the issue that there is no warning when using `SUBPARTITION` in creating partitioned tables [#41198](https://github.com/pingcap/tidb/issues/41198) [#41200](https://github.com/pingcap/tidb/issues/41200) @[mjonss](https://github.com/mjonss) + - Fix the incompatibility issue with MySQL when dealing with value overflow issues in generated columns [#40066](https://github.com/pingcap/tidb/issues/40066) @[jiyfhust](https://github.com/jiyfhust) + - Fix the issue that `REORGANIZE PARTITION` cannot be concurrently executed with other DDL operations [#42442](https://github.com/pingcap/tidb/issues/42442) @[bb7133](https://github.com/bb7133) + - Fix the issue that canceling the partition reorganization task in DDL might cause subsequent DDL operations to fail [#42448](https://github.com/pingcap/tidb/issues/42448) @[lcwangchao](https://github.com/lcwangchao) + - Fix the issue that assertions on delete operations are incorrect under certain conditions [#42426](https://github.com/pingcap/tidb/issues/42426) @[tiancaiamao](https://github.com/tiancaiamao) + - Fix the issue that TiDB server cannot start due to an error in reading the cgroup information with the error message "can't read file memory.stat from cgroup v1: open /sys/memory.stat no such file or directory" [#42659](https://github.com/pingcap/tidb/issues/42659) @[hawkingrei](https://github.com/hawkingrei) + - Fix the `Duplicate Key` issue that occurs when updating the partition key of a row on a partitioned table with a global index [#42312](https://github.com/pingcap/tidb/issues/42312) @[L-maple](https://github.com/L-maple) + - Fix the issue that the `Scan Worker Time By Phase` chart in the TTL monitoring panel does not display data [#42515](https://github.com/pingcap/tidb/issues/42515) @[lcwangchao](https://github.com/lcwangchao) + - Fix the issue that some queries on partitioned tables with a global index return incorrect results [#41991](https://github.com/pingcap/tidb/issues/41991) [#42065](https://github.com/pingcap/tidb/issues/42065) @[L-maple](https://github.com/L-maple) + - Fix the issue of displaying some error logs during the process of reorganizing a partitioned table [#42180](https://github.com/pingcap/tidb/issues/42180) @[mjonss](https://github.com/mjonss) + - Fix the issue that the data length in the `QUERY` column of the `INFORMATION_SCHEMA.DDL_JOBS` table might exceed the column definition [#42440](https://github.com/pingcap/tidb/issues/42440) @[tiancaiamao](https://github.com/tiancaiamao) + - Fix the issue that the `INFORMATION_SCHEMA.CLUSTER_HARDWARE` table might display incorrect values in containers [#42851](https://github.com/pingcap/tidb/issues/42851) @[hawkingrei](https://github.com/hawkingrei) + - Fix the issue that an incorrect result is returned when you query a partitioned table using `ORDER BY` + `LIMIT` [#43158](https://github.com/pingcap/tidb/issues/43158) @[Defined2014](https://github.com/Defined2014) + - Fix the issue of multiple DDL tasks running simultaneously using the ingest method [#42903](https://github.com/pingcap/tidb/issues/42903) @[tangenta](https://github.com/tangenta) + - Fix the wrong value returned when querying a partitioned table using `Limit` [#24636](https://github.com/pingcap/tidb/issues/24636) + - Fix the issue of displaying the incorrect TiDB address in IPv6 environment [#43260](https://github.com/pingcap/tidb/issues/43260) @[nexustar](https://github.com/nexustar) + - Fix the issue of displaying incorrect values for system variables `tidb_enable_tiflash_read_for_write_stmt` and `tidb_enable_exchange_partition` [#43281](https://github.com/pingcap/tidb/issues/43281) @[gengliqi](https://github.com/gengliqi) + - Fix the issue that when `tidb_scatter_region` is enabled, Region does not automatically split after a partition is truncated [#43174](https://github.com/pingcap/tidb/issues/43174) [#43028](https://github.com/pingcap/tidb/issues/43028) @[jiyfhust](https://github.com/jiyfhust) + - Add checks on the tables with generated columns and report errors for unsupported DDL operations on these columns [#38988](https://github.com/pingcap/tidb/issues/38988) [#24321](https://github.com/pingcap/tidb/issues/24321) @[tiancaiamao](https://github.com/tiancaiamao) + - Fix the issue that the error message is incorrect in certain type conversion errors [#41730](https://github.com/pingcap/tidb/issues/41730) @[hawkingrei](https://github.com/hawkingrei) + - Fix the issue that after a TiDB node is normally shutdown, DDL tasks triggered on this node will be canceled [#43854](https://github.com/pingcap/tidb/issues/43854) @[zimulala](https://github.com/zimulala) + - Fix the issue that when the PD member address changes, allocating ID for the `AUTO_INCREMENT` column will be blocked for a long time [#42643](https://github.com/pingcap/tidb/issues/42643) @[tiancaiamao](https://github.com/tiancaiamao) + - Fix the issue of reporting the `GC lifetime is shorter than transaction duration` error during DDL execution [#40074](https://github.com/pingcap/tidb/issues/40074) @[tangenta](https://github.com/tangenta) + - Fix the issue that metadata locks unexpectedly block the DDL execution [#43755](https://github.com/pingcap/tidb/issues/43755) @[wjhuang2016](https://github.com/wjhuang2016) + - Fix the issue that the cluster cannot query some system views in IPv6 environment [#43286](https://github.com/pingcap/tidb/issues/43286) @[Defined2014](https://github.com/Defined2014) + - Fix the issue of not finding the partition during inner join in dynamic pruning mode [#43686](https://github.com/pingcap/tidb/issues/43686) @[mjonss](https://github.com/mjonss) + - Fix the issue that TiDB reports syntax errors when analyzing tables [#43392](https://github.com/pingcap/tidb/issues/43392) @[guo-shaoge](https://github.com/guo-shaoge) + - Fix the issue that TiCDC might lose some row changes during table renaming [#43338](https://github.com/pingcap/tidb/issues/43338) @[tangenta](https://github.com/tangenta) + - Fix the issue that TiDB server crashes when the client uses cursor reads [#38116](https://github.com/pingcap/tidb/issues/38116) @[YangKeao](https://github.com/YangKeao) + - Fix the issue that `ADMIN SHOW DDL JOBS LIMIT` returns incorrect results [#42298](https://github.com/pingcap/tidb/issues/42298) @[CbcWestwolf](https://github.com/CbcWestwolf) + - Fix the TiDB panic issue that occurs when querying union views and temporary tables with `UNION` [#42563](https://github.com/pingcap/tidb/issues/42563) @[lcwangchao](https://github.com/lcwangchao) + - Fix the issue that renaming tables does not take effect when committing multiple statements in a transaction [#39664](https://github.com/pingcap/tidb/issues/39664) @[tiancaiamao](https://github.com/tiancaiamao) + - Fix the incompatibility issue between the behavior of prepared plan cache and non-prepared plan cache during time conversion [#42439](https://github.com/pingcap/tidb/issues/42439) @[qw4990](https://github.com/qw4990) + - Fix the wrong results caused by plan cache for Decimal type [#43311](https://github.com/pingcap/tidb/issues/43311) @[qw4990](https://github.com/qw4990) + - Fix the TiDB panic issue in null-aware anti join (NAAJ) due to the wrong field type check [#42459](https://github.com/pingcap/tidb/issues/42459) @[AilinKid](https://github.com/AilinKid) + - Fix the issue that DML execution failures in pessimistic transactions at the RC isolation level might cause inconsistency between data and indexes [#43294](https://github.com/pingcap/tidb/issues/43294) @[ekexium](https://github.com/ekexium) + - Fix the issue that in some extreme cases, when the first statement of a pessimistic transaction is retried, resolving locks on this transaction might affect transaction correctness [#42937](https://github.com/pingcap/tidb/issues/42937) @[MyonKeminta](https://github.com/MyonKeminta) + - Fix the issue that in some rare cases, residual pessimistic locks of pessimistic transactions might affect data correctness when GC resolves locks [#43243](https://github.com/pingcap/tidb/issues/43243) @[MyonKeminta](https://github.com/MyonKeminta) + - Fix the issue that the `LOCK` to `PUT` optimization leads to duplicate data being returned in specific queries [#28011](https://github.com/pingcap/tidb/issues/28011) @[zyguan](https://github.com/zyguan) + - Fix the issue that when data is changed, the locking behavior of the unique index is not consistent with that when the data is unchanged [#36438](https://github.com/pingcap/tidb/issues/36438) @[zyguan](https://github.com/zyguan) + ++ TiKV + + - Fix the issue that when you enable `tidb_pessimistic_txn_fair_locking`, in some extreme cases, expired requests caused by failed RPC retries might affect data correctness during the resolve lock operation [#14551](https://github.com/tikv/tikv/issues/14551) @[MyonKeminta](https://github.com/MyonKeminta) + - Fix the issue that when you enable `tidb_pessimistic_txn_fair_locking`, in some extreme cases, expired requests caused by failed RPC retries might cause transaction conflicts to be ignored, thus affecting transaction consistency [#14311](https://github.com/tikv/tikv/issues/14311) @[MyonKeminta](https://github.com/MyonKeminta) + - Fix the issue that encryption key ID conflict might cause the deletion of the old keys [#14585](https://github.com/tikv/tikv/issues/14585) @[tabokie](https://github.com/tabokie) + - Fix the performance degradation issue caused by accumulated lock records when a cluster is upgraded from a previous version to v6.5 or later versions [#14780](https://github.com/tikv/tikv/issues/14780) @[MyonKeminta](https://github.com/MyonKeminta) + - Fix the issue that the `raft entry is too large` error occurs during the PITR recovery process [#14313](https://github.com/tikv/tikv/issues/14313) @[YuJuncen](https://github.com/YuJuncen) + - Fix the issue that TiKV panics during the PITR recovery process due to `log_batch` exceeding 2 GB [#13848](https://github.com/tikv/tikv/issues/13848) @[YuJuncen](https://github.com/YuJuncen) + ++ PD + + - Fix the issue that the number of `low space store` in the PD monitoring panel is abnormal after TiKV panics [#6252](https://github.com/tikv/pd/issues/6252) @[HuSharp](https://github.com/HuSharp) + - Fix the issue that Region Health monitoring data is deleted after PD leader switch [#6366](https://github.com/tikv/pd/issues/6366) @[iosmanthus](https://github.com/iosmanthus) + - Fix the issue that the rule checker cannot repair unhealthy Regions with the `schedule=deny` label [#6426](https://github.com/tikv/pd/issues/6426) @[nolouch](https://github.com/nolouch) + - Fix the issue that some existing labels are lost after TiKV or TiFlash restarts [#6467](https://github.com/tikv/pd/issues/6467) @[JmPotato](https://github.com/JmPotato) + - Fix the issue that the replication status cannot be switched when there are learner nodes in the replication mode [#14704](https://github.com/tikv/tikv/issues/14704) @[nolouch](https://github.com/nolouch) + ++ TiFlash + + - Fix the issue that querying data in the `TIMESTAMP` or `TIME` type returns errors after enabling late materialization [#7455](https://github.com/pingcap/tiflash/issues/7455) @[Lloyd-Pottiger](https://github.com/Lloyd-Pottiger) + - Fix the issue that large update transactions might cause TiFlash to repeatedly report errors and restart [#7316](https://github.com/pingcap/tiflash/issues/7316) @[JaySon-Huang](https://github.com/JaySon-Huang) + ++ Tools + + + Backup & Restore (BR) + + - Fix the issue of backup slowdown when a TiKV node crashes in a cluster [#42973](https://github.com/pingcap/tidb/issues/42973) @[YuJuncen](https://github.com/YuJuncen) + - Fix the issue of inaccurate error messages caused by a backup failure in some cases [#43236](https://github.com/pingcap/tidb/issues/43236) @[YuJuncen](https://github.com/YuJuncen) + + + TiCDC + + - Fix the issue of TiCDC time zone setting [#8798](https://github.com/pingcap/tiflow/issues/8798) @[hi-rustin](https://github.com/hi-rustin) + - Fix the issue that TiCDC cannot automatically recover when PD address or leader fails [#8812](https://github.com/pingcap/tiflow/issues/8812) [#8877](https://github.com/pingcap/tiflow/issues/8877) @[asddongmen](https://github.com/asddongmen) + - Fix the issue that checkpoint lag increases when one of the upstream TiKV nodes crashes [#8858](https://github.com/pingcap/tiflow/issues/8858) @[hicqu](https://github.com/hicqu) + - Fix the issue that when replicating data to object storage, the `EXCHANGE PARTITION` operation in the upstream cannot be properly replicated to the downstream [#8914](https://github.com/pingcap/tiflow/issues/8914) @[CharlesCheung96](https://github.com/CharlesCheung96) + - Fix the OOM issue caused by excessive memory usage of the sorter component in some special scenarios [#8974](https://github.com/pingcap/tiflow/issues/8974) @[hicqu](https://github.com/hicqu) + - Fix the TiCDC node panic that occurs when the downstream Kafka sinks are rolling restarted [#9023](https://github.com/pingcap/tiflow/issues/9023) @[asddongmen](https://github.com/asddongmen) + + + TiDB Data Migration (DM) + + - Fix the issue that latin1 data might be corrupted during replication [#7028](https://github.com/pingcap/tiflow/issues/7028) @[lance6716](https://github.com/lance6716) + + + TiDB Dumpling + + - Fix the issue that the `UNSIGNED INTEGER` type primary key cannot be used for splitting chunks [#42620](https://github.com/pingcap/tidb/issues/42620) @[lichunzhu](https://github.com/lichunzhu) + - Fix the issue that TiDB Dumpling might panic when `--output-file-template` is incorrectly set [#42391](https://github.com/pingcap/tidb/issues/42391) @[lichunzhu](https://github.com/lichunzhu) + + + TiDB Binlog + + - Fix the issue that an error might occur when encountering a failed DDL statement [#1228](https://github.com/pingcap/tidb-binlog/issues/1228) @[okJiang](https://github.com/okJiang) + + + TiDB Lightning + + - Fix the performance degradation issue during data import [#42456](https://github.com/pingcap/tidb/issues/42456) @[lance6716](https://github.com/lance6716) + - Fix the issue of `write to tikv with no leader returned` when importing a large amount of data [#43055](https://github.com/pingcap/tidb/issues/43055) @[lance6716](https://github.com/lance6716) + - Fix the issue of excessive `keys within region is empty, skip doIngest` logs during data import [#43197](https://github.com/pingcap/tidb/issues/43197) @[D3Hunter](https://github.com/D3Hunter) + - Fix the issue that panic might occur during partial write [#43363](https://github.com/pingcap/tidb/issues/43363) @[lance6716](https://github.com/lance6716) + - Fix the issue that OOM might occur when importing a wide table [#43728](https://github.com/pingcap/tidb/issues/43728) @[D3Hunter](https://github.com/D3Hunter) + - Fix the issue of missing data in the TiDB Lightning Grafana dashboard [#43357](https://github.com/pingcap/tidb/issues/43357) @[lichunzhu](https://github.com/lichunzhu) + - Fix the import failure due to incorrect setting of `keyspace-name` [#43684](https://github.com/pingcap/tidb/issues/43684) @[zeminzhou](https://github.com/zeminzhou) + - Fix the issue that data import might be skipped during range partial write in some cases [#43768](https://github.com/pingcap/tidb/issues/43768) @[lance6716](https://github.com/lance6716) + +## Performance test + +To learn about the performance of TiDB v7.1.0, you can refer to the [TPC-C performance test report](https://docs.pingcap.com/tidbcloud/v7.1.0-performance-benchmarking-with-tpcc) and [Sysbench performance test report](https://docs.pingcap.com/tidbcloud/v7.1.0-performance-benchmarking-with-sysbench) of the TiDB Dedicated cluster. ## Contributors We would like to thank the following contributors from the TiDB community: +- [blacktear23](https://github.com/blacktear23) - [ethercflow](https://github.com/ethercflow) - [hihihuhu](https://github.com/hihihuhu) - [jiyfhust](https://github.com/jiyfhust) @@ -253,4 +548,4 @@ We would like to thank the following contributors from the TiDB community: - [lqs](https://github.com/lqs) - [pingandb](https://github.com/pingandb) - [yorkhellen](https://github.com/yorkhellen) -- [yujiarista](https://github.com/yujiarista) +- [yujiarista](https://github.com/yujiarista) (First-time contributor) diff --git a/releases/release-7.1.1.md b/releases/release-7.1.1.md new file mode 100644 index 0000000000000..ad17bb1d0f5bc --- /dev/null +++ b/releases/release-7.1.1.md @@ -0,0 +1,152 @@ +--- +title: TiDB 7.1.1 Release Notes +summary: Learn about the compatibility changes, improvements, and bug fixes in TiDB 7.1.1. +--- + +# TiDB 7.1.1 Release Notes + +Release date: July 24, 2023 + +TiDB version: 7.1.1 + +Quick access: [Quick start](https://docs.pingcap.com/tidb/v7.1/quick-start-with-tidb) | [Production deployment](https://docs.pingcap.com/tidb/v7.1/production-deployment-using-tiup) | [Installation packages](https://www.pingcap.com/download/?version=v7.1.1#version-list) + +## Compatibility changes + +- TiDB introduces a new system variable `tidb_lock_unchanged_keys` to control whether to lock unchanged keys [#44714](https://github.com/pingcap/tidb/issues/44714) @[ekexium](https://github.com/ekexium) + +## Improvements + ++ TiDB + + - Plan Cache supports queries with more than 200 parameters [#44823](https://github.com/pingcap/tidb/issues/44823) @[qw4990](https://github.com/qw4990) + - Optimize the performance of reading the dumped chunks from disk [#45125](https://github.com/pingcap/tidb/issues/45125) @[YangKeao](https://github.com/YangKeao) + - Optimize the logic of constructing index scan range so that it supports converting complex conditions into index scan range [#41572](https://github.com/pingcap/tidb/issues/41572) [#44389](https://github.com/pingcap/tidb/issues/44389) @[xuyifangreeneyes](https://github.com/xuyifangreeneyes) + - When the retry leader of stale read encounters a lock, TiDB forcibly retries with the leader after resolving the lock, which avoids unnecessary overhead [#43659](https://github.com/pingcap/tidb/issues/43659) @[you06](https://github.com/you06) + ++ PD + + - PD blocks Swagger API by default when the Swagger server is disabled [#6786](https://github.com/tikv/pd/issues/6786) @[bufferflies](https://github.com/bufferflies) + ++ Tools + + + TiCDC + + - Optimize the encoding format of binary fields when TiCDC replicates data to object storage services [#9373](https://github.com/pingcap/tiflow/issues/9373) @[CharlesCheung96](https://github.com/CharlesCheung96) + - Support the OAUTHBEARER authentication in the scenario of replication to Kafka [#8865](https://github.com/pingcap/tiflow/issues/8865) @[hi-rustin](https://github.com/hi-rustin) + + + TiDB Lightning + + - Improve the retry logic of TiDB Lightning for the PD `ClientTSOStreamClosed` error during the checksum phase [#45301](https://github.com/pingcap/tidb/issues/45301) @[lance6716](https://github.com/lance6716) + - Verify checksum through SQL after the import to improve stability of verification [#41941](https://github.com/pingcap/tidb/issues/41941) @[GMHDBJD](https://github.com/GMHDBJD) + + + Dumpling + + - Dumpling avoids executing table queries when the `--sql` parameter is used, thereby reducing the export overhead [#45239](https://github.com/pingcap/tidb/issues/45239) @[lance6716](https://github.com/lance6716) + + + TiDB Binlog + + - Optimize the method of retrieving table information to reduce the initialization time and memory usage of Drainer [#1137](https://github.com/pingcap/tidb-binlog/issues/1137) @[lichunzhu](https://github.com/lichunzhu) + +## Bug fixes + ++ TiDB + + - Fix the issue that the GC Resolve Locks step might miss some pessimistic locks [#45134](https://github.com/pingcap/tidb/issues/45134) @[MyonKeminta](https://github.com/MyonKeminta) + - Fix the issue that Stats Collector might cause deadlock when a new session is created [#44502](https://github.com/pingcap/tidb/issues/44502) @[xuyifangreeneyes](https://github.com/xuyifangreeneyes) + - Fix the potential memory leak issue in memory tracker [#44612](https://github.com/pingcap/tidb/issues/44612) @[wshwsh12](https://github.com/wshwsh12) + - Fix the issue that batch coprocessor retry might generate incorrect Region information that causes query failure [#44622](https://github.com/pingcap/tidb/issues/44622) @[windtalker](https://github.com/windtalker) + - Fix the potential data race issue in index scan [#45126](https://github.com/pingcap/tidb/issues/45126) @[wshwsh12](https://github.com/wshwsh12) + - Fix the issue that query results in MPP mode are incorrect when `tidb_enable_parallel_apply` is enabled [#45299](https://github.com/pingcap/tidb/issues/45299) @[windtalker](https://github.com/windtalker) + - Fix the hang-up issue that occurs when queries with `indexMerge` are killed [#45279](https://github.com/pingcap/tidb/issues/45279) @[xzhangxian1008](https://github.com/xzhangxian1008) + - Fix the issue that excessive memory consumption of SQL execution details in statistics causes TiDB OOM in extreme cases [#44047](https://github.com/pingcap/tidb/issues/44047) @[wshwsh12](https://github.com/wshwsh12) + - Fix the issue that the `FormatSQL()` method cannot properly truncate extremely long SQL statements in input [#44542](https://github.com/pingcap/tidb/issues/44542) @[hawkingrei](https://github.com/hawkingrei) + - Fix the issue that DDL operations get stuck during cluster upgrade, which causes upgrade failure [#44158](https://github.com/pingcap/tidb/issues/44158) @[zimulala](https://github.com/zimulala) + - Fix the issue that other TiDB nodes do not take over TTL tasks after failures in one TiDB node [#45022](https://github.com/pingcap/tidb/issues/45022) @[lcwangchao](https://github.com/lcwangchao) + - Fix the issue that when the MySQL Cursor Fetch protocol is used, the memory consumption of result sets might exceed the `tidb_mem_quota_query` limit and causes TiDB OOM. After the fix, TiDB will automatically write result sets to the disk to release memory [#43233](https://github.com/pingcap/tidb/issues/43233) @[YangKeao](https://github.com/YangKeao) + - Fix the issue that users can view information in the `INFORMATION_SCHEMA.TIFLASH_REPLICA` table even without permissions [#45320](https://github.com/pingcap/tidb/issues/45320) @[Lloyd-Pottiger](https://github.com/Lloyd-Pottiger) + - Fix the issue that `ROW_COUNT` returned by the `ADMIN SHOW DDL JOBS` statement is inaccurate [#44044](https://github.com/pingcap/tidb/issues/44044) @[tangenta](https://github.com/tangenta) + - Fix the issue that querying a Range COLUMNS partitioned table might get an error [#43459](https://github.com/pingcap/tidb/issues/43459) @[mjonss](https://github.com/mjonss) + - Fix the issue that resuming a paused DDL task fails [#44217](https://github.com/pingcap/tidb/issues/44217) @[dhysum](https://github.com/dhysum) + - Fix the issue that in-memory pessimistic locks cause `FLASHBACK` failures and data inconsistency [#44292](https://github.com/pingcap/tidb/issues/44292) @[JmPotato](https://github.com/JmPotato) + - Fix the issue that deleted tables can still be read from `INFORMATION_SCHEMA` [#43714](https://github.com/pingcap/tidb/issues/43714) @[tangenta](https://github.com/tangenta) + - Fix the issue that the cluster upgrade fails when there are paused DDL operations before the upgrade [#44225](https://github.com/pingcap/tidb/issues/44225) @[zimulala](https://github.com/zimulala) + - Fix the `duplicate entry` error that occurs when restoring a table with `AUTO_ID_CACHE=1` using BR [#44716](https://github.com/pingcap/tidb/issues/44716) @[tiancaiamao](https://github.com/tiancaiamao) + - Fix the data index inconsistency issue triggered by multiple switches of DDL owner [#44619](https://github.com/pingcap/tidb/issues/44619) @[tangenta](https://github.com/tangenta) + - Fix the issue that canceling an `ADD INDEX` DDL task in the `none` status might cause memory leak because this task is not removed from the backend task queue [#44205](https://github.com/pingcap/tidb/issues/44205) @[tangenta](https://github.com/tangenta) + - Fix the issue that the proxy protocol reports the `Header read timeout` error when processing certain erroneous data [#43205](https://github.com/pingcap/tidb/issues/43205) @[blacktear23](https://github.com/blacktear23) + - Fix the issue that PD isolation might block the running DDL [#44267](https://github.com/pingcap/tidb/issues/44267) @[wjhuang2016](https://github.com/wjhuang2016) + - Fix the issue that the query result of the `SELECT CAST(n AS CHAR)` statement is incorrect when `n` in the statement is a negative number [#44786](https://github.com/pingcap/tidb/issues/44786) @[xhebox](https://github.com/xhebox) + - Fix the issue of excessive memory usage after creating a large number of empty partitioned tables [#44308](https://github.com/pingcap/tidb/issues/44308) @[hawkingrei](https://github.com/hawkingrei) + - Fix the issue that Join Reorder might cause incorrect outer join results [#44314](https://github.com/pingcap/tidb/issues/44314) @[AilinKid](https://github.com/AilinKid) + - Fix the issue that queries containing Common Table Expressions (CTEs) might cause insufficient disk space [#44477](https://github.com/pingcap/tidb/issues/44477) @[guo-shaoge](https://github.com/guo-shaoge) + - Fix the issue that dropping a database causes slow GC progress [#33069](https://github.com/pingcap/tidb/issues/33069) @[tiancaiamao](https://github.com/tiancaiamao) + - Fix the issue that adding an index fails in the ingest mode [#44137](https://github.com/pingcap/tidb/issues/44137) @[tangenta](https://github.com/tangenta) + - Fix the issue that the `SELECT` statement returns an error for a partitioned table if the table partition definition uses the `FLOOR()` function to round a partitioned column [#42323](https://github.com/pingcap/tidb/issues/42323) @[jiyfhust](https://github.com/jiyfhust) + - Fix the issue that follower read does not handle flashback errors before retrying, which causes query errors [#43673](https://github.com/pingcap/tidb/issues/43673) @[you06](https://github.com/you06) + - Fix the issue that using `memTracker` with cursor fetch causes memory leaks [#44254](https://github.com/pingcap/tidb/issues/44254) @[YangKeao](https://github.com/YangKeao) + - Fix the issue that the `SHOW PROCESSLIST` statement cannot display the TxnStart of the transaction of the statement with a long subquery time [#40851](https://github.com/pingcap/tidb/issues/40851) @[crazycs520](https://github.com/crazycs520) + - Fix the issue that the `LEADING` hint does not support querying block aliases [#44645](https://github.com/pingcap/tidb/issues/44645) @[qw4990](https://github.com/qw4990) + - Fix the issue that `PREPARE stmt FROM "ANALYZE TABLE xxx"` might be killed by `tidb_mem_quota_query` [#44320](https://github.com/pingcap/tidb/issues/44320) @[chrysan](https://github.com/chrysan) + - Fix the panic issue caused by empty `processInfo` [#43829](https://github.com/pingcap/tidb/issues/43829) @[zimulala](https://github.com/zimulala) + - Fix the issue that data and indexes are inconsistent when the `ON UPDATE` statement does not correctly update the primary key [#44565](https://github.com/pingcap/tidb/issues/44565) @[zyguan](https://github.com/zyguan) + - Fix the issue that queries might return incorrect results when `tidb_opt_agg_push_down` is enabled [#44795](https://github.com/pingcap/tidb/issues/44795) @[AilinKid](https://github.com/AilinKid) + - Fix the issue that using CTEs and correlated subqueries simultaneously might result in incorrect query results or panic [#44649](https://github.com/pingcap/tidb/issues/44649) [#38170](https://github.com/pingcap/tidb/issues/38170) [#44774](https://github.com/pingcap/tidb/issues/44774) @[winoros](https://github.com/winoros) @[guo-shaoge](https://github.com/guo-shaoge) + - Fix the issue that canceling a DDL task in the rollback state causes errors in related metadata [#44143](https://github.com/pingcap/tidb/issues/44143) @[wjhuang2016](https://github.com/wjhuang2016) + - Fix the issue that executing the `UPDATE` statement causes errors due to checking foreign key constraints [#44848](https://github.com/pingcap/tidb/issues/44848) @[crazycs520](https://github.com/crazycs520) + ++ PD + + - Fix the issue that Resource Manager repeatedly initializes the default resource group [#6787](https://github.com/tikv/pd/issues/6787) @[glorv](https://github.com/glorv) + - Fix the issue that in some cases, the `location-labels` set in the Placement Rules in SQL does not schedule as expected [#6662](https://github.com/tikv/pd/issues/6662) @[rleungx](https://github.com/rleungx) + - Fix the issue that redundant replicas cannot be automatically repaired in some corner cases [#6573](https://github.com/tikv/pd/issues/6573) @[nolouch](https://github.com/nolouch) + ++ TiFlash + + - Fix the issue that in the disaggregated storage and compute architecture mode, the TiFlash compute node fetches inaccurate CPU core information [#7436](https://github.com/pingcap/tiflash/issues/7436) @[guo-shaoge](https://github.com/guo-shaoge) + - Fix the issue that TiFlash takes too long to restart after using Online Unsafe Recovery [#7671](https://github.com/pingcap/tiflash/issues/7671) @[hongyunyan](https://github.com/hongyunyan) + ++ Tools + + + Backup & Restore (BR) + + - Fix the issue that `checksum mismatch` is falsely reported in some cases [#44472](https://github.com/pingcap/tidb/issues/44472) @[Leavrth](https://github.com/Leavrth) + + + TiCDC + + - Fix the issue that a PD exception might cause the replication task to get stuck [#8808](https://github.com/pingcap/tiflow/issues/8808) [#9054](https://github.com/pingcap/tiflow/issues/9054) @[asddongmen](https://github.com/asddongmen) @[fubinzh](https://github.com/fubinzh) + - Fix the issue of excessive memory consumption when replicating to an object storage service [#8894](https://github.com/pingcap/tiflow/issues/8894) @[CharlesCheung96](https://github.com/CharlesCheung96) + - Fix the issue that the replication task might get stuck when the redo log is enabled and there is an exception downstream [#9172](https://github.com/pingcap/tiflow/issues/9172) @[CharlesCheung96](https://github.com/CharlesCheung96) + - Fix the issue that TiCDC keeps retrying when there is a downstream failure, which causes the retry time to be too long [#9272](https://github.com/pingcap/tiflow/issues/9272) @[asddongmen](https://github.com/asddongmen) + - Fix the issue of excessive downstream pressure caused by reading downstream metadata too frequently when replicating data to Kafka [#8959](https://github.com/pingcap/tiflow/issues/8959) @[hi-rustin](https://github.com/hi-rustin) + - Fix the issue that when the downstream is Kafka, TiCDC queries the downstream metadata too frequently and causes excessive workload in the downstream [#8957](https://github.com/pingcap/tiflow/issues/8957) [#8959](https://github.com/pingcap/tiflow/issues/8959) @[hi-rustin](https://github.com/hi-rustin) + - Fix the OOM issue caused by excessive memory usage of the sorter component in some special scenarios [#8974](https://github.com/pingcap/tiflow/issues/8974) @[hicqu](https://github.com/hicqu) + - Fix the issue that the `UPDATE` operation cannot output old values when the Avro or CSV protocol is used [#9086](https://github.com/pingcap/tiflow/issues/9086) @[3AceShowHand](https://github.com/3AceShowHand) + - Fix the issue that when replicating data to storage services, the JSON file corresponding to downstream DDL statements does not record the default values of table fields [#9066](https://github.com/pingcap/tiflow/issues/9066) @[CharlesCheung96](https://github.com/CharlesCheung96) + - Fix the issue of too many downstream logs caused by frequently setting the downstream bidirectional replication-related variables when replicating data to TiDB or MySQL [#9180](https://github.com/pingcap/tiflow/issues/9180) @[asddongmen](https://github.com/asddongmen) + - Fix the issue that when a replication error occurs due to an oversized Kafka message, the message body is recorded in the log [#9031](https://github.com/pingcap/tiflow/issues/9031) @[darraes](https://github.com/darraes) + - Fix the issue that TiCDC gets stuck when PD fails such as network isolation or PD Owner node reboot [#8808](https://github.com/pingcap/tiflow/issues/8808) [#8812](https://github.com/pingcap/tiflow/issues/8812) [#8877](https://github.com/pingcap/tiflow/issues/8877) @[asddongmen](https://github.com/asddongmen) + - Fix the issue that Avro protocol incorrectly identifies `Enum` type values [#9259](https://github.com/pingcap/tiflow/issues/9259) @[3AceShowHand](https://github.com/3AceShowHand) + + + TiDB Data Migration (DM) + + - Fix the issue that DM-master exits abnormally when a unique index contains empty columns in the table to be migrated [#9247](https://github.com/pingcap/tiflow/issues/9247) @[lance6716](https://github.com/lance6716) + + + TiDB Lightning + + - Fix the issue that the failed connection between TiDB Lightning and PD cannot be retried, improving the import success rate [#43400](https://github.com/pingcap/tidb/issues/43400) @[lichunzhu](https://github.com/lichunzhu) + - Fix the issue that TiDB Lightning does not correctly display the error message when writing data to TiKV returns an out of space error [#44733](https://github.com/pingcap/tidb/issues/44733) @[lance6716](https://github.com/lance6716) + - Fix the issue that the `Region is unavailable` error is reported during checksum operation [#45462](https://github.com/pingcap/tidb/issues/45462) @[D3Hunter](https://github.com/D3Hunter) + - Fix the TiDB Lightning panic issue when `experimental.allow-expression-index` is enabled and the default value is UUID [#44497](https://github.com/pingcap/tidb/issues/44497) @[lichunzhu](https://github.com/lichunzhu) + - Fix the issue that disk quota might be inaccurate due to competing conditions [#44867](https://github.com/pingcap/tidb/issues/44867) @[D3Hunter](https://github.com/D3Hunter) + - Fix the issue that in Logical Import Mode, deleting tables downstream during import might cause TiDB Lightning metadata not to be updated in time [#44614](https://github.com/pingcap/tidb/issues/44614) @[dsdashun](https://github.com/dsdashun) + + + Dumpling + + - Fix the issue that Dumpling exits abnormally when the query result set of `--sql` is empty [#45200](https://github.com/pingcap/tidb/issues/45200) @[D3Hunter](https://github.com/D3Hunter) + + + TiDB Binlog + + - Fix the issue that TiDB cannot correctly query Binlog node status via `SHOW PUMP STATUS` or `SHOW DRAINER STATUS` after a complete change of the PD address [#42643](https://github.com/pingcap/tidb/issues/42643) @[lichunzhu](https://github.com/lichunzhu) + - Fix the issue that TiDB cannot write binlogs after a complete change of the PD address [#42643](https://github.com/pingcap/tidb/issues/42643) @[lance6716](https://github.com/lance6716) + - Fix the issue that the etcd client does not automatically synchronize the latest node information during initialization [#1236](https://github.com/pingcap/tidb-binlog/issues/1236) @[lichunzhu](https://github.com/lichunzhu) diff --git a/releases/release-7.2.0.md b/releases/release-7.2.0.md new file mode 100644 index 0000000000000..59b8600cc245a --- /dev/null +++ b/releases/release-7.2.0.md @@ -0,0 +1,328 @@ +--- +title: TiDB 7.2.0 Release Notes +summary: Learn about the new features, compatibility changes, improvements, and bug fixes in TiDB 7.2.0. +--- + +# TiDB 7.2.0 Release Notes + +Release date: June 29, 2023 + +TiDB version: 7.2.0 + +Quick access: [Quick start](https://docs.pingcap.com/tidb/v7.2/quick-start-with-tidb) | [Installation packages](https://www.pingcap.com/download/?version=v7.2.0#version-list) + +7.2.0 introduces the following key features and improvements: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CategoryFeatureDescription
Scalability and PerformanceResource groups support managing runaway queries (experimental)You can now manage query timeout with more granularity, allowing for different behaviors based on query classifications. Queries meeting your specified threshold can be deprioritized or terminated. +
TiFlash supports the pipeline execution model (experimental)TiFlash supports a pipeline execution model to optimize thread resource control.
SQLSupport a new SQL statement, IMPORT INTO, for data import (experimental)To simplify the deployment and maintenance of TiDB Lightning, TiDB introduces a new SQL statement IMPORT INTO, which integrates physical import mode of TiDB Lightning, including remote import from Amazon S3 or Google Cloud Storage (GCS) directly into TiDB.
DB Operations and ObservabilityDDL supports pause and resume operations (experimental)This new capability lets you temporarily suspend resource-intensive DDL operations, such as index creation, to conserve resources and minimize the impact on online traffic. You can seamlessly resume these operations when ready, without the need to cancel and restart. This feature enhances resource utilization, improves user experience, and streamlines schema changes.
+ +## Feature details + +### Performance + +* Support pushing down the following two [window functions](/tiflash/tiflash-supported-pushdown-calculations.md) to TiFlash [#7427](https://github.com/pingcap/tiflash/issues/7427) @[xzhangxian1008](https://github.com/xzhangxian1008) + + * `FIRST_VALUE` + * `LAST_VALUE` + +* TiFlash supports the pipeline execution model (experimental) [#6518](https://github.com/pingcap/tiflash/issues/6518) @[SeaRise](https://github.com/SeaRise) + + Prior to v7.2.0, each task in the TiFlash engine must individually request thread resources during execution. TiFlash controls the number of tasks to limit thread resource usage and prevent overuse, but this issue could not be completely eliminated. To address this problem, starting from v7.2.0, TiFlash introduces a pipeline execution model. This model centrally manages all thread resources and schedules task execution uniformly, maximizing the utilization of thread resources while avoiding resource overuse. To enable or disable the pipeline execution model, modify the [`tidb_enable_tiflash_pipeline_model`](/system-variables.md#tidb_enable_tiflash_pipeline_model-new-in-v720) system variable. + + For more information, see [documentation](/tiflash/tiflash-pipeline-model.md). + +* TiFlash reduces the latency of schema replication [#7630](https://github.com/pingcap/tiflash/issues/7630) @[hongyunyan](https://github.com/hongyunyan) + + When the schema of a table changes, TiFlash needs to replicate the latest schema from TiKV in a timely manner. Before v7.2.0, when TiFlash accesses table data and detects a table schema change within a database, TiFlash needs to replicate the schemas of all tables in this database again, including those tables without TiFlash replicas. As a result, in a database with a large number of tables, even if you only need to read data from a single table using TiFlash, you might experience significant latency to wait for TiFlash to complete the schema replication of all tables. + + In v7.2.0, TiFlash optimizes the schema replication mechanism and supports only replicating schemas of tables with TiFlash replicas. When a schema change is detected for a table with TiFlash replicas, TiFlash only replicates the schema of that table, which reduces the latency of schema replication of TiFlash and minimizes the impact of DDL operations on TiFlash data replication. This optimization is automatically applied and does not require any manual configuration. + +* Improve the performance of statistics collection [#44725](https://github.com/pingcap/tidb/issues/44725) @[xuyifangreeneyes](https://github.com/xuyifangreeneyes) + + TiDB v7.2.0 optimizes the statistics collection strategy, skipping some of the duplicate information and information that is of little value to the optimizer. The overall speed of statistics collection has been improved by 30%. This improvement allows TiDB to update the statistics of the database in a more timely manner, making the generated execution plans more accurate, thus improving the overall database performance. + + By default, statistics collection skips the columns of the `JSON`, `BLOB`, `MEDIUMBLOB`, and `LONGBLOB` types. You can modify the default behavior by setting the [`tidb_analyze_skip_column_types`](/system-variables.md#tidb_analyze_skip_column_types-new-in-v720) system variable. TiDB supports skipping the `JSON`, `BLOB`, and `TEXT` types and their subtypes. + + For more information, see [documentation](/system-variables.md#tidb_analyze_skip_column_types-new-in-v720). + +* Improve the performance of checking data and index consistency [#43693](https://github.com/pingcap/tidb/issues/43693) @[wjhuang2016](https://github.com/wjhuang2016) + + The [`ADMIN CHECK [TABLE|INDEX]`](/sql-statements/sql-statement-admin-check-table-index.md) statement is used to check the consistency between data in a table and its corresponding indexes. In v7.2.0, TiDB optimizes the method for checking data consistency and improves the execution efficiency of [`ADMIN CHECK [TABLE|INDEX]`](/sql-statements/sql-statement-admin-check-table-index.md) greatly. In scenarios with large amounts of data, this optimization can provide a performance boost of hundreds of times. + + The optimization is enabled by default ([`tidb_enable_fast_table_check`](/system-variables.md#tidb_enable_fast_table_check-new-in-v720) is `ON` by default) to greatly reduce the time required for data consistency checks in large-scale tables and enhance operational efficiency. + + For more information, see [documentation](/system-variables.md#tidb_enable_fast_table_check-new-in-v720). + +### Reliability + +* Automatically manage queries that consume more resources than expected (experimental) [#43691](https://github.com/pingcap/tidb/issues/43691) @[Connor1996](https://github.com/Connor1996) @[CabinfeverB](https://github.com/CabinfeverB) @[glorv](https://github.com/glorv) @[HuSharp](https://github.com/HuSharp) @[nolouch](https://github.com/nolouch) + + The most common challenge to database stability is the degradation of overall database performance caused by abrupt SQL performance problems. There are many causes for SQL performance issues, such as new SQL statements that have not been fully tested, drastic changes in data volume, and abrupt changes in execution plans. These issues are difficult to completely avoid at the root. TiDB v7.2.0 provides the ability to manage queries that consume more resources than expected. This feature can quickly reduce the scope of impact when a performance issue occurs. + + To manage these queries, you can set the maximum execution time of queries for a resource group. When the execution time of a query exceeds this limit, the query is automatically deprioritized or cancelled. You can also set a period of time to immediately match identified queries by text or execution plan. This helps prevent high concurrency of the problematic queries during the identification phase that could consume more resources than expected. + + Automatic management of queries that consume more resources than expected provides you with an effective means to quickly respond to unexpected query performance problems. This feature can reduce the impact of the problem on overall database performance, thereby improving database stability. + + For more information, see [documentation](/tidb-resource-control.md#manage-queries-that-consume-more-resources-than-expected-runaway-queries). + +* Enhance the capability of creating a binding according to a historical execution plan [#39199](https://github.com/pingcap/tidb/issues/39199) @[qw4990](https://github.com/qw4990) + + TiDB v7.2.0 enhances the capability of [creating a binding according to a historical execution plan](/sql-plan-management.md#create-a-binding-according-to-a-historical-execution-plan). This feature improves the parsing and binding process for complex statements, making the bindings more stable, and supports the following new hints: + + - [`AGG_TO_COP()`](/optimizer-hints.md#agg_to_cop) + - [`LIMIT_TO_COP()`](/optimizer-hints.md#limit_to_cop) + - [`ORDER_INDEX`](/optimizer-hints.md#order_indext1_name-idx1_name--idx2_name-) + - [`NO_ORDER_INDEX()`](/optimizer-hints.md#no_order_indext1_name-idx1_name--idx2_name-) + + For more information, see [documentation](/sql-plan-management.md). + +* Introduce the Optimizer Fix Controls mechanism to provide fine-grained control over optimizer behaviors [#43169](https://github.com/pingcap/tidb/issues/43169) @[time-and-fate](https://github.com/time-and-fate) + + To generate more reasonable execution plans, the behavior of the TiDB optimizer evolves over product iterations. However, in some particular scenarios, the changes might lead to performance regression. TiDB v7.2.0 introduces Optimizer Fix Controls to let you control some of the fine-grained behaviors of the optimizer. This enables you to roll back or control some new changes. + + Each controllable behavior is described by a GitHub issue corresponding to the fix number. All controllable behaviors are listed in [Optimizer Fix Controls](/optimizer-fix-controls.md). You can set a target value for one or more behaviors by setting the [`tidb_opt_fix_control`](/system-variables.md#tidb_opt_fix_control-new-in-v710) system variable to achieve behavior control. + + The Optimizer Fix Controls mechanism helps you control the TiDB optimizer at a granular level. It provides a new means of fixing performance issues caused by the upgrade process and improves the stability of TiDB. + + For more information, see [documentation](/optimizer-fix-controls.md). + +* Lightweight statistics initialization becomes generally available (GA) [#42160](https://github.com/pingcap/tidb/issues/42160) @[xuyifangreeneyes](https://github.com/xuyifangreeneyes) + + Starting from v7.2.0, the lightweight statistics initialization feature becomes GA. Lightweight statistics initialization can significantly reduce the number of statistics that must be loaded during startup, thus improving the speed of loading statistics. This feature increases the stability of TiDB in complex runtime environments and reduces the impact on the overall service when TiDB nodes restart. + + For newly created clusters of v7.2.0 or later versions, TiDB loads lightweight statistics by default during TiDB startup and will wait for the loading to finish before providing services. For clusters upgraded from earlier versions, you can set the TiDB configuration items [`lite-init-stats`](/tidb-configuration-file.md#lite-init-stats-new-in-v710) and [`force-init-stats`](/tidb-configuration-file.md#force-init-stats-new-in-v710) to `true` to enable this feature. + + For more information, see [documentation](/statistics.md#load-statistics). + +### SQL + +* Support the `CHECK` constraints [#41711](https://github.com/pingcap/tidb/issues/41711) @[fzzf678](https://github.com/fzzf678) + + Starting from v7.2.0, you can use `CHECK` constraints to restrict the values of one or more columns in a table to meet your specified conditions. When a `CHECK` constraint is added to a table, TiDB checks whether the constraint is satisfied before inserting or updating data in the table. Only the data that satisfies the constraint can be written. + + This feature is disabled by default. You can set the [`tidb_enable_check_constraint`](/system-variables.md#tidb_enable_check_constraint-new-in-v720) system variable to `ON` to enable it. + + For more information, see [documentation](/constraints.md#check). + +### DB operations + +* DDL jobs support pause and resume operations (experimental) [#18015](https://github.com/pingcap/tidb/issues/18015) @[godouxm](https://github.com/godouxm) + + Before TiDB v7.2.0, when a DDL job encounters a business peak during execution, you can only manually cancel the DDL job to reduce its impact on the business. In v7.2.0, TiDB introduces pause and resume operations for DDL jobs. These operations let you pause DDL jobs during a peak and resume them after the peak ends, thus avoiding impact on your application workloads. + + For example, you can pause and resume multiple DDL jobs using `ADMIN PAUSE DDL JOBS` or `ADMIN RESUME DDL JOBS`: + + ```sql + ADMIN PAUSE DDL JOBS 1,2; + ADMIN RESUME DDL JOBS 1,2; + ``` + + For more information, see [documentation](/ddl-introduction.md#ddl-related-commands). + +### Data migration + +* Introduce a new SQL statement `IMPORT INTO` to improve data import efficiency greatly (experimental) [#42930](https://github.com/pingcap/tidb/issues/42930) @[D3Hunter](https://github.com/D3Hunter) + + The `IMPORT INTO` statement integrates the [Physical Import Mode](/tidb-lightning/tidb-lightning-physical-import-mode.md) capability of TiDB Lightning. With this statement, you can quickly import data in formats such as CSV, SQL, and PARQUET into an empty table in TiDB. This import method eliminates the need for a separate deployment and management of TiDB Lightning, thereby reducing the complexity of data import and greatly improving import efficiency. + + For data files stored in Amazon S3 or GCS, when the [Backend task distributed execution framework](/tidb-distributed-execution-framework.md) is enabled, `IMPORT INTO` also supports splitting a data import job into multiple sub-jobs and scheduling them to multiple TiDB nodes for parallel import, which further enhances import performance. + + For more information, see [documentation](/sql-statements/sql-statement-import-into.md). + +* TiDB Lightning supports importing source files with the Latin-1 character set into TiDB [#44434](https://github.com/pingcap/tidb/issues/44434) @[lance6716](https://github.com/lance6716) + + With this feature, you can directly import source files with the Latin-1 character set into TiDB using TiDB Lightning. Before v7.2.0, importing such files requires your additional preprocessing or conversion. Starting from v7.2.0, you only need to specify `character-set = "latin1"` when configuring the TiDB Lightning import task. Then, TiDB Lightning automatically handles the character set conversion during the import process to ensure data integrity and accuracy. + + For more information, see [documentation](/tidb-lightning/tidb-lightning-configuration.md#tidb-lightning-task). + +## Compatibility changes + +> **Note:** +> +> This section provides compatibility changes you need to know when you upgrade from v7.1.0 to the current version (v7.2.0). If you are upgrading from v7.0.0 or earlier versions to the current version, you might also need to check the compatibility changes introduced in intermediate versions. + +### System variables + +| Variable name | Change type | Description | +|--------|------------------------------|------| +| [`last_insert_id`](/system-variables.md#last_insert_id) | Modified | Changes the maximum value from `9223372036854775807` to `18446744073709551615` to be consistent with that of MySQL. | +| [`tidb_enable_non_prepared_plan_cache`](/system-variables.md#tidb_enable_non_prepared_plan_cache) | Modified | Changes the default value from `OFF` to `ON` after further tests, meaning that non-prepared execution plan cache is enabled. | +| [`tidb_remove_orderby_in_subquery`](/system-variables.md#tidb_remove_orderby_in_subquery-new-in-v610) | Modified | Changes the default value from `OFF` to `ON` after further tests, meaning that the optimizer removes the `ORDER BY` clause in a subquery. | +| [`tidb_analyze_skip_column_types`](/system-variables.md#tidb_analyze_skip_column_types-new-in-v720) | Newly added | Controls which types of columns are skipped for statistics collection when executing the `ANALYZE` command to collect statistics. The variable is only applicable for [`tidb_analyze_version = 2`](/system-variables.md#tidb_analyze_version-new-in-v510). When using the syntax of `ANALYZE TABLE t COLUMNS c1, ..., cn`, if the type of a specified column is included in `tidb_analyze_skip_column_types`, the statistics of this column will not be collected. | +| [`tidb_enable_check_constraint`](/system-variables.md#tidb_enable_check_constraint-new-in-v720) | Newly added | Controls whether to enable `CHECK` constraints. The default value is `OFF`, which means this feature is disabled. | +| [`tidb_enable_fast_table_check`](/system-variables.md#tidb_enable_fast_table_check-new-in-v720) | Newly added | Controls whether to use a checksum-based approach to quickly check the consistency of data and indexes in a table. The default value is `ON`, which means this feature is enabled. | +| [`tidb_enable_tiflash_pipeline_model`](/system-variables.md#tidb_enable_tiflash_pipeline_model-new-in-v720) | Newly added | Controls whether to enable the new execution model of TiFlash, the [pipeline model](/tiflash/tiflash-pipeline-model.md). The default value is `OFF`, which means the pipeline model is disabled. | +| [`tidb_expensive_txn_time_threshold`](/system-variables.md#tidb_expensive_txn_time_threshold-new-in-v720) | Newly added | Controls the threshold for logging expensive transactions, which is 600 seconds by default. When the duration of a transaction exceeds the threshold, and the transaction is neither committed nor rolled back, it is considered an expensive transaction and will be logged. | + +### Configuration file parameters + +| Configuration file | Configuration parameter | Change type | Description | +| -------- | -------- | -------- | -------- | +| TiDB | [`lite-init-stats`](/tidb-configuration-file.md#lite-init-stats-new-in-v710) | Modified | Changes the default value from `false` to `true` after further tests, meaning that TiDB uses lightweight statistics initialization by default during TiDB startup to improve the initialization efficiency. | +| TiDB | [`force-init-stats`](/tidb-configuration-file.md#force-init-stats-new-in-v710) | Modified | Changes the default value from `false` to `true` to align with [`lite-init-stats`](/tidb-configuration-file.md#lite-init-stats-new-in-v710), meaning that TiDB waits for statistics initialization to finish before providing services during TiDB startup. | +| TiKV | [rocksdb.\[defaultcf\|writecf\|lockcf\].compaction-guard-min-output-file-size](/tikv-configuration-file.md#compaction-guard-min-output-file-size) | Modified | Changes the default value from `"8MB"` to `"1MB"` to reduce the data volume of compaction tasks in RocksDB. | +| TiKV | [rocksdb.\[defaultcf\|writecf\|lockcf\].optimize-filters-for-memory](/tikv-configuration-file.md#optimize-filters-for-memory-new-in-v720) | Newly added | Controls whether to generate Bloom/Ribbon filters that minimize memory internal fragmentation. | +| TiKV | [rocksdb.\[defaultcf\|writecf\|lockcf\].periodic-compaction-seconds](/tikv-configuration-file.md#periodic-compaction-seconds-new-in-v720) | Newly added | Controls the time interval for periodic compaction. SST files with updates older than this value will be selected for compaction and rewritten to the same level where these SST files originally reside. | +| TiKV | [rocksdb.\[defaultcf\|writecf\|lockcf\].ribbon-filter-above-level](/tikv-configuration-file.md#ribbon-filter-above-level-new-in-v720) | Newly added | Controls whether to use Ribbon filters for levels greater than or equal to this value and use non-block-based bloom filters for levels less than this value. | +| TiKV | [rocksdb.\[defaultcf\|writecf\|lockcf\].ttl](/tikv-configuration-file.md#ttl-new-in-v720) | Newly added | SST files with updates older than the TTL will be automatically selected for compaction. | +| TiDB Lightning | `send-kv-pairs` | Deprecated | Starting from v7.2.0, the parameter `send-kv-pairs` is deprecated. You can use [`send-kv-size`](/tidb-lightning/tidb-lightning-configuration.md) to control the maximum size of one request when sending data to TiKV in physical import mode. | +| TiDB Lightning | [`character-set`](/tidb-lightning/tidb-lightning-configuration.md#tidb-lightning-task) | Modified | Introduces a new value option `latin1` for the supported character sets of data import. You can use this option to import source files with the Latin-1 character set. | +| TiDB Lightning | [`send-kv-size`](/tidb-lightning/tidb-lightning-configuration.md) | Newly added | Specify the maximum size of one request when sending data to TiKV in physical import mode. When the size of key-value pairs reaches the specified threshold, TiDB Lightning will immediately send them to TiKV. This avoids the OOM problems caused by TiDB Lightning nodes accumulating too many key-value pairs in memory when importing large wide tables. By adjusting this parameter, you can find a balance between memory usage and import speed, improving the stability and efficiency of the import process. | +| Data Migration | [`strict-optimistic-shard-mode`](/dm/feature-shard-merge-optimistic.md) | Newly added | This configuration item is used to be compatible with the DDL shard merge behavior in TiDB Data Migration v2.0. You can enable this configuration item in optimistic mode. After this is enabled, the replication task will be interrupted when it encounters a Type 2 DDL statement. In scenarios where there are dependencies between DDL changes in multiple tables, a timely interruption can be made. You need to manually process the DDL statements of each table before resuming the replication task to ensure data consistency between the upstream and the downstream. | +| TiCDC | [`sink.protocol`](/ticdc/ticdc-changefeed-config.md) | Modified | Introduces a new value option `"open-protocol"` when the downstream is Kafka. Specifies the protocol format used for encoding messages. | +| TiCDC | [`sink.delete-only-output-handle-key-columns`](/ticdc/ticdc-changefeed-config.md) | Newly added | Specifies the output of DELETE events. This parameter is valid only for `"canal-json"` and `"open-protocol"` protocols. The default value is `false`, which means outputting all columns. When you set it to `true`, only primary key columns or unique index columns are output. | + +## Improvements + ++ TiDB + + - Optimize the logic of constructing index scan range so that it supports converting complex conditions into index scan range [#41572](https://github.com/pingcap/tidb/issues/41572) [#44389](https://github.com/pingcap/tidb/issues/44389) @[xuyifangreeneyes](https://github.com/xuyifangreeneyes) + - Add new monitoring metrics `Stale Read OPS` and `Stale Read Traffic` [#43325](https://github.com/pingcap/tidb/issues/43325) @[you06](https://github.com/you06) + - When the retry leader of stale read encounters a lock, TiDB forcibly retries with the leader after resolving the lock, which avoids unnecessary overhead [#43659](https://github.com/pingcap/tidb/issues/43659) @[you06](https://github.com/you06) + - Use estimated time to calculate stale read ts and reduce the overhead of stale read [#44215](https://github.com/pingcap/tidb/issues/44215) @[you06](https://github.com/you06) + - Add logs and system variables for long-running transactions [#41471](https://github.com/pingcap/tidb/issues/41471) @[crazycs520](https://github.com/crazycs520) + - Support connecting to TiDB through the compressed MySQL protocol, which improves the performance of data-intensive queries under low bandwidth networks and saves bandwidth costs. This supports both `zlib` and `zstd` based compression. [#22605](https://github.com/pingcap/tidb/issues/22605) @[dveeden](https://github.com/dveeden) + - Recognize both `utf8` and `utf8bm3` as the legacy three-byte UTF-8 character set encodings, which facilitates the migration of tables with legacy UTF-8 encodings from MySQL 8.0 to TiDB [#26226](https://github.com/pingcap/tidb/issues/26226) @[dveeden](https://github.com/dveeden) + - Support using `:=` for assignment in `UPDATE` statements [#44751](https://github.com/pingcap/tidb/issues/44751) @[CbcWestwolf](https://github.com/CbcWestwolf) + ++ TiKV + + - Support configuring the retry interval of PD connections in scenarios such as connection request failures using `pd.retry-interval` [#14964](https://github.com/tikv/tikv/issues/14964) @[rleungx](https://github.com/rleungx) + - Optimize the resource control scheduling algorithm by incorporating the global resource usage [#14604](https://github.com/tikv/tikv/issues/14604) @[Connor1996](https://github.com/Connor1996) + - Use gzip compression for `check_leader` requests to reduce traffic [#14553](https://github.com/tikv/tikv/issues/14553) @[you06](https://github.com/you06) + - Add related metrics for `check_leader` requests [#14658](https://github.com/tikv/tikv/issues/14658) @[you06](https://github.com/you06) + - Provide detailed time information during TiKV handling write commands [#12362](https://github.com/tikv/tikv/issues/12362) @[cfzjywxk](https://github.com/cfzjywxk) + ++ PD + + - Use a separate gRPC connection for PD leader election to prevent the impact of other requests [#6403](https://github.com/tikv/pd/issues/6403) @[rleungx](https://github.com/rleungx) + - Enable the bucket splitting by default to mitigate hotspot issues in multi-Region scenarios [#6433](https://github.com/tikv/pd/issues/6433) @[bufferflies](https://github.com/bufferflies) + ++ Tools + + + Backup & Restore (BR) + + - Support access to Azure Blob Storage by shared access signature (SAS) [#44199](https://github.com/pingcap/tidb/issues/44199) @[Leavrth](https://github.com/Leavrth) + + + TiCDC + + - Optimize the structure of the directory where data files are stored when a DDL operation occurs in the scenario of replication to an object storage service [#8891](https://github.com/pingcap/tiflow/issues/8891) @[CharlesCheung96](https://github.com/CharlesCheung96) + - Support the OAUTHBEARER authentication in the scenario of replication to Kafka [#8865](https://github.com/pingcap/tiflow/issues/8865) @[hi-rustin](https://github.com/hi-rustin) + - Add the option of outputting only the handle keys for the `DELETE` operation in the scenario of replication to Kafka [#9143](https://github.com/pingcap/tiflow/issues/9143) @[3AceShowHand](https://github.com/3AceShowHand) + + + TiDB Data Migration (DM) + + - Support reading compressed binlogs in MySQL 8.0 as a data source for incremental replication [#6381](https://github.com/pingcap/tiflow/issues/6381) @[dveeden](https://github.com/dveeden) + + + TiDB Lightning + + - Optimize the retry mechanism during import to avoid errors caused by leader switching [#44478](https://github.com/pingcap/tidb/pull/44478) @[lance6716](https://github.com/lance6716) + - Verify checksum through SQL after the import to improve stability of verification [#41941](https://github.com/pingcap/tidb/issues/41941) @[GMHDBJD](https://github.com/GMHDBJD) + - Optimize TiDB Lightning OOM issues when importing wide tables [#43853](https://github.com/pingcap/tidb/issues/43853) @[D3Hunter](https://github.com/D3Hunter) + +## Bug fixes + ++ TiDB + + - Fix the issue that the query with CTE causes TiDB to hang [#43749](https://github.com/pingcap/tidb/issues/43749) [#36896](https://github.com/pingcap/tidb/issues/36896) @[guo-shaoge](https://github.com/guo-shaoge) + - Fix the issue that the `min, max` query result is incorrect [#43805](https://github.com/pingcap/tidb/issues/43805) @[wshwsh12](https://github.com/wshwsh12) + - Fix the issue that the `SHOW PROCESSLIST` statement cannot display the TxnStart of the transaction of the statement with a long subquery time [#40851](https://github.com/pingcap/tidb/issues/40851) @[crazycs520](https://github.com/crazycs520) + - Fix the issue that the stale read global optimization does not take effect due to the lack of `TxnScope` in Coprocessor tasks [#43365](https://github.com/pingcap/tidb/issues/43365) @[you06](https://github.com/you06) + - Fix the issue that follower read does not handle flashback errors before retrying, which causes query errors [#43673](https://github.com/pingcap/tidb/issues/43673) @[you06](https://github.com/you06) + - Fix the issue that data and indexes are inconsistent when the `ON UPDATE` statement does not correctly update the primary key [#44565](https://github.com/pingcap/tidb/issues/44565) @[zyguan](https://github.com/zyguan) + - Modify the upper limit of the `UNIX_TIMESTAMP()` function to `3001-01-19 03:14:07.999999 UTC` to be consistent with that of MySQL 8.0.28 or later versions [#43987](https://github.com/pingcap/tidb/issues/43987) @[YangKeao](https://github.com/YangKeao) + - Fix the issue that adding an index fails in the ingest mode [#44137](https://github.com/pingcap/tidb/issues/44137) @[tangenta](https://github.com/tangenta) + - Fix the issue that canceling a DDL task in the rollback state causes errors in related metadata [#44143](https://github.com/pingcap/tidb/issues/44143) @[wjhuang2016](https://github.com/wjhuang2016) + - Fix the issue that using `memTracker` with cursor fetch causes memory leaks [#44254](https://github.com/pingcap/tidb/issues/44254) @[YangKeao](https://github.com/YangKeao) + - Fix the issue that dropping a database causes slow GC progress [#33069](https://github.com/pingcap/tidb/issues/33069) @[tiancaiamao](https://github.com/tiancaiamao) + - Fix the issue that TiDB returns an error when the corresponding rows in partitioned tables cannot be found in the probe phase of index join [#43686](https://github.com/pingcap/tidb/issues/43686) @[AilinKid](https://github.com/AilinKid) @[mjonss](https://github.com/mjonss) + - Fix the issue that there is no warning when using `SUBPARTITION` to create partitioned tables [#41198](https://github.com/pingcap/tidb/issues/41198) [#41200](https://github.com/pingcap/tidb/issues/41200) @[mjonss](https://github.com/mjonss) + - Fix the issue that when a query is killed because it exceeds `MAX_EXECUTION_TIME`, the returned error message is inconsistent with that of MySQL [#43031](https://github.com/pingcap/tidb/issues/43031) @[dveeden](https://github.com/dveeden) + - Fix the issue that the `LEADING` hint does not support querying block aliases [#44645](https://github.com/pingcap/tidb/issues/44645) @[qw4990](https://github.com/qw4990) + - Modify the return type of the `LAST_INSERT_ID()` function from VARCHAR to LONGLONG to be consistent with that of MySQL [#44574](https://github.com/pingcap/tidb/issues/44574) @[Defined2014](https://github.com/Defined2014) + - Fix the issue that incorrect results might be returned when using a common table expression (CTE) in statements with non-correlated subqueries [#44051](https://github.com/pingcap/tidb/issues/44051) @[winoros](https://github.com/winoros) + - Fix the issue that Join Reorder might cause incorrect outer join results [#44314](https://github.com/pingcap/tidb/issues/44314) @[AilinKid](https://github.com/AilinKid) + - Fix the issue that `PREPARE stmt FROM "ANALYZE TABLE xxx"` might be killed by `tidb_mem_quota_query` [#44320](https://github.com/pingcap/tidb/issues/44320) @[chrysan](https://github.com/chrysan) + ++ TiKV + + - Fix the issue that the transaction returns an incorrect value when TiKV handles stale pessimistic lock conflicts [#13298](https://github.com/tikv/tikv/issues/13298) @[cfzjywxk](https://github.com/cfzjywxk) + - Fix the issue that in-memory pessimistic lock might cause flashback failures and data inconsistency [#13303](https://github.com/tikv/tikv/issues/13303) @[JmPotato](https://github.com/JmPotato) + - Fix the issue that the fair lock might be incorrect when TiKV handles stale requests [#13298](https://github.com/tikv/tikv/issues/13298) @[cfzjywxk](https://github.com/cfzjywxk) + - Fix the issue that `autocommit` and `point get replica read` might break linearizability [#14715](https://github.com/tikv/tikv/issues/14715) @[cfzjywxk](https://github.com/cfzjywxk) + ++ PD + + - Fix the issue that redundant replicas cannot be automatically repaired in some corner cases [#6573](https://github.com/tikv/pd/issues/6573) @[nolouch](https://github.com/nolouch) + ++ TiFlash + + - Fix the issue that queries might consume more memory than needed when the data on the Join build side is very large and contains many small string type columns [#7416](https://github.com/pingcap/tiflash/issues/7416) @[yibin87](https://github.com/yibin87) + ++ Tools + + + Backup & Restore (BR) + + - Fix the issue that `checksum mismatch` is falsely reported in some cases [#44472](https://github.com/pingcap/tidb/issues/44472) @[Leavrth](https://github.com/Leavrth) + - Fix the issue that `resolved lock timeout` is falsely reported in some cases [#43236](https://github.com/pingcap/tidb/issues/43236) @[YuJuncen](https://github.com/YuJuncen) + - Fix the issue that TiDB might panic when restoring statistics information [#44490](https://github.com/pingcap/tidb/issues/44490) @[tangenta](https://github.com/tangenta) + + + TiCDC + + - Fix the issue that Resolved TS does not advance properly in some cases [#8963](https://github.com/pingcap/tiflow/issues/8963) @[CharlesCheung96](https://github.com/CharlesCheung96) + - Fix the issue that the `UPDATE` operation cannot output old values when the Avro or CSV protocol is used [#9086](https://github.com/pingcap/tiflow/issues/9086) @[3AceShowHand](https://github.com/3AceShowHand) + - Fix the issue of excessive downstream pressure caused by reading downstream metadata too frequently when replicating data to Kafka [#8959](https://github.com/pingcap/tiflow/issues/8959) @[hi-rustin](https://github.com/hi-rustin) + - Fix the issue of too many downstream logs caused by frequently setting the downstream bidirectional replication-related variables when replicating data to TiDB or MySQL [#9180](https://github.com/pingcap/tiflow/issues/9180) @[asddongmen](https://github.com/asddongmen) + - Fix the issue that the PD node crashing causes the TiCDC node to restart [#8868](https://github.com/pingcap/tiflow/issues/8868) @[asddongmen](https://github.com/asddongmen) + - Fix the issue that TiCDC cannot create a changefeed with a downstream Kafka-on-Pulsar [#8892](https://github.com/pingcap/tiflow/issues/8892) @[hi-rustin](https://github.com/hi-rustin) + + + TiDB Lightning + + - Fix the TiDB Lightning panic issue when `experimental.allow-expression-index` is enabled and the default value is UUID [#44497](https://github.com/pingcap/tidb/issues/44497) @[lichunzhu](https://github.com/lichunzhu) + - Fix the TiDB Lightning panic issue when a task exits while dividing a data file [#43195](https://github.com/pingcap/tidb/issues/43195) @[lance6716](https://github.com/lance6716) + +## Contributors + +We would like to thank the following contributors from the TiDB community: + +- [asjdf](https://github.com/asjdf) +- [blacktear23](https://github.com/blacktear23) +- [Cavan-xu](https://github.com/Cavan-xu) +- [darraes](https://github.com/darraes) +- [demoManito](https://github.com/demoManito) +- [dhysum](https://github.com/dhysum) +- [HappyUncle](https://github.com/HappyUncle) +- [jiyfhust](https://github.com/jiyfhust) +- [L-maple](https://github.com/L-maple) +- [nyurik](https://github.com/nyurik) +- [SeigeC](https://github.com/SeigeC) +- [tangjingyu97](https://github.com/tangjingyu97) \ No newline at end of file diff --git a/releases/release-7.3.0.md b/releases/release-7.3.0.md new file mode 100644 index 0000000000000..828736d7e5ad5 --- /dev/null +++ b/releases/release-7.3.0.md @@ -0,0 +1,338 @@ +--- +title: TiDB 7.3.0 Release Notes +summary: Learn about the new features, compatibility changes, improvements, and bug fixes in TiDB 7.3.0. +--- + +# TiDB 7.3.0 Release Notes + +Release date: August 14, 2023 + +TiDB version: 7.3.0 + +Quick access: [Quick start](https://docs.pingcap.com/tidb/v7.3/quick-start-with-tidb) | [Installation packages](https://www.pingcap.com/download/?version=v7.3.0#version-list) + +7.3.0 introduces the following major features. In addition to that, 7.3.0 also includes a series of enhancements (described in the [Feature details](#feature-details) section) to query stability in TiDB server and TiFlash. These enhancements are more miscellaneous in nature and not user-facing so they are not included in the following table. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CategoryFeatureDescription
Scalability and PerformanceTiDB Lightning supports Partitioned Raft KV (experimental)TiDB Lightning now supports the new Partitioned Raft KV architecture, as part of the near-term GA of the architecture. +
Reliability and AvailabilityAdd automatic conflict detection and resolution on data importsThe TiDB Lightning Physical Import Mode supports a new version of conflict detection, which implements the semantics of replacing (replace) or ignoring (ignore) conflict data when encountering conflicts. It automatically handles conflict data for you while improving the performance of conflict resolution.
Manual management of runaway queries (experimental)Queries might take longer than you expect. With the new watch list of resource groups, you can now manage queries more effectively and either deprioritize or kill them. Allowing operators to mark target queries by exact SQL text, SQL digest, or plan digest and deal with the queries at a resource group level, this feature gives you much more control over the potential impact of unexpected large queries on a cluster.
SQLEnhance operator control over query stability by adding more optimizer hints to the query plannerAdded hints: NO_INDEX_JOIN(), NO_MERGE_JOIN(), NO_INDEX_MERGE_JOIN(), NO_HASH_JOIN(), NO_INDEX_HASH_JOIN() +
DB Operations and ObservabilityShow the progress of statistics collection tasksSupport viewing the progress of ANALYZE tasks using the SHOW ANALYZE STATUS statement or through the mysql.analyze_jobs system table.
+ +## Feature details + +### Performance + +* TiFlash supports the replica selection strategy [#44106](https://github.com/pingcap/tidb/issues/44106) @[XuHuaiyu](https://github.com/XuHuaiyu) + + Before v7.3.0, TiFlash uses replicas from all its nodes for data scanning and MPP calculations to maximize performance. Starting from v7.3.0, TiFlash introduces the replica selection strategy and lets you configure it using the [`tiflash_replica_read`](/system-variables.md#tiflash_replica_read-new-in-v730) system variable. This strategy supports selecting specific replicas based on the [zone attributes](/schedule-replicas-by-topology-labels.md#optional-configure-labels-for-tidb) of nodes and scheduling specific nodes for data scanning and MPP calculations. + + For a cluster that is deployed in multiple data centers and each data center has complete TiFlash data replicas, you can configure this strategy to only select TiFlash replicas from the current data center. This means data scanning and MPP calculations are performed only on TiFlash nodes in the current data center, which avoids excessive network data transmission across data centers. + + For more information, see [documentation](/system-variables.md#tiflash_replica_read-new-in-v730). + +* TiFlash supports Runtime Filter within nodes [#40220](https://github.com/pingcap/tidb/issues/40220) @[elsa0520](https://github.com/elsa0520) + + Runtime Filter is a **dynamic predicate** generated during the query planning phase. In the process of table joining, these dynamic predicates can effectively filter out rows that do not meet the join conditions, reducing scan time and network overhead, and improving the efficiency of table joining. Starting from v7.3.0, TiFlash supports Runtime Filter within nodes, improving the overall performance of analytical queries. In some TPC-DS workloads, the performance can be improved by 10% to 50%. + + This feature is disabled by default in v7.3.0. To enable this feature, set the system variable [`tidb_runtime_filter_mode`](/system-variables.md#tidb_runtime_filter_mode-new-in-v720) to `LOCAL`. + + For more information, see [documentation](/runtime-filter.md). + +* TiFlash supports executing common table expressions (CTEs) (experimental) [#43333](https://github.com/pingcap/tidb/issues/43333) @[winoros](https://github.com/winoros) + + Before v7.3.0, the MPP engine of TiFlash cannot execute queries that contain CTEs by default. To achieve the best execution performance within the MPP framework, you need to use the system variable [`tidb_opt_force_inline_cte`](/system-variables.md#tidb_opt_force_inline_cte-new-in-v630) to enforce inlining CTE. + + Starting from v7.3.0, TiFlash's MPP engine supports executing queries with CTEs without inlining them, allowing for optimal query execution within the MPP framework. In TPC-DS benchmark tests, compared with inlining CTEs, this feature has shown a 20% improvement in overall query execution speed for queries containing CTE. + + This feature is experimental and is disabled by default. It is controlled by the system variable [`tidb_opt_enable_mpp_shared_cte_execution`](/system-variables.md#tidb_opt_enable_mpp_shared_cte_execution-new-in-v720). + +### Reliability + +* Add new optimizer hints [#45520](https://github.com/pingcap/tidb/issues/45520) @[qw4990](https://github.com/qw4990) + + In v7.3.0, TiDB introduces several new optimizer hints to control the join methods between tables, including: + + - [`NO_MERGE_JOIN()`](/optimizer-hints.md#no_merge_joint1_name--tl_name-) selects join methods other than merge join. + - [`NO_INDEX_JOIN()`](/optimizer-hints.md#no_index_joint1_name--tl_name-) selects join methods other than index nested loop join. + - [`NO_INDEX_MERGE_JOIN()`](/optimizer-hints.md#no_index_merge_joint1_name--tl_name-) selects join methods other than index nested loop merge join. + - [`NO_HASH_JOIN()`](/optimizer-hints.md#no_hash_joint1_name--tl_name-) selects join methods other than hash join. + - [`NO_INDEX_HASH_JOIN()`](/optimizer-hints.md#no_index_hash_joint1_name--tl_name-) selects join methods other than [index nested loop hash join](/optimizer-hints.md#inl_hash_join). + + For more information, see [documentation](/optimizer-hints.md). + +* Manually mark queries that use resources more than expected (experimental) [#43691](https://github.com/pingcap/tidb/issues/43691) @[Connor1996](https://github.com/Connor1996) @[CabinfeverB](https://github.com/CabinfeverB) + + In v7.2.0, TiDB automatically manages queries that use resources more than expected (Runaway Query) by automatically downgrading or canceling runaway queries. In actual practice, rules alone cannot cover all cases. Therefore, TiDB v7.3.0 introduces the ability to manually mark runaway queries. With the new command [`QUERY WATCH`](/sql-statements/sql-statement-query-watch.md), you can mark runaway queries based on SQL text, SQL Digest, or execution plan, and the marked runaway queries can be downgraded or cancelled. + + This feature provides an effective intervention method for sudden performance issues in the database. For performance issues caused by queries, before identifying the root cause, this feature can quickly alleviate its impact on overall performance, thereby improving system service quality. + + For more information, see [documentation](/tidb-resource-control.md#query-watch-parameters). + +### SQL + +* List and List COLUMNS partitioned tables support default partitions [#20679](https://github.com/pingcap/tidb/issues/20679) @[mjonss](https://github.com/mjonss) @[bb7133](https://github.com/bb7133) + + Before v7.3.0, when you use the `INSERT` statement to insert data into a List or List COLUMNS partitioned table, the data needs to meet the specified partitioning conditions of the table. If the data to be inserted does not meet any of these conditions, either the execution of the statement will fail or the non-compliant data will be ignored. + + Starting from v7.3.0, List and List COLUMNS partitioned tables support default partitions. After a default partition is created, if the data to be inserted does not meet any partitioning condition, it will be written to the default partition. This feature improves the usability of List and List COLUMNS partitioning, avoiding the execution failure of the `INSERT` statement or data being ignored due to data that does not meet partitioning conditions. + + Note that this feature is a TiDB extension to MySQL syntax. For a partitioned table with a default partition, the data in the table cannot be directly replicated to MySQL. + + For more information, see [documentation](/partitioned-table.md#list-partitioning). + +### Observability + +* Show the progress of collecting statistics [#44033](https://github.com/pingcap/tidb/issues/44033) @[hawkingrei](https://github.com/hawkingrei) + + Collecting statistics for large tables often takes a long time. In previous versions, you cannot see the progress of collecting statistics, and therefore cannot predict the completion time. TiDB v7.3.0 introduces a feature to show the progress of collecting statistics. You can view the overall workload, current progress, and estimated completion time for each subtask using the system table `mysql.analyze_jobs` or `SHOW ANALYZE STATUS`. In scenarios such as large-scale data import and SQL performance optimization, this feature helps you understand the overall task progress and improves the user experience. + + For more information, see [documentation](/sql-statements/sql-statement-show-analyze-status.md). + +* Plan Replayer supports exporting historical statistics [#45038](https://github.com/pingcap/tidb/issues/45038) @[time-and-fate](https://github.com/time-and-fate) + + Starting from v7.3.0, with the newly added [`dump with stats as of timestamp`](/sql-plan-replayer.md) clause, you can use Plan Replayer to export the statistics of specified SQL-related objects at a specific point in time. During the diagnosis of execution plan issues, accurately capturing historical statistics can help analyze more precisely how the execution plan was generated at the time when the issue occurred. This helps identify the root cause of the issue and greatly improves efficiency in diagnosing execution plan issues. + + For more information, see [documentation](/sql-plan-replayer.md). + +### Data migration + +* TiDB Lightning introduces a new version of conflict data detection and handling strategy [#41629](https://github.com/pingcap/tidb/issues/41629) @[lance6716](https://github.com/lance6716) + + In previous versions, TiDB Lightning uses different conflict detection and handling methods for Logical Import Mode and Physical Import Mode, which are complex to configure and not easy for users to understand. In addition, Physical Import Mode cannot handle conflicts using the `replace` or `ignore` strategy. Starting from v7.3.0, TiDB Lightning introduces a unified conflict detection and handling strategy for both Logical Import Mode and Physical Import Mode. You can choose to report an error (`error`), replace (`replace`) or ignore (`ignore`) conflicting data when encountering conflicts. You can limit the number of conflict records, such as the task is interrupted and terminated after processing a specified number of conflict records. Furthermore, the system can record conflicting data for troubleshooting. + + For import data with many conflicts, it is recommended to use the new version of the conflict detection and handling strategy for better performance. In the lab environment, the new version strategy can improve the performance of conflict detection and handling up to three times faster than the old version. This performance value is for reference only. The actual performance might vary depending on your configuration, table structure, and the percentage of conflicting data. Note that the new version and the old version of the conflict strategy cannot be used at the same time. The old conflict detection and handling strategy will be deprecated in the future. + + For more information, see [documentation](/tidb-lightning/tidb-lightning-physical-import-mode-usage.md#conflict-detection). + +* TiDB Lightning supports Partitioned Raft KV (experimental) [#14916](https://github.com/tikv/tikv/issues/14916) @[GMHDBJD](https://github.com/GMHDBJD) + + TiDB Lightning now supports Partitioned Raft KV. This feature helps improve the data import performance of TiDB Lightning. + +* TiDB Lightning introduces a new parameter `enable-diagnose-log` to enhance troubleshooting by printing more diagnostic logs [#45497](https://github.com/pingcap/tidb/issues/45497) @[D3Hunter](https://github.com/D3Hunter) + + By default, this feature is disabled and TiDB Lightning only prints logs containing `lightning/main`. When enabled, TiDB Lightning prints logs for all packages (including `client-go` and `tidb`) to help diagnose issues related to `client-go` and `tidb`. + + For more information, see [documentation](/tidb-lightning/tidb-lightning-configuration.md#tidb-lightning-global). + +## Compatibility changes + +> **Note:** +> +> This section provides compatibility changes you need to know when you upgrade from v7.2.0 to the current version (v7.3.0). If you are upgrading from v7.1.0 or earlier versions to the current version, you might also need to check the compatibility changes introduced in intermediate versions. + +### Behavior changes + +* Backup & Restore (BR) + + - BR adds an empty cluster check before performing a full data restoration. By default, restoring data to a non-empty cluster is not allowed. If you want to force the restoration, you can use the `--filter` option to specify the corresponding table name to restore data to. + +* TiDB Lightning + + - `tikv-importer.on-duplicate` is deprecated and replaced by [`conflict.strategy`](/tidb-lightning/tidb-lightning-configuration.md#tidb-lightning-task). + - The `max-error` parameter, which controls the maximum number of non-fatal errors that TiDB Lightning can tolerate before stopping the migration task, no longer limits import data conflicts. The [`conflict.threshold`](/tidb-lightning/tidb-lightning-configuration.md#tidb-lightning-task) parameter now controls the maximum number of conflicting records that can be tolerated. + +* TiCDC + + - When Kafka sink uses Avro protocol, if the `force-replicate` parameter is set to `true`, TiCDC reports an error when creating a changefeed. + - Due to incompatibility between `delete-only-output-handle-key-columns` and `force-replicate` parameters, when both parameters are enabled, TiCDC reports an error when creating a changefeed. + - When the output protocol is Open Protocol, the `UPDATE` events only output the changed columns. + +### System variables + +| Variable name | Change type | Description | +|--------|------------------------------|------| +| [`tidb_opt_enable_mpp_shared_cte_execution`](/system-variables.md#tidb_opt_enable_mpp_shared_cte_execution-new-in-v720) | Modified | This system variable takes effect starting from v7.3.0. It controls whether non-recursive Common Table Expressions (CTEs) can be executed in TiFlash MPP. | +| [`tidb_lock_unchanged_keys`](/system-variables.md#tidb_lock_unchanged_keys-new-in-v711-and-v730) | Newly added | This variable is used to control in certain scenarios whether to lock the keys that are involved but not modified in a transaction. | +| [`tidb_opt_enable_non_eval_scalar_subquery`](/system-variables.md#tidb_opt_enable_non_eval_scalar_subquery-new-in-v730) | Newly added | Controls whether the `EXPLAIN` statement disables the execution of constant subqueries that can be expanded at the optimization stage. | +| [`tidb_skip_missing_partition_stats`](/system-variables.md#tidb_skip_missing_partition_stats-new-in-v730) | Newly added | This variable controls the generation of GlobalStats when partition statistics are missing. | +| [`tiflash_replica_read`](/system-variables.md#tiflash_replica_read-new-in-v730) | Newly added | Controls the strategy for selecting TiFlash replicas when a query requires the TiFlash engine. | + +### Configuration file parameters + +| Configuration file | Configuration parameter | Change type | Description | +| -------- | -------- | -------- | -------- | +| TiDB | [`enable-32bits-connection-id`](/tidb-configuration-file.md#enable-32bits-connection-id-new-in-v730) | Newly added | Controls whether to enable the 32-bit connection ID feature. | +| TiDB | [`in-mem-slow-query-recent-num`](/tidb-configuration-file.md#in-mem-slow-query-recent-num-new-in-v730) | Newly added | Controls the number of recently used slow queries that are cached in memory. | +| TiDB | [`in-mem-slow-query-topn-num`](/tidb-configuration-file.md#in-mem-slow-query-topn-num-new-in-v730) | Newly added | Controls the number of slowest queries that are cached in memory. | +| TiKV | [`coprocessor.region-bucket-size`](/tikv-configuration-file.md#region-bucket-size-new-in-v610) | Modified | Changes the default value from `96MiB` to `50MiB`. | +| TiKV | [`raft-engine.format-version`](/tikv-configuration-file.md#format-version-new-in-v630) | Modified | When using Partitioned Raft KV (`storage.engine="partitioned-raft-kv"`), Ribbon filter is used. Therefore, TiKV changes the default value from `2` to `5`. | +| TiKV | [`raftdb.max-total-wal-size`](/tikv-configuration-file.md#max-total-wal-size-1) | Modified | When using Partitioned Raft KV (`storage.engine="partitioned-raft-kv"`), TiKV skips writing WAL. Therefore, TiKV changes the default value from `"4GB"` to `1`, meaning that WAL is disabled. | +| TiKV | [rocksdb.\[defaultcf\|writecf\|lockcf\].compaction-guard-min-output-file-size](/tikv-configuration-file.md#compaction-guard-min-output-file-size) | Modified | Changes the default value from `"1MB"` to `"8MB"` to resolve the issue that compaction speed cannot keep up with the write speed during large data writes. | +| TiKV | [rocksdb.\[defaultcf\|writecf\|lockcf\].format-version](/tikv-configuration-file.md#format-version-new-in-v620) | Modified | When using Partitioned Raft KV (`storage.engine="partitioned-raft-kv"`), Ribbon filter is used. Therefore, TiKV changes the default value from `2` to `5`. | +| TiKV | [`rocksdb.lockcf.write-buffer-size`](/tikv-configuration-file.md#write-buffer-size) | Modified | When using Partitioned Raft KV (`storage.engine="partitioned-raft-kv"`), to speed up compaction on lockcf, TiKV changes the default value from `"32MB"` to `"4MB"`. | +| TiKV | [`rocksdb.max-total-wal-size`](/tikv-configuration-file.md#max-total-wal-size) | Modified | When using Partitioned Raft KV (`storage.engine="partitioned-raft-kv"`), TiKV skips writing WAL. Therefore, TiKV changes the default value from `"4GB"` to `1`, meaning that WAL is disabled. | +| TiKV | [`rocksdb.stats-dump-period`](/tikv-configuration-file.md#stats-dump-period) | Modified | When using Partitioned Raft KV (`storage.engine="partitioned-raft-kv"`), to disable redundant log printing, changes the default value from `"10m"` to `"0"`. | +| TiKV | [`rocksdb.write-buffer-limit`](/tikv-configuration-file.md#write-buffer-limit-new-in-v660) | Modified | To reduce the memory overhead of memtables, when `storage.engine="raft-kv"`, TiKV changes the default value from 25% of the memory of the machine to `0`, which means no limit. When using Partitioned Raft KV (`storage.engine="partitioned-raft-kv"`), TiKV changes the default value from 25% to 20% of the memory of the machine. | +| TiKV | [`storage.block-cache.capacity`](/tikv-configuration-file.md#capacity) | Modified | When using Partitioned Raft KV (`storage.engine="partitioned-raft-kv"`), to compensate for the memory overhead of memtables, TiKV changes the default value from 45% to 30% of the size of total system memory. | +| TiFlash | [`storage.format_version`](/tiflash/tiflash-configuration.md) | Modified | Introduces a new DTFile format `format_version = 5` to reduce the number of physical files by merging smaller files. Note that this format is experimental and not enabled by default. | +| TiDB Lightning | `tikv-importer.incremental-import` | Deleted | TiDB Lightning parallel import parameter. Because it could easily be mistaken as an incremental import parameter, this parameter is now renamed to `tikv-importer.parallel-import`. If a user passes in the old parameter name, it will be automatically converted to the new one. | +| TiDB Lightning | `tikv-importer.on-duplicate` | Deprecated | Controls action to do when trying to insert a conflicting record in the logical import mode. Starting from v7.3.0, this parameter is replaced by [`conflict.strategy`](/tidb-lightning/tidb-lightning-configuration.md#tidb-lightning-task). | +| TiDB Lightning | [`conflict.max-record-rows`](/tidb-lightning/tidb-lightning-configuration.md) | Newly added | The new version of strategy to handle conflicting data. It controls the maximum number of rows in the `conflict_records` table. The default value is 100. | +| TiDB Lightning | [`conflict.strategy`](/tidb-lightning/tidb-lightning-configuration.md) | Newly added | The new version of strategy to handle conflicting data. It includes the following options: "" (TiDB Lightning does not detect and process conflicting data), `error` (terminate the import and report an error if a primary or unique key conflict is detected in the imported data), `replace` (when encountering data with conflicting primary or unique keys, the new data is retained and the old data is overwritten.), `ignore` (when encountering data with conflicting primary or unique keys, the old data is retained and the new data is ignored.). The default value is "", that is, TiDB Lightning does not detect and process conflicting data. | +| TiDB Lightning | [`conflict.threshold`](/tidb-lightning/tidb-lightning-configuration.md) | Newly added | Controls the upper limit of the conflicting data. When `conflict.strategy="error"`, the default value is `0`. When `conflict.strategy="replace"` or `conflict.strategy="ignore"`, you can set it as a maxint. | +| TiDB Lightning | [`enable-diagnose-logs`](/tidb-lightning/tidb-lightning-configuration.md) | Newly added | Controls whether to enable the diagnostic logs. The default value is `false`, that is, only the logs related to the import are output, and the logs of other dependent components are not output. When you set it to `true`, logs from both the import process and other dependent components are output, and GRPC debugging is enabled, which can be used for diagnosis. | +|TiDB Lightning | [`tikv-importer.parallel-import`](/tidb-lightning/tidb-lightning-configuration.md) | Newly added | TiDB Lightning parallel import parameter. It replaces the existing `tikv-importer.incremental-import` parameter, which could be mistaken as an incremental import parameter and misused. | +|BR | `azblob.encryption-scope` | Newly added | BR provides encryption scope support for Azure Blob Storage. | +|BR | `azblob.encryption-key` | Newly added | BR provides encryption key support for Azure Blob Storage. | +| TiCDC | [`large-message-handle-option`](/ticdc/ticdc-sink-to-kafka.md#handle-messages-that-exceed-the-kafka-topic-limit) | Newly added | Empty by default, which means that when the message size exceeds the limit of Kafka topic, the changefeed fails. When this configuration is set to `"handle-key-only"`, if the message exceeds the size limit, only the handle key will be sent to reduce the message size; if the reduced message still exceeds the limit, then the changefeed fails. | +| TiCDC | [`sink.csv.binary-encoding-method`](/ticdc/ticdc-changefeed-config.md#changefeed-configuration-parameters) | Newly added | The encoding method of binary data, which can be `'base64'` or `'hex'`. The default value is `'base64'`. | + +### System tables + +- Add a new system table `mysql.tidb_timers` to store the metadata of internal timers. + +## Deprecated features + +* TiDB + + - The [`Fast Analyze`](/system-variables.md#tidb_enable_fast_analyze) feature (experimental) for statistics will be deprecated in v7.5.0. + - The [incremental collection](/statistics.md#incremental-collection) feature for statistics will be deprecated in v7.5.0. + +## Improvements + ++ TiDB + + - Introduce a new system variable [`tidb_opt_enable_non_eval_scalar_subquery`](/system-variables.md#tidb_opt_enable_non_eval_scalar_subquery-new-in-v730) to control whether the `EXPLAIN` statement executes subqueries in advance during the optimization phase [#22076](https://github.com/pingcap/tidb/issues/22076) @[winoros](https://github.com/winoros) + - When [Global Kill](/tidb-configuration-file.md#enable-global-kill-new-in-v610) is enabled, you can terminate the current session by pressing Control+C [#8854](https://github.com/pingcap/tidb/issues/8854) @[pingyu](https://github.com/pingyu) + - Support the `IS_FREE_LOCK()` and `IS_USED_LOCK()` locking functions [#44493](https://github.com/pingcap/tidb/issues/44493) @[dveeden](https://github.com/dveeden) + - Optimize the performance of reading the dumped chunks from disk [#45125](https://github.com/pingcap/tidb/issues/45125) @[YangKeao](https://github.com/YangKeao) + - Optimize the overestimation issue of the inner table of Index Join by using Optimizer Fix Controls [#44855](https://github.com/pingcap/tidb/issues/44855) @[time-and-fate](https://github.com/time-and-fate) + ++ TiKV + + - Add the `Max gap of safe-ts` and `Min safe ts region` metrics and introduce the `tikv-ctl get_region_read_progress` command to better observe and diagnose the status of resolved-ts and safe-ts [#15082](https://github.com/tikv/tikv/issues/15082) @[ekexium](https://github.com/ekexium) + ++ PD + + - Support blocking the Swagger API by default when the Swagger server is not enabled [#6786](https://github.com/tikv/pd/issues/6786) @[bufferflies](https://github.com/bufferflies) + - Improve the high availability of etcd [#6554](https://github.com/tikv/pd/issues/6554) [#6442](https://github.com/tikv/pd/issues/6442) @[lhy1024](https://github.com/lhy1024) + - Reduce the memory consumption of `GetRegions` requests [#6835](https://github.com/tikv/pd/issues/6835) @[lhy1024](https://github.com/lhy1024) + ++ TiFlash + + - Support a new DTFile format version [`storage.format_version = 5`](/tiflash/tiflash-configuration.md) to reduce the number of physical files (experimental) [#7595](https://github.com/pingcap/tiflash/issues/7595) @[hongyunyan](https://github.com/hongyunyan) + ++ Tools + + + Backup & Restore (BR) + + - When backing up data to Azure Blob Storage using BR, you can specify either an encryption scope or an encryption key for server-side encryption [#45025](https://github.com/pingcap/tidb/issues/45025) @[Leavrth](https://github.com/Leavrth) + + + TiCDC + + - Optimize the message size of the Open Protocol output to make it include only the updated column values when sending `UPDATE` events [#9336](https://github.com/pingcap/tiflow/issues/9336) @[3AceShowHand](https://github.com/3AceShowHand) + - Storage Sink now supports hexadecimal encoding for HEX formatted data, making it compatible with AWS DMS format specifications [#9373](https://github.com/pingcap/tiflow/issues/9373) @[CharlesCheung96](https://github.com/CharlesCheung96) + - Kafka Sink supports [sending only handle key data](/ticdc/ticdc-sink-to-kafka.md#handle-messages-that-exceed-the-kafka-topic-limit) when the message is too large, reducing the size of the message [#9382](https://github.com/pingcap/tiflow/issues/9382) @[3AceShowHand](https://github.com/3AceShowHand) + +## Bug fixes + ++ TiDB + + - Fix the issue that when the MySQL Cursor Fetch protocol is used, the memory consumption of result sets might exceed the `tidb_mem_quota_query` limit and causes TiDB OOM. After the fix, TiDB will automatically write result sets to the disk to release memory [#43233](https://github.com/pingcap/tidb/issues/43233) @[YangKeao](https://github.com/YangKeao) + - Fix the TiDB panic issue caused by data race [#45561](https://github.com/pingcap/tidb/issues/45561) @[genliqi](https://github.com/gengliqi) + - Fix the hang-up issue that occurs when queries with `indexMerge` are killed [#45279](https://github.com/pingcap/tidb/issues/45279) @[xzhangxian1008](https://github.com/xzhangxian1008) + - Fix the issue that query results in MPP mode are incorrect when `tidb_enable_parallel_apply` is enabled [#45299](https://github.com/pingcap/tidb/issues/45299) @[windtalker](https://github.com/windtalker) + - Fix the issue that `resolve lock` might hang when there is a sudden change in PD time [#44822](https://github.com/pingcap/tidb/issues/44822) @[zyguan](https://github.com/zyguan) + - Fix the issue that the GC Resolve Locks step might miss some pessimistic locks [#45134](https://github.com/pingcap/tidb/issues/45134) @[MyonKeminta](https://github.com/MyonKeminta) + - Fix the issue that the query with `ORDER BY` returns incorrect results in dynamic pruning mode [#45007](https://github.com/pingcap/tidb/issues/45007) @[Defined2014](https://github.com/Defined2014) + - Fix the issue that `AUTO_INCREMENT` can be specified on the same column with the `DEFAULT` column value [#45136](https://github.com/pingcap/tidb/issues/45136) @[Defined2014](https://github.com/Defined2014) + - Fix the issue that querying the system table `INFORMATION_SCHEMA.TIKV_REGION_STATUS` returns incorrect results in some cases [#45531](https://github.com/pingcap/tidb/issues/45531) @[Defined2014](https://github.com/Defined2014) + - Fix the issue of incorrect partition table pruning in some cases [#42273](https://github.com/pingcap/tidb/issues/42273) @[jiyfhust](https://github.com/jiyfhust) + - Fix the issue that global indexes are not cleared when truncating partition of a partitioned table [#42435](https://github.com/pingcap/tidb/issues/42435) @[L-maple](https://github.com/L-maple) + - Fix the issue that other TiDB nodes do not take over TTL tasks after failures in one TiDB node [#45022](https://github.com/pingcap/tidb/issues/45022) @[lcwangchao](https://github.com/lcwangchao) + - Fix the memory leak issue when TTL is running [#45510](https://github.com/pingcap/tidb/issues/45510) @[lcwangchao](https://github.com/lcwangchao) + - Fix the issue of inaccurate error messages when inserting data into partitioned tables [#44966](https://github.com/pingcap/tidb/issues/44966) @[lilinghai](https://github.com/lilinghai) + - Fix the read permission issue on the `INFORMATION_SCHEMA.TIFLASH_REPLICA` table [#7795](https://github.com/pingcap/tiflash/issues/7795) @[Lloyd-Pottiger](https://github.com/Lloyd-Pottiger) + - Fix the issue that an error occurs when using a wrong partition table name [#44967](https://github.com/pingcap/tidb/issues/44967) @[River2000i](https://github.com/River2000i) + - Fix the issue that creating indexes gets stuck when `tidb_enable_dist_task` is enabled in some cases [#44440](https://github.com/pingcap/tidb/issues/44440) @[tangenta](https://github.com/tangenta) + - Fix the `duplicate entry` error that occurs when restoring a table with `AUTO_ID_CACHE=1` using BR [#44716](https://github.com/pingcap/tidb/issues/44716) @[tiancaiamao](https://github.com/tiancaiamao) + - Fix the issue that the time consumed for executing `TRUNCATE TABLE` is inconsistent with the task execution time shown in `ADMIN SHOW DDL JOBS` [#44785](https://github.com/pingcap/tidb/issues/44785) @[tangenta](https://github.com/tangenta) + - Fix the issue that upgrading TiDB gets stuck when reading metadata takes longer than one DDL lease [#45176](https://github.com/pingcap/tidb/issues/45176) @[zimulala](https://github.com/zimulala) + - Fix the issue that the query result of the `SELECT CAST(n AS CHAR)` statement is incorrect when `n` in the statement is a negative number [#44786](https://github.com/pingcap/tidb/issues/44786) @[xhebox](https://github.com/xhebox) + - Fix the issue that queries might return incorrect results when `tidb_opt_agg_push_down` is enabled [#44795](https://github.com/pingcap/tidb/issues/44795) @[AilinKid](https://github.com/AilinKid) + - Fix the issue of wrong results that occurs when a query with `current_date()` uses plan cache [#45086](https://github.com/pingcap/tidb/issues/45086) @[qw4990](https://github.com/qw4990) + ++ TiKV + + - Fix the issue that reading data during GC might cause TiKV panic in some rare cases [#15109](https://github.com/tikv/tikv/issues/15109) @[MyonKeminta](https://github.com/MyonKeminta) + ++ PD + + - Fix the issue that restarting PD might cause the `default` resource group to be reinitialized [#6787](https://github.com/tikv/pd/issues/6787) @[glorv](https://github.com/glorv) + - Fix the issue that when etcd is already started but the client has not yet connected to it, calling the client might cause PD to panic [#6860](https://github.com/tikv/pd/issues/6860) @[HuSharp](https://github.com/HuSharp) + - Fix the issue that the `health-check` output of a Region is inconsistent with the Region information returned by querying the Region ID [#6560](https://github.com/tikv/pd/issues/6560) @[JmPotato](https://github.com/JmPotato) + - Fix the issue that failed learner peers in `unsafe recovery` are ignored in `auto-detect` mode [#6690](https://github.com/tikv/pd/issues/6690) @[v01dstar](https://github.com/v01dstar) + - Fix the issue that Placement Rules select TiFlash learners that do not meet the rules [#6662](https://github.com/tikv/pd/issues/6662) @[rleungx](https://github.com/rleungx) + - Fix the issue that unhealthy peers cannot be removed when rule checker selects peers [#6559](https://github.com/tikv/pd/issues/6559) @[nolouch](https://github.com/nolouch) + ++ TiFlash + + - Fix the issue that TiFlash cannot replicate partitioned tables successfully due to deadlocks [#7758](https://github.com/pingcap/tiflash/issues/7758) @[hongyunyan](https://github.com/hongyunyan) + - Fix the issue that the `INFORMATION_SCHEMA.TIFLASH_REPLICA` system table contains tables that users do not have privileges to access [#7795](https://github.com/pingcap/tiflash/issues/7795) @[Lloyd-Pottiger](https://github.com/Lloyd-Pottiger) + - Fix the issue that when there are multiple HashAgg operators within the same MPP task, the compilation of the MPP task might take an excessively long time, severely affecting query performance [#7810](https://github.com/pingcap/tiflash/issues/7810) @[SeaRise](https://github.com/SeaRise) + ++ Tools + + + TiCDC + + - Fix the issue that changefeeds would fail due to the temporary unavailability of PD [#9294](https://github.com/pingcap/tiflow/issues/9294) @[asddongmen](https://github.com/asddongmen) + - Fix the data inconsistency issue that might occur when some TiCDC nodes are isolated from the network [#9344](https://github.com/pingcap/tiflow/issues/9344) @[CharlesCheung96](https://github.com/CharlesCheung96) + - Fix the issue that when Kafka Sink encounters errors it might indefinitely block changefeed progress [#9309](https://github.com/pingcap/tiflow/issues/9309) @[hicqu](https://github.com/hicqu) + - Fix the panic issue that might occur when the TiCDC node status changes [#9354](https://github.com/pingcap/tiflow/issues/9354) @[sdojjy](https://github.com/sdojjy) + - Fix the encoding error for the default `ENUM` values [#9259](https://github.com/pingcap/tiflow/issues/9259) @[3AceShowHand](https://github.com/3AceShowHand) + + + TiDB Lightning + + - Fix the issue that executing checksum after TiDB Lightning completes import might get SSL errors [#45462](https://github.com/pingcap/tidb/issues/45462) @[D3Hunter](https://github.com/D3Hunter) + - Fix the issue that in Logical Import Mode, deleting tables downstream during import might cause TiDB Lightning metadata not to be updated in time [#44614](https://github.com/pingcap/tidb/issues/44614) @[dsdashun](https://github.com/dsdashun) + +## Contributors + +We would like to thank the following contributors from the TiDB community: + +- [charleszheng44](https://github.com/charleszheng44) +- [dhysum](https://github.com/dhysum) +- [haiyux](https://github.com/haiyux) +- [Jiang-Hua](https://github.com/Jiang-Hua) +- [Jille](https://github.com/Jille) +- [jiyfhust](https://github.com/jiyfhust) +- [krishnaduttPanchagnula](https://github.com/krishnaduttPanchagnula) +- [L-maple](https://github.com/L-maple) +- [pingandb](https://github.com/pingandb) +- [testwill](https://github.com/testwill) +- [tisonkun](https://github.com/tisonkun) +- [xuyifangreeneyes](https://github.com/xuyifangreeneyes) +- [yumchina](https://github.com/yumchina) diff --git a/releases/release-notes.md b/releases/release-notes.md index b7be63c202d69..1be217dadcb2b 100644 --- a/releases/release-notes.md +++ b/releases/release-notes.md @@ -5,6 +5,19 @@ aliases: ['/docs/dev/releases/release-notes/','/docs/dev/releases/rn/'] # TiDB Release Notes +## 7.3 + +- [7.3.0-DMR](/releases/release-7.3.0.md): 2023-08-14 + +## 7.2 + +- [7.2.0-DMR](/releases/release-7.2.0.md): 2023-06-29 + +## 7.1 + +- [7.1.1](/releases/release-7.1.1.md): 2023-07-24 +- [7.1.0](/releases/release-7.1.0.md): 2023-05-31 + ## 7.0 - [7.0.0-DMR](/releases/release-7.0.0.md): 2023-03-30 @@ -15,6 +28,7 @@ aliases: ['/docs/dev/releases/release-notes/','/docs/dev/releases/rn/'] ## 6.5 +- [6.5.3](/releases/release-6.5.3.md): 2023-06-14 - [6.5.2](/releases/release-6.5.2.md): 2023-04-21 - [6.5.1](/releases/release-6.5.1.md): 2023-03-10 - [6.5.0](/releases/release-6.5.0.md): 2022-12-29 @@ -33,6 +47,7 @@ aliases: ['/docs/dev/releases/release-notes/','/docs/dev/releases/rn/'] ## 6.1 +- [6.1.7](/releases/release-6.1.7.md): 2023-07-12 - [6.1.6](/releases/release-6.1.6.md): 2023-04-12 - [6.1.5](/releases/release-6.1.5.md): 2023-02-28 - [6.1.4](/releases/release-6.1.4.md): 2023-02-08 diff --git a/releases/release-timeline.md b/releases/release-timeline.md index 98583193dbb0e..d00e28f916296 100644 --- a/releases/release-timeline.md +++ b/releases/release-timeline.md @@ -9,6 +9,12 @@ This document shows all the released TiDB versions in reverse chronological orde | Version | Release Date | | :--- | :--- | +| [7.3.0-DMR](/releases/release-7.3.0.md) | 2023-08-14 | +| [7.1.1](/releases/release-7.1.1.md) | 2023-07-24 | +| [6.1.7](/releases/release-6.1.7.md) | 2023-07-12 | +| [7.2.0-DMR](/releases/release-7.2.0.md) | 2023-06-29 | +| [6.5.3](/releases/release-6.5.3.md) | 2023-06-14 | +| [7.1.0](/releases/release-7.1.0.md) | 2023-05-31 | | [6.5.2](/releases/release-6.5.2.md) | 2023-04-21 | | [6.1.6](/releases/release-6.1.6.md) | 2023-04-12 | | [7.0.0-DMR](/releases/release-7.0.0.md) | 2023-03-30 | diff --git a/resources/doc-templates/patch_release_note_template_zh.md b/resources/doc-templates/patch_release_note_template_zh.md new file mode 100644 index 0000000000000..6ead5f552bd7c --- /dev/null +++ b/resources/doc-templates/patch_release_note_template_zh.md @@ -0,0 +1,161 @@ +--- +title: TiDB x.y.z Release Notes +summary: 了解 TiDB x.y.z 版本的兼容性变更、改进提升,以及错误修复。 +--- + +# TiDB x.y.z Release Notes + +发版日期:2023 年 x 月 x 日 + +TiDB 版本:x.y.z + +试用链接:[快速体验](https://docs.pingcap.com/zh/tidb/vx.y/quick-start-with-tidb) | [生产部署](https://docs.pingcap.com/zh/tidb/vx.y/production-deployment-using-tiup) | [下载离线包](https://cn.pingcap.com/product-community/?version=vx.y.z#version-list) + +## 兼容性变更 + +- note [#issue](https://github.com/pingcap/${repo-name}/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) +- placeholder + +## 改进提升 + ++ TiDB + + - note [#issue](https://github.com/pingcap/tidb/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - note [#issue](https://github.com/pingcap/tidb/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - placeholder + ++ TiKV + + - note [#issue](https://github.com/tikv/tikv/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - note [#issue](https://github.com/tikv/tikv/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - placeholder + ++ PD + + - note [#issue](https://github.com/tikv/pd/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - note [#issue](https://github.com/tikv/pd/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - placeholder + ++ TiFlash + + - note [#issue](https://github.com/pingcap/tiflash/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - note [#issue](https://github.com/pingcap/tiflash/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - placeholder + ++ Tools + + + Backup & Restore (BR) + + - note [#issue](https://github.com/pingcap/tidb/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - note [#issue](https://github.com/pingcap/tidb/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - placeholder + + + TiCDC + + - note [#issue](https://github.com/pingcap/tiflow/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - note [#issue](https://github.com/pingcap/tiflow/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - placeholder + + + TiDB Data Migration (DM) + + - note [#issue](https://github.com/pingcap/tiflow/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - note [#issue](https://github.com/pingcap/tiflow/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - placeholder + + + TiDB Lightning + + - note [#issue](https://github.com/pingcap/tidb/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - note [#issue](https://github.com/pingcap/tidb/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - placeholder + + + Dumpling + + - note [#issue](https://github.com/pingcap/tidb/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - note [#issue](https://github.com/pingcap/tidb/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - placeholder + + + TiUP + + - note [#issue](https://github.com/pingcap/tiup/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - note [#issue](https://github.com/pingcap/tiup/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - placeholder + + + TiDB Binlog + + - note [#issue](https://github.com/pingcap/tidb-binlog/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - note [#issue](https://github.com/pingcap/tidb-binlog/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - placeholder + +## 错误修复 + ++ TiDB + + - note [#issue](https://github.com/pingcap/tidb/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - note [#issue](https://github.com/pingcap/tidb/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - placeholder + ++ TiKV + + - note [#issue](https://github.com/tikv/tikv/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - note [#issue](https://github.com/tikv/tikv/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - placeholder + ++ PD + + - note [#issue](https://github.com/tikv/pd/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - note [#issue](https://github.com/tikv/pd/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - placeholder + ++ TiFlash + + - note [#issue](https://github.com/pingcap/tiflash/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - note [#issue](https://github.com/pingcap/tiflash/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - placeholder + ++ Tools + + + Backup & Restore (BR) + + - note [#issue](https://github.com/pingcap/tidb/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - note [#issue](https://github.com/pingcap/tidb/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - placeholder + + + TiCDC + + - note [#issue](https://github.com/pingcap/tiflow/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - note [#issue](https://github.com/pingcap/tiflow/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - placeholder + + + TiDB Data Migration (DM) + + - note [#issue](https://github.com/pingcap/tiflow/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - note [#issue](https://github.com/pingcap/tiflow/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - placeholder + + + TiDB Lightning + + - note [#issue](https://github.com/pingcap/tidb/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - note [#issue](https://github.com/pingcap/tidb/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - placeholder + + + Dumpling + + - note [#issue](https://github.com/pingcap/tidb/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - note [#issue](https://github.com/pingcap/tidb/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - placeholder + + + TiUP + + - note [#issue](https://github.com/pingcap/tiup/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - note [#issue](https://github.com/pingcap/tiup/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - placeholder + + + TiDB Binlog + + - note [#issue](https://github.com/pingcap/tidb-binlog/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - note [#issue](https://github.com/pingcap/tidb-binlog/issues/${issue-id}) @[贡献者 GitHub ID](https://github.com/${github-id}) + - placeholder + +## Other dup notes + +- placeholder \ No newline at end of file diff --git a/runtime-filter.md b/runtime-filter.md new file mode 100644 index 0000000000000..c4d01012a35d2 --- /dev/null +++ b/runtime-filter.md @@ -0,0 +1,258 @@ +--- +title: Runtime Filter +summary: Learn the working principles of Runtime Filter and how to use it. +--- + +# Runtime Filter + +Runtime Filter is a new feature introduced in TiDB v7.3, which aims to improve the performance of hash join in MPP scenarios. By generating filters dynamically to filter the data of hash join in advance, TiDB can reduce the amount of data scanning and the amount of calculation of hash join at runtime, ultimately improving the query performance. + +## Concepts + +- Hash join: a way to implement the join relational algebra. It gets the result of Join by building a hash table on one side and continuously matching the hash table on the other side. +- Build side: one side of hash join used to build a hash table. In this document, the right table of hash join is called the build side by default. +- Probe side: one side of hash join used to continuously match the hash table. In this document, the left table of hash join is called the probe side by default. +- Filter: also known as predicate, which refers to the filter condition in this document. + +## Working principles of Runtime Filter + +Hash join performs the join operation by building a hash table based on the right table and continuously probing the hash table using the left table. If some join key values cannot hit the hash table during the probing process, it means that the data does not exist in the right table and will not appear in the final join result. Therefore, if TiDB can **filter out the join key data in advance** during scanning, it will reduce the scanning time and network overhead, thereby greatly improving the join efficiency. + +Runtime Filter is a **dynamic predicate** generated during the query planning phase. This predicate has the same function as other predicates in the TiDB Selection operator. These predicates are all applied to the Table Scan operation to filter out rows that do not match the predicate. The only difference is that the parameter values in Runtime Filter come from the results generated during the hash join build process. + +### Example + +Assume that there is a join query between the `store_sales` table and the `date_dim` table, and the join method is hash join. `store_sales` is a fact table that mainly stores the sales data of stores, and the number of rows is 1 million. `date_dim` is a time dimension table that mainly stores date information. You want to query the sales data of the year 2001, so 365 rows of the `date_dim` table are involved in the join operation. + +```sql +SELECT * FROM store_sales, date_dim +WHERE ss_date_sk = d_date_sk + AND d_year = 2001; +``` + +The execution plan of hash join is usually as follows: + +``` + +-------------------+ + | PhysicalHashJoin | + +------->| |<------+ + | +-------------------+ | + | | + | | + 100w | | 365 + | | + | | ++-------+-------+ +--------+-------+ +| TableFullScan | | TableFullScan | +| store_sales | | date_dim | ++---------------+ +----------------+ +``` + +*(The above figure omits the exchange node and other nodes.)* + +The execution process of Runtime Filter is as follows: + +1. Scan the data of the `date_dim` table. +2. `PhysicalHashJoin` calculates a filter condition based on the data of the build side, such as `date_dim in (2001/01/01~2001/12/31)`. +3. Send the filter condition to the `TableFullScan` operator that is waiting to scan `store_sales`. +4. The filter condition is applied to `store_sales`, and the filtered data is passed to `PhysicalHashJoin`, thereby reducing the amount of data scanned by the probe side and the amount of calculation of matching the hash table. + +``` + 2. Build RF values + +-------->+-------------------+ + | |PhysicalHashJoin |<-----+ + | +----+ | | +4. After RF | | +-------------------+ | 1. Scan T2 + 5000 | |3. Send RF | 365 + | | filter data | + | | | + +-----+----v------+ +-------+--------+ + | TableFullScan | | TabelFullScan | + | store_sales | | date_dim | + +-----------------+ +----------------+ +``` + +*(RF is short for Runtime Filter)* + +From the above two figures, you can see that the amount of data scanned by `store_sales` is reduced from 1 million to 5000. By reducing the amount of data scanned by `TableFullScan`, Runtime Filter can reduce the number of times to match the hash table, avoiding unnecessary I/O and network transmission, thus significantly improving the efficiency of the join operation. + +## Use Runtime Filter + +To use Runtime Filter, you need to create a table with TiFlash replicas and set [`tidb_runtime_filter_mode`](/system-variables.md#tidb_runtime_filter_mode-new-in-v720) to `LOCAL`. + +Taking the TPC-DS dataset as an example, this section uses the `catalog_sales` table and the `date_dim` table for join operations to illustrate how Runtime Filter improves query efficiency. + +### Step 1. Create TiFlash replicas for tables to be joined + +Add a TiFlash replica to each of the `catalog_sales` table and the `date_dim` table. + +```sql +ALTER TABLE catalog_sales SET tiflash REPLICA 1; +ALTER TABLE date_dim SET tiflash REPLICA 1; +``` + +Wait until the TiFlash replicas of the two tables are ready, that is, the `AVAILABLE` and `PROGRESS` fields of the replicas are both `1`. + +```sql +SELECT * FROM INFORMATION_SCHEMA.TIFLASH_REPLICA WHERE TABLE_NAME='catalog_sales'; ++--------------+---------------+----------+---------------+-----------------+-----------+----------+ +| TABLE_SCHEMA | TABLE_NAME | TABLE_ID | REPLICA_COUNT | LOCATION_LABELS | AVAILABLE | PROGRESS | ++--------------+---------------+----------+---------------+-----------------+-----------+----------+ +| tpcds50 | catalog_sales | 1055 | 1 | | 1 | 1 | ++--------------+---------------+----------+---------------+-----------------+-----------+----------+ + +SELECT * FROM INFORMATION_SCHEMA.TIFLASH_REPLICA WHERE TABLE_NAME='date_dim'; ++--------------+------------+----------+---------------+-----------------+-----------+----------+ +| TABLE_SCHEMA | TABLE_NAME | TABLE_ID | REPLICA_COUNT | LOCATION_LABELS | AVAILABLE | PROGRESS | ++--------------+------------+----------+---------------+-----------------+-----------+----------+ +| tpcds50 | date_dim | 1015 | 1 | | 1 | 1 | ++--------------+------------+----------+---------------+-----------------+-----------+----------+ +``` + +### Step 2. Enable Runtime Filter + +To enable Runtime Filter, set the value of the system variable [`tidb_runtime_filter_mode`](/system-variables.md#tidb_runtime_filter_mode-new-in-v720 ) to `LOCAL`. + +```sql +SET tidb_runtime_filter_mode="LOCAL"; +``` + +Check whether the change is successful: + +```sql +SHOW VARIABLES LIKE "tidb_runtime_filter_mode"; ++--------------------------+-------+ +| Variable_name | Value | ++--------------------------+-------+ +| tidb_runtime_filter_mode | LOCAL | ++--------------------------+-------+ +``` + +If the value of the system variable is `LOCAL`, Runtime Filter is enabled. + +### Step 3. Execute the query + +Before executing the query, use the [`EXPLAIN` statement](/sql-statements/sql-statement-explain.md) to show the execution plan and check whether Runtime Filter has taken effect. + +```sql +EXPLAIN SELECT cs_ship_date_sk FROM catalog_sales, date_dim +WHERE d_date = '2002-2-01' AND + cs_ship_date_sk = d_date_sk; +``` + +When Runtime Filter takes effect, the corresponding Runtime Filter is mounted on the `HashJoin` node and the `TableScan` node, indicating that Runtime Filter is applied successfully. + +``` +TableFullScan: runtime filter:0[IN] -> tpcds50.catalog_sales.cs_ship_date_sk +HashJoin: runtime filter:0[IN] <- tpcds50.date_dim.d_date_sk | +``` + +The complete query execution plan is as follows: + +``` ++----------------------------------------+-------------+--------------+---------------------+-----------------------------------------------------------------------------------------------------------------------------------------------+ +| id | estRows | task | access object | operator info | ++----------------------------------------+-------------+--------------+---------------------+-----------------------------------------------------------------------------------------------------------------------------------------------+ +| TableReader_53 | 37343.19 | root | | MppVersion: 1, data:ExchangeSender_52 | +| └─ExchangeSender_52 | 37343.19 | mpp[tiflash] | | ExchangeType: PassThrough | +| └─Projection_51 | 37343.19 | mpp[tiflash] | | tpcds50.catalog_sales.cs_ship_date_sk | +| └─HashJoin_48 | 37343.19 | mpp[tiflash] | | inner join, equal:[eq(tpcds50.date_dim.d_date_sk, tpcds50.catalog_sales.cs_ship_date_sk)], runtime filter:0[IN] <- tpcds50.date_dim.d_date_sk | +| ├─ExchangeReceiver_29(Build) | 1.00 | mpp[tiflash] | | | +| │ └─ExchangeSender_28 | 1.00 | mpp[tiflash] | | ExchangeType: Broadcast, Compression: FAST | +| │ └─TableFullScan_26 | 1.00 | mpp[tiflash] | table:date_dim | pushed down filter:eq(tpcds50.date_dim.d_date, 2002-02-01 00:00:00.000000), keep order:false | +| └─Selection_31(Probe) | 71638034.00 | mpp[tiflash] | | not(isnull(tpcds50.catalog_sales.cs_ship_date_sk)) | +| └─TableFullScan_30 | 71997669.00 | mpp[tiflash] | table:catalog_sales | pushed down filter:empty, keep order:false, runtime filter:0[IN] -> tpcds50.catalog_sales.cs_ship_date_sk | ++----------------------------------------+-------------+--------------+---------------------+-----------------------------------------------------------------------------------------------------------------------------------------------+ +9 rows in set (0.01 sec) +``` + +Now, execute the SQL query, and Runtime Filter is applied. + +```sql +SELECT cs_ship_date_sk FROM catalog_sales, date_dim +WHERE d_date = '2002-2-01' AND + cs_ship_date_sk = d_date_sk; +``` + +### Step 4. Performance comparison + +This example uses the 50 GB TPC-DS data. After Runtime Filter is enabled, the query time is reduced from 0.38 seconds to 0.17 seconds, and efficiency is improved by 50%. You can use the `ANALYZE` statement to view the execution time of each operator after Runtime Filter takes effect. + +The following is the execution information of the query when Runtime Filter is not enabled: + +```sql +EXPLAIN ANALYZE SELECT cs_ship_date_sk FROM catalog_sales, date_dim WHERE d_date = '2002-2-01' AND cs_ship_date_sk = d_date_sk; ++----------------------------------------+-------------+----------+--------------+---------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------+---------+------+ +| id | estRows | actRows | task | access object | execution info | operator info | memory | disk | ++----------------------------------------+-------------+----------+--------------+---------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------+---------+------+ +| TableReader_53 | 37343.19 | 59574 | root | | time:379.7ms, loops:83, RU:0.000000, cop_task: {num: 48, max: 0s, min: 0s, avg: 0s, p95: 0s, copr_cache_hit_ratio: 0.00} | MppVersion: 1, data:ExchangeSender_52 | 12.0 KB | N/A | +| └─ExchangeSender_52 | 37343.19 | 59574 | mpp[tiflash] | | tiflash_task:{proc max:377ms, min:375.3ms, avg: 376.1ms, p80:377ms, p95:377ms, iters:1160, tasks:2, threads:16} | ExchangeType: PassThrough | N/A | N/A | +| └─Projection_51 | 37343.19 | 59574 | mpp[tiflash] | | tiflash_task:{proc max:377ms, min:375.3ms, avg: 376.1ms, p80:377ms, p95:377ms, iters:1160, tasks:2, threads:16} | tpcds50.catalog_sales.cs_ship_date_sk | N/A | N/A | +| └─HashJoin_48 | 37343.19 | 59574 | mpp[tiflash] | | tiflash_task:{proc max:377ms, min:375.3ms, avg: 376.1ms, p80:377ms, p95:377ms, iters:1160, tasks:2, threads:16} | inner join, equal:[eq(tpcds50.date_dim.d_date_sk, tpcds50.catalog_sales.cs_ship_date_sk)] | N/A | N/A | +| ├─ExchangeReceiver_29(Build) | 1.00 | 2 | mpp[tiflash] | | tiflash_task:{proc max:291.3ms, min:290ms, avg: 290.6ms, p80:291.3ms, p95:291.3ms, iters:2, tasks:2, threads:16} | | N/A | N/A | +| │ └─ExchangeSender_28 | 1.00 | 1 | mpp[tiflash] | | tiflash_task:{proc max:290.9ms, min:0s, avg: 145.4ms, p80:290.9ms, p95:290.9ms, iters:1, tasks:2, threads:1} | ExchangeType: Broadcast, Compression: FAST | N/A | N/A | +| │ └─TableFullScan_26 | 1.00 | 1 | mpp[tiflash] | table:date_dim | tiflash_task:{proc max:3.88ms, min:0s, avg: 1.94ms, p80:3.88ms, p95:3.88ms, iters:1, tasks:2, threads:1}, tiflash_scan:{dtfile:{total_scanned_packs:2, total_skipped_packs:12, total_scanned_rows:16384, total_skipped_rows:97625, total_rs_index_load_time: 0ms, total_read_time: 0ms}, total_create_snapshot_time: 0ms, total_local_region_num: 1, total_remote_region_num: 0} | pushed down filter:eq(tpcds50.date_dim.d_date, 2002-02-01 00:00:00.000000), keep order:false | N/A | N/A | +| └─Selection_31(Probe) | 71638034.00 | 71638034 | mpp[tiflash] | | tiflash_task:{proc max:47ms, min:34.3ms, avg: 40.6ms, p80:47ms, p95:47ms, iters:1160, tasks:2, threads:16} | not(isnull(tpcds50.catalog_sales.cs_ship_date_sk)) | N/A | N/A | +| └─TableFullScan_30 | 71997669.00 | 71997669 | mpp[tiflash] | table:catalog_sales | tiflash_task:{proc max:34ms, min:17.3ms, avg: 25.6ms, p80:34ms, p95:34ms, iters:1160, tasks:2, threads:16}, tiflash_scan:{dtfile:{total_scanned_packs:8893, total_skipped_packs:4007, total_scanned_rows:72056474, total_skipped_rows:32476901, total_rs_index_load_time: 8ms, total_read_time: 579ms}, total_create_snapshot_time: 0ms, total_local_region_num: 194, total_remote_region_num: 0} | pushed down filter:empty, keep order:false | N/A | N/A | ++----------------------------------------+-------------+----------+--------------+---------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+----------------------------------------------------------------------------------------------+---------+------+ +9 rows in set (0.38 sec) +``` + +The following is the execution information of the query when Runtime Filter is enabled: + +```sql +EXPLAIN ANALYZE SELECT cs_ship_date_sk FROM catalog_sales, date_dim + -> WHERE d_date = '2002-2-01' AND + -> cs_ship_date_sk = d_date_sk; ++----------------------------------------+-------------+---------+--------------+---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------+---------+------+ +| id | estRows | actRows | task | access object | execution info | operator info | memory | disk | ++----------------------------------------+-------------+---------+--------------+---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------+---------+------+ +| TableReader_53 | 37343.19 | 59574 | root | | time:162.1ms, loops:82, RU:0.000000, cop_task: {num: 47, max: 0s, min: 0s, avg: 0s, p95: 0s, copr_cache_hit_ratio: 0.00} | MppVersion: 1, data:ExchangeSender_52 | 12.7 KB | N/A | +| └─ExchangeSender_52 | 37343.19 | 59574 | mpp[tiflash] | | tiflash_task:{proc max:160.8ms, min:154.3ms, avg: 157.6ms, p80:160.8ms, p95:160.8ms, iters:86, tasks:2, threads:16} | ExchangeType: PassThrough | N/A | N/A | +| └─Projection_51 | 37343.19 | 59574 | mpp[tiflash] | | tiflash_task:{proc max:160.8ms, min:154.3ms, avg: 157.6ms, p80:160.8ms, p95:160.8ms, iters:86, tasks:2, threads:16} | tpcds50.catalog_sales.cs_ship_date_sk | N/A | N/A | +| └─HashJoin_48 | 37343.19 | 59574 | mpp[tiflash] | | tiflash_task:{proc max:160.8ms, min:154.3ms, avg: 157.6ms, p80:160.8ms, p95:160.8ms, iters:86, tasks:2, threads:16} | inner join, equal:[eq(tpcds50.date_dim.d_date_sk, tpcds50.catalog_sales.cs_ship_date_sk)], runtime filter:0[IN] <- tpcds50.date_dim.d_date_sk | N/A | N/A | +| ├─ExchangeReceiver_29(Build) | 1.00 | 2 | mpp[tiflash] | | tiflash_task:{proc max:132.3ms, min:130.8ms, avg: 131.6ms, p80:132.3ms, p95:132.3ms, iters:2, tasks:2, threads:16} | | N/A | N/A | +| │ └─ExchangeSender_28 | 1.00 | 1 | mpp[tiflash] | | tiflash_task:{proc max:131ms, min:0s, avg: 65.5ms, p80:131ms, p95:131ms, iters:1, tasks:2, threads:1} | ExchangeType: Broadcast, Compression: FAST | N/A | N/A | +| │ └─TableFullScan_26 | 1.00 | 1 | mpp[tiflash] | table:date_dim | tiflash_task:{proc max:3.01ms, min:0s, avg: 1.51ms, p80:3.01ms, p95:3.01ms, iters:1, tasks:2, threads:1}, tiflash_scan:{dtfile:{total_scanned_packs:2, total_skipped_packs:12, total_scanned_rows:16384, total_skipped_rows:97625, total_rs_index_load_time: 0ms, total_read_time: 0ms}, total_create_snapshot_time: 0ms, total_local_region_num: 1, total_remote_region_num: 0} | pushed down filter:eq(tpcds50.date_dim.d_date, 2002-02-01 00:00:00.000000), keep order:false | N/A | N/A | +| └─Selection_31(Probe) | 71638034.00 | 5308995 | mpp[tiflash] | | tiflash_task:{proc max:39.8ms, min:24.3ms, avg: 32.1ms, p80:39.8ms, p95:39.8ms, iters:86, tasks:2, threads:16} | not(isnull(tpcds50.catalog_sales.cs_ship_date_sk)) | N/A | N/A | +| └─TableFullScan_30 | 71997669.00 | 5335549 | mpp[tiflash] | table:catalog_sales | tiflash_task:{proc max:36.8ms, min:23.3ms, avg: 30.1ms, p80:36.8ms, p95:36.8ms, iters:86, tasks:2, threads:16}, tiflash_scan:{dtfile:{total_scanned_packs:660, total_skipped_packs:12451, total_scanned_rows:5335549, total_skipped_rows:100905778, total_rs_index_load_time: 2ms, total_read_time: 47ms}, total_create_snapshot_time: 0ms, total_local_region_num: 194, total_remote_region_num: 0} | pushed down filter:empty, keep order:false, runtime filter:0[IN] -> tpcds50.catalog_sales.cs_ship_date_sk | N/A | N/A | ++----------------------------------------+-------------+---------+--------------+---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------+---------+------+ +9 rows in set (0.17 sec) +``` + +By comparing the execution information of the two queries, you can find the following improvements: + +* IO reduction: by comparing the `total_scanned_rows` of the TableFullScan operator, you can see that the scan volume of `TableFullScan` is reduced by 2/3 after Runtime Filter is enabled. +* Hash join performance improvement: the execution duration of the `HashJoin` operator is reduced from 376.1 ms to 157.6 ms. + +### Best practices + +Runtime Filter is applicable to the scenario where a large table and a small table are joined, such as a join query of a fact table and a dimension table. When the dimension table has a small amount of hit data, it means that the filter has fewer values, so the fact table can filter out the data that does not meet the conditions more effectively. Compared with the default scenario where the entire fact table is scanned, this significantly improves the query performance. + +The join operation of the `Sales` table and the `date_dim` table in TPC-DS is a typical example. + +## Configure Runtime Filter + +When using Runtime Filter, you can configure the mode and predicate type of Runtime Filter. + +### Runtime Filter mode + +The mode of Runtime Filter is the relationship between the **Filter Sender operator** and **Filter Receiver operator**. There are three modes: `OFF`, `LOCAL`, and `GLOBAL`. In v7.3.0, only `OFF` and `LOCAL` modes are supported. The Runtime Filter mode is controlled by the system variable [`tidb_runtime_filter_mode`](/system-variables.md#tidb_runtime_filter_mode-new-in-v720). + +- `OFF`: Runtime Filter is disabled. After it is disabled, the query behavior is the same as in previous versions. +- `LOCAL`: Runtime Filter is enabled in the local mode. In the local mode, the **Filter Sender operator** and **Filter Receiver operator** are in the same MPP task. In other words, Runtime Filter can be applied to the scenario where the HashJoin operator and TableScan operator are in the same task. Currently, Runtime Filter only supports the local mode. To enable this mode, set it to `LOCAL`. +- `GLOBAL`: currently, the global mode is not supported. You cannot set Runtime Filter to this mode. + +### Runtime Filter type + +The type of Runtime Filter is the type of the predicate used by the generated Filter operator. Currently, only one type is supported: `IN`, which means that the generated predicated is similar to `k1 in (xxx)`. The Runtime Filter type is controlled by the system variable [`tidb_runtime_filter_type`](/system-variables.md#tidb_runtime_filter_type-new-in-v720). + +- `IN`: the default type. It means that the generated Runtime Filter uses the `IN` type predicate. + +## Limitations + +- Runtime Filter is an optimization in the MPP architecture and can only be applied to queries pushed down to TiFlash. +- Join type: Left outer, Full outer, and Anti join (when the left table is the probe side) do not support Runtime Filter. Because Runtime Filter filters the data involved in the join in advance, the preceding types of join do not discard the unmatched data, so Runtime Filter cannot be used. +- Equal join expression: When the probe column in the equal join expression is a complex expression, or when the probe column type is JSON, Blob, Array, or other complex data types, Runtime Filter is not generated. The main reason is that the preceding types of columns are rarely used as the join column. Even if the Filter is generated, the filtering rate is usually low. + +For the preceding limitations, if you need to confirm whether Runtime Filter is generated correctly, you can use the [`EXPLAIN` statement](/sql-statements/sql-statement-explain.md) to verify the execution plan. diff --git a/scale-tidb-using-tiup.md b/scale-tidb-using-tiup.md index 1810abd9d71c1..0bde471415de5 100644 --- a/scale-tidb-using-tiup.md +++ b/scale-tidb-using-tiup.md @@ -274,9 +274,9 @@ This section exemplifies how to remove a TiKV node from the `10.0.1.5` host. ``` ``` - Starting /root/.tiup/components/cluster/v1.11.3/cluster display + Starting /root/.tiup/components/cluster/v1.12.3/cluster display TiDB Cluster: - TiDB Version: v7.0.0 + TiDB Version: v7.3.0 ID Role Host Ports Status Data Dir Deploy Dir -- ---- ---- ----- ------ -------- ---------- 10.0.1.3:8300 cdc 10.0.1.3 8300 Up data/cdc-8300 deploy/cdc-8300 diff --git a/schedule-replicas-by-topology-labels.md b/schedule-replicas-by-topology-labels.md index c8506f7e94cce..682a112b915a4 100644 --- a/schedule-replicas-by-topology-labels.md +++ b/schedule-replicas-by-topology-labels.md @@ -24,7 +24,7 @@ Assume that the topology has four layers: zone > data center (dc) > rack > host, + Use the command-line flag to start a TiKV instance: - ```shell + ```shell tikv-server --labels zone=,dc=,rack=,host= ``` @@ -41,14 +41,14 @@ Assume that the topology has four layers: zone > data center (dc) > rack > host, To set labels for TiFlash, you can use the `tiflash-learner.toml` file, which is the configuration file of tiflash-proxy: - ```toml - [server] - [server.labels] - zone = "" - dc = "" - rack = "" - host = "" - ``` +```toml +[server] +[server.labels] +zone = "" +dc = "" +rack = "" +host = "" +``` ### (Optional) Configure `labels` for TiDB diff --git a/scripts/filterUpdateFiles.js b/scripts/filterUpdateFiles.js new file mode 100644 index 0000000000000..2dd50c222e2e4 --- /dev/null +++ b/scripts/filterUpdateFiles.js @@ -0,0 +1,119 @@ +import * as fs from "fs"; +import path from "path"; +import axios from "axios"; +import { Octokit } from "octokit"; + +const GH_TOKEN = process.env.GH_TOKEN || ""; + +const octokit = GH_TOKEN + ? new Octokit({ + auth: GH_TOKEN, + }) + : new Octokit(); + +const getLocalCfg = () => { + const fileContent = fs.readFileSync("./latest_translation_commit.json"); + const data = JSON.parse(fileContent); + return data; +}; + +const writeLocalCfg = (cfg) => { + const data = JSON.stringify(cfg); + fs.writeFileSync("./latest_translation_commit.json", data); +}; + +const ghGetBranch = async (branchName = "master") => { + const result = await octokit.request( + `GET /repos/pingcap/docs/branches/${branchName}`, + { + owner: "pingcap", + repo: "docs", + branch: branchName, + } + ); + if (result.status === 200) { + const data = result.data; + return data; + } + throw new Error(`ghGetBranch error: ${result}`); +}; + +const ghCompareCommits = async (base = "", head = "") => { + const basehead = `${base}...${head}`; + const result = await octokit.request( + `GET /repos/pingcap/docs/compare/${basehead}`, + { + owner: "pingcap", + repo: "docs", + basehead, + } + ); + if (result.status === 200) { + const data = result.data; + return data; + } + throw new Error(`ghGetBranch error: ${result}`); +}; + +const downloadFile = async (url, targetPath) => { + const response = await axios({ + method: "GET", + url, + responseType: "stream", + }); + const dir = path.dirname(targetPath); + if (!fs.existsSync(dir)) { + fs.mkdirSync(dir, { recursive: true }); + } + // pipe the result stream into a file on disc + response.data.pipe(fs.createWriteStream(targetPath)); + // return a promise and resolve when download finishes + return new Promise((resolve, reject) => { + response.data.on("end", () => { + resolve(); + }); + + response.data.on("error", () => { + reject(); + }); + }); +}; + +const deleteFile = (targetFile) => { + fs.rmSync(targetFile); +}; + +const handleFiles = async (fileList = []) => { + console.log(fileList); + for (let file of fileList) { + const { status, raw_url, filename, previous_filename } = file; + switch (status) { + case "added": + case "modified": + await downloadFile(raw_url, `tmp/${filename}`); + break; + case "removed": + deleteFile(filename); + break; + case "renamed": + deleteFile(previous_filename); + await downloadFile(raw_url, `tmp/${filename}`); + break; + } + } +}; + +const main = async () => { + const { target: branchName, sha: base } = getLocalCfg(); + const targetBranchData = await ghGetBranch(branchName); + const head = targetBranchData?.commit?.sha; + const comparedDetails = await ghCompareCommits(base, head); + const files = comparedDetails?.files || []; + handleFiles(files); + writeLocalCfg({ + target: branchName, + sha: head, + }); +}; + +main(); diff --git a/scripts/release_notes_update_pr_author_info_add_dup.py b/scripts/release_notes_update_pr_author_info_add_dup.py index 021537ef56488..92039bf89380d 100644 --- a/scripts/release_notes_update_pr_author_info_add_dup.py +++ b/scripts/release_notes_update_pr_author_info_add_dup.py @@ -1,32 +1,45 @@ -# This script can replace the bot author info in the release note table with the actual PR authors and add the history duplicated release notes based on issue links and author info. +# This script can automate the following things: +# 1. Replace the bot author info in the release note table with the actual PR authors. +# 2. Add the history duplicated release notes based on issue links and author info. The duplicate release notes in the same series will be not added. For example, if you are working on v6.5.4 release notes, the notes from other v6.5.x with the same issue number will not be counted and added as duplicated notes. +# 3. Make a copy of the patch release note template file and write the duplicated release notes to the copy. + # Before running this script, you need to get a GitHub personal access token (https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token) and save it in a text file. +# -*- coding: utf-8 -*- + from github import Github import re import openpyxl import os +import shutil + +version = '6.5.3' # Specifies the target TiDB version +release_note_excel = r'/Users/userid/Downloads/download_tirelease_tmp_patch_6.5.3_release_note_2023-06-06.xlsx' # Specifies the path of release note table with PR links and issue links +ext_path = r'/Users/userid/Documents/GitHub/mygithubid/docs-cn/releases' # Specifies the path of the existing release notes +template_file = r'/Users/userid/Documents/GitHub/mygithubid/docs/resources/doc-templates/patch_release_note_template_zh.md' # Specifies the path of the release note template file -release_note_excel = r'/Users/userid/Downloads/patch_6.5.1_release_note_test copy.xlsx' -ext_path = r'/Users/userid/Documents/GitHub/githubid/docs-cn/releases' # The path of the existing release notes -main_path = r'/Users/githubid/Documents/GitHub/githubid/docs-cn/releases/release-6.5.1.md' # The path of the release notes in preparation -with open("/Users/githubid/Documents/gh_token.txt", "r") as f: # Read the GitHub personal access token from the gh_token.txt file +with open("/Users/userid/Documents/PingCAP/Python_scripts/GitHub/gh_token2.txt", "r") as f: # Read the GitHub personal access token from the token.txt file access_token = f.read().strip() # Get the issue info of the existing release notes -def store_exst_rn(ext_path,main_path): +def store_exst_rn(ext_path, version): exst_notes = [] exst_issue_nums = [] exst_note_levels = [] + release_file = os.path.join(ext_path, f'release-{version}.md') + + version_parts = version.split('.') + major_minor_version = '.'.join(version_parts[:2]) for maindir, subdir, files in os.walk(ext_path): for afile in files: file_path = (os.path.join(maindir, afile)) - if file_path.endswith('.md') and not os.path.samefile(file_path,main_path): + if file_path.endswith('.md') and major_minor_version not in afile: # Exclude duplicate notes that are in the same major or minor releases. For example, excluding 6.5.x dup release notes for v6.5.3 with open(file_path,'r', encoding='utf-8') as fp: level1 = level2 = level3 = "" for line in fp: - exst_issue_num = re.search(r'https://github.com/(pingcap|tikv)/\w+/(issues|pull)/\d+', line) + exst_issue_num = re.search(r'https://github.com/(pingcap|tikv)/[\w-]+/(issues|pull)/\d+', line) authors = re.findall(r'@\[([^\]]+)\]', line) # Get the list of authors in this line if exst_issue_num: if exst_issue_num.group() not in exst_issue_nums: @@ -39,10 +52,10 @@ def store_exst_rn(ext_path,main_path): elif line.startswith("##"): level1 = "> " + line.replace("##","").strip() level2 = level3 = "" - elif line.startswith ("+") or line.startswith ("-"): + elif (line.startswith ("+") or line.startswith ("-")) and (not authors): level2 = "> " + line.replace("+","").replace("-","").strip() level3 = "" - elif line.startswith (" +") or line.startswith (" -"): + elif (line.startswith (" +") or line.startswith (" -")) and (not authors): level3 = "> " + line.replace(" +","").replace(" -","").strip() else: continue @@ -54,28 +67,42 @@ def store_exst_rn(ext_path,main_path): else: return 0 -def get_pr_info_from_github(cp_pr_link,cp_pr_title): +def get_pr_info_from_github(cp_pr_link,cp_pr_title, current_pr_author): + g = Github(access_token, timeout=30)# Create a Github object with the access token + target_pr_number_existence = 1 target_repo_pr_link= cp_pr_link.rsplit('/', 1)[0] - target_pr_number = re.findall(r'\(#(\d+)\)$', cp_pr_title) - - if len(target_pr_number) > 1: - print ("There is more than one match result of original PR number from the cherry-pick title: " + cp_pr_title ) - - target_pr_link = target_repo_pr_link + '/' + target_pr_number[0] - - # Create a Github object with the access token - g = Github(access_token) - - pr_info = target_pr_link.split("/") - - # Extract the owner, repository name, and pull request number from the link - owner, repo, pr_number = pr_info[-4], pr_info[-3], pr_info[-1] - - #print ('Getting the PR info from GitHub: ' + target_pr_link) - repo_obj = g.get_repo(f"{owner}/{repo}")# Get the repository object - pr_obj = repo_obj.get_pull(int(pr_number))# Get the pull request object - pr_author = pr_obj.user.login # Get the author of the pull request + target_pr_number = re.findall(r'\(#(\d+)\)$', cp_pr_title) # Match the original PR number in the end of the cherry-pick PR + + if target_pr_number: # Match the original PR number according to the title of the cherry-pick PR + pass + else: # Match the original PR number according to the head branch name of the cherry-pick PR + cp_pr_info = cp_pr_link.split("/") + owner, repo, cp_pr_number = cp_pr_info[-4], cp_pr_info[-3], cp_pr_info[-1] + repo_obj = g.get_repo(f"{owner}/{repo}") + try: + cp_pr_obj = repo_obj.get_pull(int(cp_pr_number)) + cp_head_branch = cp_pr_obj.head.ref + target_pr_number = re.findall(r'cherry-pick-(\d+)', cp_head_branch) + if target_pr_number: + pass + else: + target_pr_number_existence = 0 + except: + target_pr_number_existence = 0 + + if target_pr_number_existence == 1: + target_pr_link = target_repo_pr_link + '/' + target_pr_number[0] + pr_info = target_pr_link.split("/") + owner, repo, target_pr_number = pr_info[-4], pr_info[-3], pr_info[-1] + repo_obj = g.get_repo(f"{owner}/{repo}")# Get the repository object + try: + pr_obj = repo_obj.get_pull(int(target_pr_number))# Get the pull request object + pr_author = pr_obj.user.login # Get the author of the pull request + except: + print("Failed to get the original PR information for this PR: " + cp_pr_link) + else: + pr_author = current_pr_author # Use the current author if the cherry-pick PR cannot be found return(pr_author) @@ -98,13 +125,15 @@ def update_pr_author_and_release_notes(excel_path): sheet.insert_cols(pr_last_col_index + 1) # Insert a new column for the dup release notes sheet.cell(row=1, column=pr_last_col_index + 1, value='published_release_notes') # Set a column name # Go through each row + dup_notes = [] + dup_notes_levels = [] for row_index, row in enumerate(sheet.iter_rows(min_row=2, values_only=True), start=2): # If pr_author is ti-chi-bot or ti-srebot current_pr_author = row[pr_author_index] current_formated_rn= row[pr_formated_rn_index] if current_pr_author in ['ti-chi-bot', 'ti-srebot']: print ("Replacing the author info for row " + str(row_index) + ".") - actual_pr_author = get_pr_info_from_github(row[pr_link_index], row[pr_title_index]) #Get the PR author according to the cherry-pick PR + actual_pr_author = get_pr_info_from_github(row[pr_link_index], row[pr_title_index], current_pr_author) # Get the PR author according to the cherry-pick PR pr_author_cell = sheet.cell(row=row_index, column=pr_author_index+1, value = actual_pr_author)#Fill in the pr_author_cell updated_formated_rn = current_formated_rn.replace("[{}](https://github.com/{}".format(current_pr_author, current_pr_author),"[{}](https://github.com/{}".format(actual_pr_author, actual_pr_author)) formated_release_note_cell = sheet.cell(row=row_index, column=pr_formated_rn_index+1, value = updated_formated_rn) # Fill in the formated_release_note_cell @@ -113,19 +142,94 @@ def update_pr_author_and_release_notes(excel_path): pass ## Add the dup release note info - issue_link = re.search('https://github.com/(pingcap|tikv)/\w+/issues/\d+', current_formated_rn) - for note_pair in note_pairs: - if (issue_link.group() == note_pair[0]) and ((current_pr_author in note_pair[4]) or len(note_pair[4]) == 0): # Add the dup release notes only if the issues link is the same as the existing one and the current author is in the existing author list - print('A duplicated note is found in row ' + str(row_index) + " from " + note_pair[2] + note_pair[1]) - dup_formated_rn = '- (dup): {} {} {}'.format(note_pair[2], note_pair[3], note_pair[1]) - sheet.cell(row=row_index, column=pr_last_col_index+1, value=dup_formated_rn) - break - else: - pass + issue_link = re.search('https://github.com/(pingcap|tikv)/[\w-]+/issues/\d+', current_formated_rn) + if issue_link: + for note_pair in note_pairs: + if (issue_link.group() == note_pair[0]) and ((current_pr_author in note_pair[4]) or len(note_pair[4]) == 0): # Add the dup release notes only if the issues link is the same as the existing one and the current author is in the existing author list + #print('A duplicated note is found in row ' + str(row_index) + " from " + note_pair[2] + note_pair[1]) + dup_formated_rn = '- (dup): {} {} {}'.format(note_pair[2], note_pair[3], note_pair[1]) + #print (note_pair) + sheet.cell(row=row_index, column=pr_last_col_index+1, value=dup_formated_rn) + if dup_formated_rn not in dup_notes: # Collect the dup release note if it is not collected before + dup_notes.append(dup_formated_rn) + print ("-----") + print (dup_formated_rn) + dup_notes_level = note_pair[3].replace("Bug 修复", "错误修复") + dup_notes_levels.append(dup_notes_level) + else: + pass + else: + pass + elif (not issue_link) and ("/issue/" in current_formated_rn): + print(current_formated_rn) + else: + pass workbook.save(release_note_excel) + return dup_notes, dup_notes_levels + +# Add the dup release notes to the release note file +def create_release_file(version, dup_notes_levels, dup_notes): + + release_file = os.path.join(ext_path, f'release-{version}.md') + shutil.copyfile(template_file, release_file) + # Replace the file content + with open(release_file, 'r+') as file: + content = file.read() + content = content.replace('x.y.z', version) + version_parts = version.split('.') + major_minor_version = '.'.join(version_parts[:2]) + content = content.replace('x.y', major_minor_version) + level1 = level2 = level3 = "" + lines = content.splitlines() + newlines = [] + note_level = "" + note_levels = [] + other_dup_notes = [] + for line in lines: + if "placeholder" in line: + note_level = level1 + level2 + level3 + note_levels.append(note_level) + newline = line.replace("- placeholder", "") + for dup_note_level, dup_note in zip(dup_notes_levels, dup_notes): # Add the dup release notes to the release note file + if dup_note_level == note_level: + newlines.append(newline+dup_note) + else: + continue + if "Other dup notes" in note_level: # Add the dup release notes without corresponding categories to the release note file + for dup_note_level, dup_note in zip(dup_notes_levels, dup_notes): + if dup_note_level not in note_levels: + newlines.append(newline+dup_note) + other_dup_notes.append(dup_note) + if len(other_dup_notes) == 0: + newlines = newlines[:-2] # Remove the last two lines if other dup notes do not exist + else: + pass + elif line.startswith("##"): + level1 = "> " + line.replace("##","").strip() + level2 = level3 = "" + newlines.append(line) + elif (line.startswith ("+") or line.startswith ("-")) and "GitHub ID" not in line: + level2 = "> " + line.replace("+","").replace("-","").strip() + level3 = "" + newlines.append(line) + elif (line.startswith (" +") or line.startswith (" -")) and "GitHub ID" not in line: + level3 = "> " + line.replace(" +","").replace(" -","").strip() + newlines.append(line) + else: + newlines.append(line) + + #print(note_levels) + content = "\n".join(newlines) + file.seek(0) + file.write(content) + file.truncate() + print(f'The v{version} release note is now created in the following directory: \n {release_file}') if __name__ == '__main__': - note_pairs = store_exst_rn(ext_path,main_path) - update_pr_author_and_release_notes(release_note_excel) - print ("The bot author info in the excel is now replaced with the actual authors.") \ No newline at end of file + note_pairs = store_exst_rn(ext_path, version) + dup_notes, dup_notes_levels = update_pr_author_and_release_notes(release_note_excel) + print ("The bot author info in the excel is now replaced with the actual authors.") + version_parts = version.split('.') + if len(version_parts) >= 2: + create_release_file(version, dup_notes_levels, dup_notes) \ No newline at end of file diff --git a/smooth-upgrade-tidb.md b/smooth-upgrade-tidb.md index 61ef9c782f0fe..f73a2a62b1bf1 100644 --- a/smooth-upgrade-tidb.md +++ b/smooth-upgrade-tidb.md @@ -39,10 +39,12 @@ When using the smooth upgrade feature, note the following limitations. * Before the upgrade, if there is a canceling DDL job in the cluster, that is, an ongoing DDL job is being canceled by a user, because the job in the canceling state cannot be paused, TiDB will retry canceling the job. If the retry fails, an error is reported and the upgrade is exited. +* In scenarios of using TiUP to upgrade TiDB, because TiUP upgrade has a timeout period, if the cluster has a large number of DDL jobs (more than 300) waiting in queues before the upgrade, the upgrade might fail. + * During the upgrade, the following operations are not allowed: * Run DDL operations on system tables (`mysql.*`, `information_schema.*`, `performance_schema.*`, and `metrics_schema.*`). - * Manually cancel, pause, or resume DDL jobs: `ADMIN CANCEL/PAUSE/RESUME DDL JOBS job_id [, job_id] ...;`. + * Manually cancel DDL jobs: `ADMIN CANCEL DDL JOBS job_id [, job_id] ...;`. * Import data. ### Limitations on tools diff --git a/sql-non-prepared-plan-cache.md b/sql-non-prepared-plan-cache.md index c265fcab71b57..4feafee98d97d 100644 --- a/sql-non-prepared-plan-cache.md +++ b/sql-non-prepared-plan-cache.md @@ -88,7 +88,7 @@ Due to the preceding risks and the fact that the execution plan cache only provi - Queries that contain numbers or expressions directly after `ORDER BY` or `GROUP BY` are not supported, such as `ORDER BY 1` and `GROUP BY a+1`. Only `ORDER BY column_name` and `GROUP BY column_name` are supported. - Queries that filter on columns of `JSON`, `ENUM`, `SET`, or `BIT` type are not supported, such as `SELECT * FROM t WHERE json_col = '{}'`. - Queries that filter on `NULL` values are not supported, such as `SELECT * FROM t WHERE a is NULL`. -- Queries with more than 200 parameters after parameterization are not supported, such as `SELECT * FROM t WHERE a in (1, 2, 3, ... 201)`. +- Queries with more than 200 parameters after parameterization are not supported by default, such as `SELECT * FROM t WHERE a in (1, 2, 3, ... 201)`. Starting from v7.3.0, you can modify this limit by setting the [`44823`](/optimizer-fix-controls.md#44823-new-in-v730) fix in the [`tidb_opt_fix_control`](/system-variables.md#tidb_opt_fix_control-new-in-v710) system variable. - Queries that access partitioned tables, virtual columns, temporary tables, views, or memory tables are not supported, such as `SELECT * FROM INFORMATION_SCHEMA.COLUMNS`, where `COLUMNS` is a TiDB memory table. - Queries with hints or bindings are not supported. - DML statements or `SELECT` statements with the `FOR UPDATE` clause are not supported by default. To remove this restriction, you can execute `SET tidb_enable_non_prepared_plan_cache_for_dml = ON`. diff --git a/sql-plan-management.md b/sql-plan-management.md index 2510725a3d08b..442bb2c717244 100644 --- a/sql-plan-management.md +++ b/sql-plan-management.md @@ -168,15 +168,15 @@ The original SQL statement and the bound statement must have the same text after - This binding can be created successfully because the texts before and after parameterization and hint removal are the same: `SELECT * FROM test . t WHERE a > ?` - ```sql - CREATE BINDING FOR SELECT * FROM t WHERE a > 1 USING SELECT * FROM t use index (idx) WHERE a > 2 - ``` + ```sql + CREATE BINDING FOR SELECT * FROM t WHERE a > 1 USING SELECT * FROM t use index (idx) WHERE a > 2 + ``` - This binding will fail because the original SQL statement is processed as `SELECT * FROM test . t WHERE a > ?`, while the bound SQL statement is processed differently as `SELECT * FROM test . t WHERE b > ?`. - ```sql - CREATE BINDING FOR SELECT * FROM t WHERE a > 1 USING SELECT * FROM t use index(idx) WHERE b > 2 - ``` + ```sql + CREATE BINDING FOR SELECT * FROM t WHERE a > 1 USING SELECT * FROM t use index(idx) WHERE b > 2 + ``` > **Note:** > @@ -186,10 +186,11 @@ The original SQL statement and the bound statement must have the same text after To make the execution plan of a SQL statement fixed to a historical execution plan, you can use `plan_digest` to bind that historical execution plan to the SQL statement, which is more convenient than binding it according to a SQL statement. -Currently, this feature has the following limitations: +When using this feature, note the following: - The feature generates hints according to historical execution plans and uses the generated hints for binding. Because historical execution plans are stored in [Statement Summary Tables](/statement-summary-tables.md), before using this feature, you need to enable the [`tidb_enable_stmt_summary`](/system-variables.md#tidb_enable_stmt_summary-new-in-v304) system variable first. -- Currently, this feature only supports binding historical execution plans in the `statements_summary` and `statements_summary_history` tables of the current TiDB node. If you get a `can't find any plans` error, you can connect to another TiDB node in the cluster and retry the binding. +- This feature does not support TiFlash queries, Join queries with three or more tables, and queries that contain subqueries. +- If a historical execution plan is for a SQL statement with hints, the hints will be added to the binding. For example, after executing `SELECT /*+ max_execution_time(1000) */ * FROM t`, the binding created with its `plan_digest` will include `max_execution_time(1000)`. The SQL statement of this binding method is as follows: diff --git a/sql-plan-replayer.md b/sql-plan-replayer.md index 1eed4c2766174..d849aafc9e3e3 100644 --- a/sql-plan-replayer.md +++ b/sql-plan-replayer.md @@ -19,7 +19,7 @@ You can use `PLAN REPLAYER` to save the on-site information of a TiDB cluster. T {{< copyable "sql" >}} ```sql -PLAN REPLAYER DUMP EXPLAIN [ANALYZE] sql-statement; +PLAN REPLAYER DUMP EXPLAIN [ANALYZE] [WITH STATS AS OF TIMESTAMP expression] sql-statement; ``` Based on `sql-statement`, TiDB sorts out and exports the following on-site information: @@ -33,6 +33,10 @@ Based on `sql-statement`, TiDB sorts out and exports the following on-site infor - The result of `EXPLAIN [ANALYZE] sql-statement` - Some internal procudures of query optimization +If historical statistics are [enabled](/system-variables.md#tidb_enable_historical_stats), you can specify a time in the `PLAN REPLAYER` statement to get the historical statistics for the corresponding time. You can directly specify a time and date or specify a timestamp. TiDB looks for the historical statistics before the specified time and exports the latest one among them. + +If there are no historical statistics before the specified time, TiDB exports the latest statistics, which is consistent with the behavior when no time is specified. In addition, TiDB prints the error messages in the `errors.txt` file within the exported `ZIP` file. + > **Note:** > > `PLAN REPLAYER` **DOES NOT** export any table data. @@ -48,6 +52,8 @@ insert into t values(1,1), (2, 2), (3, 3); analyze table t; plan replayer dump explain select * from t; +plan replayer dump with stats as of timestamp '2023-07-17 12:00:00' explain select * from t; +plan replayer dump with stats as of timestamp '442012134592479233' explain select * from t; ``` `PLAN REPLAYER DUMP` packages the table information above into a `ZIP` file and returns the file identifier as the execution result. diff --git a/sql-statements/sql-statement-add-index.md b/sql-statements/sql-statement-add-index.md index 315aaa0b9aee5..c85653df8f98f 100644 --- a/sql-statements/sql-statement-add-index.md +++ b/sql-statements/sql-statement-add-index.md @@ -8,11 +8,17 @@ aliases: ['/docs/dev/sql-statements/sql-statement-add-index/','/docs/dev/referen The `ALTER TABLE.. ADD INDEX` statement adds an index to an existing table. This operation is online in TiDB, which means that neither reads or writes to the table are blocked by adding an index. + + > **Warning:** > > - **DO NOT** upgrade a TiDB cluster when a DDL statement is being executed in the cluster (usually for the time-consuming DDL statements such as `ADD INDEX` and the column type changes). > - Before the upgrade, it is recommended to use the [`ADMIN SHOW DDL`](/sql-statements/sql-statement-admin-show-ddl.md) command to check whether the TiDB cluster has an ongoing DDL job. If the cluster has a DDL job, to upgrade the cluster, wait until the DDL execution is finished or use the [`ADMIN CANCEL DDL`](/sql-statements/sql-statement-admin-cancel-ddl.md) command to cancel the DDL job before you upgrade the cluster. > - In addition, during the cluster upgrade, **DO NOT** execute any DDL statement. Otherwise, the issue of undefined behavior might occur. +> +> When you upgrade TiDB from v7.1.0 to a later version, you can ignore the preceding limitations. For details, see [the limitations of TiDB smooth upgrade](/smooth-upgrade-tidb.md). + + ## Synopsis diff --git a/sql-statements/sql-statement-admin-pause-ddl.md b/sql-statements/sql-statement-admin-pause-ddl.md new file mode 100644 index 0000000000000..75fd004a424a6 --- /dev/null +++ b/sql-statements/sql-statement-admin-pause-ddl.md @@ -0,0 +1,65 @@ +--- +title: ADMIN PAUSE DDL JOBS +summary: An overview of the usage of ADMIN PAUSE DDL JOBS for the TiDB database. +--- + +# ADMIN PAUSE DDL JOBS + +`ADMIN PAUSE DDL` allows you to pause a running DDL job. The `job_id` can be found by running [`ADMIN SHOW DDL JOBS`](/sql-statements/sql-statement-admin-show-ddl.md). + +You can use this statement to pause a DDL job that is issued but not yet completed executing. After the pause, the SQL statement that executes the DDL job does not return immediately, but looks like it is still running. If you try to pause a DDL job that has already been completed, you will see the `DDL Job:90 not found` error in the `RESULT` column, which indicates that the job has been removed from the DDL waiting queue. + +> **Warning:** +> +> This feature is an experimental feature. It is not recommended that you use it in the production environment. This feature might be changed or removed without prior notice. If you find a bug, you can report an [issue](https://github.com/pingcap/tidb/issues) on GitHub. + +## Synopsis + +```ebnf+diagram +AdminStmt ::= + 'ADMIN' ( 'SHOW' ( 'DDL' ( 'JOBS' Int64Num? WhereClauseOptional | 'JOB' 'QUERIES' NumList )? | TableName 'NEXT_ROW_ID' | 'SLOW' AdminShowSlow ) | 'CHECK' ( 'TABLE' TableNameList | 'INDEX' TableName Identifier ( HandleRange ( ',' HandleRange )* )? ) | 'RECOVER' 'INDEX' TableName Identifier | 'CLEANUP' ( 'INDEX' TableName Identifier | 'TABLE' 'LOCK' TableNameList ) | 'CHECKSUM' 'TABLE' TableNameList | 'CANCEL' 'DDL' 'JOBS' NumList | 'PAUSE' 'DDL' 'JOBS' NumList | 'RESUME' 'DDL' 'JOBS' NumList | 'RELOAD' ( 'EXPR_PUSHDOWN_BLACKLIST' | 'OPT_RULE_BLACKLIST' | 'BINDINGS' ) | 'PLUGINS' ( 'ENABLE' | 'DISABLE' ) PluginNameList | 'REPAIR' 'TABLE' TableName CreateTableStmt | ( 'FLUSH' | 'CAPTURE' | 'EVOLVE' ) 'BINDINGS' ) + +NumList ::= + Int64Num ( ',' Int64Num )* +``` + +## Examples + +`ADMIN PAUSE DDL JOBS` pauses the currently running DDL job and returns whether the job is paused successfully. The job can be resumed by `ADMIN RESUME DDL JOBS`. + +```sql +ADMIN PAUSE DDL JOBS job_id [, job_id] ...; +``` + +If the pause fails, the specific reason for the failure is displayed. + + + +> **Note:** +> +> + This statement can pause a DDL job, but other operations and environment changes (such as machine restarts and cluster restarts) do not pause DDL jobs except for cluster upgrades. +> + During the cluster upgrade, the ongoing DDL jobs are paused, and the DDL jobs initiated during the upgrade are also paused. After the upgrade, all paused DDL jobs will resume. The pause and resume operations during the upgrade are taken automatically. For details, see [TiDB Smooth Upgrade](/smooth-upgrade-tidb.md). +> + This statement can pause multiple DDL jobs. You can use the [`ADMIN SHOW DDL JOBS`](/sql-statements/sql-statement-admin-show-ddl.md) statement to obtain the `job_id` of a DDL job. +> + If the job to be paused has already been completed or is about to be completed, the pause operation will fail. + + + + +> **Note:** +> +> + This statement can pause a DDL job, but other operations and environment changes (such as machine restarts and cluster restarts) do not pause DDL jobs except for cluster upgrades. +> + During the cluster upgrade, the ongoing DDL jobs are paused, and the DDL jobs initiated during the upgrade are also paused. After the upgrade, all paused DDL jobs will resume. The pause and resume operations during the upgrade are taken automatically. For details, see [TiDB Smooth Upgrade](https://docs.pingcap.com/tidb/stable/smooth-upgrade-tidb). +> + This statement can pause multiple DDL jobs. You can use the [`ADMIN SHOW DDL JOBS`](/sql-statements/sql-statement-admin-show-ddl.md) statement to obtain the `job_id` of a DDL job. +> + If the job to be paused has already been completed or is about to be completed, the pause operation will fail. + + + +## MySQL compatibility + +This statement is a TiDB extension to MySQL syntax. + +## See also + +* [`ADMIN SHOW DDL [JOBS|QUERIES]`](/sql-statements/sql-statement-admin-show-ddl.md) +* [`ADMIN CANCEL DDL`](/sql-statements/sql-statement-admin-cancel-ddl.md) +* [`ADMIN RESUME DDL`](/sql-statements/sql-statement-admin-resume-ddl.md) diff --git a/sql-statements/sql-statement-admin-resume-ddl.md b/sql-statements/sql-statement-admin-resume-ddl.md new file mode 100644 index 0000000000000..a68a5ee135a7e --- /dev/null +++ b/sql-statements/sql-statement-admin-resume-ddl.md @@ -0,0 +1,65 @@ +--- +title: ADMIN RESUME DDL JOBS +summary: An overview of the usage of ADMIN RESUME DDL for the TiDB database. +--- + +# ADMIN RESUME DDL JOBS + +`ADMIN RESUME DDL` allows you to resume a paused DDL job. You can find the `job_id` by running [`ADMIN SHOW DDL JOBS`](/sql-statements/sql-statement-admin-show-ddl.md). + +You can use this statement to resume a paused DDL job. After the resume is completed, the SQL statement that executes the DDL job continues to show as being executed. If you try to resume a DDL job that has already been completed, you will see the `DDL Job:90 not found` error in the `RESULT` column, which indicates that the job has been removed from the DDL waiting queue. + +> **Warning:** +> +> This feature is an experimental feature. It is not recommended that you use it in the production environment. This feature might be changed or removed without prior notice. If you find a bug, you can report an [issue](https://github.com/pingcap/tidb/issues) on GitHub. + +## Synopsis + +```ebnf+diagram +AdminStmt ::= + 'ADMIN' ( 'SHOW' ( 'DDL' ( 'JOBS' Int64Num? WhereClauseOptional | 'JOB' 'QUERIES' NumList )? | TableName 'NEXT_ROW_ID' | 'SLOW' AdminShowSlow ) | 'CHECK' ( 'TABLE' TableNameList | 'INDEX' TableName Identifier ( HandleRange ( ',' HandleRange )* )? ) | 'RECOVER' 'INDEX' TableName Identifier | 'CLEANUP' ( 'INDEX' TableName Identifier | 'TABLE' 'LOCK' TableNameList ) | 'CHECKSUM' 'TABLE' TableNameList | 'CANCEL' 'DDL' 'JOBS' NumList | 'PAUSE' 'DDL' 'JOBS' NumList | 'RESUME' 'DDL' 'JOBS' NumList | 'RELOAD' ( 'EXPR_PUSHDOWN_BLACKLIST' | 'OPT_RULE_BLACKLIST' | 'BINDINGS' ) | 'PLUGINS' ( 'ENABLE' | 'DISABLE' ) PluginNameList | 'REPAIR' 'TABLE' TableName CreateTableStmt | ( 'FLUSH' | 'CAPTURE' | 'EVOLVE' ) 'BINDINGS' ) + +NumList ::= + Int64Num ( ',' Int64Num )* +``` + +## Examples + +`ADMIN RESUME DDL JOBS` resumes the currently paused DDL job and returns whether the job is resumed successfully. + +```sql +ADMIN RESUME DDL JOBS job_id [, job_id] ...; +``` + +If the resume fails, the specific reason for the failure is displayed. + + + +> **Note:** +> +> + During the cluster upgrade, the ongoing DDL jobs are paused, and the DDL jobs initiated during the upgrade are also paused. After the upgrade, all paused DDL jobs will resume. The pause and resume operations during the upgrade are taken automatically. For details, see [TiDB Smooth Upgrade](/smooth-upgrade-tidb.md). +> + This statement can resume multiple DDL jobs. You can use the [`ADMIN SHOW DDL JOBS`](/sql-statements/sql-statement-admin-show-ddl.md) statement to obtain the `job_id` of a DDL job. +> + A DDL job in other status (other than `paused`) cannot be resumed and the resume operation will fail. +> + If you try to resume a job more than once, TiDB reports an error `Error Number: 8261`. + + + + +> **Note:** +> +> + During the cluster upgrade, the ongoing DDL jobs are paused, and the DDL jobs initiated during the upgrade are also paused. After the upgrade, all paused DDL jobs will resume. The pause and resume operations during the upgrade are taken automatically. For details, see [TiDB Smooth Upgrade](https://docs.pingcap.com/tidb/stable/smooth-upgrade-tidb). +> + This statement can resume multiple DDL jobs. You can use the [`ADMIN SHOW DDL JOBS`](/sql-statements/sql-statement-admin-show-ddl.md) statement to obtain the `job_id` of a DDL job. +> + A DDL job in other status (other than `paused`) cannot be resumed and the resume operation will fail. +> + If you try to resume a job more than once, TiDB reports an error `Error Number: 8261`. + + + +## MySQL compatibility + +This statement is a TiDB extension to MySQL syntax. + +## See also + +* [`ADMIN SHOW DDL [JOBS|QUERIES]`](/sql-statements/sql-statement-admin-show-ddl.md) +* [`ADMIN CANCEL DDL`](/sql-statements/sql-statement-admin-cancel-ddl.md) +* [`ADMIN PAUSE DDL`](/sql-statements/sql-statement-admin-pause-ddl.md) diff --git a/sql-statements/sql-statement-admin-show-ddl.md b/sql-statements/sql-statement-admin-show-ddl.md index ecbad399bcac9..9da7821eac9b2 100644 --- a/sql-statements/sql-statement-admin-show-ddl.md +++ b/sql-statements/sql-statement-admin-show-ddl.md @@ -51,7 +51,10 @@ The `ADMIN SHOW DDL JOBS` statement is used to view all the results in the curre - `JOB_ID`: each DDL operation corresponds to a DDL job. `JOB_ID` is globally unique. - `DB_NAME`: the name of the database where the DDL operation is performed. - `TABLE_NAME`: the name of the table where the DDL operation is performed. -- `JOB_TYPE`: the type of DDL operation. +- `JOB_TYPE`: the type of DDL operation. Common job types include the following: + - `ingest`: Ingestion with accelerated index backfilling as configured by [`tidb_ddl_enable_fast_reorg`](/system-variables.md#tidb_ddl_enable_fast_reorg-new-in-v630). + - `txn`: Basic transactional backfill. + - `txn-merge`: Transactional backfill with a temporary index that gets merged with the original index when the backfill is finished. - `SCHEMA_STATE`: the current state of the schema object that the DDL operates on. If `JOB_TYPE` is `ADD INDEX`, it is the state of the index; if `JOB_TYPE` is `ADD COLUMN`, it is the state of the column; if `JOB_TYPE` is `CREATE TABLE`, it is the state of the table. Common states include the following: - `none`: indicates that it does not exist. Generally, after the `DROP` operation or after the `CREATE` operation fails and rolls back, it will become the `none` state. - `delete only`, `write only`, `delete reorganization`, `write reorganization`: these four states are intermediate states. For their specific meanings, see [How the Online DDL Asynchronous Change Works in TiDB](/ddl-introduction.md#how-the-online-ddl-asynchronous-change-works-in-tidb). As the intermediate state conversion is fast, these states are generally not visible during operation. Only when performing `ADD INDEX` operation can the `write reorganization` state be seen, indicating that index data is being added. @@ -66,7 +69,8 @@ The `ADMIN SHOW DDL JOBS` statement is used to view all the results in the curre - `synced`: indicates that the operation has been executed successfully and all TiDB instances have been synchronized to this state. - `rollback done`: indicates that the operation has failed and the rollback has been completed. - `rollingback`: indicates that the operation has failed and is rolling back. - - `cancelling`: indicates that the operation is being canceled. This state only appears when you use the `ADMIN CANCEL DDL JOBS` command to cancel the DDL job. + - `cancelling`: indicates that the operation is being canceled. This state only appears when you use the [`ADMIN CANCEL DDL JOBS`](/sql-statements/sql-statement-admin-cancel-ddl.md) command to cancel the DDL job. + - `paused`: indicates that the operation has been paused. This state only appears when you use the [`ADMIN PAUSED DDL JOBS`](/sql-statements/sql-statement-admin-pause-ddl.md) command to pause the DDL job. You can use the [`ADMIN RESUME DDL JOBS`](/sql-statements/sql-statement-admin-resume-ddl.md) command to resume the DDL job. @@ -90,7 +94,8 @@ The `ADMIN SHOW DDL JOBS` statement is used to view all the results in the curre - `synced`: indicates that the operation has been executed successfully and all TiDB instances have been synchronized to this state. - `rollback done`: indicates that the operation has failed and the rollback has been completed. - `rollingback`: indicates that the operation has failed and is rolling back. - - `cancelling`: indicates that the operation is being canceled. This state only appears when you use the `ADMIN CANCEL DDL JOBS` command to cancel the DDL job. + - `cancelling`: indicates that the operation is being canceled. This state only appears when you use the [`ADMIN CANCEL DDL JOBS`](/sql-statements/sql-statement-admin-cancel-ddl.md) command to cancel the DDL job. + - `paused`: indicates that the operation has been paused. This state only appears when you use the [`ADMIN PAUSED DDL JOBS`](/sql-statements/sql-statement-admin-pause-ddl.md) command to pause the DDL job. You can use the [`ADMIN RESUME DDL JOBS`](/sql-statements/sql-statement-admin-resume-ddl.md) command to resume the DDL job. @@ -221,3 +226,5 @@ This statement is a TiDB extension to MySQL syntax. ## See also * [ADMIN CANCEL DDL](/sql-statements/sql-statement-admin-cancel-ddl.md) +* [ADMIN PAUSE DDL](/sql-statements/sql-statement-admin-pause-ddl.md) +* [ADMIN RESUME DDL](/sql-statements/sql-statement-admin-resume-ddl.md) diff --git a/sql-statements/sql-statement-admin.md b/sql-statements/sql-statement-admin.md index ed3143031aafe..ba84ab5d59eac 100644 --- a/sql-statements/sql-statement-admin.md +++ b/sql-statements/sql-statement-admin.md @@ -20,7 +20,9 @@ This statement is a TiDB extension syntax, used to view the status of TiDB and c | Statement | Description | |------------------------------------------------------------------------------------------|-----------------------------| -| [`ADMIN CANCEL DDL JOBS`](/sql-statements/sql-statement-admin-cancel-ddl.md) | Cancels a currently running DDL jobs. | +| [`ADMIN CANCEL DDL JOBS`](/sql-statements/sql-statement-admin-cancel-ddl.md) | Cancels the currently running DDL jobs. | +| [`ADMIN PAUSE DDL JOBS`](/sql-statements/sql-statement-admin-pause-ddl.md) | Pauses the currently running DDL jobs. | +| [`ADMIN RESUME DDL JOBS`](/sql-statements/sql-statement-admin-resume-ddl.md) | Resumes the paused DDL jobs. | | [`ADMIN CHECKSUM TABLE`](/sql-statements/sql-statement-admin-checksum-table.md) | Calculates the CRC64 of all rows + indexes of a table. | | [ADMIN CHECK [TABLE\|INDEX]](/sql-statements/sql-statement-admin-check-table-index.md) | Checks for consistency of a table or index. | | [ADMIN SHOW DDL [JOBS\|QUERIES]](/sql-statements/sql-statement-admin-show-ddl.md) | Shows details about currently running or recently completed DDL jobs. | @@ -245,7 +247,8 @@ ADMIN SHOW DDL JOBS 5 WHERE state != 'synced' AND db_name = 'test'; * `synced`: it indicates that the operation has been performed successfully and all TiDB instances have been synced to this state. * `rollback done`: it indicates that the operation has failed and has finished rolling back. * `rollingback`: it indicates that the operation has failed and is rolling back. - * `cancelling`: it indicates that the operation is being cancelled. This state only occurs when you cancel DDL jobs using the `ADMIN CANCEL DDL JOBS` command. + * `cancelling`: it indicates that the operation is being cancelled. This state only occurs when you cancel DDL jobs using the [`ADMIN CANCEL DDL JOBS`](/sql-statements/sql-statement-admin-cancel-ddl.md) command. + * `paused`: it indicates that the operation has been paused. This state only appears when you use the [`ADMIN PAUSED DDL JOBS`](/sql-statements/sql-statement-admin-pause-ddl.md) command to pause the DDL job. You can use the [`ADMIN RESUME DDL JOBS`](/sql-statements/sql-statement-admin-resume-ddl.md) command to resume the DDL job. ## MySQL compatibility diff --git a/sql-statements/sql-statement-alter-database.md b/sql-statements/sql-statement-alter-database.md index 9420182e2fd0b..baba06511d986 100644 --- a/sql-statements/sql-statement-alter-database.md +++ b/sql-statements/sql-statement-alter-database.md @@ -36,7 +36,7 @@ Currently, TiDB only supports some character sets and collations. See [Character ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `ALTER DATABASE` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-alter-resource-group.md b/sql-statements/sql-statement-alter-resource-group.md index f8f8ae3528b08..fd0880cd9bb93 100644 --- a/sql-statements/sql-statement-alter-resource-group.md +++ b/sql-statements/sql-statement-alter-resource-group.md @@ -9,7 +9,7 @@ summary: Learn the usage of ALTER RESOURCE GROUP in TiDB. > **Note:** > -> This feature is not available on [Serverless Tier clusters](/tidb-cloud/select-cluster-tier.md#serverless-tier-beta). +> This feature is not available on [TiDB Serverless clusters](https://docs.pingcap.com/tidbcloud/select-cluster-tier#tidb-serverless). @@ -25,7 +25,8 @@ IfExists ::= ('IF' 'EXISTS')? ResourceGroupName ::= - Identifier + Identifier +| "DEFAULT" ResourceGroupOptionList ::= DirectResourceGroupOption @@ -36,11 +37,34 @@ DirectResourceGroupOption ::= "RU_PER_SEC" EqOpt stringLit | "PRIORITY" EqOpt ResourceGroupPriorityOption | "BURSTABLE" +| "BURSTABLE" EqOpt Boolean +| "QUERY_LIMIT" EqOpt '(' ResourceGroupRunawayOptionList ')' +| "QUERY_LIMIT" EqOpt '(' ')' +| "QUERY_LIMIT" EqOpt "NULL" + ResourceGroupPriorityOption ::= LOW | MEDIUM | HIGH +ResourceGroupRunawayOptionList ::= + DirectResourceGroupRunawayOption +| ResourceGroupRunawayOptionList DirectResourceGroupRunawayOption +| ResourceGroupRunawayOptionList ',' DirectResourceGroupRunawayOption + +DirectResourceGroupRunawayOption ::= + "EXEC_ELAPSED" EqOpt stringLit +| "ACTION" EqOpt ResourceGroupRunawayActionOption +| "WATCH" EqOpt ResourceGroupRunawayWatchOption "DURATION" EqOpt stringLit + +ResourceGroupRunawayWatchOption ::= + EXACT +| SIMILAR + +ResourceGroupRunawayActionOption ::= + DRYRUN +| COOLDOWN +| KILL ``` TiDB supports the following `DirectResourceGroupOption`, where [Request Unit (RU)](/tidb-resource-control.md#what-is-request-unit-ru) is a unified abstraction unit in TiDB for CPU, IO, and other system resources. @@ -50,10 +74,12 @@ TiDB supports the following `DirectResourceGroupOption`, where [Request Unit (RU | `RU_PER_SEC` | Rate of RU backfilling per second | `RU_PER_SEC = 500` indicates that this resource group is backfilled with 500 RUs per second | | `PRIORITY` | The absolute priority of tasks to be processed on TiKV | `PRIORITY = HIGH` indicates that the priority is high. If not specified, the default value is `MEDIUM`. | | `BURSTABLE` | If the `BURSTABLE` attribute is set, TiDB allows the corresponding resource group to use the available system resources when the quota is exceeded. | +| `QUERY_LIMIT` | When the query execution meets this condition, the query is identified as a runaway query and the corresponding action is executed. | `QUERY_LIMIT=(EXEC_ELAPSED='60s', ACTION=KILL, WATCH=EXACT DURATION='10m')` indicates that the query is identified as a runaway query when the execution time exceeds 60 seconds. The query is terminated. All SQL statements with the same SQL text will be terminated immediately in the coming 10 minutes. `QUERY_LIMIT=()` or `QUERY_LIMIT=NULL` means that runaway control is not enabled. See [Runaway Queries](/tidb-resource-control.md#manage-queries-that-consume-more-resources-than-expected-runaway-queries). | > **Note:** > > The `ALTER RESOURCE GROUP` statement can only be executed when the global variable [`tidb_enable_resource_control`](/system-variables.md#tidb_enable_resource_control-new-in-v660) is set to `ON`. +> The `ALTER RESOURCE GROUP` statement supports incremental changes, leaving unspecified parameters unchanged. However, `QUERY_LIMIT`, as a whole, cannot be partially modified. ## Examples @@ -79,18 +105,19 @@ Query OK, 0 rows affected (0.08 sec) ```sql SELECT * FROM information_schema.resource_groups WHERE NAME ='rg1'; -+------+------------+----------+-----------+ -| NAME | RU_PER_SEC | PRIORITY | BURSTABLE | -+------+------------+----------+-----------+ -| rg1 | 100 | MEDIUM | YES | -+------+------------+----------+-----------+ ++------+------------+----------+-----------+-------------+ +| NAME | RU_PER_SEC | PRIORITY | BURSTABLE | QUERY_LIMIT | ++------+------------+----------+-----------+-------------+ +| rg1 | 100 | MEDIUM | YES | NULL | ++------+------------+----------+-----------+-------------+ 1 rows in set (1.30 sec) ``` ```sql ALTER RESOURCE GROUP rg1 RU_PER_SEC = 200 - PRIORITY = LOW; + PRIORITY = LOW + QUERY_LIMIT = (EXEC_ELAPSED='1s' ACTION=COOLDOWN WATCH=EXACT DURATION='30s'); ``` ```sql @@ -102,11 +129,11 @@ SELECT * FROM information_schema.resource_groups WHERE NAME ='rg1'; ``` ```sql -+------+------------+----------+-----------+ -| NAME | RU_PER_SEC | PRIORITY | BURSTABLE | -+------+------------+----------+-----------+ -| rg1 | 200 | LOW | NO | -+------+------------+----------+-----------+ ++------+------------+----------+-----------+----------------------------------------------------+ +| NAME | RU_PER_SEC | PRIORITY | BURSTABLE | QUERY_LIMIT | ++------+------------+----------+-----------+----------------------------------------------------+ +| rg1 | 200 | LOW | YES | EXEC_ELAPSED=1s, ACTION=COOLDOWN, WATCH=EXACT[30s] | ++------+------------+----------+-----------+----------------------------------------------------+ 1 rows in set (1.30 sec) ``` diff --git a/sql-statements/sql-statement-alter-table.md b/sql-statements/sql-statement-alter-table.md index 45eb326a61b0f..a3b28c54cc46c 100644 --- a/sql-statements/sql-statement-alter-table.md +++ b/sql-statements/sql-statement-alter-table.md @@ -186,11 +186,11 @@ The following major restrictions apply to `ALTER TABLE` in TiDB: - `ALTER TABLE t CACHE | NOCACHE` is a TiDB extension to MySQL syntax. For details, see [Cached Tables](/cached-tables.md). -For further restrictions, see [MySQL Compatibility](/mysql-compatibility.md#ddl). +For further restrictions, see [MySQL Compatibility](/mysql-compatibility.md#ddl-operations). ## See also -- [MySQL Compatibility](/mysql-compatibility.md#ddl) +- [MySQL Compatibility](/mysql-compatibility.md#ddl-operations) - [ADD COLUMN](/sql-statements/sql-statement-add-column.md) - [DROP COLUMN](/sql-statements/sql-statement-drop-column.md) - [ADD INDEX](/sql-statements/sql-statement-add-index.md) diff --git a/sql-statements/sql-statement-alter-user.md b/sql-statements/sql-statement-alter-user.md index edadffc812047..62cacea484759 100644 --- a/sql-statements/sql-statement-alter-user.md +++ b/sql-statements/sql-statement-alter-user.md @@ -172,10 +172,10 @@ SELECT USER, JSON_EXTRACT(User_attributes, "$.resource_group") FROM mysql.user W 1 row in set (0.02 sec) ``` -Unbind the user to a resource group, that is, set the resource group to which the user is bound to be empty. After unbinding, the user will be bound to the `default` resource group. +Unbind the user to a resource group, that is, bind the user to the `default` resource group. ```sql -ALTER USER 'newuser' RESOURCE GROUP ``; +ALTER USER 'newuser' RESOURCE GROUP `default`; SELECT USER, JSON_EXTRACT(User_attributes, "$.resource_group") FROM mysql.user WHERE user = "newuser"; ``` @@ -183,7 +183,7 @@ SELECT USER, JSON_EXTRACT(User_attributes, "$.resource_group") FROM mysql.user W +---------+---------------------------------------------------+ | USER | JSON_EXTRACT(User_attributes, "$.resource_group") | +---------+---------------------------------------------------+ -| newuser | "" | +| newuser | "default" | +---------+---------------------------------------------------+ 1 row in set (0.02 sec) ``` diff --git a/sql-statements/sql-statement-analyze-table.md b/sql-statements/sql-statement-analyze-table.md index 8ad8aae8fad5f..d921dd825fccc 100644 --- a/sql-statements/sql-statement-analyze-table.md +++ b/sql-statements/sql-statement-analyze-table.md @@ -22,7 +22,7 @@ AnalyzeOptionListOpt ::= ( WITH AnalyzeOptionList )? AnalyzeOptionList ::= -AnalyzeOption ( ',' AnlyzeOption )* +AnalyzeOption ( ',' AnalyzeOption )* AnalyzeOption ::= ( NUM ( 'BUCKETS' | 'TOPN' | ( 'CMSKETCH' ( 'DEPTH' | 'WIDTH' ) ) | 'SAMPLES' ) ) | ( FLOATNUM 'SAMPLERATE' ) @@ -51,14 +51,20 @@ PartitionNameList ::= ```sql mysql> CREATE TABLE t1 (id INT NOT NULL PRIMARY KEY AUTO_INCREMENT, c1 INT NOT NULL); Query OK, 0 rows affected (0.11 sec) +``` +```sql mysql> INSERT INTO t1 (c1) VALUES (1),(2),(3),(4),(5); Query OK, 5 rows affected (0.03 sec) Records: 5 Duplicates: 0 Warnings: 0 +``` +```sql mysql> ALTER TABLE t1 ADD INDEX (c1); Query OK, 0 rows affected (0.30 sec) +``` +```sql mysql> EXPLAIN SELECT * FROM t1 WHERE c1 = 3; +------------------------+---------+-----------+------------------------+---------------------------------------------+ | id | estRows | task | access object | operator info | @@ -67,8 +73,12 @@ mysql> EXPLAIN SELECT * FROM t1 WHERE c1 = 3; | └─IndexRangeScan_5 | 10.00 | cop[tikv] | table:t1, index:c1(c1) | range:[3,3], keep order:false, stats:pseudo | +------------------------+---------+-----------+------------------------+---------------------------------------------+ 2 rows in set (0.00 sec) +``` + +The status of the current statistics is `pseudo`, which means the statistics is inaccurate. -mysql> analyze table t1; +```sql +mysql> ANALYZE TABLE t1; Query OK, 0 rows affected (0.13 sec) mysql> EXPLAIN SELECT * FROM t1 WHERE c1 = 3; @@ -81,6 +91,8 @@ mysql> EXPLAIN SELECT * FROM t1 WHERE c1 = 3; 2 rows in set (0.00 sec) ``` +The statistics is now correctly updated and loaded. + ## MySQL compatibility TiDB differs from MySQL in **both** the statistics it collects and how it makes use of statistics during query execution. While this statement is syntactically similar to MySQL, the following differences apply: diff --git a/sql-statements/sql-statement-calibrate-resource.md b/sql-statements/sql-statement-calibrate-resource.md index 9dca783224589..d6345e2a314e1 100644 --- a/sql-statements/sql-statement-calibrate-resource.md +++ b/sql-statements/sql-statement-calibrate-resource.md @@ -11,7 +11,7 @@ The `CALIBRATE RESOURCE` statement is used to estimate and output the ['Request > **Note:** > -> This feature is not available on [Serverless Tier clusters](/tidb-cloud/select-cluster-tier.md#serverless-tier-beta). +> This feature is not available on [TiDB Serverless clusters](https://docs.pingcap.com/tidbcloud/select-cluster-tier#tidb-serverless). @@ -34,7 +34,7 @@ To execute this command, make sure that the following requirements are met: - The user has `SUPER` or `RESOURCE_GROUP_ADMIN` privilege. - The user has the `SELECT` privilege for all tables in the `METRICS_SCHEMA` schema. -## Methods for estimating capacity +## Methods for estimating capacity TiDB provides two methods for estimation: @@ -47,6 +47,10 @@ If your application is already running in a production environment, or you can r - The time window ranges from 10 minutes to 24 hours. - In the specified time window, if the CPU utilization of TiDB and TiKV is too low, you cannot estimate the capacity. +> **Note:** +> +> TiKV does not monitor CPU usage metrics on macOS. It does not support capacity estimation based on the actual workload on macOS. + ### Estimate capacity based on hardware deployment This method mainly estimates capacity based on the current cluster configuration, combined with the empirical values observed for different workloads. Because different types of workloads require different ratios of hardware, the output capacity of the same configuration of hardware might be different. The `WORKLOAD` parameter here accepts the following different workload types. The default value is `TPCC`. @@ -95,7 +99,14 @@ CALIBRATE RESOURCE START_TIME '2023-04-18 08:00:00' DURATION '9m'; ERROR 1105 (HY000): the duration of calibration is too short, which could lead to inaccurate output. Please make the duration between 10m0s and 24h0m0s ``` -When the workload within the time window is too low, an error occurs. +The monitoring metrics for the [capacity estimation based on the actual workload](#estimate-capacity-based-on-actual-workload) feature include `tikv_cpu_quota`, `tidb_server_maxprocs`, `resource_manager_resource_unit`, and `process_cpu_usage`. If the CPU quota monitoring data is empty, there will be an error with the corresponding monitoring metric name, as shown in the following example: + +```sql +CALIBRATE RESOURCE START_TIME '2023-04-18 08:00:00' DURATION '60m'; +Error 1105 (HY000): There is no CPU quota metrics, metrics 'tikv_cpu_quota' is empty +``` + +If the workload in the time window is too low, or the `resource_manager_resource_unit` and `process_cpu_usage` monitoring data is missing, the following error will be reported. In addition, because TiKV does not monitor CPU utilization on macOS, it does not support capacity estimation based on the actual workload, and will also report this error. ```sql CALIBRATE RESOURCE START_TIME '2023-04-18 08:00:00' DURATION '60m'; diff --git a/sql-statements/sql-statement-cancel-import-job.md b/sql-statements/sql-statement-cancel-import-job.md new file mode 100644 index 0000000000000..fe0c632b72dda --- /dev/null +++ b/sql-statements/sql-statement-cancel-import-job.md @@ -0,0 +1,48 @@ +--- +title: CANCEL IMPORT +summary: An overview of the usage of CANCEL IMPORT in TiDB. +--- + +# CANCEL IMPORT + +The `CANCEL IMPORT` statement is used to cancel a data import job created in TiDB. + + + +## Required privileges + +To cancel a data import job, you need to be the creator of the import job or have the `SUPER` privilege. + +## Synopsis + +```ebnf+diagram +CancelImportJobsStmt ::= + 'CANCEL' 'IMPORT' 'JOB' JobID +``` + +## Example + +To cancel an import job with the ID as `1`, execute the following statement: + +```sql +CANCEL IMPORT JOB 1; +``` + +The output is as follows: + +``` +Query OK, 0 rows affected (0.01 sec) +``` + +## MySQL compatibility + +This statement is a TiDB extension to MySQL syntax. + +## See also + +* [`IMPORT INTO`](/sql-statements/sql-statement-import-into.md) +* [`SHOW IMPORT JOB`](/sql-statements/sql-statement-show-import-job.md) diff --git a/sql-statements/sql-statement-create-binding.md b/sql-statements/sql-statement-create-binding.md index 56783c8d22534..fcf3dc8ce70d8 100644 --- a/sql-statements/sql-statement-create-binding.md +++ b/sql-statements/sql-statement-create-binding.md @@ -38,11 +38,11 @@ The following example shows how to create a binding according to a SQL statement ```sql mysql> CREATE TABLE t1 ( - -> id INT NOT NULL PRIMARY KEY auto_increment, - -> b INT NOT NULL, - -> pad VARBINARY(255), - -> INDEX(b) - -> ); + id INT NOT NULL PRIMARY KEY auto_increment, + b INT NOT NULL, + pad VARBINARY(255), + INDEX(b) + ); Query OK, 0 rows affected (0.07 sec) mysql> INSERT INTO t1 SELECT NULL, FLOOR(RAND()*1000), RANDOM_BYTES(255) FROM dual; @@ -95,9 +95,9 @@ mysql> EXPLAIN ANALYZE SELECT * FROM t1 WHERE b = 123; 3 rows in set (0.02 sec) mysql> CREATE SESSION BINDING FOR - -> SELECT * FROM t1 WHERE b = 123 - -> USING - -> SELECT * FROM t1 IGNORE INDEX (b) WHERE b = 123; + SELECT * FROM t1 WHERE b = 123 + USING + SELECT * FROM t1 IGNORE INDEX (b) WHERE b = 123; Query OK, 0 rows affected (0.00 sec) mysql> EXPLAIN ANALYZE SELECT * FROM t1 WHERE b = 123; diff --git a/sql-statements/sql-statement-create-database.md b/sql-statements/sql-statement-create-database.md index 62f5c367ce014..20a912f6e8cf6 100644 --- a/sql-statements/sql-statement-create-database.md +++ b/sql-statements/sql-statement-create-database.md @@ -74,7 +74,7 @@ mysql> SHOW TABLES; ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `CREATE DATABASE` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-create-index.md b/sql-statements/sql-statement-create-index.md index 27da4d1beb229..501d4ffe19a0c 100644 --- a/sql-statements/sql-statement-create-index.md +++ b/sql-statements/sql-statement-create-index.md @@ -232,9 +232,9 @@ Therefore, when the query performance outweighs the insert and update performanc Expression indexes have the same syntax and limitations as in MySQL. They are implemented by creating indexes on generated virtual columns that are invisible, so the supported expressions inherit all [limitations of virtual generated columns](/generated-columns.md#limitations). -## Multi-valued index +## Multi-valued indexes -Multi-valued index is a kind of secondary index defined on an array column. In a normal index, one index record corresponds to one data record (1:1). In a multi-valued index, multiple index records correspond to one data record (N:1). Multi-valued indexes are used to index JSON arrays. For example, a multi-valued index defined on the `zipcode` field will generate one index record for each element in the `zipcode` array. +Multi-valued indexes are a kind of secondary index defined on an array column. In a normal index, one index record corresponds to one data record (1:1). In a multi-valued index, multiple index records correspond to one data record (N:1). Multi-valued indexes are used to index JSON arrays. For example, a multi-valued index defined on the `zipcode` field will generate one index record for each element in the `zipcode` array. ```json { @@ -244,9 +244,9 @@ Multi-valued index is a kind of secondary index defined on an array column. In a } ``` -### Create a multi-valued index +### Create multi-valued indexes -You can create a multi-valued index by using the `CAST(... AS ... ARRAY)` expression in the index definition, as creating an expression index. +You can create multi-valued indexes by using the `CAST(... AS ... ARRAY)` expression in the index definition, as creating an expression index. ```sql mysql> CREATE TABLE customers ( @@ -327,21 +327,23 @@ mysql> INSERT INTO customers VALUES (1, 'pingcap', '{"zipcode": [1]}'); Query OK, 1 row affected (0.00 sec) ``` -### Use a multi-valued index +### Use multi-valued indexes -See [Index Selection - Use multi-valued indexes](/choose-index.md#use-a-multi-valued-index) for more details. +See [Index Selection - Use multi-valued indexes](/choose-index.md#use-multi-valued-indexes) for more details. ### Limitations - For an empty JSON array, no corresponding index record is generated. - The target type in `CAST(... AS ... ARRAY)` cannot be any of `BINARY`, `JSON`, `YEAR`, `FLOAT`, and `DECIMAL`. The source type must be JSON. -- You cannot use a multi-valued index for sorting. -- You can only create a multi-valued index on a JSON array. +- You cannot use multi-valued indexes for sorting. +- You can only create multi-valued indexes on a JSON array. - A multi-valued index cannot be a primary key or a foreign key. - The extra storage space used by a multi-valued index = the average number of array elements per row * the space used by a normal secondary index. - Compared with normal indexes, DML operations will modify more index records for multi-valued indexes, so multi-valued indexes will have a greater performance impact than normal indexes. - Because multi-valued indexes are a special type of expression index, multi-valued indexes have the same limitations as expression indexes. - If a table uses multi-valued indexes, you cannot back up, replicate, or import the table using BR, TiCDC, or TiDB Lightning to a TiDB cluster earlier than v6.6.0. +- Due to the lack of collected statistics for multi-valued indexes, the selection rate of multi-valued indexes is currently based on fixed assumptions. When a query hits multiple multi-valued indexes, TiDB might not be able to select the optimal index. In such cases, it is recommended to use the [`use_index_merge`](/optimizer-hints.md#use_index_merget1_name-idx1_name--idx2_name-) optimizer hint to enforce a fixed execution plan. +- For a query with complex conditions, TiDB might not be able to select multi-valued indexes. For information on the condition patterns supported by multi-valued indexes, refer to [Use multi-valued indexes](/choose-index.md#use-multi-valued-indexes). ## Invisible index @@ -365,7 +367,7 @@ The system variables associated with the `CREATE INDEX` statement are `tidb_ddl_ * Adding the primary key of the `CLUSTERED` type to a table is not supported. For more details about the primary key of the `CLUSTERED` type, refer to [clustered index](/clustered-indexes.md). * Expression indexes are incompatible with views. When a query is executed using a view, the expression index cannot be used at the same time. * Expression indexes have compatibility issues with bindings. When the expression of an expression index has a constant, the binding created for the corresponding query expands its scope. For example, suppose that the expression in the expression index is `a+1`, and the corresponding query condition is `a+1 > 2`. In this case, the created binding is `a+? > ?`, which means that the query with the condition such as `a+2 > 2` is also forced to use the expression index and results in a poor execution plan. In addition, this also affects the baseline capturing and baseline evolution in SQL Plan Management (SPM). -* The data written with the multi-valued index must exactly match the defined data type. Otherwise, data writes fail. For details, see [Creat a multi-valued index](/sql-statements/sql-statement-create-index.md#create-a-multi-valued-index). +* The data written with multi-valued indexes must exactly match the defined data type. Otherwise, data writes fail. For details, see [create multi-valued indexes](/sql-statements/sql-statement-create-index.md#create-multi-valued-indexes). ## See also diff --git a/sql-statements/sql-statement-create-resource-group.md b/sql-statements/sql-statement-create-resource-group.md index 9beb133fb4408..675062b007c31 100644 --- a/sql-statements/sql-statement-create-resource-group.md +++ b/sql-statements/sql-statement-create-resource-group.md @@ -9,7 +9,7 @@ summary: Learn the usage of CREATE RESOURCE GROUP in TiDB. > **Note:** > -> This feature is not available on [Serverless Tier clusters](/tidb-cloud/select-cluster-tier.md#serverless-tier-beta). +> This feature is not available on [TiDB Serverless clusters](https://docs.pingcap.com/tidbcloud/select-cluster-tier#tidb-serverless). @@ -25,7 +25,8 @@ IfNotExists ::= ('IF' 'NOT' 'EXISTS')? ResourceGroupName ::= - Identifier + Identifier +| "DEFAULT" ResourceGroupOptionList ::= DirectResourceGroupOption @@ -36,10 +37,38 @@ DirectResourceGroupOption ::= "RU_PER_SEC" EqOpt stringLit | "PRIORITY" EqOpt ResourceGroupPriorityOption | "BURSTABLE" +| "BURSTABLE" EqOpt Boolean +| "QUERY_LIMIT" EqOpt '(' ResourceGroupRunawayOptionList ')' +| "QUERY_LIMIT" EqOpt '(' ')' +| "QUERY_LIMIT" EqOpt "NULL" + ResourceGroupPriorityOption ::= LOW | MEDIUM | HIGH + +ResourceGroupRunawayOptionList ::= + DirectResourceGroupRunawayOption +| ResourceGroupRunawayOptionList DirectResourceGroupRunawayOption +| ResourceGroupRunawayOptionList ',' DirectResourceGroupRunawayOption + +DirectResourceGroupRunawayOption ::= + "EXEC_ELAPSED" EqOpt stringLit +| "ACTION" EqOpt ResourceGroupRunawayActionOption +| "WATCH" EqOpt ResourceGroupRunawayWatchOption WatchDurationOption + +WatchDurationOption ::= + ("DURATION" EqOpt stringLit | "DURATION" EqOpt "UNLIMITED")? + +ResourceGroupRunawayWatchOption ::= + EXACT +| SIMILAR +| PLAN + +ResourceGroupRunawayActionOption ::= + DRYRUN +| COOLDOWN +| KILL ``` The resource group name parameter (`ResourceGroupName`) must be globally unique. @@ -51,6 +80,7 @@ TiDB supports the following `DirectResourceGroupOption`, where [Request Unit (RU | `RU_PER_SEC` | Rate of RU backfilling per second | `RU_PER_SEC = 500` indicates that this resource group is backfilled with 500 RUs per second | | `PRIORITY` | The absolute priority of tasks to be processed on TiKV | `PRIORITY = HIGH` indicates that the priority is high. If not specified, the default value is `MEDIUM`. | | `BURSTABLE` | If the `BURSTABLE` attribute is set, TiDB allows the corresponding resource group to use the available system resources when the quota is exceeded. | +| `QUERY_LIMIT` | When the query execution meets this condition, the query is identified as a runaway query and the corresponding action is executed. | `QUERY_LIMIT=(EXEC_ELAPSED='60s', ACTION=KILL, WATCH=EXACT DURATION='10m')` indicates that the query is identified as a runaway query when the execution time exceeds 60 seconds. The query is terminated. All SQL statements with the same SQL text will be terminated immediately in the coming 10 minutes. `QUERY_LIMIT=()` or `QUERY_LIMIT=NULL` means that runaway control is not enabled. See [Runaway Queries](/tidb-resource-control.md#manage-queries-that-consume-more-resources-than-expected-runaway-queries). | > **Note:** > @@ -82,7 +112,7 @@ Query OK, 0 rows affected (0.08 sec) ```sql CREATE RESOURCE GROUP IF NOT EXISTS rg2 - RU_PER_SEC = 200; + RU_PER_SEC = 200 QUERY_LIMIT=(EXEC_ELAPSED='100ms', ACTION=KILL); ``` ```sql @@ -94,12 +124,12 @@ SELECT * FROM information_schema.resource_groups WHERE NAME ='rg1' or NAME = 'rg ``` ```sql -+------+------------+----------+-----------+ -| NAME | RU_PER_SEC | PRIORITY | BURSTABLE | -+------+------------+----------+-----------+ -| rg1 | 100 | HIGH | YES | -| rg2 | 200 | MEDIUM | NO | -+------+------------+----------+-----------+ ++------+------------+----------+-----------+---------------------------------+ +| NAME | RU_PER_SEC | PRIORITY | BURSTABLE | QUERY_LIMIT | ++------+------------+----------+-----------+---------------------------------+ +| rg1 | 100 | HIGH | YES | NULL | +| rg2 | 200 | MEDIUM | NO | EXEC_ELAPSED=100ms, ACTION=KILL | ++------+------------+----------+-----------+---------------------------------+ 2 rows in set (1.30 sec) ``` diff --git a/sql-statements/sql-statement-create-role.md b/sql-statements/sql-statement-create-role.md index 7b5cd58ba4f29..4376a57c5f4cb 100644 --- a/sql-statements/sql-statement-create-role.md +++ b/sql-statements/sql-statement-create-role.md @@ -129,7 +129,7 @@ SHOW TABLES IN test; ## MySQL compatibility -This statement is understood to be fully compatible with roles, which are a feature of MySQL 8.0. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `CREATE ROLE` statement in TiDB is fully compatible with the roles feature in MySQL 8.0. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-create-table-like.md b/sql-statements/sql-statement-create-table-like.md index c8efc87ada7b6..6ae771b07a1a6 100644 --- a/sql-statements/sql-statement-create-table-like.md +++ b/sql-statements/sql-statement-create-table-like.md @@ -60,7 +60,7 @@ If the table to be copied is defined with the `PRE_SPLIT_REGIONS` attribute, the ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `CREATE TABLE LIKE` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-create-table.md b/sql-statements/sql-statement-create-table.md index a52031a224732..ecaf89fd5bf0c 100644 --- a/sql-statements/sql-statement-create-table.md +++ b/sql-statements/sql-statement-create-table.md @@ -221,9 +221,9 @@ mysql> DROP TABLE IF EXISTS t1; Query OK, 0 rows affected (0.22 sec) mysql> CREATE TABLE IF NOT EXISTS t1 ( - -> id BIGINT NOT NULL PRIMARY KEY auto_increment, - -> b VARCHAR(200) NOT NULL - -> ); + id BIGINT NOT NULL PRIMARY KEY auto_increment, + b VARCHAR(200) NOT NULL + ); Query OK, 0 rows affected (0.08 sec) mysql> DESC t1; @@ -257,7 +257,6 @@ mysql> DESC t1; * The `COMMENT` attribute does not support the `WITH PARSER` option. * TiDB supports 1017 columns in a single table by default and 4096 columns at most. The corresponding number limit in InnoDB is 1017 columns, and the hard limit in MySQL is 4096 columns. For details, see [TiDB Limitations](/tidb-limitations.md). * For partitioned tables, only Range, Hash and Range Columns (single column) are supported. For details, see [partitioned table](/partitioned-table.md). -* `CHECK` constraints are parsed but ignored (MySQL 5.7 compatible behavior). For details, see [Constraints](/constraints.md). ## See also diff --git a/sql-statements/sql-statement-deallocate.md b/sql-statements/sql-statement-deallocate.md index 7681a85bb1b3d..2de1eefe207dd 100644 --- a/sql-statements/sql-statement-deallocate.md +++ b/sql-statements/sql-statement-deallocate.md @@ -48,7 +48,7 @@ Query OK, 0 rows affected (0.00 sec) ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `DEALLOCATE` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-delete.md b/sql-statements/sql-statement-delete.md index 3fc54b1779f4f..af55f5833c9c6 100644 --- a/sql-statements/sql-statement-delete.md +++ b/sql-statements/sql-statement-delete.md @@ -54,7 +54,7 @@ mysql> SELECT * FROM t1; ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `DELETE` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-do.md b/sql-statements/sql-statement-do.md index 6babd12a94748..1f514ad5c3792 100644 --- a/sql-statements/sql-statement-do.md +++ b/sql-statements/sql-statement-do.md @@ -54,7 +54,7 @@ Query OK, 0 rows affected (2.50 sec) ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `DO` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-drop-binding.md b/sql-statements/sql-statement-drop-binding.md index 804803b81bcd7..b240e6293ea3a 100644 --- a/sql-statements/sql-statement-drop-binding.md +++ b/sql-statements/sql-statement-drop-binding.md @@ -34,11 +34,11 @@ The following example shows how to remove a binding according to a SQL statement ```sql mysql> CREATE TABLE t1 ( - -> id INT NOT NULL PRIMARY KEY auto_increment, - -> b INT NOT NULL, - -> pad VARBINARY(255), - -> INDEX(b) - -> ); + id INT NOT NULL PRIMARY KEY auto_increment, + b INT NOT NULL, + pad VARBINARY(255), + INDEX(b) + ); Query OK, 0 rows affected (0.07 sec) mysql> INSERT INTO t1 SELECT NULL, FLOOR(RAND()*1000), RANDOM_BYTES(255) FROM dual; @@ -91,9 +91,9 @@ mysql> EXPLAIN ANALYZE SELECT * FROM t1 WHERE b = 123; 3 rows in set (0.02 sec) mysql> CREATE SESSION BINDING FOR - -> SELECT * FROM t1 WHERE b = 123 - -> USING - -> SELECT * FROM t1 IGNORE INDEX (b) WHERE b = 123; + SELECT * FROM t1 WHERE b = 123 + USING + SELECT * FROM t1 IGNORE INDEX (b) WHERE b = 123; Query OK, 0 rows affected (0.00 sec) mysql> EXPLAIN ANALYZE SELECT * FROM t1 WHERE b = 123; diff --git a/sql-statements/sql-statement-drop-database.md b/sql-statements/sql-statement-drop-database.md index b8a822f3ea392..506960218cf1a 100644 --- a/sql-statements/sql-statement-drop-database.md +++ b/sql-statements/sql-statement-drop-database.md @@ -47,7 +47,7 @@ mysql> SHOW DATABASES; ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `DROP DATABASE` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-drop-resource-group.md b/sql-statements/sql-statement-drop-resource-group.md index 2aa8de47500f6..a92fa125d95dd 100644 --- a/sql-statements/sql-statement-drop-resource-group.md +++ b/sql-statements/sql-statement-drop-resource-group.md @@ -9,7 +9,7 @@ summary: Learn the usage of DROP RESOURCE GROUP in TiDB. > **Note:** > -> This feature is not available on [Serverless Tier clusters](/tidb-cloud/select-cluster-tier.md#serverless-tier-beta). +> This feature is not available on [TiDB Serverless clusters](https://docs.pingcap.com/tidbcloud/select-cluster-tier#tidb-serverless). @@ -26,6 +26,7 @@ IfExists ::= ResourceGroupName ::= Identifier +| "DEFAULT" ``` > **Note:** @@ -58,11 +59,11 @@ SELECT * FROM information_schema.resource_groups WHERE NAME ='rg1'; ``` ```sql -+------+------------+----------+-----------+ -| NAME | RU_PER_SEC | PRIORITY | BURSTABLE | -+------+------------+----------+-----------+ -| rg1 | 500 | MEDIUM | YES | -+------+------------+----------+-----------+ ++------+------------+----------+-----------+-------------+ +| NAME | RU_PER_SEC | PRIORITY | BURSTABLE | QUERY_LIMIT | ++------+------------+----------+-----------+-------------+ +| rg1 | 500 | MEDIUM | YES | NULL | ++------+------------+----------+-----------+-------------+ 1 row in set (0.01 sec) ``` @@ -90,4 +91,4 @@ MySQL also supports [DROP RESOURCE GROUP](https://dev.mysql.com/doc/refman/8.0/e * [ALTER RESOURCE GROUP](/sql-statements/sql-statement-alter-resource-group.md) * [CREATE RESOURCE GROUP](/sql-statements/sql-statement-create-resource-group.md) -* [Request Unit (RU)](/tidb-resource-control.md#what-is-request-unit-ru) \ No newline at end of file +* [Request Unit (RU)](/tidb-resource-control.md#what-is-request-unit-ru) diff --git a/sql-statements/sql-statement-drop-role.md b/sql-statements/sql-statement-drop-role.md index 7fcbf62ef2d98..3e52cfc49ea42 100644 --- a/sql-statements/sql-statement-drop-role.md +++ b/sql-statements/sql-statement-drop-role.md @@ -162,7 +162,7 @@ ERROR 3530 (HY000): `analyticsteam`@`%` is is not granted to jennifer@% ## MySQL compatibility -This statement is understood to be fully compatible with roles, which are a feature of MySQL 8.0. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `DROP ROLE` statement in TiDB is fully compatible with the roles feature in MySQL 8.0. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-drop-view.md b/sql-statements/sql-statement-drop-view.md index 9685072629239..cd9c030a9dd39 100644 --- a/sql-statements/sql-statement-drop-view.md +++ b/sql-statements/sql-statement-drop-view.md @@ -74,7 +74,7 @@ mysql> SELECT * FROM t1; ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `DROP VIEW` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-execute.md b/sql-statements/sql-statement-execute.md index 8c76b17055ed7..8b1c463de0341 100644 --- a/sql-statements/sql-statement-execute.md +++ b/sql-statements/sql-statement-execute.md @@ -38,7 +38,7 @@ Query OK, 0 rows affected (0.00 sec) ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `EXECUTE` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-explain-analyze.md b/sql-statements/sql-statement-explain-analyze.md index b7c1fef3c90bf..3954c9ed4dbc0 100644 --- a/sql-statements/sql-statement-explain-analyze.md +++ b/sql-statements/sql-statement-explain-analyze.md @@ -282,6 +282,18 @@ commit_txn: {prewrite:48.564544ms, wait_prewrite_binlog:47.821579, get_commit_ts - `write_keys`: The total `keys` written in the transaction. - `write_byte`: The total bytes of `key-value` written in the transaction, and the unit is byte. +### RU (Request Unit) consumption + +[Request Unit (RU)](/tidb-resource-control.md#what-is-request-unit-ru) is a unified abstraction unit of system resources, which is defined in TiDB resource control. The `execution info` of the top-level operator shows the overall RU consumption of this particular SQL statement. + +``` +RU:273.842670 +``` + +> **Note:** +> +> This value shows the actual RUs consumed by this execution. The same SQL statement might consume different amounts of RUs each time it is executed due to the effects of caching (for example, [coprocessor cache](/coprocessor-cache.md)). + ### Other common execution information The Coprocessor operators usually contain two parts of execution time information: `cop_task` and `tikv_task`. `cop_task` is the time recorded by TiDB, and it is from the moment that the request is sent to the server to the moment that the response is received. `tikv_task` is the time recorded by TiKV Coprocessor itself. If there is much difference between the two, it might indicate that the time spent waiting for the response is too long, or the time spent on gRPC or network is too long. diff --git a/sql-statements/sql-statement-flashback-to-timestamp.md b/sql-statements/sql-statement-flashback-to-timestamp.md index 3843499c35fce..8165316870a35 100644 --- a/sql-statements/sql-statement-flashback-to-timestamp.md +++ b/sql-statements/sql-statement-flashback-to-timestamp.md @@ -11,7 +11,17 @@ TiDB v6.4.0 introduces the `FLASHBACK CLUSTER TO TIMESTAMP` syntax. You can use > **Warning:** > -> The `FLASHBACK CLUSTER TO TIMESTAMP` syntax is not applicable to TiDB Cloud [Serverless Tier](/tidb-cloud/select-cluster-tier.md#serverless-tier-beta) clusters. Do not execute this statement on Serverless Tier clusters to avoid unexpected results. +> The `FLASHBACK CLUSTER TO TIMESTAMP` syntax is not applicable to [TiDB Serverless](https://docs.pingcap.com/tidbcloud/select-cluster-tier#tidb-serverless) clusters. Do not execute this statement on TiDB Serverless clusters to avoid unexpected results. + + + + + +> **Warning:** +> +> When you use this feature in TiDB v7.1.0, some Regions might remain in the FLASHBACK process even after the completion of the FLASHBACK operation. It is recommended to avoid using this feature in v7.1.0. For more information, see issue [#44292](https://github.com/pingcap/tidb/issues/44292). +> +> If you have encountered this issue, you can use the [TiDB snapshot backup and restore](/br/br-snapshot-guide.md) feature to restore data. diff --git a/sql-statements/sql-statement-flush-privileges.md b/sql-statements/sql-statement-flush-privileges.md index aa1ece3d1117f..525377b55624c 100644 --- a/sql-statements/sql-statement-flush-privileges.md +++ b/sql-statements/sql-statement-flush-privileges.md @@ -6,7 +6,7 @@ aliases: ['/docs/dev/sql-statements/sql-statement-flush-privileges/','/docs/dev/ # FLUSH PRIVILEGES -This statement triggers TiDB to reload the in-memory copy of privileges from the privilege tables. You should execute `FLUSH PRIVILEGES` after making manual edits to tables such as `mysql.user`. Executing this statement is not required after using privilege statements such as `GRANT` or `REVOKE`. Executing this statement requires the `RELOAD` privilege. +The statement `FLUSH PRIVILEGES` instructs TiDB to reload the in-memory copy of privileges from the privilege tables. You must execute this statement after manually editing tables such as `mysql.user`. However, executing this statement is not necessary after using privilege statements like `GRANT` or `REVOKE`. To execute this statement, the `RELOAD` privilege is required. ## Synopsis @@ -35,7 +35,7 @@ Query OK, 0 rows affected (0.01 sec) ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `FLUSH PRIVILEGES` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-grant-role.md b/sql-statements/sql-statement-grant-role.md index 02e8ac7f40720..e62a832576092 100644 --- a/sql-statements/sql-statement-grant-role.md +++ b/sql-statements/sql-statement-grant-role.md @@ -129,7 +129,7 @@ SHOW TABLES IN test; ## MySQL compatibility -This statement is understood to be fully compatible with roles, which are a feature of MySQL 8.0. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `GRANT ` statement in TiDB is fully compatible with the roles feature in MySQL 8.0. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-import-into.md b/sql-statements/sql-statement-import-into.md new file mode 100644 index 0000000000000..2ab27bece9ffa --- /dev/null +++ b/sql-statements/sql-statement-import-into.md @@ -0,0 +1,266 @@ +--- +title: IMPORT INTO +summary: An overview of the usage of IMPORT INTO in TiDB. +--- + +# IMPORT INTO + +The `IMPORT INTO` statement is used to import data in formats such as `CSV`, `SQL`, and `PARQUET` into an empty table in TiDB via the [Physical Import Mode](/tidb-lightning/tidb-lightning-physical-import-mode.md) of TiDB Lightning. + + + +> **Warning:** +> +> Currently, this statement is experimental. It is not recommended to use it in production environments. + +`IMPORT INTO` supports importing data from files stored in Amazon S3, GCS, and the TiDB local storage. + +- For data files stored in Amazon S3 or GCS, `IMPORT INTO` supports running in the [TiDB backend task distributed execution framework](/tidb-distributed-execution-framework.md). + + - When this framework is enabled ([tidb_enable_dist_task](/system-variables.md#tidb_enable_dist_task-new-in-v710) is `ON`), `IMPORT INTO` splits a data import job into multiple sub-jobs and distributes these sub-jobs to different TiDB nodes for execution to improve the import efficiency. + - When this framework is disabled, `IMPORT INTO` only supports running on the TiDB node where the current user is connected. + +- For data files stored locally in TiDB, `IMPORT INTO` only supports running on the TiDB node where the current user is connected. Therefore, the data files need to be placed on the TiDB node where the current user is connected. If you access TiDB through a proxy or load balancer, you cannot import data files stored locally in TiDB. + +## Known issue + +After starting a data import job, TiDB sorts the data to be imported locally. During the sorting, in the case that the disk space used by TiDB exceeds the specified value of [`DISK_QUOTA`](#withoptions) or reaches 80% of the local disk space and TiDB has already started writing data to TiKV, if you cancel the import job or the import job fails, the background import thread will continue running for a while before exiting completely. For more information, see [#45048](https://github.com/pingcap/tidb/issues/45048). + +## Restrictions + +- Currently, `IMPORT INTO` only supports importing data within 1 TiB. +- `IMPORT INTO` only supports importing data into existing empty tables in the database. +- `IMPORT INTO` does not support transactions or rollback. Executing `IMPORT INTO` within an explicit transaction (`BEGIN`/`END`) will return an error. +- The execution of `IMPORT INTO` blocks the current connection until the import is completed. To execute the statement asynchronously, you can add the `DETACHED` option. +- `IMPORT INTO` does not support working simultaneously with features such as [Backup & Restore](/br/backup-and-restore-overview.md), [`FLASHBACK CLUSTER TO TIMESTAMP`](/sql-statements/sql-statement-flashback-to-timestamp.md), [acceleration of adding indexes](/system-variables.md#tidb_ddl_enable_fast_reorg-new-in-v630), data import using TiDB Lightning, data replication using TiCDC, or [Point-in-Time Recovery (PITR)](/br/br-log-architecture.md). +- Only one `IMPORT INTO` job can run on a cluster at a time. Although `IMPORT INTO` performs a precheck for running jobs, it is not a hard limit. Starting multiple import jobs might work when multiple clients execute `IMPORT INTO` simultaneously, but you need to avoid that because it might result in data inconsistency or import failures. +- During the data import process, do not perform DDL or DML operations on the target table, and do not execute [`FLASHBACK DATABASE`](/sql-statements/sql-statement-flashback-database.md) for the target database. These operations can lead to import failures or data inconsistencies. In addition, it is **NOT** recommended to perform read operations during the import process, as the data being read might be inconsistent. Perform read and write operations only after the import is completed. +- The import process consumes system resources significantly. To get better performance, it is recommended to use TiDB nodes with at least 32 cores and 64 GiB of memory. TiDB writes sorted data to the TiDB [temporary directory](/tidb-configuration-file.md#temp-dir-new-in-v630) during import, so it is recommended to configure high-performance storage media such as flash memory. For more information, see [Physical Import Mode limitations](/tidb-lightning/tidb-lightning-physical-import-mode.md#requirements-and-restrictions). +- The TiDB [temporary directory](/tidb-configuration-file.md#temp-dir-new-in-v630) is expected to have at least 90 GiB of available space. It is recommended to allocate storage space that is equal to or greater than the volume of data to be imported. +- One import job supports importing data into one target table only. To import data into multiple target tables, after the import for a target table is completed, you need to create a new job for the next target table. + +## Prerequisites for import + +Before using `IMPORT INTO` to import data, make sure the following requirements are met: + +- The target table to be imported is already created in TiDB and it is empty. +- The target cluster has sufficient space to store the data to be imported. +- The [temporary directory](/tidb-configuration-file.md#temp-dir-new-in-v630) of the TiDB node connected to the current session has at least 90 GiB of available space. If [`tidb_enable_dist_task`](/system-variables.md#tidb_enable_dist_task-new-in-v710) is enabled, also make sure that the temporary directory of each TiDB node in the cluster has sufficient disk space. + +## Required privileges + +Executing `IMPORT INTO` requires the `SELECT`, `UPDATE`, `INSERT`, `DELETE`, and `ALTER` privileges on the target table. To import files in TiDB local storage, the `FILE` privilege is also required. + +## Synopsis + +```ebnf+diagram +ImportIntoStmt ::= + 'IMPORT' 'INTO' TableName ColumnNameOrUserVarList? SetClause? FROM fileLocation Format? WithOptions? + +ColumnNameOrUserVarList ::= + '(' ColumnNameOrUserVar (',' ColumnNameOrUserVar)* ')' + +SetClause ::= + 'SET' SetItem (',' SetItem)* + +SetItem ::= + ColumnName '=' Expr + +Format ::= + 'CSV' | 'SQL' | 'PARQUET' + +WithOptions ::= + 'WITH' OptionItem (',' OptionItem)* + +OptionItem ::= + optionName '=' optionVal | optionName +``` + +## Parameter description + +### ColumnNameOrUserVarList + +It specifies how each field in the data file corresponds to the columns in the target table. You can also use it to map fields to variables to skip certain fields for the import, or use it in `SetClause`. + +- If this parameter is not specified, the number of fields in each row of the data file must match the number of columns in the target table, and the fields will be imported to the corresponding columns in order. +- If this parameter is specified, the number of specified columns or variables must match the number of fields in each row of the data file. + +### SetClause + +It specifies how the values of target columns are calculated. In the right side of the `SET` expression, you can reference the variables specified in `ColumnNameOrUserVarList`. + +In the left side of the `SET` expression, you can only reference a column name that is not included in `ColumnNameOrUserVarList`. If the target column name already exists in `ColumnNameOrUserVarList`, the `SET` expression is invalid. + +### fileLocation + +It specifies the storage location of the data file, which can be an Amazon S3 or GCS URI path, or a TiDB local file path. + +- Amazon S3 or GCS URI path: for URI configuration details, see [External storage](/br/backup-and-restore-storages.md#uri-format). +- TiDB local file path: it must be an absolute path, and the file extension must be `.csv`, `.sql`, or `.parquet`. Make sure that the files corresponding to this path are stored on the TiDB node connected by the current user, and the user has the `FILE` privilege. + +> **Note:** +> +> If [SEM](/system-variables.md#tidb_enable_enhanced_security) is enabled in the target cluster, the `fileLocation` cannot be specified as a local file path. + +In the `fileLocation` parameter, you can specify a single file or use the `*` wildcard to match multiple files for import. Note that the wildcard can only be used in the file name, because it does not match directories or recursively match files in subdirectories. Taking files stored on Amazon S3 as examples, you can configure the parameter as follows: + +- Import a single file: `s3:///path/to/data/foo.csv` +- Import all files in a specified path: `s3:///path/to/data/*` +- Import all files with the `.csv` suffix in a specified path: `s3:///path/to/data/*.csv` +- Import all files with the `foo` prefix in a specified path: `s3:///path/to/data/foo*` +- Import all files with the `foo` prefix and the `.csv` suffix in a specified path: `s3:///path/to/data/foo*.csv` + +### Format + +The `IMPORT INTO` statement supports three data file formats: `CSV`, `SQL`, and `PARQUET`. If not specified, the default format is `CSV`. + +### WithOptions + +You can use `WithOptions` to specify import options and control the data import process. For example, to execute the import asynchronously in the backend, you can enable the `DETACHED` mode for the import by adding the `WITH DETACHED` option to the `IMPORT INTO` statement. + +The supported options are described as follows: + +| Option name | Supported data formats | Description | +|:---|:---|:---| +| `CHARACTER_SET=''` | CSV | Specifies the character set of the data file. The default character set is `utf8mb4`. The supported character sets include `binary`, `utf8`, `utf8mb4`, `gb18030`, `gbk`, `latin1`, and `ascii`. | +| `FIELDS_TERMINATED_BY=''` | CSV | Specifies the field separator. The default separator is `,`. | +| `FIELDS_ENCLOSED_BY=''` | CSV | Specifies the field delimiter. The default delimiter is `"`. | +| `FIELDS_ESCAPED_BY=''` | CSV | Specifies the escape character for fields. The default escape character is `\`. | +| `FIELDS_DEFINED_NULL_BY=''` | CSV | Specifies the value that represents `NULL` in the fields. The default value is `\N`. | +| `LINES_TERMINATED_BY=''` | CSV | Specifies the line terminator. By default, `IMPORT INTO` automatically identifies `\n`, `\r`, or `\r\n` as line terminators. If the line terminator is one of these three, you do not need to explicitly specify this option. | +| `SKIP_ROWS=` | CSV | Specifies the number of rows to skip. The default value is `0`. You can use this option to skip the header in a CSV file. If you use a wildcard to specify the source files for import, this option applies to all source files that are matched by the wildcard in `fileLocation`. | +| `DISK_QUOTA=''` | All formats | Specifies the disk space threshold that can be used during data sorting. The default value is 80% of the disk space in the TiDB [temporary directory](/tidb-configuration-file.md#temp-dir-new-in-v630). If the total disk size cannot be obtained, the default value is 50 GiB. When specifying `DISK_QUOTA` explicitly, make sure that the value does not exceed 80% of the disk space in the TiDB temporary directory. | +| `DISABLE_TIKV_IMPORT_MODE` | All formats | Specifies whether to disable switching TiKV to import mode during the import process. By default, switching TiKV to import mode is not disabled. If there are ongoing read-write operations in the cluster, you can enable this option to avoid impact from the import process. | +| `THREAD=` | All formats | Specifies the concurrency for import. The default value is 50% of the CPU cores, with a minimum value of 1. You can explicitly specify this option to control the resource usage, but make sure that the value does not exceed the number of CPU cores. To import data into a new cluster without any data, it is recommended to increase this concurrency appropriately to improve import performance. If the target cluster is already used in a production environment, it is recommended to adjust this concurrency according to your application requirements. | +| `MAX_WRITE_SPEED=''` | All formats | Controls the write speed to a TiKV node. By default, there is no speed limit. For example, you can specify this option as `1MiB` to limit the write speed to 1 MiB/s. | +| `CHECKSUM_TABLE=''` | All formats | Configures whether to perform a checksum check on the target table after the import to validate the import integrity. The supported values include `"required"` (default), `"optional"`, and `"off"`. `"required"` means performing a checksum check after the import. If the checksum check fails, TiDB will return an error and the import will exit. `"optional"` means performing a checksum check after the import. If an error occurs, TiDB will return a warning and ignore the error. `"off"` means not performing a checksum check after the import. | +| `DETACHED` | All Formats | Controls whether to execute `IMPORT INTO` asynchronously. When this option is enabled, executing `IMPORT INTO` immediately returns the information of the import job (such as the `Job_ID`), and the job is executed asynchronously in the backend. | + +## Output + +When `IMPORT INTO` completes the import or when the `DETACHED` mode is enabled, `IMPORT INTO` will return the current job information in the output, as shown in the following examples. For the description of each field, see [`SHOW IMPORT JOB(s)`](/sql-statements/sql-statement-show-import-job.md). + +When `IMPORT INTO` completes the import, the example output is as follows: + +```sql +IMPORT INTO t FROM '/path/to/small.csv'; ++--------+--------------------+--------------+----------+-------+----------+------------------+---------------+----------------+----------------------------+----------------------------+----------------------------+------------+ +| Job_ID | Data_Source | Target_Table | Table_ID | Phase | Status | Source_File_Size | Imported_Rows | Result_Message | Create_Time | Start_Time | End_Time | Created_By | ++--------+--------------------+--------------+----------+-------+----------+------------------+---------------+----------------+----------------------------+----------------------------+----------------------------+------------+ +| 60002 | /path/to/small.csv | `test`.`t` | 363 | | finished | 16B | 2 | | 2023-06-08 16:01:22.095698 | 2023-06-08 16:01:22.394418 | 2023-06-08 16:01:26.531821 | root@% | ++--------+--------------------+--------------+----------+-------+----------+------------------+---------------+----------------+----------------------------+----------------------------+----------------------------+------------+ +``` + +When the `DETACHED` mode is enabled, executing the `IMPORT INTO` statement will immediately return the job information in the output. From the output, you can see that the status of the job is `pending`, which means waiting for execution. + +```sql +IMPORT INTO t FROM '/path/to/small.csv' WITH DETACHED; ++--------+--------------------+--------------+----------+-------+---------+------------------+---------------+----------------+----------------------------+------------+----------+------------+ +| Job_ID | Data_Source | Target_Table | Table_ID | Phase | Status | Source_File_Size | Imported_Rows | Result_Message | Create_Time | Start_Time | End_Time | Created_By | ++--------+--------------------+--------------+----------+-------+---------+------------------+---------------+----------------+----------------------------+------------+----------+------------+ +| 60001 | /path/to/small.csv | `test`.`t` | 361 | | pending | 16B | NULL | | 2023-06-08 15:59:37.047703 | NULL | NULL | root@% | ++--------+--------------------+--------------+----------+-------+---------+------------------+---------------+----------------+----------------------------+------------+----------+------------+ +``` + +## View and manage import jobs + +For an import job with the `DETACHED` mode enabled, you can use [`SHOW IMPORT`](/sql-statements/sql-statement-show-import-job.md) to view its current job progress. + +After an import job is started, you can cancel it using [`CANCEL IMPORT JOB `](/sql-statements/sql-statement-cancel-import-job.md). + +## Examples + +### Import a CSV file with headers + +```sql +IMPORT INTO t FROM '/path/to/file.csv' WITH skip_rows=1; +``` + +### Import a file asynchronously in the `DETACHED` mode + +```sql +IMPORT INTO t FROM '/path/to/file.csv' WITH DETACHED; +``` + +### Skip importing a specific field in your data file + +Assume that your data file is in the CSV format and its content is as follows: + +``` +id,name,age +1,Tom,23 +2,Jack,44 +``` + +And assume that the target table schema for the import is `CREATE TABLE t(id int primary key, name varchar(100))`. To skip importing the `age` field in the data file to the table `t`, you can execute the following SQL statement: + +```sql +IMPORT INTO t(id, name, @1) FROM '/path/to/file.csv' WITH skip_rows=1; +``` + +### Import multiple data files using the wildcard `*` + +Assume that there are three files named `file-01.csv`, `file-02.csv`, and `file-03.csv` in the `/path/to/` directory. To import these three files into a target table `t` using `IMPORT INTO`, you can execute the following SQL statement: + +```sql +IMPORT INTO t FROM '/path/to/file-*.csv' +``` + +### Import data files from Amazon S3 or GCS + +- Import data files from Amazon S3: + + ```sql + IMPORT INTO t FROM 's3://bucket-name/test.csv?access-key=XXX&secret-access-key=XXX'; + ``` + +- Import data files from GCS: + + ```sql + IMPORT INTO t FROM 'gs://bucket-name/test.csv'; + ``` + +For details about the URI path configuration for Amazon S3 or GCS, see [External storage](/br/backup-and-restore-storages.md#uri-format). + +### Calculate column values using SetClause + +Assume that your data file is in the CSV format and its content is as follows: + +``` +id,name,val +1,phone,230 +2,book,440 +``` + +And assume that the target table schema for the import is `CREATE TABLE t(id int primary key, name varchar(100), val int)`. If you want to multiply the `val` column values by 100 during the import, you can execute the following SQL statement: + +```sql +IMPORT INTO t(id, name, @1) SET val=@1*100 FROM '/path/to/file.csv' WITH skip_rows=1; +``` + +### Import a data file in the SQL format + +```sql +IMPORT INTO t FROM '/path/to/file.sql' FORMAT 'sql'; +``` + +### Limit the write speed to TiKV + +To limit the write speed to a TiKV node to 10 MiB/s, execute the following SQL statement: + +```sql +IMPORT INTO t FROM 's3://bucket/path/to/file.parquet?access-key=XXX&secret-access-key=XXX' FORMAT 'parquet' WITH MAX_WRITE_SPEED='10MiB'; +``` + +## MySQL compatibility + +This statement is a TiDB extension to MySQL syntax. + +## See also + +* [`SHOW IMPORT JOB(s)`](/sql-statements/sql-statement-show-import-job.md) +* [`CANCEL IMPORT JOB`](/sql-statements/sql-statement-cancel-import-job.md) diff --git a/sql-statements/sql-statement-insert.md b/sql-statements/sql-statement-insert.md index 7cc8a30e417f2..cac13069ca412 100644 --- a/sql-statements/sql-statement-insert.md +++ b/sql-statements/sql-statement-insert.md @@ -98,7 +98,7 @@ mysql> SELECT * FROM t2; ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `INSERT` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-kill.md b/sql-statements/sql-statement-kill.md index a1858613ea1c3..dba75b9f2a4f2 100644 --- a/sql-statements/sql-statement-kill.md +++ b/sql-statements/sql-statement-kill.md @@ -47,18 +47,26 @@ Query OK, 0 rows affected (0.00 sec) ## MySQL compatibility - The `KILL` statement of MySQL can only terminate a connection in the currently connected MySQL instance, while the `KILL` statement of TiDB can terminate a connection in any TiDB instance in the entire cluster. -- Currently, using the MySQL command line ctrl+c to terminate a query or connection in TiDB is not supported. +- In v7.2.0 and earlier versions, using the MySQL command line Control+C to terminate a query or connection in TiDB is not supported. ## Behavior change descriptions +Starting from v7.3.0, TiDB supports generating 32-bit connection IDs, which is enabled by default and controlled by the [`enable-32bits-connection-id`](/tidb-configuration-file.md#enable-32bits-connection-id-new-in-v730) configuration item. When both the Global Kill feature and 32-bit connection ID are enabled, TiDB generates a 32-bit connection ID and you can terminate queries or connections in the MySQL command-line using Control+C. + +> **Warning:** +> +> When the number of TiDB instances in the cluster exceeds 2048 or the concurrent connection count of a single TiDB instance exceeds 1048576, the 32-bit connection ID space becomes insufficient and is automatically upgraded to 64-bit connection IDs. During the upgrade process, existing business and established connections are unaffected. However, subsequent new connections cannot be terminated using Control+C in the MySQL command-line. + Starting from v6.1.0, TiDB supports the Global Kill feature, which is enabled by default and controlled by the [`enable-global-kill`](/tidb-configuration-file.md#enable-global-kill-new-in-v610) configuration. +Starting from v7.3.0, TiDB supports generating 32-bit connection IDs, which is enabled by default. When both the Global Kill feature and 32-bit connection ID are enabled, You can terminate queries or connections in the MySQL command-line using Control+C. + Starting from v6.1.0, TiDB supports the Global Kill feature, which is enabled by default. @@ -71,11 +79,11 @@ If the Global Kill feature is not enabled or you are using a TiDB version earlie -- It is **STRONGLY NOT RECOMMENDED** to set [`compatible-kill-query = true`](/tidb-configuration-file.md#compatible-kill-query) in your configuration file UNLESS you are certain that clients will be always connected to the same TiDB instance. This is because pressing ctrl+c in the default MySQL client opens a new connection in which `KILL` is executed. If there is a proxy between the client and the TiDB cluster, the new connection might be routed to a different TiDB instance, which possibly kills a different session by mistake. +- It is **STRONGLY NOT RECOMMENDED** to set [`compatible-kill-query = true`](/tidb-configuration-file.md#compatible-kill-query) in your configuration file UNLESS you are certain that clients will be always connected to the same TiDB instance. This is because pressing Control+C in the default MySQL client opens a new connection in which `KILL` is executed. If there is a proxy between the client and the TiDB cluster, the new connection might be routed to a different TiDB instance, which possibly kills a different session by mistake. -- The `KILL TIDB` statement is a TiDB extension. The feature of this statement is similar to the MySQL `KILL [CONNECTION|QUERY]` command and the MySQL command line ctrl+c. It is safe to use `KILL TIDB` on the same TiDB instance. +- The `KILL TIDB` statement is a TiDB extension. The feature of this statement is similar to the MySQL `KILL [CONNECTION|QUERY]` command and the MySQL command line Control+C. It is safe to use `KILL TIDB` on the same TiDB instance. ## See also diff --git a/sql-statements/sql-statement-load-data.md b/sql-statements/sql-statement-load-data.md index 7eface00451e9..16bca2c1b4dad 100644 --- a/sql-statements/sql-statement-load-data.md +++ b/sql-statements/sql-statement-load-data.md @@ -8,22 +8,14 @@ aliases: ['/docs/dev/sql-statements/sql-statement-load-data/','/docs/dev/referen The `LOAD DATA` statement batch loads data into a TiDB table. -In TiDB v7.0.0, the `LOAD DATA` SQL statement supports the following features: +Starting from TiDB v7.0.0, the `LOAD DATA` SQL statement supports the following features: - Support importing data from S3 and GCS - Add a new parameter `FIELDS DEFINED NULL BY` > **Warning:** > -> The new parameter `FIELDS DEFINED NULL BY` and support for importing data from S3 and GCS in v7.0.0 are experimental. It is not recommended that you use it in the production environment. This feature might be changed or removed without prior notice. If you find a bug, you can report an [issue](https://github.com/pingcap/tidb/issues) on GitHub. - - - -> **Note:** -> -> This feature is only available on [Serverless Tier clusters](/tidb-cloud/select-cluster-tier.md#serverless-tier-beta). - - +> The new parameter `FIELDS DEFINED NULL BY` and support for importing data from S3 and GCS are experimental. It is not recommended that you use it in the production environment. This feature might be changed or removed without prior notice. If you find a bug, you can report an [issue](https://github.com/pingcap/tidb/issues) on GitHub. ## Synopsis @@ -46,6 +38,20 @@ Fields ::= You can use `LOCAL` to specify data files on the client to be imported, where the file parameter must be the file system path on the client. +If you are using TiDB Cloud, to use the `LOAD DATA` statement to load local data files, you need to add the `--local-infile` option to the connection string when you connect to TiDB Cloud. + +- The following is an example connection string for TiDB Serverless: + + ``` + mysql --connect-timeout 15 -u '' -h -P 4000 -D test --ssl-mode=VERIFY_IDENTITY --ssl-ca=/etc/ssl/cert.pem -p --local-infile + ``` + +- The following is an example connection string for TiDB Dedicated: + + ``` + mysql --connect-timeout 15 --ssl-mode=VERIFY_IDENTITY --ssl-ca= --tls-version="TLSv1.2" -u root -h -P 4000 -D test -p --local-infile + ``` + ### S3 and GCS storage diff --git a/sql-statements/sql-statement-lock-tables-and-unlock-tables.md b/sql-statements/sql-statement-lock-tables-and-unlock-tables.md index 60dbf50e0d665..1d1c0c306e365 100644 --- a/sql-statements/sql-statement-lock-tables-and-unlock-tables.md +++ b/sql-statements/sql-statement-lock-tables-and-unlock-tables.md @@ -17,9 +17,12 @@ TiDB enables client sessions to acquire table locks for the purpose of cooperati A table lock protects against reads or writes by other sessions. A session that holds a `WRITE` lock can perform table-level operations such as `DROP TABLE` or `TRUNCATE TABLE`. -> **Warning:** +> **Note:** > -> To enable table locks, you need to set [`enable-table-lock`](/tidb-configuration-file.md#enable-table-lock-new-in-v400) to `true` in the configuration files of all TiDB instances. +> The table locks feature is disabled by default. +> +> - For TiDB Self-Hosted, to enable the table locks feature, you need to set [`enable-table-lock`](https://docs.pingcap.com/tidb/stable/tidb-configuration-file#enable-table-lock-new-in-v400) to `true` in the configuration files of all TiDB instances. +> - For TiDB Cloud, to enable the table locks feature, you need to contact [TiDB Cloud Support](https://docs.pingcap.com/tidbcloud/tidb-cloud-support) to set [`enable-table-lock`](https://docs.pingcap.com/tidb/stable/tidb-configuration-file#enable-table-lock-new-in-v400) to `true`. ## Synopsis diff --git a/sql-statements/sql-statement-prepare.md b/sql-statements/sql-statement-prepare.md index 85ce7fffe0b20..9916f3732df90 100644 --- a/sql-statements/sql-statement-prepare.md +++ b/sql-statements/sql-statement-prepare.md @@ -42,7 +42,7 @@ Query OK, 0 rows affected (0.00 sec) ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `PREPARE` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-query-watch.md b/sql-statements/sql-statement-query-watch.md new file mode 100644 index 0000000000000..a635d54c231a6 --- /dev/null +++ b/sql-statements/sql-statement-query-watch.md @@ -0,0 +1,55 @@ +--- +title: QUERY WATCH +summary: An overview of the usage of QUERY WATCH for the TiDB database. +--- + +# QUERY WATCH + +The `QUERY WATCH` statement is used to manually manage the watch list of runaway queries in a resource group. + +> **Warning:** +> +> This feature is experimental. It is not recommended that you use it in the production environment. This feature might be changed or removed without prior notice. If you find a bug, you can report an [issue](https://github.com/pingcap/tidb/issues) on GitHub. + +## Synopsis + +```ebnf+diagram +AddQueryWatchStmt ::= + "QUERY" "WATCH" "ADD" QueryWatchOptionList +QueryWatchOptionList ::= + QueryWatchOption +| QueryWatchOptionList QueryWatchOption +| QueryWatchOptionList ',' QueryWatchOption +QueryWatchOption ::= + "RESOURCE" "GROUP" ResourceGroupName +| "RESOURCE" "GROUP" UserVariable +| "ACTION" EqOpt ResourceGroupRunawayActionOption +| QueryWatchTextOption +ResourceGroupName ::= + Identifier +| "DEFAULT" +QueryWatchTextOption ::= + "SQL" "DIGEST" SimpleExpr +| "PLAN" "DIGEST" SimpleExpr +| "SQL" "TEXT" ResourceGroupRunawayWatchOption "TO" SimpleExpr + +ResourceGroupRunawayWatchOption ::= + "EXACT" +| "SIMILAR" +| "PLAN" + +DropQueryWatchStmt ::= + "QUERY" "WATCH" "REMOVE" NUM +``` + +## Parameters + +See [`QUERY WATCH` parameters](/tidb-resource-control.md#query-watch-parameters). + +## MySQL compatibility + +This statement is a TiDB extension to MySQL syntax. + +## See also + +* [Runaway Queries](/tidb-resource-control.md#manage-queries-that-consume-more-resources-than-expected-runaway-queries) diff --git a/sql-statements/sql-statement-rename-index.md b/sql-statements/sql-statement-rename-index.md index c227fb0566152..b450aacde6c5e 100644 --- a/sql-statements/sql-statement-rename-index.md +++ b/sql-statements/sql-statement-rename-index.md @@ -52,7 +52,7 @@ Create Table: CREATE TABLE `t1` ( ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `RENAME INDEX` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-rename-table.md b/sql-statements/sql-statement-rename-table.md index 857d8ad5ba3ad..933eaef86ab46 100644 --- a/sql-statements/sql-statement-rename-table.md +++ b/sql-statements/sql-statement-rename-table.md @@ -46,7 +46,7 @@ mysql> SHOW TABLES; ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `RENAME TABLE` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-replace.md b/sql-statements/sql-statement-replace.md index f95340a839051..d45bec6b30327 100644 --- a/sql-statements/sql-statement-replace.md +++ b/sql-statements/sql-statement-replace.md @@ -69,7 +69,7 @@ mysql> SELECT * FROM t1; ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `REPLACE` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-revoke-role.md b/sql-statements/sql-statement-revoke-role.md index e3765cfca91ae..389276d19477e 100644 --- a/sql-statements/sql-statement-revoke-role.md +++ b/sql-statements/sql-statement-revoke-role.md @@ -160,7 +160,7 @@ SHOW GRANTS; ## MySQL compatibility -This statement is understood to be fully compatible with roles, which are a feature of MySQL 8.0. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `REVOKE ` statement in TiDB is fully compatible with the roles feature in MySQL 8.0. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-select.md b/sql-statements/sql-statement-select.md index 779811c32922c..e63d2223629dc 100644 --- a/sql-statements/sql-statement-select.md +++ b/sql-statements/sql-statement-select.md @@ -116,7 +116,7 @@ TableSampleOpt ::= |`ORDER BY` | The `ORDER BY` clause is used to sort the data in ascending or descending order, based on columns, expressions or items in the `select_expr` list.| |`LIMIT` | The `LIMIT` clause can be used to constrain the number of rows. `LIMIT` takes one or two numeric arguments. With one argument, the argument specifies the maximum number of rows to return, the first row to return is the first row of the table by default; with two arguments, the first argument specifies the offset of the first row to return, and the second specifies the maximum number of rows to return. TiDB also supports the `FETCH FIRST/NEXT n ROW/ROWS ONLY` syntax, which has the same effect as `LIMIT n`. You can omit `n` in this syntax and its effect is the same as `LIMIT 1`. | |`Window window_definition`| This is the syntax for window function, which is usually used to do some analytical computation. For more information, refer to [Window Function](/functions-and-operators/window-functions.md). | -| `FOR UPDATE` | The `SELECT FOR UPDATE` clause locks all the data in the result sets to detect concurrent updates from other transactions. Data that match the query conditions but do not exist in the result sets are not read-locked, such as the row data written by other transactions after the current transaction is started. TiDB uses the [Optimistic Transaction Model](/optimistic-transaction.md). The transaction conflicts are not detected in the statement execution phase. Therefore, the current transaction does not block other transactions from executing `UPDATE`, `DELETE` or `SELECT FOR UPDATE` like other databases such as PostgreSQL. In the committing phase, the rows read by `SELECT FOR UPDATE` are committed in two phases, which means they can also join the conflict detection. If write conflicts occur, the commit fails for all transactions that include the `SELECT FOR UPDATE` clause. If no conflict is detected, the commit succeeds. And a new version is generated for the locked rows, so that write conflicts can be detected when other uncommitted transactions are being committed later. When using pessimistic transaction mode, the behavior is basically the same as other databases. Refer to [Difference with MySQL InnoDB](/pessimistic-transaction.md#difference-with-mysql-innodb) to see the details. TiDB supports the `NOWAIT` modifier for `FOR UPDATE`. See [TiDB Pessimistic Transaction Mode](/pessimistic-transaction.md) for details. | +| `FOR UPDATE` | The `SELECT FOR UPDATE` clause locks all the data in the result sets to detect concurrent updates from other transactions. Data that match the query conditions but do not exist in the result sets are not read-locked, such as the row data written by other transactions after the current transaction is started. When TiDB uses the [Optimistic Transaction Mode](/optimistic-transaction.md), the transaction conflicts are not detected in the statement execution phase. Therefore, the current transaction does not block other transactions from executing `UPDATE`, `DELETE` or `SELECT FOR UPDATE` like other databases such as PostgreSQL. In the committing phase, the rows read by `SELECT FOR UPDATE` are committed in two phases, which means they can also join the conflict detection. If write conflicts occur, the commit fails for all transactions that include the `SELECT FOR UPDATE` clause. If no conflict is detected, the commit succeeds. And a new version is generated for the locked rows, so that write conflicts can be detected when other uncommitted transactions are being committed later. When TiDB uses the [Pessimistic Transaction Mode](/pessimistic-transaction.md), the behavior is basically the same as other databases. Refer to [Difference with MySQL InnoDB](/pessimistic-transaction.md#difference-with-mysql-innodb) to see the details. TiDB supports the `NOWAIT` modifier for `FOR UPDATE`. See [TiDB Pessimistic Transaction Mode](/pessimistic-transaction.md#behaviors) for details. | |`LOCK IN SHARE MODE` | To guarantee compatibility, TiDB parses these three modifiers, but will ignore them. | | `TABLESAMPLE` | To get a sample of rows from the table. | diff --git a/sql-statements/sql-statement-set-default-role.md b/sql-statements/sql-statement-set-default-role.md index 1b114a8c05d46..a901fb53ae512 100644 --- a/sql-statements/sql-statement-set-default-role.md +++ b/sql-statements/sql-statement-set-default-role.md @@ -141,7 +141,7 @@ ERROR 3530 (HY000): `analyticsteam`@`%` is is not granted to jennifer@% ## MySQL compatibility -This statement is understood to be fully compatible with roles, which are a feature of MySQL 8.0. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `SET DEFAULT ROLE` statement in TiDB is fully compatible with the roles feature in MySQL 8.0. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-set-names.md b/sql-statements/sql-statement-set-names.md index b453681c4e990..4eb3f9e85ad93 100644 --- a/sql-statements/sql-statement-set-names.md +++ b/sql-statements/sql-statement-set-names.md @@ -95,7 +95,7 @@ mysql> SHOW VARIABLES LIKE 'character_set%'; ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `SET [NAMES|CHARACTER SET]` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-set-password.md b/sql-statements/sql-statement-set-password.md index 08904c5240eb5..a75258df5bafa 100644 --- a/sql-statements/sql-statement-set-password.md +++ b/sql-statements/sql-statement-set-password.md @@ -56,7 +56,7 @@ mysql> SHOW CREATE USER 'newuser'; ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `SET PASSWORD` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-set-resource-group.md b/sql-statements/sql-statement-set-resource-group.md index 1275cf6ee1aba..6765d31da08bf 100644 --- a/sql-statements/sql-statement-set-resource-group.md +++ b/sql-statements/sql-statement-set-resource-group.md @@ -11,7 +11,7 @@ summary: An overview of the usage of SET RESOURCE GROUP in the TiDB database. > **Note:** > -> This feature is not available on [Serverless Tier clusters](/tidb-cloud/select-cluster-tier.md#serverless-tier-beta). +> This feature is not available on [TiDB Serverless clusters](https://docs.pingcap.com/tidbcloud/select-cluster-tier#tidb-serverless). @@ -21,10 +21,11 @@ summary: An overview of the usage of SET RESOURCE GROUP in the TiDB database. ```ebnf+diagram SetResourceGroupStmt ::= - "SET" "RESOURCE" "GROUP" ResourceGroupName + "SET" "RESOURCE" "GROUP" ResourceGroupName ResourceGroupName ::= - Identifier + Identifier +| "DEFAULT" ``` ## Examples @@ -71,7 +72,7 @@ SELECT CURRENT_RESOURCE_GROUP(); Execute `SET RESOURCE GROUP` to specify the current session to use the default resource group. ```sql -SET RESOURCE GROUP ``; +SET RESOURCE GROUP `default`; SELECT CURRENT_RESOURCE_GROUP(); ``` diff --git a/sql-statements/sql-statement-set-role.md b/sql-statements/sql-statement-set-role.md index dee5238082196..788ca0bfbacec 100644 --- a/sql-statements/sql-statement-set-role.md +++ b/sql-statements/sql-statement-set-role.md @@ -109,7 +109,7 @@ SELECT CURRENT_ROLE(); ## MySQL compatibility -This statement is understood to be fully compatible with roles, which are a feature of MySQL 8.0. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `SET ROLE` statement in TiDB is fully compatible with the roles feature in MySQL 8.0. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-show-analyze-status.md b/sql-statements/sql-statement-show-analyze-status.md index bba1da6b6dac1..087a6e8819176 100644 --- a/sql-statements/sql-statement-show-analyze-status.md +++ b/sql-statements/sql-statement-show-analyze-status.md @@ -12,6 +12,8 @@ Starting from TiDB v6.1.0, the `SHOW ANALYZE STATUS` statement supports showing Starting from TiDB v6.1.0, you can view the history tasks within the last 7 days through the system table `mysql.analyze_jobs`. +Starting from TiDB v7.3.0, you can view the progress of the current `ANALYZE` task through the system table `mysql.analyze_jobs` or `SHOW ANALYZE STATUS`. + ## Synopsis ```ebnf+diagram @@ -22,8 +24,6 @@ ShowLikeOrWhereOpt ::= 'LIKE' SimpleExpr | 'WHERE' Expression ## Examples -{{< copyable "sql" >}} - ```sql mysql> create table t(x int, index idx(x)) partition by hash(x) partitions 2; Query OK, 0 rows affected (0.69 sec) @@ -35,14 +35,15 @@ mysql> analyze table t; Query OK, 0 rows affected (0.20 sec) mysql> show analyze status; -+--------------+------------+----------------+-------------------+----------------+---------------------+---------------------+----------+-------------+----------------+------------+ -| Table_schema | Table_name | Partition_name | Job_info | Processed_rows | Start_time | End_time | State | Fail_reason | Instance | Process_ID | -+--------------+------------+----------------+-------------------+----------------+---------------------+---------------------+----------+-------------+----------------+------------+ -| test | t | p1 | analyze index idx | 0 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | finished | NULL | 127.0.0.1:4000 | NULL | -| test | t | p0 | analyze index idx | 0 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | finished | NULL | 127.0.0.1:4000 | NULL | -| test | t | p1 | analyze columns | 0 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | finished | NULL | 127.0.0.1:4000 | NULL | -| test | t | p0 | analyze columns | 0 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | finished | NULL | 127.0.0.1:4000 | NULL | -+--------------+------------+----------------+-------------------+----------------+---------------------+---------------------+----------+-------------+----------------+------------+ ++--------------+------------+----------------+-------------------+----------------+---------------------+---------------------+----------+-------------+----------------+------------+------------------+----------+---------------------+ +| Table_schema | Table_name | Partition_name | Job_info | Processed_rows | Start_time | End_time | State | Fail_reason | Instance | Process_ID | Remaining_seconds| Progress | Estimated_total_rows| ++--------------+------------+----------------+-------------------+----------------+---------------------+---------------------+----------+-------------+----------------+------------+------------------+----------+---------------------+ +| test | t | p1 | analyze index idx | 0 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | finished | NULL | 127.0.0.1:4000 | NULL | NULL | NULL | NULL | +| test | t | p0 | analyze index idx | 0 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | finished | NULL | 127.0.0.1:4000 | NULL | NULL | NULL | NULL | +| test | t | p1 | analyze columns | 0 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | finished | NULL | 127.0.0.1:4000 | NULL | NULL | NULL | NULL | +| test | t | p0 | analyze columns | 0 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | finished | NULL | 127.0.0.1:4000 | NULL | NULL | NULL | NULL | +| test | t1 | p0 | analyze columns | 28523259 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | running | NULL | 127.0.0.1:4000 | 690208308 | 0s | 0.9843 | 28978290 | ++--------------+------------+----------------+-------------------+----------------+---------------------+---------------------+----------+-------------+----------------+------------+------------------+----------+---------------------+ 4 rows in set (0.01 sec) mysql> set @@tidb_analyze_version = 2; @@ -52,16 +53,16 @@ mysql> analyze table t; Query OK, 0 rows affected, 2 warnings (0.03 sec) mysql> show analyze status; -+--------------+------------+----------------+--------------------------------------------------------------------+----------------+---------------------+---------------------+----------+-------------+----------------+------------+ -| Table_schema | Table_name | Partition_name | Job_info | Processed_rows | Start_time | End_time | State | Fail_reason | Instance | Process_ID | -+--------------+------------+----------------+--------------------------------------------------------------------+----------------+---------------------+---------------------+----------+-------------+----------------+------------+ -| test | t | p1 | analyze table all columns with 256 buckets, 500 topn, 1 samplerate | 0 | 2022-05-27 11:30:12 | 2022-05-27 11:30:12 | finished | NULL | 127.0.0.1:4000 | NULL | -| test | t | p0 | analyze table all columns with 256 buckets, 500 topn, 1 samplerate | 0 | 2022-05-27 11:30:12 | 2022-05-27 11:30:12 | finished | NULL | 127.0.0.1:4000 | NULL | -| test | t | p1 | analyze index idx | 0 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | finished | NULL | 127.0.0.1:4000 | NULL | -| test | t | p0 | analyze index idx | 0 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | finished | NULL | 127.0.0.1:4000 | NULL | -| test | t | p1 | analyze columns | 0 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | finished | NULL | 127.0.0.1:4000 | NULL | -| test | t | p0 | analyze columns | 0 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | finished | NULL | 127.0.0.1:4000 | NULL | -+--------------+------------+----------------+--------------------------------------------------------------------+----------------+---------------------+---------------------+----------+-------------+----------------+------------+ ++--------------+------------+----------------+--------------------------------------------------------------------+----------------+---------------------+---------------------+----------+-------------+----------------+------------+--------------------+----------+----------------------+ +| Table_schema | Table_name | Partition_name | Job_info | Processed_rows | Start_time | End_time | State | Fail_reason | Instance | Process_ID | Remaining_seconds | Progress | Estimated_total_rows | ++--------------+------------+----------------+--------------------------------------------------------------------+----------------+---------------------+---------------------+----------+-------------+----------------+------------+--------------------+----------+----------------------+ +| test | t | p1 | analyze table all columns with 256 buckets, 500 topn, 1 samplerate | 0 | 2022-05-27 11:30:12 | 2022-05-27 11:30:12 | finished | NULL | 127.0.0.1:4000 | NULL | NULL | NULL | NULL | +| test | t | p0 | analyze table all columns with 256 buckets, 500 topn, 1 samplerate | 0 | 2022-05-27 11:30:12 | 2022-05-27 11:30:12 | finished | NULL | 127.0.0.1:4000 | NULL | NULL | NULL | NULL | +| test | t | p1 | analyze index idx | 0 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | finished | NULL | 127.0.0.1:4000 | NULL | NULL | NULL | NULL | +| test | t | p0 | analyze index idx | 0 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | finished | NULL | 127.0.0.1:4000 | NULL | NULL | NULL | NULL | +| test | t | p1 | analyze columns | 0 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | finished | NULL | 127.0.0.1:4000 | NULL | NULL | NULL | NULL | +| test | t | p0 | analyze columns | 0 | 2022-05-27 11:29:46 | 2022-05-27 11:29:46 | finished | NULL | 127.0.0.1:4000 | NULL | NULL | NULL | NULL | ++--------------+------------+----------------+--------------------------------------------------------------------+----------------+---------------------+---------------------+----------+-------------+----------------+------------+--------------------+----------+----------------------+ 6 rows in set (0.00 sec) ``` diff --git a/sql-statements/sql-statement-show-bindings.md b/sql-statements/sql-statement-show-bindings.md index 4fe2548dd6d77..53d2dcb001e76 100644 --- a/sql-statements/sql-statement-show-bindings.md +++ b/sql-statements/sql-statement-show-bindings.md @@ -54,11 +54,11 @@ This statement outputs the execution plan bindings at the GLOBAL or SESSION leve ```sql mysql> CREATE TABLE t1 ( - -> id INT NOT NULL PRIMARY KEY auto_increment, - -> b INT NOT NULL, - -> pad VARBINARY(255), - -> INDEX(b) - -> ); + id INT NOT NULL PRIMARY KEY auto_increment, + b INT NOT NULL, + pad VARBINARY(255), + INDEX(b) + ); Query OK, 0 rows affected (0.07 sec) mysql> INSERT INTO t1 SELECT NULL, FLOOR(RAND()*1000), RANDOM_BYTES(255) FROM dual; @@ -111,9 +111,9 @@ mysql> EXPLAIN ANALYZE SELECT * FROM t1 WHERE b = 123; 3 rows in set (0.02 sec) mysql> CREATE SESSION BINDING FOR - -> SELECT * FROM t1 WHERE b = 123 - -> USING - -> SELECT * FROM t1 IGNORE INDEX (b) WHERE b = 123; + SELECT * FROM t1 WHERE b = 123 + USING + SELECT * FROM t1 IGNORE INDEX (b) WHERE b = 123; Query OK, 0 rows affected (0.00 sec) mysql> EXPLAIN ANALYZE SELECT * FROM t1 WHERE b = 123; diff --git a/sql-statements/sql-statement-show-character-set.md b/sql-statements/sql-statement-show-character-set.md index 4a5098cc7f860..77659b7e96152 100644 --- a/sql-statements/sql-statement-show-character-set.md +++ b/sql-statements/sql-statement-show-character-set.md @@ -36,7 +36,7 @@ mysql> SHOW CHARACTER SET; ## MySQL compatibility -The usage of this statement is understood to be fully compatible with MySQL. However, charsets in TiDB may have different default collations compared with MySQL. For details, refer to [Compatibility with MySQL](/mysql-compatibility.md). Any other compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The usage of `SHOW CHARACTER SET` statement in TiDB is fully compatible with MySQL. However, charsets in TiDB might have different default collations compared with MySQL. For details, refer to [Compatibility with MySQL](/mysql-compatibility.md). If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-show-collation.md b/sql-statements/sql-statement-show-collation.md index ecbce91476f51..7a253300785f2 100644 --- a/sql-statements/sql-statement-show-collation.md +++ b/sql-statements/sql-statement-show-collation.md @@ -60,7 +60,7 @@ mysql> SHOW COLLATION; ## MySQL compatibility -The usage of this statement is understood to be fully compatible with MySQL. However, charsets in TiDB may have different default collations compared with MySQL. For details, refer to [Compatibility with MySQL](/mysql-compatibility.md). Any other compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The usage of the `SHOW COLLATION` statement in TiDB is fully compatible with MySQL. However, charsets in TiDB might have different default collations compared with MySQL. For details, refer to [Compatibility with MySQL](/mysql-compatibility.md). If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-show-columns-from.md b/sql-statements/sql-statement-show-columns-from.md index 79e15bb6a9f8b..166c998d7f555 100644 --- a/sql-statements/sql-statement-show-columns-from.md +++ b/sql-statements/sql-statement-show-columns-from.md @@ -158,7 +158,7 @@ mysql> show full columns from mysql.user; ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `SHOW [FULL] COLUMNS FROM` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-show-create-resource-group.md b/sql-statements/sql-statement-show-create-resource-group.md index f6a3173ba7482..8d3b94d150018 100644 --- a/sql-statements/sql-statement-show-create-resource-group.md +++ b/sql-statements/sql-statement-show-create-resource-group.md @@ -9,7 +9,7 @@ summary: Learn the usage of SHOW CREATE RESOURCE GROUP in TiDB. > **Note:** > -> This feature is not available on [Serverless Tier clusters](/tidb-cloud/select-cluster-tier.md#serverless-tier-beta). +> This feature is not available on [TiDB Serverless clusters](https://docs.pingcap.com/tidbcloud/select-cluster-tier#tidb-serverless). @@ -23,6 +23,7 @@ ShowCreateResourceGroupStmt ::= ResourceGroupName ::= Identifier +| "DEFAULT" ``` ## Examples diff --git a/sql-statements/sql-statement-show-create-table.md b/sql-statements/sql-statement-show-create-table.md index 27f404c856e30..eff43d499701f 100644 --- a/sql-statements/sql-statement-show-create-table.md +++ b/sql-statements/sql-statement-show-create-table.md @@ -37,7 +37,7 @@ mysql> SHOW CREATE TABLE t1; ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `SHOW CREATE TABLE` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-show-databases.md b/sql-statements/sql-statement-show-databases.md index 0760cef967797..fc422e8fd2271 100644 --- a/sql-statements/sql-statement-show-databases.md +++ b/sql-statements/sql-statement-show-databases.md @@ -52,7 +52,7 @@ mysql> SHOW DATABASES; ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `SHOW DATABASES` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-show-errors.md b/sql-statements/sql-statement-show-errors.md index 0d7875b9ab460..27bf0dc76a35a 100644 --- a/sql-statements/sql-statement-show-errors.md +++ b/sql-statements/sql-statement-show-errors.md @@ -48,7 +48,7 @@ Empty set (0.00 sec) ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `SHOW ERRORS` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-show-grants.md b/sql-statements/sql-statement-show-grants.md index 0821858083b52..739a7e08d4435 100644 --- a/sql-statements/sql-statement-show-grants.md +++ b/sql-statements/sql-statement-show-grants.md @@ -61,7 +61,7 @@ mysql> SHOW GRANTS FOR u1; ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `SHOW GRANTS` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-show-import-job.md b/sql-statements/sql-statement-show-import-job.md new file mode 100644 index 0000000000000..f392edb3df1c2 --- /dev/null +++ b/sql-statements/sql-statement-show-import-job.md @@ -0,0 +1,84 @@ +--- +title: SHOW IMPORT +summary: An overview of the usage of SHOW IMPORT in TiDB. +--- + +# SHOW IMPORT + +The `SHOW IMPORT` statement is used to show the IMPORT jobs created in TiDB. This statement can only show jobs created by the current user. + + + +## Required privileges + +- `SHOW IMPORT JOBS`: if a user has the `SUPER` privilege, this statement shows all import jobs in TiDB. Otherwise, this statement only shows jobs created by the current user. +- `SHOW IMPORT JOB `: only the creator of an import job or users with the `SUPER` privilege can use this statement to view a specific job. + +## Synopsis + +```ebnf+diagram +ShowImportJobsStmt ::= + 'SHOW' 'IMPORT' 'JOBS' + +ShowImportJobStmt ::= + 'SHOW' 'IMPORT' 'JOB' JobID +``` + +The output fields of the `SHOW IMPORT` statement are described as follows: + +| Column | Description | +|------------------|-------------------------| +| Job_ID | The ID of the task | +| Data_Source | Information about the data source | +| Target_Table | The name of the target table | +| Phase | The current phase of the job, including `importing`, `validating`, and `add-index` | +| Status | The current status of the job, including `pending` (means created but not started yet), `running`, `canceled`, `failed`, and `finished` | +| Source_File_Size | The size of the source file | +| Imported_Rows | The number of data rows that have been read and written to the target table | +| Result_Message | If the import fails, this field returns the error message. Otherwise, it is empty.| +| Create_Time | The time when the task is created | +| Start_Time | The time when the task is started | +| End_Time | The time when the task is ended | +| Created_By | The name of the database user who creates the task | + +## Example + +```sql +SHOW IMPORT JOBS; +``` + +``` ++--------+-------------------+--------------+----------+-------+----------+------------------+---------------+----------------+----------------------------+----------------------------+----------------------------+------------+ +| Job_ID | Data_Source | Target_Table | Table_ID | Phase | Status | Source_File_Size | Imported_Rows | Result_Message | Create_Time | Start_Time | End_Time | Created_By | ++--------+-------------------+--------------+----------+-------+----------+------------------+---------------+----------------+----------------------------+----------------------------+----------------------------+------------+ +| 1 | /path/to/file.csv | `test`.`foo` | 116 | | finished | 11GB | 950000 | | 2023-06-26 11:23:59.281257 | 2023-06-26 11:23:59.484932 | 2023-06-26 13:04:30.622952 | root@% | +| 2 | /path/to/file.csv | `test`.`bar` | 130 | | finished | 1.194TB | 49995000 | | 2023-06-26 15:42:45.079237 | 2023-06-26 15:42:45.388108 | 2023-06-26 17:29:43.023568 | root@% | ++--------+-------------------+--------------+----------+-------+----------+------------------+---------------+----------------+----------------------------+----------------------------+----------------------------+------------+ +1 row in set (0.01 sec) +``` + +```sql +SHOW IMPORT JOB 60001; +``` + +``` ++--------+--------------------+--------------+----------+-------+---------+------------------+---------------+----------------+----------------------------+------------+----------+------------+ +| Job_ID | Data_Source | Target_Table | Table_ID | Phase | Status | Source_File_Size | Imported_Rows | Result_Message | Create_Time | Start_Time | End_Time | Created_By | ++--------+--------------------+--------------+----------+-------+---------+------------------+---------------+----------------+----------------------------+------------+----------+------------+ +| 60001 | /path/to/small.csv | `test`.`t` | 361 | | pending | 16B | NULL | | 2023-06-08 15:59:37.047703 | NULL | NULL | root@% | ++--------+--------------------+--------------+----------+-------+---------+------------------+---------------+----------------+----------------------------+------------+----------+------------+ +1 row in set (0.01 sec) +``` + +## MySQL compatibility + +This statement is a TiDB extension to MySQL syntax. + +## See also + +* [`IMPORT INTO`](/sql-statements/sql-statement-import-into.md) +* [`CANCEL IMPORT JOB`](/sql-statements/sql-statement-cancel-import-job.md) diff --git a/sql-statements/sql-statement-show-indexes.md b/sql-statements/sql-statement-show-indexes.md index 8c99a9b0c5f16..5616f807e5722 100644 --- a/sql-statements/sql-statement-show-indexes.md +++ b/sql-statements/sql-statement-show-indexes.md @@ -72,7 +72,7 @@ mysql> SHOW KEYS FROM t1; ## MySQL compatibility -The `Cardinality` column in MySQL shows the number of different values on the index. In TiDB, the `Cardinality` column always shows `0`. +The `SHOW INDEXES [FROM|IN]` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-show-plugins.md b/sql-statements/sql-statement-show-plugins.md index 55685ff20ba72..3cc97a08ffae8 100644 --- a/sql-statements/sql-statement-show-plugins.md +++ b/sql-statements/sql-statement-show-plugins.md @@ -52,4 +52,4 @@ SHOW PLUGINS LIKE 'a%'; ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `SHOW PLUGINS` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). diff --git a/sql-statements/sql-statement-show-privileges.md b/sql-statements/sql-statement-show-privileges.md index 986e1260b71fe..9054f19985145 100644 --- a/sql-statements/sql-statement-show-privileges.md +++ b/sql-statements/sql-statement-show-privileges.md @@ -76,7 +76,7 @@ mysql> show privileges; ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `SHOW PRIVILEGES` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-show-table-status.md b/sql-statements/sql-statement-show-table-status.md index e663ed7907ff9..b2327d3987461 100644 --- a/sql-statements/sql-statement-show-table-status.md +++ b/sql-statements/sql-statement-show-table-status.md @@ -82,7 +82,7 @@ Max_data_length: 0 ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `SHOW TABLE STATUS` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-show-tables.md b/sql-statements/sql-statement-show-tables.md index 97cf3e6ccfeb2..80530f2ff4dcf 100644 --- a/sql-statements/sql-statement-show-tables.md +++ b/sql-statements/sql-statement-show-tables.md @@ -85,7 +85,7 @@ mysql> SHOW TABLES IN mysql; ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `SHOW [FULL] TABLES` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-show-variables.md b/sql-statements/sql-statement-show-variables.md index 223c5fe00ca0e..68ea60dddeed3 100644 --- a/sql-statements/sql-statement-show-variables.md +++ b/sql-statements/sql-statement-show-variables.md @@ -151,7 +151,7 @@ mysql> SHOW GLOBAL VARIABLES LIKE 'time_zone%'; ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `SHOW [GLOBAL|SESSION] VARIABLES` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-show-warnings.md b/sql-statements/sql-statement-show-warnings.md index 45a317d37c656..6b43d6d0484d1 100644 --- a/sql-statements/sql-statement-show-warnings.md +++ b/sql-statements/sql-statement-show-warnings.md @@ -76,7 +76,7 @@ mysql> SELECT * FROM t1; ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `SHOW WARNINGS` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-truncate.md b/sql-statements/sql-statement-truncate.md index e293ad391723a..b45f7674798a0 100644 --- a/sql-statements/sql-statement-truncate.md +++ b/sql-statements/sql-statement-truncate.md @@ -62,7 +62,7 @@ Query OK, 0 rows affected (0.11 sec) ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `TRUNCATE` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/sql-statements/sql-statement-use.md b/sql-statements/sql-statement-use.md index 6a6507cc522ad..802b8b478711a 100644 --- a/sql-statements/sql-statement-use.md +++ b/sql-statements/sql-statement-use.md @@ -75,7 +75,7 @@ mysql> SHOW TABLES; ## MySQL compatibility -This statement is understood to be fully compatible with MySQL. Any compatibility differences should be [reported via an issue](https://github.com/pingcap/tidb/issues/new/choose) on GitHub. +The `USE` statement in TiDB is fully compatible with MySQL. If you find any compatibility differences, report them via [an issue on GitHub](https://github.com/pingcap/tidb/issues/new/choose). ## See also diff --git a/statement-summary-tables.md b/statement-summary-tables.md index d03e283deac4c..fb75efdafa18c 100644 --- a/statement-summary-tables.md +++ b/statement-summary-tables.md @@ -20,8 +20,8 @@ Therefore, starting from v4.0.0-rc.1, TiDB provides system tables in `informatio > **Note:** > -> The following tables are unavailable for [Serverless Tier clusters](/tidb-cloud/select-cluster-tier.md#serverless-tier-beta): `statements_summary`, `statements_summary_history`, `cluster_statements_summary`, and `cluster_statements_summary_history`. - +> The following tables are unavailable for [TiDB Serverless clusters](https://docs.pingcap.com/tidbcloud/select-cluster-tier#tidb-serverless): `statements_summary`, `statements_summary_history`, `cluster_statements_summary`, and `cluster_statements_summary_history`. + This document details these tables and introduces how to use them to troubleshoot SQL performance issues. @@ -201,7 +201,7 @@ To address this issue, TiDB v6.6.0 experimentally introduces the [statement summ -This section is only applicable to on-premises TiDB. For TiDB Cloud, the value of the `tidb_stmt_summary_enable_persistent` parameter is `false` by default and does not support dynamic modification. +This section is only applicable to TiDB Self-Hosted. For TiDB Cloud, the value of the `tidb_stmt_summary_enable_persistent` parameter is `false` by default and does not support dynamic modification. diff --git a/statistics.md b/statistics.md index 0b50fc916f9ea..7728ee73a7397 100644 --- a/statistics.md +++ b/statistics.md @@ -12,7 +12,7 @@ TiDB uses statistics to decide [which index to choose](/choose-index.md). The `tidb_analyze_version` variable controls the statistics collected by TiDB. Currently, two versions of statistics are supported: `tidb_analyze_version = 1` and `tidb_analyze_version = 2`. -- For on-premises TiDB, the default value of this variable is `1` before v5.1.0. In v5.3.0 and later versions, the default value of this variable is `2`. If your cluster is upgraded from a version earlier than v5.3.0 to v5.3.0 or later, the default value of `tidb_analyze_version` does not change. +- For TiDB Self-Hosted, the default value of this variable is `1` before v5.1.0. In v5.3.0 and later versions, the default value of this variable is `2`. If your cluster is upgraded from a version earlier than v5.3.0 to v5.3.0 or later, the default value of `tidb_analyze_version` does not change. - For TiDB Cloud, the default value of this variable is `1`. Compared to Version 1, Version 2 statistics avoids the potential inaccuracy caused by hash collision when the data volume is huge. It also maintains the estimate precision in most scenarios. @@ -37,22 +37,22 @@ When `tidb_analyze_version = 2`, if memory overflow occurs after `ANALYZE` is ex - If the `ANALYZE` statement is executed manually, manually analyze every table to be analyzed. - ```sql - SELECT DISTINCT(CONCAT('ANALYZE TABLE ', table_schema, '.', table_name, ';')) FROM information_schema.tables, mysql.stats_histograms WHERE stats_ver = 2 AND table_id = tidb_table_id; - ``` + ```sql + SELECT DISTINCT(CONCAT('ANALYZE TABLE ', table_schema, '.', table_name, ';')) FROM information_schema.tables, mysql.stats_histograms WHERE stats_ver = 2 AND table_id = tidb_table_id; + ``` - If TiDB automatically executes the `ANALYZE` statement because the auto-analysis has been enabled, execute the following statement that generates the `DROP STATS` statement: - ```sql - SELECT DISTINCT(CONCAT('DROP STATS ', table_schema, '.', table_name, ';')) FROM information_schema.tables, mysql.stats_histograms WHERE stats_ver = 2 AND table_id = tidb_table_id; - ``` + ```sql + SELECT DISTINCT(CONCAT('DROP STATS ', table_schema, '.', table_name, ';')) FROM information_schema.tables, mysql.stats_histograms WHERE stats_ver = 2 AND table_id = tidb_table_id; + ``` - If the result of the preceding statement is too long to copy and paste, you can export the result to a temporary text file and then perform execution from the file like this: - ```sql - SELECT DISTINCT ... INTO OUTFILE '/tmp/sql.txt'; - mysql -h ${TiDB_IP} -u user -P ${TIDB_PORT} ... < '/tmp/sql.txt' - ``` + ```sql + SELECT DISTINCT ... INTO OUTFILE '/tmp/sql.txt'; + mysql -h ${TiDB_IP} -u user -P ${TIDB_PORT} ... < '/tmp/sql.txt' + ``` This document briefly introduces the histogram, Count-Min Sketch, and Top-N, and details the collection and maintenance of statistics. @@ -147,7 +147,8 @@ If a table has many columns, collecting statistics on all the columns can cause > **Note:** > -> Collecting statistics on some columns is only applicable for `tidb_analyze_version = 2`. +> - Collecting statistics on some columns is only applicable for [`tidb_analyze_version = 2`](/system-variables.md#tidb_analyze_version-new-in-v510). +> - Starting from TiDB v7.2.0, TiDB introduces the [`tidb_analyze_skip_column_types`](/system-variables.md#tidb_analyze_skip_column_types-new-in-v720) system variable, indicating which types of columns are skipped for statistics collection when executing the `ANALYZE` command to collect statistics. The system variable is only applicable for `tidb_analyze_version = 2`. - To collect statistics on specific columns, use the following syntax: @@ -322,11 +323,15 @@ When accessing partitioned tables in [dynamic pruning mode](/partitioned-table.m > **Note:** > -> - When GlobalStats update is triggered: +> - When GlobalStats update is triggered and [`tidb_skip_missing_partition_stats`](/system-variables.md#tidb_skip_missing_partition_stats-new-in-v730) is `OFF`: > > - If some partitions have no statistics (such as a new partition that has never been analyzed), GlobalStats generation is interrupted and a warning message is displayed saying that no statistics are available on partitions. > - If statistics of some columns are absent in specific partitions (different columns are specified for analyzing in these partitions), GlobalStats generation is interrupted when statistics of these columns are aggregated, and a warning message is displayed saying that statistics of some columns are absent in specific partitions. > +> - When GlobalStats update is triggered and [`tidb_skip_missing_partition_stats`](/system-variables.md#tidb_skip_missing_partition_stats-new-in-v730) is `ON`: +> +> If statistics of all or some columns are missing for some partitions, TiDB skips these missing partition statistics when generating GlobalStats so the generation of GlobalStats is not affected. +> > - In dynamic pruning mode, the Analyze configurations of partitions and tables should be the same. Therefore, if you specify the `COLUMNS` configuration following the `ANALYZE TABLE TableName PARTITION PartitionNameList` statement or the `OPTIONS` configuration following `WITH`, TiDB will ignore them and return a warning. #### Incremental collection @@ -743,18 +748,14 @@ After enabling the synchronously loading statistics feature, you can further con - To specify the maximum number of columns that the synchronously loading statistics feature can process concurrently, modify the value of the [`stats-load-concurrency`](/tidb-configuration-file.md#stats-load-concurrency-new-in-v540) option in the TiDB configuration file. The default value is `5`. - To specify the maximum number of column requests that the synchronously loading statistics feature can cache, modify the value of the [`stats-load-queue-size`](/tidb-configuration-file.md#stats-load-queue-size-new-in-v540) option in the TiDB configuration file. The default value is `1000`. -During TiDB startup, SQL statements executed before the initial statistics are fully loaded might have suboptimal execution plans, thus causing performance issues. To avoid such issues, TiDB v7.1.0 introduces the configuration parameter [`force-init-stats`](/tidb-configuration-file.md#force-init-stats-new-in-v710). With this option, you can control whether TiDB provides services only after statistics initialization has been finished during startup. This parameter is disabled by default. - -> **Warning:** -> -> Lightweight statistics initialization is an experimental feature. It is not recommended that you use it in the production environment. This feature might be changed or removed without prior notice. If you find a bug, you can report an [issue](https://github.com/pingcap/tidb/issues) on GitHub. +During TiDB startup, SQL statements executed before the initial statistics are fully loaded might have suboptimal execution plans, thus causing performance issues. To avoid such issues, TiDB v7.1.0 introduces the configuration parameter [`force-init-stats`](/tidb-configuration-file.md#force-init-stats-new-in-v710). With this option, you can control whether TiDB provides services only after statistics initialization has been finished during startup. Starting from v7.2.0, this parameter is enabled by default. Starting from v7.1.0, TiDB introduces [`lite-init-stats`](/tidb-configuration-file.md#lite-init-stats-new-in-v710) for lightweight statistics initialization. - When the value of `lite-init-stats` is `true`, statistics initialization does not load any histogram, TopN, or Count-Min Sketch of indexes or columns into memory. - When the value of `lite-init-stats` is `false`, statistics initialization loads histograms, TopN, and Count-Min Sketch of indexes and primary keys into memory but does not load any histogram, TopN, or Count-Min Sketch of non-primary key columns into memory. When the optimizer needs the histogram, TopN, and Count-Min Sketch of a specific index or column, the necessary statistics are loaded into memory synchronously or asynchronously. -The default value of `lite-init-stats` is `false`, which means to disable lightweight statistics initialization. Setting `lite-init-stats` to `true` speeds up statistics initialization and reduces TiDB memory usage by avoiding unnecessary statistics loading. +The default value of `lite-init-stats` is `true`, which means to enable lightweight statistics initialization. Setting `lite-init-stats` to `true` speeds up statistics initialization and reduces TiDB memory usage by avoiding unnecessary statistics loading. diff --git a/system-variables.md b/system-variables.md index 8915f21e846c1..eb61b5b49bef0 100644 --- a/system-variables.md +++ b/system-variables.md @@ -521,7 +521,7 @@ This variable is an alias for [`last_insert_id`](#last_insert_id). - Scope: SESSION - Type: Integer - Default value: `0` -- Range: `[0, 9223372036854775807]` +- Range: `[0, 18446744073709551615]` - This variable returns the last `AUTO_INCREMENT` or `AUTO_RANDOM` value generated by an insert statement. - The value of `last_insert_id` is the same as the value returned by the function `LAST_INSERT_ID()`. @@ -631,7 +631,7 @@ This variable is an alias for [`last_insert_id`](#last_insert_id). > **Note:** > -> Unlike in MySQL, the `max_execution_time` system variable currently works on all kinds of statements in TiDB, not only restricted to the `SELECT` statement. The precision of the timeout value is roughly 100ms. This means the statement might not be terminated in accurate milliseconds as you specify. +> The `max_execution_time` system variable currently only controls the maximum execution time for read-only SQL statements. The precision of the timeout value is roughly 100ms. This means the statement might not be terminated in accurate milliseconds as you specify. @@ -941,7 +941,7 @@ MPP is a distributed computing framework provided by the TiFlash engine, which a - Scope: SESSION | GLOBAL - Persists to cluster: Yes - Type: Integer -- Default value: `2` for on-premises TiDB and `1` for TiDB Cloud +- Default value: `2` for TiDB Self-Hosted and `1` for TiDB Cloud - Range: `[1, 2]` - Controls how TiDB collects statistics. @@ -957,6 +957,60 @@ MPP is a distributed computing framework provided by the TiFlash engine, which a +### tidb_analyze_skip_column_types New in v7.2.0 + +- Scope: SESSION | GLOBAL +- Persists to cluster: Yes +- Default value: "json,blob,mediumblob,longblob" +- Possible values: "json,blob,mediumblob,longblob,text,mediumtext,longtext" +- This variable controls which types of columns are skipped for statistics collection when executing the `ANALYZE` command to collect statistics. The variable is only applicable for `tidb_analyze_version = 2`. Even if you specify a column using `ANALYZE TABLE t COLUMNS c1, ... , cn`, no statistics will be collected for the specified column if its type is in `tidb_analyze_skip_column_types`. + +``` +mysql> SHOW CREATE TABLE t; ++-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| t | CREATE TABLE `t` ( + `a` int(11) DEFAULT NULL, + `b` varchar(10) DEFAULT NULL, + `c` json DEFAULT NULL, + `d` blob DEFAULT NULL, + `e` longblob DEFAULT NULL +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin | ++-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +1 row in set (0.00 sec) + +mysql> SELECT @@tidb_analyze_skip_column_types; ++----------------------------------+ +| @@tidb_analyze_skip_column_types | ++----------------------------------+ +| json,blob,mediumblob,longblob | ++----------------------------------+ +1 row in set (0.00 sec) + +mysql> ANALYZE TABLE t; +Query OK, 0 rows affected, 1 warning (0.05 sec) + +mysql> SELECT job_info FROM mysql.analyze_jobs ORDER BY end_time DESC LIMIT 1; ++---------------------------------------------------------------------+ +| job_info | ++---------------------------------------------------------------------+ +| analyze table columns a, b with 256 buckets, 500 topn, 1 samplerate | ++---------------------------------------------------------------------+ +1 row in set (0.00 sec) + +mysql> ANALYZE TABLE t COLUMNS a, c; +Query OK, 0 rows affected, 1 warning (0.04 sec) + +mysql> SELECT job_info FROM mysql.analyze_jobs ORDER BY end_time DESC LIMIT 1; ++------------------------------------------------------------------+ +| job_info | ++------------------------------------------------------------------+ +| analyze table columns a with 256 buckets, 500 topn, 1 samplerate | ++------------------------------------------------------------------+ +1 row in set (0.00 sec) +``` + ### tidb_auto_analyze_end_time - Scope: GLOBAL @@ -1310,7 +1364,7 @@ MPP is a distributed computing framework provided by the TiFlash engine, which a > **Note:** > -> To improve the speed for index creation using this variable, make sure that your TiDB cluster is hosted on AWS and your TiDB node size is at least 8 vCPU. For [Serverless Tier](/tidb-cloud/select-cluster-tier.md#serverless-tier-beta) clusters, this feature is unavailable. +> To improve the speed for index creation using this variable, make sure that your TiDB cluster is hosted on AWS and your TiDB node size is at least 8 vCPU. For [TiDB Serverless](https://docs.pingcap.com/tidbcloud/select-cluster-tier#tidb-serverless) clusters, this feature is unavailable. @@ -1330,7 +1384,9 @@ MPP is a distributed computing framework provided by the TiFlash engine, which a > **Note:** > -> Before you upgrade TiDB to v6.5.0 or later, it is recommended that you check whether the [`temp-dir`](/tidb-configuration-file.md#temp-dir-new-in-v630) path of TiDB is correctly mounted to an SSD disk. This path is a TiDB configuration item, which takes effect after TiDB is restarted. Therefore, setting this configuration item before upgrading can avoid another restart. +> * Index acceleration requires a [`temp-dir`](/tidb-configuration-file.md#temp-dir-new-in-v630) that is writable and has enough free space. If the `temp-dir` is unusable, TiDB falls back to non-accelerated index building. It is recommended to put the `temp-dir` on a SSD disk. +> +> * Before you upgrade TiDB to v6.5.0 or later, it is recommended that you check whether the [`temp-dir`](/tidb-configuration-file.md#temp-dir-new-in-v630) path of TiDB is correctly mounted to an SSD disk. Make sure that the operating system user that runs TiDB has the read and write permissions for this directory. Otherwise, The DDL operations might experience unpredictable issues. This path is a TiDB configuration item, which takes effect after TiDB is restarted. Therefore, setting this configuration item before upgrading can avoid another restart. @@ -1354,7 +1410,8 @@ MPP is a distributed computing framework provided by the TiFlash engine, which a - Persists to cluster: Yes - Default value: `OFF` - This variable is used to control whether to enable the [TiDB backend task distributed execution framework](/tidb-distributed-execution-framework.md). After the framework is enabled, backend tasks such as DDL and import will be distributedly executed and completed by multiple TiDB nodes in the cluster. -- In TiDB v7.1.0, the framework supports distributedly executing only the `ADD INDEX` statement for partitioned tables. +- Starting from TiDB v7.1.0, the framework supports distributedly executing the [`ADD INDEX`](/sql-statements/sql-statement-add-index.md) statement for partitioned tables. +- Starting from TiDB v7.2.0, the framework supports distributedly executing the [`IMPORT INTO`](https://docs.pingcap.com/tidb/v7.2/sql-statement-import-into) statement for import jobs of TiDB Self-Hosted. For TiDB Cloud, the `IMPORT INTO` statement is not applicable. - This variable is renamed from `tidb_ddl_distribute_reorg`. ### tidb_ddl_error_count_limit @@ -1560,6 +1617,14 @@ MPP is a distributed computing framework provided by the TiFlash engine, which a - Default value: `OFF` - This variable is used to control whether to enable the cascades planner. +### tidb_enable_check_constraint New in v7.2.0 + +- Scope: GLOBAL +- Persists to cluster: Yes +- Type: Boolean +- Default value: `OFF` +- This variable is used to control whether to enable the [`CHECK` constraint](/constraints.md#check) feature. + ### tidb_enable_chunk_rpc New in v4.0 - Scope: SESSION @@ -1686,6 +1751,19 @@ MPP is a distributed computing framework provided by the TiFlash engine, which a - This variable is used to set whether to enable the statistics `Fast Analyze` feature. - If the statistics `Fast Analyze` feature is enabled, TiDB randomly samples about 10,000 rows of data as statistics. When the data is distributed unevenly or the data size is small, the statistics accuracy is low. This might lead to a non-optimal execution plan, for example, selecting a wrong index. If the execution time of the regular `Analyze` statement is acceptable, it is recommended to disable the `Fast Analyze` feature. +### tidb_enable_fast_table_check New in v7.2.0 + +> **Note:** +> +> This variable does not work for [multi-valued indexes](/sql-statements/sql-statement-create-index.md#multi-valued-indexes) and prefix indexes. + +- Scope: SESSION | GLOBAL +- Persists to the cluster: Yes +- Type: Boolean +- Default value: `ON` +- This variable is used to control whether to use a checksum-based approach to quickly check the integrity of data and indexes in a table. The default value `ON` means this feature is enabled by default. +- When this variable is enabled, TiDB can execute the [`ADMIN CHECK [TABLE|INDEX]`](/sql-statements/sql-statement-admin-check-table-index.md) statement in a faster way. + ### tidb_enable_foreign_key New in v6.3.0 - Scope: GLOBAL @@ -1715,7 +1793,7 @@ MPP is a distributed computing framework provided by the TiFlash engine, which a - Scope: SESSION | GLOBAL - Persists to cluster: Yes - Type: Boolean -- Default value: `OFF` +- Default value: `ON` - This variable controls whether to enable the [Non-prepared plan cache](/sql-non-prepared-plan-cache.md) feature. ### tidb_enable_non_prepared_plan_cache_for_dml New in v7.1.0 @@ -1841,7 +1919,7 @@ MPP is a distributed computing framework provided by the TiFlash engine, which a - Persists to cluster: Yes - Type: Boolean - Default value: `OFF` -- This variable controls the behavior when TiDB performs the `ONLY_FULL_GOUP_BY` check. For detailed information about `ONLY_FULL_GROUP_BY`, see the [MySQL documentation](https://dev.mysql.com/doc/refman/8.0/en/sql-mode.html#sqlmode_only_full_group_by). In v6.1.0, TiDB handles this check more strictly and correctly. +- This variable controls the behavior when TiDB performs the `ONLY_FULL_GROUP_BY` check. For detailed information about `ONLY_FULL_GROUP_BY`, see the [MySQL documentation](https://dev.mysql.com/doc/refman/8.0/en/sql-mode.html#sqlmode_only_full_group_by). In v6.1.0, TiDB handles this check more strictly and correctly. - To avoid potential compatibility issues caused by version upgrades, the default value of this variable is `OFF` in v6.1.0. ### tidb_enable_noop_functions New in v4.0 @@ -2067,9 +2145,9 @@ MPP is a distributed computing framework provided by the TiFlash engine, which a ### tidb_enable_resource_control New in v6.6.0 -> **Warning:** +> **Note:** > -> This feature is experimental and its form and usage might change in subsequent versions. +> This variable is read-only for [TiDB Serverless](https://docs.pingcap.com/tidbcloud/select-cluster-tier#tidb-serverless). - Scope: GLOBAL - Persists to cluster: Yes @@ -2183,6 +2261,48 @@ Query OK, 0 rows affected (0.09 sec) +### tidb_enable_tiflash_pipeline_model New in v7.2.0 + + + +- Scope: SESSION | GLOBAL +- Persists to cluster: Yes +- Type: Boolean +- Default value: `OFF` +- This variable controls whether to enable the new [Pipeline Execution Model](/tiflash/tiflash-pipeline-model.md) in TiFlash. +- When this variable is set to `OFF` to disable the pipeline execution model, the query pushed down to TiFlash will be executed using the original stream model. +- When this variable is set to `ON` to enable the pipeline execution model, the query pushed down to TiFlash will be executed using the new pipeline execution model. + +> **Note:** +> +> - The pipeline execution model is currently an experimental feature and is not recommended to use in production environments. +> - The pipeline execution model does not support the following features. When the following features are enabled, even if `tidb_enable_tiflash_pipeline_model` is set to `ON`, the query pushed down to TiFlash will still be executed using the original stream model. +> +> - [Join operator spill to disk](#tidb_max_bytes_before_tiflash_external_join-new-in-v700) +> - [TiFlash Disaggregated Storage and Compute Architecture and S3 Support](/tiflash/tiflash-disaggregated-and-s3.md) + + + + + +- Scope: SESSION | GLOBAL +- Persists to cluster: Yes +- Type: Boolean +- Default value: `OFF` +- This variable controls whether to enable the new [Pipeline Execution Model](https://docs.pingcap.com/tidb/v7.2/tiflash-pipeline-model) in TiFlash. +- When this variable is set to `OFF` to disable the pipeline execution model, the query pushed down to TiFlash will be executed using the original stream model. +- When this variable is set to `ON` to enable the pipeline execution model, the query pushed down to TiFlash will be executed using the new pipeline execution model. + +> **Note:** +> +> - The pipeline execution model is currently an experimental feature and is not recommended to use in production environments. +> - The pipeline execution model does not support the following features. When the following features are enabled, even if `tidb_enable_tiflash_pipeline_model` is set to `ON`, the query pushed down to TiFlash will still be executed using the original stream model. +> +> - [Join operator spill to disk](#tidb_max_bytes_before_tiflash_external_join-new-in-v700) +> - [TiFlash Disaggregated Storage and Compute Architecture and S3 Support](https://docs.pingcap.com/tidb/v7.2/tiflash-disaggregated-and-s3) + + + ### tidb_enable_tiflash_read_for_write_stmt New in v6.3.0 - Scope: SESSION | GLOBAL @@ -2391,6 +2511,24 @@ For a system upgraded to v5.0 from an earlier version, if you have not modified - Slow logs are printed after the statement is executed. - Expensive query logs print the statements that are being executed, with execution time exceeding the threshold value, and their related information. +### tidb_expensive_txn_time_threshold New in v7.2.0 + + + +> **Note:** +> +> This TiDB variable is not applicable to TiDB Cloud. + + + +- Scope: GLOBAL +- Persists to cluster: No, only applicable to the current TiDB instance that you are connecting to. +- Type: Integer +- Default value: `600` +- Range: `[60, 2147483647]` +- Unit: Seconds +- This variable controls the threshold for logging expensive transactions, which is 600 seconds by default. When the duration of a transaction exceeds the threshold, and the transaction is neither committed nor rolled back, it is considered an expensive transaction and will be logged. + ### tidb_force_priority @@ -2800,6 +2938,17 @@ For a system upgraded to v5.0 from an earlier version, if you have not modified +### `tidb_lock_unchanged_keys` New in v7.1.1 and v7.3.0 + +- Scope: SESSION | GLOBAL +- Persists to cluster: Yes +- Type: Boolean +- Default value: `ON` +- This variable is used to control whether to lock specific keys in the following scenarios. When the value is set to `ON`, these keys are locked. When the value is set to `OFF`, these keys are not locked. + - Duplicate keys in `INSERT IGNORE` and `REPLACE` statements. Before v6.1.6, these keys were not locked. This issue has been fixed in [#42121](https://github.com/pingcap/tidb/issues/42121). + - Unique keys in `UPDATE` statements when the values of the keys are not changed. Before v6.5.2, these keys were not locked. This issue has been fixed in [#36438](https://github.com/pingcap/tidb/issues/36438). +- To maintain the consistency and rationality of the transaction, it is not recommended to change this value. If upgrading TiDB causes severe performance issues due to these two fixes, and the behavior without locks is acceptable (see the preceding issues), you can set this variable to `OFF`. + ### tidb_log_file_max_days New in v5.3.0 - Scope: GLOBAL @@ -3368,6 +3517,14 @@ mysql> desc select count(distinct a) from test.t; - Default value: `ON` - This variable is used to control whether the optimizer estimates the number of rows based on column order correlation +### `tidb_opt_enable_non_eval_scalar_subquery` New in v7.3.0 + +- Scope: SESSION | GLOBAL +- Persists to cluster: Yes +- Type: Boolean +- Default value: `OFF` +- This variable is used to control whether the `EXPLAIN` statement disables the execution of constant subqueries that can be expanded at the optimization stage. When this variable is set to `OFF`, the `EXPLAIN` statement expands the subquery in advance at the optimization stage. When this variable is set to `ON`, the `EXPLAIN` statement does not expand the subquery at the optimization stage. For more information, see [Disable subquery expansion](/explain-walkthrough.md#disable-the-early-execution-of-subqueries). + ### tidb_opt_enable_late_materialization New in v7.0.0 - Scope: SESSION | GLOBAL @@ -3377,15 +3534,43 @@ mysql> desc select count(distinct a) from test.t; - This variable is used to control whether to enable the [TiFlash late materialization](/tiflash/tiflash-late-materialization.md) feature. Note that TiFlash late materialization does not take effect in the [fast scan mode](/tiflash/use-fastscan.md). - When this variable is set to `OFF` to disable the TiFlash late materialization feature, to process a `SELECT` statement with filter conditions (`WHERE` clause), TiFlash scans all the data of the required columns before filtering. When this variable is set to `ON` to enable the TiFlash late materialization feature, TiFlash can first scan the column data related to the filter conditions that are pushed down to the TableScan operator, filter the rows that meet the conditions, and then scan the data of other columns of these rows for further calculations, thereby reducing IO scans and computations of data processing. +### tidb_opt_enable_mpp_shared_cte_execution New in v7.2.0 + +> **Warning:** +> +> The feature controlled by this variable is experimental. It is not recommended that you use it in the production environment. This feature might be changed or removed without prior notice. If you find a bug, you can report an [issue](https://github.com/pingcap/tidb/issues) on GitHub. + +- Scope: SESSION | GLOBAL +- Persists to cluster: Yes +- Type: Boolean +- Default value: `OFF` +- This variable controls whether the non-recursive [Common Table Expressions (CTE)](/sql-statements/sql-statement-with.md) can be executed on TiFlash MPP. By default, when this variable is disabled, CTE is executed on TiDB, which has a large performance gap compared with enabling this feature. + ### tidb_opt_fix_control New in v7.1.0 + + - Scope: SESSION | GLOBAL - Persists to cluster: Yes - Type: String - Default value: `""` - This variable is used to control some internal behaviors of the optimizer. - The optimizer's behavior might vary depending on user scenarios or SQL statements. This variable provides a more fine-grained control over the optimizer and helps to prevent performance regression after upgrading caused by behavior changes in the optimizer. -- This variable supports multiple control items, separated by commas (`,`). The format is `"<#issue1>:,<#issue2>:,...,<#issueN>:"`, where `<#issueN>` represents the control number, corresponding to the link `https://github.com/pingcap/tidb/issues/<#issue>` that describes the control item, and `` is the target value set for the control item. +- For a more detailed introduction, see [Optimizer Fix Controls](/optimizer-fix-controls.md). + + + + + +- Scope: SESSION | GLOBAL +- Persists to cluster: Yes +- Type: String +- Default value: `""` +- This variable is used to control some internal behaviors of the optimizer. +- The optimizer's behavior might vary depending on user scenarios or SQL statements. This variable provides a more fine-grained control over the optimizer and helps to prevent performance regression after upgrading caused by behavior changes in the optimizer. +- For a more detailed introduction, see [Optimizer Fix Controls](https://docs.pingcap.com/tidb/v7.2/optimizer-fix-controls). + + ### tidb_opt_force_inline_cte New in v6.3.0 @@ -4071,7 +4256,7 @@ SHOW WARNINGS; - Scope: SESSION | GLOBAL - Persists to cluster: Yes - Type: Boolean -- Default value: `OFF` +- Default value: Before v7.2.0, the default value is `OFF`. Starting from v7.2.0, the default value is `ON`. - Specifies whether to remove `ORDER BY` clause in a subquery. ### tidb_replica_read New in v4.0 @@ -4142,6 +4327,24 @@ SHOW WARNINGS; - If you upgrade from a TiDB version earlier than v4.0.0 to v4.0.0 or later versions, the format version is not changed, and TiDB continues to use the old format of version `1` to write data to the table, which means that **only newly created clusters use the new data format by default**. - Note that modifying this variable does not affect the old data that has been saved, but applies the corresponding version format only to the newly written data after modifying this variable. +### tidb_runtime_filter_mode New in v7.2.0 + +- Scope: SESSION | GLOBAL +- Persists to cluster: Yes +- Type: Enumeration +- Default value: `OFF` +- Possible values: `OFF`, `LOCAL` +- Controls the mode of Runtime Filter, that is, the relationship between the **Filter Sender operator** and **Filter Receiver operator**. There are two modes: `OFF` and `LOCAL`. `OFF` means disabling Runtime Filter. `LOCAL` means enabling Runtime Filter in the local mode. For more information, see [Runtime Filter mode](/runtime-filter.md#runtime-filter-mode). + +### tidb_runtime_filter_type New in v7.2.0 + +- Scope: SESSION | GLOBAL +- Persists to cluster: Yes +- Type: Enumeration +- Default value: `IN` +- Possible values: `IN` +- Controls the type of predicate used by the generated Filter operator. Currently, only one type is supported: `IN`. For more information, see [Runtime Filter type](/runtime-filter.md#runtime-filter-type). + ### tidb_scatter_region - Scope: GLOBAL @@ -4234,6 +4437,17 @@ tidb> set tx_isolation='serializable'; Query OK, 0 rows affected, 1 warning (0.00 sec) ``` +### tidb_skip_missing_partition_stats New in v7.3.0 + +- Scope: SESSION | GLOBAL +- Persists to cluster: Yes +- Type: Boolean +- Default value: `ON` +- When accesing a partitioned table in [dynamic pruning mode](/partitioned-table.md#dynamic-pruning-mode), TiDB aggregates the statistics of each partition to generate GlobalStats. This variable controls the generation of GlobalStats when partition statistics are missing. + + - If this variable is `ON`, TiDB skips missing partition statistics when generating GlobalStats so the generation of GlobalStats is not affected. + - If this variable is `OFF`, TiDB stops generating GloablStats when it detects any missing partition statistics. + ### tidb_skip_utf8_check - Scope: SESSION | GLOBAL @@ -4754,6 +4968,10 @@ For details, see [Identify Slow Queries](/identify-slow-queries.md). ### tidb_ttl_running_tasks New in v7.0.0 +> **Note:** +> +> This variable is read-only for [TiDB Serverless](https://docs.pingcap.com/tidbcloud/select-cluster-tier#tidb-serverless). + - Scope: GLOBAL - Persists to cluster: Yes - Type: Integer @@ -4879,6 +5097,31 @@ For details, see [Identify Slow Queries](/identify-slow-queries.md). * Integer greater than 0: the Fine Grained Shuffle feature is enabled. The window function pushed down to TiFlash is executed in multiple threads. The concurrency level is: min(`tiflash_fine_grained_shuffle_stream_count`, the number of physical threads on TiFlash nodes). - Theoretically, the performance of the window function increases linearly with this value. However, if the value exceeds the actual number of physical threads, it instead leads to performance degradation. +### tiflash_replica_read New in v7.3.0 + +> **Note:** +> +> This TiDB variable is not applicable to TiDB Cloud. + +- Scope: SESSION | GLOBAL +- Persists to cluster: Yes +- Type: Enumeration +- Default value: `all_replicas` +- Value options: `all_replicas`, `closest_adaptive`, or `closest_replicas` +- This variable is used to set the strategy for selecting TiFlash replicas when a query requires the TiFlash engine. + - `all_replicas` means using all available TiFlash replicas for analytical computing. + - `closest_adaptive` means preferring to use TiFlash replicas in the same zone as the TiDB node initiating the query. If replicas in this zone do not contain all the required data, the query will involve TiFlash replicas from other zones along with their corresponding TiFlash nodes. + - `closest_replicas` means using only TiFlash replicas in the same zone as the TiDB node initiating the query. If replicas in this zone do not contain all the required data, the query will return an error. + + + +> **Note:** +> +> - If TiDB nodes do not have [zone attributes](/schedule-replicas-by-topology-labels.md#optional-configure-labels-for-tidb) configured and `tiflash_replica_read` is not set to `all_replicas`, TiFlash ignores the replica selection strategy. Instead, it uses all TiFlash replicas for queries and returns the `The variable tiflash_replica_read is ignored.` warning. +> - If TiFlash nodes do not have [zone attributes](/schedule-replicas-by-topology-labels.md#configure-labels-for-tikv-and-tiflash) configured, they are treated as nodes not belonging to any zone. + + + ### time_zone - Scope: SESSION | GLOBAL @@ -5015,7 +5258,7 @@ Internally, the TiDB parser transforms the `SET TRANSACTION ISOLATION LEVEL [REA - Scope: NONE - Default value: `5.7.25-TiDB-`(tidb version) -- This variable returns the MySQL version, followed by the TiDB version. For example '5.7.25-TiDB-v4.0.0-beta.2-716-g25e003253'. +- This variable returns the MySQL version, followed by the TiDB version. For example '5.7.25-TiDB-v7.3.0'. ### version_comment diff --git a/telemetry.md b/telemetry.md index 0de8d1c778bdf..6c1cb5bf87112 100644 --- a/telemetry.md +++ b/telemetry.md @@ -69,7 +69,7 @@ TIUP_CLUSTER_DEBUG=enable tiup cluster list > **Note:** > -> Starting from v3.3, the telemetry collection is disabled by default in TiSpark, and usage information is not collected and shared with PingCAP. +> Starting from v3.0.3, the telemetry collection is disabled by default in TiSpark, and usage information is not collected and shared with PingCAP. When the telemetry collection feature is enabled for TiSpark, the Spark module will share the usage details of TiSpark, including (but not limited to): diff --git a/ticdc/deploy-ticdc.md b/ticdc/deploy-ticdc.md index 5933a1d990366..55a902c7b1926 100644 --- a/ticdc/deploy-ticdc.md +++ b/ticdc/deploy-ticdc.md @@ -60,7 +60,7 @@ The method of scaling out a TiCDC cluster is similar to that of deploying one. I - host: 10.1.1.2 gc-ttl: 86400 data_dir: /tidb-data/cdc-8300 - - host: 10.0.1.4:8300 + - host: 10.0.1.4 gc-ttl: 86400 data_dir: /tidb-data/cdc-8300 ``` @@ -95,7 +95,7 @@ tiup cluster upgrade --transfer-timeout 600 > **Note:** > -> In the preceding command, you need to replace `` and `` with the actual cluster name and cluster version. For example, the version can be v7.0.0. +> In the preceding command, you need to replace `` and `` with the actual cluster name and cluster version. For example, the version can be v7.3.0. ### Upgrade cautions @@ -115,7 +115,7 @@ This section describes how to use the [`tiup cluster edit-config`](/tiup/tiup-co 1. Run the `tiup cluster edit-config` command. Replace `` with the actual cluster name: - ```shell + ```shell tiup cluster edit-config ``` @@ -152,7 +152,7 @@ See [Enable TLS Between TiDB Components](/enable-tls-between-components.md). ## View TiCDC status using the command-line tool -Run the following command to view the TiCDC cluster status. Note that you need to replace `v` with the TiCDC cluster version, such as `v6.5.0`: +Run the following command to view the TiCDC cluster status. Note that you need to replace `v` with the TiCDC cluster version, such as `v7.3.0`: ```shell tiup ctl:v cdc capture list --server=http://10.0.10.25:8300 diff --git a/ticdc/integrate-confluent-using-ticdc.md b/ticdc/integrate-confluent-using-ticdc.md index 3d940ea2e5810..43b71808f7492 100644 --- a/ticdc/integrate-confluent-using-ticdc.md +++ b/ticdc/integrate-confluent-using-ticdc.md @@ -71,7 +71,7 @@ The preceding steps are performed in a lab environment. You can also deploy a cl After creation, a key pair file is generated, as shown below: - ``` + ``` === Confluent Cloud API key: yyy-yyyyy === API key: 7NBH2CAFM2LMGTH7 @@ -229,7 +229,7 @@ create or replace TABLE TIDB_TEST_ITEM ( ); ``` -2. Create a stream for `TIDB_TEST_ITEM` and set `append_only` to `true` as follows. +2. Create a stream for `TIDB_TEST_ITEM` and set `append_only` to `true` as follows. ``` create or replace stream TEST_ITEM_STREAM on table TIDB_TEST_ITEM append_only=true; diff --git a/ticdc/ticdc-alert-rules.md b/ticdc/ticdc-alert-rules.md index 44d586fbce8f0..7e245b53e4c61 100644 --- a/ticdc/ticdc-alert-rules.md +++ b/ticdc/ticdc-alert-rules.md @@ -16,7 +16,7 @@ For critical alerts, you need to pay close attention to abnormal monitoring metr - Alert rule: - (time() - ticdc_processor_checkpoint_ts / 1000) > 600 + (time() - ticdc_owner_checkpoint_ts / 1000) > 600 - Description: @@ -30,7 +30,7 @@ For critical alerts, you need to pay close attention to abnormal monitoring metr - Alert rule: - (time() - ticdc_processor_resolved_ts / 1000) > 300 + (time() - ticdc_owner_resolved_ts / 1000) > 300 - Description: @@ -72,25 +72,11 @@ Warning alerts are a reminder for an issue or error. Collect TiCDC logs to locate the root cause. -### `ticdc_mounter_unmarshal_and_mount_time_more_than_1s` +### `cdc_sink_flush_duration_time_more_than_10s` - Alert rule: -`histogram_quantile(0.9, rate(ticdc_mounter_unmarshal_and_mount_bucket[1m])) * 1000 > 1000` - -- Description: - - It takes a replication task more than 1 second to unmarshal the data changes. - -- Solution: - - Collect TiCDC logs to locate the root cause. - -### `cdc_sink_execute_duration_time_more_than_10s` - -- Alert rule: - - `histogram_quantile(0.9, rate(ticdc_sink_txn_exec_duration_bucket[1m])) > 10` + `histogram_quantile(0.9, rate(ticdc_sink_txn_worker_flush_duration[1m])) > 10` - Description: diff --git a/ticdc/ticdc-avro-checksum-verification.md b/ticdc/ticdc-avro-checksum-verification.md new file mode 100644 index 0000000000000..1cc9e89a290c8 --- /dev/null +++ b/ticdc/ticdc-avro-checksum-verification.md @@ -0,0 +1,514 @@ +--- +title: TiCDC Row Data Checksum Verification Based on Avro +summary: Introduce the detailed implementation of TiCDC row data checksum verification. +--- + +# TiCDC Row Data Checksum Verification Based on Avro + +This document introduces how to consume data sent to Kafka by TiCDC and encoded by Avro protocol using Golang, and how to perform data verification using the [Single-row data checksum feature](/ticdc/ticdc-integrity-check.md). + +The source code of this example is available in the [`avro-checksum-verification`](https://github.com/pingcap/tiflow/tree/master/examples/golang/avro-checksum-verification) directory. + +The example in this document uses [kafka-go](https://github.com/segmentio/kafka-go) to create a simple Kafka consumer program. This program continuously reads data from a specified topic, calculates the checksum, and verifies its value. + +```go +package main + +import ( + "context" + "encoding/binary" + "encoding/json" + "hash/crc32" + "io" + "math" + "net/http" + "strconv" + "strings" + + "github.com/linkedin/goavro/v2" + "github.com/pingcap/log" + "github.com/pingcap/tidb/parser/mysql" + "github.com/pingcap/tidb/types" + "github.com/pingcap/tiflow/pkg/errors" + "github.com/segmentio/kafka-go" + "go.uber.org/zap" +) + +const ( + // The first byte of the Confluent Avro wire format is always 0. + // For more details, see https://docs.confluent.io/platform/current/schema-registry/fundamentals/serdes-develop/index.html#wire-format. + magicByte = uint8(0) +) + +func main() { + var ( + kafkaAddr = "127.0.0.1:9092" + schemaRegistryURL = "http://127.0.0.1:8081" + + topic = "avro-checksum-test" + consumerGroupID = "avro-checksum-test" + ) + + consumer := kafka.NewReader(kafka.ReaderConfig{ + Brokers: []string{kafkaAddr}, + GroupID: consumerGroupID, + Topic: topic, + MaxBytes: 10e6, // 10MB + }) + defer consumer.Close() + + ctx := context.Background() + log.Info("start consuming ...", zap.String("kafka", kafkaAddr), zap.String("topic", topic), zap.String("groupID", consumerGroupID)) + for { + // 1. Fetch the kafka message. + message, err := consumer.FetchMessage(ctx) + if err != nil { + log.Error("read kafka message failed", zap.Error(err)) + } + + value := message.Value + if len(value) == 0 { + log.Info("delete event does not have value, skip checksum verification", zap.String("topic", topic)) + } + + // 2. Decode the value to get the corresponding value map and schema map. + valueMap, valueSchema, err := getValueMapAndSchema(value, schemaRegistryURL) + if err != nil { + log.Panic("decode kafka value failed", zap.String("topic", topic), zap.ByteString("value", value), zap.Error(err)) + } + + // 3. Calculate and verify checksum value using the value map and schema map obtained in the previous step. + err = CalculateAndVerifyChecksum(valueMap, valueSchema) + if err != nil { + log.Panic("calculate checksum failed", zap.String("topic", topic), zap.ByteString("value", value), zap.Error(err)) + } + + // 4. Commit offset after the data is successfully consumed. + if err := consumer.CommitMessages(ctx, message); err != nil { + log.Error("commit kafka message failed", zap.Error(err)) + break + } + } +} +``` + +The key steps for calculating the checksum value are `getValueMapAndSchema()` and `CalculateAndVerifyChecksum()`. The following sections describe the implementation of these two functions. + +## Decode data and get the corresponding schema + +The `getValueMapAndSchema()` method decodes data and gets the corresponding schema. This method returns both the data and schema as a `map[string]interface{}` type. + +```go +// data is the key or value of the received kafka message, and url is the schema registry url. +// This function returns the decoded value and corresponding schema as map. +func getValueMapAndSchema(data []byte, url string) (map[string]interface{}, map[string]interface{}, error) { + schemaID, binary, err := extractSchemaIDAndBinaryData(data) + if err != nil { + return nil, nil, err + } + + codec, err := GetSchema(url, schemaID) + if err != nil { + return nil, nil, err + } + + native, _, err := codec.NativeFromBinary(binary) + if err != nil { + return nil, nil, err + } + + result, ok := native.(map[string]interface{}) + if !ok { + return nil, nil, errors.New("raw avro message is not a map") + } + + schema := make(map[string]interface{}) + if err := json.Unmarshal([]byte(codec.Schema()), &schema); err != nil { + return nil, nil, errors.Trace(err) + } + + return result, schema, nil +} + +// extractSchemaIDAndBinaryData +func extractSchemaIDAndBinaryData(data []byte) (int, []byte, error) { + if len(data) < 5 { + return 0, nil, errors.ErrAvroInvalidMessage.FastGenByArgs() + } + if data[0] != magicByte { + return 0, nil, errors.ErrAvroInvalidMessage.FastGenByArgs() + } + return int(binary.BigEndian.Uint32(data[1:5])), data[5:], nil +} + +// GetSchema fetches the schema from the schema registry by the schema ID. +// This function returns a goavro.Codec that can be used to encode and decode the data. +func GetSchema(url string, schemaID int) (*goavro.Codec, error) { + requestURI := url + "/schemas/ids/" + strconv.Itoa(schemaID) + + req, err := http.NewRequest("GET", requestURI, nil) + if err != nil { + log.Error("Cannot create the request to look up the schema", zap.Error(err)) + return nil, errors.WrapError(errors.ErrAvroSchemaAPIError, err) + } + req.Header.Add( + "Accept", + "application/vnd.schemaregistry.v1+json, application/vnd.schemaregistry+json, "+ + "application/json", + ) + + httpClient := &http.Client{} + resp, err := httpClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + log.Error("Cannot parse the lookup schema response", zap.Error(err)) + return nil, errors.WrapError(errors.ErrAvroSchemaAPIError, err) + } + + if resp.StatusCode == 404 { + log.Warn("Specified schema not found in Registry", zap.String("requestURI", requestURI), zap.Int("schemaID", schemaID)) + return nil, errors.ErrAvroSchemaAPIError.GenWithStackByArgs("Schema not found in Registry") + } + + if resp.StatusCode != 200 { + log.Error("Failed to query schema from the Registry, HTTP error", + zap.Int("status", resp.StatusCode), zap.String("uri", requestURI), zap.ByteString("responseBody", body)) + return nil, errors.ErrAvroSchemaAPIError.GenWithStack("Failed to query schema from the Registry, HTTP error") + } + + var jsonResp lookupResponse + err = json.Unmarshal(body, &jsonResp) + if err != nil { + log.Error("Failed to parse result from Registry", zap.Error(err)) + return nil, errors.WrapError(errors.ErrAvroSchemaAPIError, err) + } + + codec, err := goavro.NewCodec(jsonResp.Schema) + if err != nil { + return nil, errors.WrapError(errors.ErrAvroSchemaAPIError, err) + } + return codec, nil +} + +type lookupResponse struct { + Name string `json:"name"` + SchemaID int `json:"id"` + Schema string `json:"schema"` +} + +``` + +## Calculate and verify the checksum value + +The `valueMap` and `valueSchema` obtained in the previous step contain all the elements used for checksum calculation and verification. + +The checksum calculation and verification process on the consumer side includes the following steps: + +1. Get the expected checksum value. +2. Iterate over each column, generate a byte slice according to the column value and the corresponding MySQL type, and update the checksum value continuously. +3. Compare the checksum value calculated in the previous step with the checksum value obtained from the received message. If they are not the same, the checksum verification fails and the data might be corrupted. + +The sample code is as follows: + +```go +func CalculateAndVerifyChecksum(valueMap, valueSchema map[string]interface{}) error { + // The fields variable stores the column type information for each data change event. The column IDs are used to sort the fields, which is the same as the order in which the checksum is calculated. + fields, ok := valueSchema["fields"].([]interface{}) + if !ok { + return errors.New("schema fields should be a map") + } + + // 1. Get the expected checksum value from valueMap, which is encoded as a string. + // If the expected checksum value is not found, it means that the checksum feature is not enabled when TiCDC sends the data. In this case, this function returns directly. + o, ok := valueMap["_tidb_row_level_checksum"] + if !ok { + return nil + } + expected := o.(string) + if expected == "" { + return nil + } + + // expectedChecksum is the expected checksum value passed from TiCDC. + expectedChecksum, err := strconv.ParseUint(expected, 10, 64) + if err != nil { + return errors.Trace(err) + } + + // 2. Iterate over each field and calculate the checksum value. + var actualChecksum uint32 + // buf stores the byte slice used to update the checksum value each time. + buf := make([]byte, 0) + for _, item := range fields { + field, ok := item.(map[string]interface{}) + if !ok { + return errors.New("schema field should be a map") + } + + // The tidbOp and subsequent columns are not involved in the checksum calculation, because they are used to assist data consumption and not real TiDB column data. + colName := field["name"].(string) + if colName == "_tidb_op" { + break + } + + // The holder variable stores the type information of each column. + var holder map[string]interface{} + switch ty := field["type"].(type) { + case []interface{}: + for _, item := range ty { + if m, ok := item.(map[string]interface{}); ok { + holder = m["connect.parameters"].(map[string]interface{}) + break + } + } + case map[string]interface{}: + holder = ty["connect.parameters"].(map[string]interface{}) + default: + log.Panic("type info is anything else", zap.Any("typeInfo", field["type"])) + } + tidbType := holder["tidb_type"].(string) + + mysqlType := mysqlTypeFromTiDBType(tidbType) + + // Get the value of each column from the decoded value map according to the name of each column. + value, ok := valueMap[colName] + if !ok { + return errors.New("value not found") + } + value, err := getColumnValue(value, holder, mysqlType) + if err != nil { + return errors.Trace(err) + } + + if len(buf) > 0 { + buf = buf[:0] + } + + // Generate a byte slice used to update the checksum according to the value and mysqlType of each column, and then update the checksum value. + buf, err = buildChecksumBytes(buf, value, mysqlType) + if err != nil { + return errors.Trace(err) + } + actualChecksum = crc32.Update(actualChecksum, crc32.IEEETable, buf) + } + + if uint64(actualChecksum) != expectedChecksum { + log.Error("checksum mismatch", + zap.Uint64("expected", expectedChecksum), + zap.Uint64("actual", uint64(actualChecksum))) + return errors.New("checksum mismatch") + } + + log.Info("checksum verified", zap.Uint64("checksum", uint64(actualChecksum))) + return nil +} + +func mysqlTypeFromTiDBType(tidbType string) byte { + var result byte + switch tidbType { + case "INT", "INT UNSIGNED": + result = mysql.TypeLong + case "BIGINT", "BIGINT UNSIGNED": + result = mysql.TypeLonglong + case "FLOAT": + result = mysql.TypeFloat + case "DOUBLE": + result = mysql.TypeDouble + case "BIT": + result = mysql.TypeBit + case "DECIMAL": + result = mysql.TypeNewDecimal + case "TEXT": + result = mysql.TypeVarchar + case "BLOB": + result = mysql.TypeLongBlob + case "ENUM": + result = mysql.TypeEnum + case "SET": + result = mysql.TypeSet + case "JSON": + result = mysql.TypeJSON + case "DATE": + result = mysql.TypeDate + case "DATETIME": + result = mysql.TypeDatetime + case "TIMESTAMP": + result = mysql.TypeTimestamp + case "TIME": + result = mysql.TypeDuration + case "YEAR": + result = mysql.TypeYear + default: + log.Panic("this should not happen, unknown TiDB type", zap.String("type", tidbType)) + } + return result +} + +// The value is an interface type, which needs to be converted according to the type information provided by the holder. +func getColumnValue(value interface{}, holder map[string]interface{}, mysqlType byte) (interface{}, error) { + switch t := value.(type) { + // The column with nullable is encoded as a map, and there is only one key-value pair. The key is the type, and the value is the real value. Only the real value is concerned here. + case map[string]interface{}: + for _, v := range t { + value = v + } + } + + switch mysqlType { + case mysql.TypeEnum: + // Enum is encoded as a string, which is converted to the int value corresponding to the Enum definition here. + allowed := strings.Split(holder["allowed"].(string), ",") + switch t := value.(type) { + case string: + enum, err := types.ParseEnum(allowed, t, "") + if err != nil { + return nil, errors.Trace(err) + } + value = enum.Value + case nil: + value = nil + } + case mysql.TypeSet: + // Set is encoded as a string, which is converted to the int value corresponding to the Set definition here. + elems := strings.Split(holder["allowed"].(string), ",") + switch t := value.(type) { + case string: + s, err := types.ParseSet(elems, t, "") + if err != nil { + return nil, errors.Trace(err) + } + value = s.Value + case nil: + value = nil + } + } + return value, nil +} + +// buildChecksumBytes generates a byte slice used to update the checksum, refer to https://github.com/pingcap/tidb/blob/e3417913f58cdd5a136259b902bf177eaf3aa637/util/rowcodec/common.go#L308 +func buildChecksumBytes(buf []byte, value interface{}, mysqlType byte) ([]byte, error) { + if value == nil { + return buf, nil + } + + switch mysqlType { + // TypeTiny, TypeShort, and TypeInt32 are encoded as int32. + // TypeLong is encoded as int32 if signed, otherwise, it is encoded as int64. + // TypeLongLong is encoded as int64 if signed, otherwise, it is encoded as uint64. + // When the checksum feature is enabled, bigintUnsignedHandlingMode must be set to string, which is encoded as string. + case mysql.TypeTiny, mysql.TypeShort, mysql.TypeLong, mysql.TypeLonglong, mysql.TypeInt24, mysql.TypeYear: + switch a := value.(type) { + case int32: + buf = binary.LittleEndian.AppendUint64(buf, uint64(a)) + case uint32: + buf = binary.LittleEndian.AppendUint64(buf, uint64(a)) + case int64: + buf = binary.LittleEndian.AppendUint64(buf, uint64(a)) + case uint64: + buf = binary.LittleEndian.AppendUint64(buf, a) + case string: + v, err := strconv.ParseUint(a, 10, 64) + if err != nil { + return nil, errors.Trace(err) + } + buf = binary.LittleEndian.AppendUint64(buf, v) + default: + log.Panic("unknown golang type for the integral value", + zap.Any("value", value), zap.Any("mysqlType", mysqlType)) + } + // Encode float type as float64 and encode double type as float64. + case mysql.TypeFloat, mysql.TypeDouble: + var v float64 + switch a := value.(type) { + case float32: + v = float64(a) + case float64: + v = a + } + if math.IsInf(v, 0) || math.IsNaN(v) { + v = 0 + } + buf = binary.LittleEndian.AppendUint64(buf, math.Float64bits(v)) + // getColumnValue encodes Enum and Set to uint64 type. + case mysql.TypeEnum, mysql.TypeSet: + buf = binary.LittleEndian.AppendUint64(buf, value.(uint64)) + case mysql.TypeBit: + // Encode bit type as []byte and convert it to uint64. + v, err := binaryLiteralToInt(value.([]byte)) + if err != nil { + return nil, errors.Trace(err) + } + buf = binary.LittleEndian.AppendUint64(buf, v) + // Non-binary types are encoded as string, and binary types are encoded as []byte. + case mysql.TypeVarchar, mysql.TypeVarString, mysql.TypeString, mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob, mysql.TypeBlob: + switch a := value.(type) { + case string: + buf = appendLengthValue(buf, []byte(a)) + case []byte: + buf = appendLengthValue(buf, a) + default: + log.Panic("unknown golang type for the string value", + zap.Any("value", value), zap.Any("mysqlType", mysqlType)) + } + case mysql.TypeTimestamp, mysql.TypeDatetime, mysql.TypeDate, mysql.TypeDuration, mysql.TypeNewDate: + v := value.(string) + buf = appendLengthValue(buf, []byte(v)) + // When the checksum feature is enabled, decimalHandlingMode must be set to string. + case mysql.TypeNewDecimal: + buf = appendLengthValue(buf, []byte(value.(string))) + case mysql.TypeJSON: + buf = appendLengthValue(buf, []byte(value.(string))) + // Null and Geometry are not involved in the checksum calculation. + case mysql.TypeNull, mysql.TypeGeometry: + // do nothing + default: + return buf, errors.New("invalid type for the checksum calculation") + } + return buf, nil +} + +func appendLengthValue(buf []byte, val []byte) []byte { + buf = binary.LittleEndian.AppendUint32(buf, uint32(len(val))) + buf = append(buf, val...) + return buf +} + +// Convert []byte to uint64, refer to https://github.com/pingcap/tidb/blob/e3417913f58cdd5a136259b902bf177eaf3aa637/types/binary_literal.go#L105 +func binaryLiteralToInt(bytes []byte) (uint64, error) { + bytes = trimLeadingZeroBytes(bytes) + length := len(bytes) + + if length > 8 { + log.Error("invalid bit value found", zap.ByteString("value", bytes)) + return math.MaxUint64, errors.New("invalid bit value") + } + + if length == 0 { + return 0, nil + } + + val := uint64(bytes[0]) + for i := 1; i < length; i++ { + val = (val << 8) | uint64(bytes[i]) + } + return val, nil +} + +func trimLeadingZeroBytes(bytes []byte) []byte { + if len(bytes) == 0 { + return bytes + } + pos, posMax := 0, len(bytes)-1 + for ; pos < posMax; pos++ { + if bytes[pos] != 0 { + break + } + } + return bytes[pos:] +} +``` diff --git a/ticdc/ticdc-avro-protocol.md b/ticdc/ticdc-avro-protocol.md index 240aa6d22e013..553cefe57033f 100644 --- a/ticdc/ticdc-avro-protocol.md +++ b/ticdc/ticdc-avro-protocol.md @@ -7,6 +7,14 @@ summary: Learn the concept of TiCDC Avro Protocol and how to use it. Avro is a data exchange format protocol defined by [Apache Avro™](https://avro.apache.org/) and chosen by [Confluent Platform](https://docs.confluent.io/platform/current/platform.html) as the default data exchange format. This document describes the implementation of the Avro data format in TiCDC, including TiDB extension fields, definition of the Avro data format, and the interaction between Avro and [Confluent Schema Registry](https://docs.confluent.io/platform/current/schema-registry/index.html). +> **Warning:** +> +> When the [Old Value feature](/ticdc/ticdc-manage-changefeed.md#output-the-historical-value-of-a-row-changed-event) is enabled (`enable-old-value = true`), the Avro data format cannot output the old value of change events. +> +> For more information, see [What changes occur to the change event format when TiCDC enables the Old Value feature?](/ticdc/ticdc-faq.md#what-changes-occur-to-the-change-event-format-when-ticdc-enables-the-old-value-feature). +> +> Starting from v7.3.0, if you enable TiCDC to [replicate tables without a valid index](/ticdc/ticdc-manage-changefeed.md#replicate-tables-without-a-valid-index), TiCDC will report an error when you create a changefeed that uses the Avro protocol. + ## Use Avro When using Message Queue (MQ) as a downstream sink, you can specify Avro in `sink-uri`. TiCDC captures TiDB DML events, creates Avro messages from these events, and sends the messages downstream. When Avro detects a schema change, it registers the latest schema with Schema Registry. diff --git a/ticdc/ticdc-bidirectional-replication.md b/ticdc/ticdc-bidirectional-replication.md index d010d67add19e..b098f5dbd815a 100644 --- a/ticdc/ticdc-bidirectional-replication.md +++ b/ticdc/ticdc-bidirectional-replication.md @@ -38,16 +38,36 @@ After the configuration takes effect, the clusters can perform bi-directional re ## Execute DDL -Bi-directional replication does not support replicating DDL statements. - -If you need to execute DDL statements, take the following steps: - -1. Pause the write operations in the tables that need to execute DDL in all clusters. If the DDL statement is adding a non-unique index, skip this step. +After the bidirectional replication is enabled, TiCDC does not replicate any DDL statements. You need to execute DDL statements in the upstream and downstream clusters respectively. + +Note that some DDL statements might cause table structure changes or data change time sequence problems, which might lead to data inconsistency after the replication. Therefore, after enabling bidirectional replication, only the DDL statements in the following table can be executed without stopping the write operations of the application. + +| Event | Does it cause changefeed errors | Note | +|---|---|---| +| create database | Yes | After you manually execute the DDL statements in the upstream and downstream clusters, the errors can be automatically recovered. | +| drop database | Yes | You need to manually restart the changefeed and specify `--overwrite-checkpoint-ts` as the `commitTs` of the DDL statement to recover the errors. | +| create table | Yes | After you manually execute the DDL statements in the upstream and downstream clusters, the errors can be automatically recovered. | +| drop table | Yes | You need to manually restart the changefeed and specify `--overwrite-checkpoint-ts` as the `commitTs` of the DDL statement to recover the errors. | +| alter table comment | No | | +| rename index | No | | +| alter table index visibility | No | | +| add partition | Yes | After you manually execute the DDL statements in the upstream and downstream clusters, the errors can be automatically recovered. | +| drop partition | No | | +| create view | No | | +| drop view | No | | +| alter column default value | No | | +| reorganize partition | Yes | After you manually execute the DDL statements in the upstream and downstream clusters, the errors can be automatically recovered. | +| alter table ttl | No | | +| alter table remove ttl | No | | +| add **not unique** index | No | | +| drop **not unique** index | No | | + +If you need to execute DDL statements that are not in the preceding table, take the following steps: + +1. Pause the write operations in the tables that need to execute DDL in all clusters. 2. After the write operations of the corresponding tables in all clusters have been replicated to other clusters, manually execute all DDL statements in each TiDB cluster. 3. After the DDL statements are executed, resume the write operations. -Note that a DDL statement that adds non-unique index does not break bi-directional replication, so you do not need to pause the write operations in the corresponding table. - ## Stop bi-directional replication After the application has stopped writing data, you can insert a special record into each cluster. By checking the two special records, you can make sure that data in two clusters are consistent. diff --git a/ticdc/ticdc-canal-json.md b/ticdc/ticdc-canal-json.md index fd0c816a7131a..acc59c5a2fe07 100644 --- a/ticdc/ticdc-canal-json.md +++ b/ticdc/ticdc-canal-json.md @@ -258,6 +258,45 @@ The following table shows the mapping relationships between Java SQL Types in Ti For more information about Java SQL Types, see [Java SQL Class Types](https://docs.oracle.com/javase/8/docs/api/java/sql/Types.html). +## Binary and Blob types + +TiCDC encodes [binary types](/data-type-string.md#binary-type) in the Canal-JSON format by converting each byte to its character representation as follows: + +- Printable characters are represented using the ISO/IEC 8859-1 character encodings. +- Non-printable characters and certain characters with special meaning in HTML are represented using their UTF-8 escape sequence. + +The following table shows the detailed representation information. + +| Character type | Value range | Character representation | +| :---------------------------| :-----------| :---------------------| +| Control characters | [0, 31] | UTF-8 escape (such as `\u0000` through `\u001F`) | +| Horizontal tab | [9] | `\t` | +| Line feed | [10] | `\n` | +| Carriage return | [13] | `\r` | +| Printable characters | [32, 127] | Literal character (such as `A`) | +| Ampersand | [38] | `\u0026` | +| Less-than sign | [60] | `\u0038` | +| Greater-than sign | [62] | `\u003E` | +| Extended control characters | [128, 159] | Literal character | +| ISO 8859-1 (Latin-1) | [160, 255] | Literal character | + +### Example of the encoding + +For example, the following 16 bytes `[5 7 10 15 36 50 43 99 120 60 38 255 254 45 55 70]` stored in a `VARBINARY` column called `c_varbinary` are encoded in a Canal-JSON `Update` event as follows: + +```json +{ + ... + "data": [ + { + ... + "c_varbinary": "\u0005\u0007\n\u000f$2+cx\u003c\u0026ÿþ-7F" + } + ] + ... +} +``` + ## Comparison of TiCDC Canal-JSON and the official Canal The way that TiCDC implements the Canal-JSON data format, including the `Update` Event and the `mysqlType` field, differs from the official Canal. The following table shows the main differences. diff --git a/ticdc/ticdc-changefeed-config.md b/ticdc/ticdc-changefeed-config.md index 69bd344527c2a..cd1919c500e42 100644 --- a/ticdc/ticdc-changefeed-config.md +++ b/ticdc/ticdc-changefeed-config.md @@ -129,12 +129,19 @@ write-key-threshold = 0 # {matcher = ['test6.*'], partition = "ts"} # ] -# The protocol configuration item specifies the protocol format of the messages sent to the downstream. -# When the downstream is Kafka, the protocol can only be canal-json or avro. +# The protocol configuration item specifies the protocol format used for encoding messages. +# When the downstream is Kafka, the protocol can only be canal-json, avro, or open-protocol. # When the downstream is a storage service, the protocol can only be canal-json or csv. # Note: This configuration item only takes effect if the downstream is Kafka or a storage service. # protocol = "canal-json" +# Starting from v7.2.0, the `delete-only-output-handle-key-columns` parameter specifies the output of DELETE events. This parameter is valid only for canal-json and open-protocol protocols. +# This parameter is incompatible with `force-replicate`. If both this parameter and `force-replicate` is set to `true`, TiCDC reports an error when creating a changefeed. +# The default value is false, which means outputting all columns. When you set it to true, only primary key columns or unique index columns are output. +# The Avro protocol is not controlled by this parameter and always outputs only the primary key columns or unique index columns. +# The CSV protocol is not controlled by this parameter and always outputs all columns. +delete-only-output-handle-key-columns = false + # The following three configuration items are only used when you replicate data to storage sinks and can be ignored when replicating data to MQ or MySQL sinks. # Row terminator, used for separating two data change events. The default value is an empty string, which means "\r\n" is used. # terminator = '' @@ -159,7 +166,7 @@ enable-partition-separator = true # The default value is false. # enable-kafka-sink-v2 = false -# Specifies whether to only output the updated columns. +# Starting from v7.1.0, this configuration item specifies whether to only output the updated columns. # Note: This configuration item only applies to the MQ downstream using the open-protocol and canal-json. # The default value is false. # only-output-updated-columns = false @@ -174,10 +181,12 @@ enable-partition-separator = true # null = '\N' # Whether to include commit-ts in CSV rows. The default value is false. # include-commit-ts = false +# The encoding method of binary data, which can be 'base64' or 'hex'. The default value is 'base64'. +# binary-encoding-method = 'base64' # Specifies the replication consistency configurations for a changefeed when using the redo log. For more information, see https://docs.pingcap.com/tidb/stable/ticdc-sink-to-mysql#eventually-consistent-replication-in-disaster-scenarios. # Note: The consistency-related configuration items only take effect when the downstream is a database and the redo log feature is enabled. -[sink.consistent] +[consistent] # The data consistency level. Available options are "none" and "eventual". "none" means that the redo log is disabled. # The default value is "none". level = "none" @@ -198,4 +207,21 @@ use-file-backend = false integrity-check-level = "none" # Specifies the log level of the Changefeed when the checksum validation for single-row data fails. The default value is "warn". Value options are "warn" and "error". corruption-handle-level = "warn" + +# The following configuration items only take effect when the downstream is Kafka. +[sink.kafka-config] +# The mechanism of Kafka SASL authentication. The default value is empty, indicating that SASL authentication is not used. +sasl-mechanism = "OAUTHBEARER" +# The client-id in the Kafka SASL OAUTHBEARER authentication. The default value is empty. This parameter is required when the OAUTHBEARER authentication is used. +sasl-oauth-client-id = "producer-kafka" +# The client-secret in the Kafka SASL OAUTHBEARER authentication. The default value is empty. This parameter is required when the OAUTHBEARER authentication is used. +sasl-oauth-client-secret = "cHJvZHVjZXIta2Fma2E=" +# The token-url in the Kafka SASL OAUTHBEARER authentication to obtain the token. The default value is empty. This parameter is required when the OAUTHBEARER authentication is used. +sasl-oauth-token-url = "http://127.0.0.1:4444/oauth2/token" +# The scopes in the Kafka SASL OAUTHBEARER authentication. The default value is empty. This parameter is optional when the OAUTHBEARER authentication is used. +sasl-oauth-scopes = ["producer.kafka", "consumer.kafka"] +# The grant-type in the Kafka SASL OAUTHBEARER authentication. The default value is "client_credentials". This parameter is optional when the OAUTHBEARER authentication is used. +sasl-oauth-grant-type = "client_credentials" +# The audience in the Kafka SASL OAUTHBEARER authentication. The default value is empty. This parameter is optional when the OAUTHBEARER authentication is used. +sasl-oauth-audience = "kafka" ``` diff --git a/ticdc/ticdc-changefeed-overview.md b/ticdc/ticdc-changefeed-overview.md index 463e318682d9e..ceae1d20bc993 100644 --- a/ticdc/ticdc-changefeed-overview.md +++ b/ticdc/ticdc-changefeed-overview.md @@ -19,7 +19,7 @@ The states in the preceding state transfer diagram are described as follows: - `Stopped`: The replication task is stopped, because the user manually pauses the changefeed. The changefeed in this state blocks GC operations. - `Error`: The replication task returns an error. The replication cannot continue due to some recoverable errors. The changefeed in this state keeps trying to resume until the state transfers to `Normal`. The changefeed in this state blocks GC operations. - `Finished`: The replication task is finished and has reached the preset `TargetTs`. The changefeed in this state does not block GC operations. -- `Failed`: The replication task fails. Due to some unrecoverable errors, the replication task cannot resume and cannot be recovered. The changefeed in this state does not block GC operations. +- `Failed`: The replication task fails. Due to some unrecoverable errors, the replication task cannot resume and cannot be recovered. TiCDC retains the data associated with the failed changefeed for 24 hours, preventing it from being garbage collected. The numbers in the preceding state transfer diagram are described as follows. @@ -38,4 +38,4 @@ You can manage a TiCDC cluster and its replication tasks using the command-line You can also use the HTTP interface (the TiCDC OpenAPI feature) to manage a TiCDC cluster and its replication tasks. For details, see [TiCDC OpenAPI](/ticdc/ticdc-open-api.md). -If your TiCDC is deployed using TiUP, you can start `cdc cli` by running the `tiup ctl:v cdc` command. Replace `v` with the TiCDC cluster version, such as `v6.5.0`. You can also run `cdc cli` directly. +If your TiCDC is deployed using TiUP, you can start `cdc cli` by running the `tiup ctl:v cdc` command. Replace `v` with the TiCDC cluster version, such as `v7.3.0`. You can also run `cdc cli` directly. diff --git a/ticdc/ticdc-csv.md b/ticdc/ticdc-csv.md index 1a746be5b0e64..5fcbe297380fc 100644 --- a/ticdc/ticdc-csv.md +++ b/ticdc/ticdc-csv.md @@ -7,6 +7,12 @@ summary: Learn the concept of TiCDC CSV Protocol and how to use it. When using a cloud storage service as the downstream sink, you can send DML events to the cloud storage service in CSV format. +> **Warning:** +> +> When the [Old Value feature](/ticdc/ticdc-manage-changefeed.md#output-the-historical-value-of-a-row-changed-event) is enabled (`enable-old-value = true`), the CSV data format cannot output the old value of change events. +> +> For more information, see [What changes occur to the change event format when TiCDC enables the Old Value feature?](/ticdc/ticdc-faq.md#what-changes-occur-to-the-change-event-format-when-ticdc-enables-the-old-value-feature). + ## Use CSV The following is an example of the configuration when using the CSV protocol: @@ -83,7 +89,7 @@ The DML events of this table are stored in the CSV format as follows: | `TIME` | String | `"23:59:59"` | Format: `yyyy-MM-dd` | | `YEAR` | Integer | `1970` | - | | `VARCHAR`/`JSON`/`TINYTEXT`/`MEDIUMTEXT`/`LONGTEXT`/`TEXT`/`CHAR` | String | `"test"` | UTF-8 encoded | -| `VARBINARY`/`TINYBLOB`/`MEDIUMBLOB`/`LONGBLOB`/`BLOB`/`BINARY` | String | `"6Zi/5pav"` | base64 encoded | +| `VARBINARY`/`TINYBLOB`/`MEDIUMBLOB`/`LONGBLOB`/`BLOB`/`BINARY` | String | `"6Zi/5pav"` or `"e998bfe696af"` | Base64 or hex encoded | | `BIT` | Integer | `81` | - | | `DECIMAL` | String | `"129012.1230000"` | - | | `ENUM` | String | `"a"` | - | diff --git a/ticdc/ticdc-faq.md b/ticdc/ticdc-faq.md index 572e474320d8b..bc8cf33a0f5c1 100644 --- a/ticdc/ticdc-faq.md +++ b/ticdc/ticdc-faq.md @@ -280,7 +280,7 @@ When a changefeed is resumed, TiCDC needs to scan the historical versions of dat ## How should I deploy TiCDC to replicate data between two TiDB cluster located in different regions? -It is recommended that you deploy TiCDC in the downstream TiDB cluster. If the network latency between the upstream and downstream is high, for example, more than 100 ms, the latency produced when TiCDC executes SQL statements to the downstream might increase dramatically due to the MySQL transmission protocol issues. This results in a decrease in system throughput. However, deploying TiCDC in the downstream can greatly ease this problem. +For TiCDC versions earlier than v6.5.2, it is recommended that you deploy TiCDC in the downstream TiDB cluster. If the network latency between the upstream and downstream is high, for example, more than 100 ms, the latency produced when TiCDC executes SQL statements to the downstream might increase dramatically due to the MySQL transmission protocol issues. This results in a decrease in system throughput. However, deploying TiCDC in the downstream can greatly ease this problem. After optimization, starting from TiCDC v6.5.2, it is recommended that you deploy TiCDC in the upstream TiDB cluster. ## What is the order of executing DML and DDL statements? @@ -297,3 +297,99 @@ This feature is currently not supported, which might be supported in a future re ## Does TiCDC replication get stuck if the upstream has long-running uncommitted transactions? TiDB has a transaction timeout mechanism. When a transaction runs for a period longer than [`max-txn-ttl`](/tidb-configuration-file.md#max-txn-ttl), TiDB forcibly rolls it back. TiCDC waits for the transaction to be committed before proceeding with the replication, which causes replication delay. + +## What changes occur to the change event format when TiCDC enables the Old Value feature? + +In the following description, the definition of a valid index is as follows: + +- A primary key (`PRIMARY KEY`) is a valid index. +- A unique index (`UNIQUE INDEX`) is valid if every column of the index is explicitly defined as non-nullable (`NOT NULL`) and the index does not have a virtual generated column (`VIRTUAL GENERATED COLUMNS`). + +TiDB supports the clustered index feature starting from v5.0. This feature controls how data is stored in tables containing primary keys. For more information, see [Clustered indexes](/clustered-indexes.md). + +After you enable the [Old Value feature](/ticdc/ticdc-manage-changefeed.md#output-the-historical-value-of-a-row-changed-event), TiCDC behaves as follows: + +- For change events on invalid index columns, the output contains both new and old values. +- For change events on valid index columns, the output varies based on certain conditions: + - If a unique index column (`UNIQUE INDEX`) is updated and the table has no primary key, the output contains both new and old values. + - If the clustered index is disabled in the upstream TiDB cluster, and a non-INT type primary key column is updated, the output contains both new and old values. + - Otherwise, the change event is split into a delete event for the old value and an insert event for the new value. + +The preceding behavior change might lead to the following issues. + +### When change events on a valid index column contains both new and old values, the distribution behavior of Kafka Sink might not guarantee that change events with the same index columns are distributed to the same partition + +The index-value mode of Kafka Sink distributes events according to the value of the index column. When change events contain both new and old values, the value of the index column changes, which might cause change events with the same index column to be distributed to different partitions. The following is an example: + +Create table `t` when the TiDB clustered index feature is disabled: + +```sql +CREATE TABLE t (a VARCHAR(255) PRIMARY KEY NONCLUSTERED); +``` + +Execute the following DML statements: + +```sql +INSERT INTO t VALUES ("2"); +UPDATE t SET a="1" WHERE a="2"; +INSERT INTO t VALUES ("2"); +UPDATE t SET a="3" WHERE a="2"; +``` + +- When the Old Value feature is disabled, the change event is split into a delete event for the old value and an insert event for the new value. The index-value dispatcher of Kafka Sink calculates the corresponding partition for each event. The preceding DML events will be distributed to the following partitions: + + | partition-1 | partition-2 | partition-3 | + | ------------ | ------------ | ------------ | + | INSERT a = 2 | INSERT a = 1 | INSERT a = 3 | + | DELETE a = 2 | | | + | INSERT a = 2 | | | + | DELETE a = 2 | | | + + Because Kafka guarantees message order in each partition, consumers can independently process data in each partition, and get the same result as the DML execution order. + +- When the Old Value feature is enabled, the index-value dispatcher of Kafka Sink distributes change events with the same index columns to different partitions. Therefore, the preceding DML will be distributed to the following partitions (change events contain both new and old values): + + | partition-1 | partition-2 | partition-3 | + | ------------ | ------------------------ | ------------------------ | + | INSERT a = 2 | UPDATE a = 1 WHERE a = 2 | UPDATE a = 3 WHERE a = 2 | + | INSERT a = 2 | | | + + Because Kafka does not guarantee message order between partitions, the preceding DML might not preserve the update order of the index column during consumption. To maintain the order of index column updates when the output contains both new and old values, you can use the default dispatcher when enabling the Old Value feature. + +### When change events on an invalid index column and change events on a valid index column both contain new and old values, the Avro format of Kafka Sink cannot correctly output the old value + +In the Avro implementation, Kafka message values only contain the current column values. Therefore, old values cannot be output correctly when an event contains both new and old values. To output the old value, you can disable the Old Value feature to get the split delete and insert events. + +### When change events on an invalid index column and change events on a valid index column both contain new and old values, the CSV format of Cloud Storage Sink cannot correctly output the old value + +Because a CSV file has a fixed number of columns, old values cannot be output correctly when an event contains both new and old values. To output the old value, you can use the Canal-JSON format. + +## Why can't I use the `cdc cli` command to operate a TiCDC cluster deployed by TiDB Operator? + +This is because the default port number of the TiCDC cluster deployed by TiDB Operator is `8301`, while the default port number of the `cdc cli` command to connect to the TiCDC server is `8300`. When using the `cdc cli` command to operate the TiCDC cluster deployed by TiDB Operator, you need to explicitly specify the `--server` parameter, as follows: + +```shell +./cdc cli changefeed list --server "127.0.0.1:8301" +[ + { + "id": "4k-table", + "namespace": "default", + "summary": { + "state": "stopped", + "tso": 441832628003799353, + "checkpoint": "2023-05-30 22:41:57.910", + "error": null + } + }, + { + "id": "big-table", + "namespace": "default", + "summary": { + "state": "normal", + "tso": 441872834546892882, + "checkpoint": "2023-06-01 17:18:13.700", + "error": null + } + } +] +``` diff --git a/ticdc/ticdc-filter.md b/ticdc/ticdc-filter.md index 6ad1c17ec9250..2bed375030d28 100644 --- a/ticdc/ticdc-filter.md +++ b/ticdc/ticdc-filter.md @@ -87,3 +87,40 @@ Description of configuration parameters: > > - When TiDB updates a value in the column of the clustered index, TiDB splits an `UPDATE` event into a `DELETE` event and an `INSERT` event. TiCDC does not identify such events as an `UPDATE` event and thus cannot correctly filter out such events. > - When you configure a SQL expression, make sure all tables that matches `matcher` contain all the columns specified in the SQL expression. Otherwise, the replication task cannot be created. In addition, if the table schema changes during the replication, which results in a table no longer containing a required column, the replication task fails and cannot be resumed automatically. In such a situation, you must manually modify the configuration and resume the task. + +## DDL allow list + +Currently, TiCDC uses an allow list to replicate DDL statements. Only the DDL statements in the allow list are replicated to the downstream. The DDL statements not in the allow list are not replicated to the downstream. + +The allow list of DDL statements supported by TiCDC is as follows: + +- create database +- drop database +- create table +- drop table +- add column +- drop column +- create index / add index +- drop index +- truncate table +- modify column +- rename table +- alter column default value +- alter table comment +- rename index +- add partition +- drop partition +- truncate partition +- create view +- drop view +- alter table character set +- alter database character set +- recover table +- add primary key +- drop primary key +- rebase auto id +- alter table index visibility +- exchange partition +- reorganize partition +- alter table ttl +- alter table remove ttl diff --git a/ticdc/ticdc-integrity-check.md b/ticdc/ticdc-integrity-check.md index 1142d93548b4b..36a271dbc942a 100644 --- a/ticdc/ticdc-integrity-check.md +++ b/ticdc/ticdc-integrity-check.md @@ -90,15 +90,15 @@ fn checksum(columns) { * BIT, ENUM, and SET types are converted to UINT64. * BIT type is converted to UINT64 in binary format. - * ENUM and SET types are converted to their corresponding INT values in UINT64. For example, if the data value of a `SET('a','b','c')` type column is `'a,c'`, the value is encoded as `0b101`. + * ENUM and SET types are converted to their corresponding INT values in UINT64. For example, if the data value of a `SET('a','b','c')` type column is `'a,c'`, the value is encoded as `0b101`, which is `5` in decimal. - * TIMESTAMP, DATE, DURATION, DATETIME, JSON, and DECIMAL types are converted to STRING and then encoded as UTF8 bytes. - * VARBIANRY, BINARY, and BLOB types (including TINY, MEDIUM, and LONG) are directly encoded as bytes. - * VARCHAR, CHAR, and TEXT types (including TINY, MEDIUM, and LONG) are encoded as UTF8 bytes. + * TIMESTAMP, DATE, DURATION, DATETIME, JSON, and DECIMAL types are first converted to STRING and then converted to bytes. + * CHAR, VARCHAR, VARSTRING, STRING, TEXT, and BLOB types (including TINY, MEDIUM, and LONG) are directly converted to bytes. * NULL and GEOMETRY types are excluded from the checksum calculation and this function returns empty bytes. +For more information about the implementation of data consumption and checksum verification using Golang, see [TiCDC row data checksum verification](/ticdc/ticdc-avro-checksum-verification.md). + > **Note:** > -> After enabling the checksum validation feature, DECIMAL and UNSIGNED BIGINT types data will be converted to string types. Therefore, in the downstream consumer code, you need to convert them back to their corresponding numerical types before calculating checksum values. - -The consumer code written in Golang implements steps such as decoding data read from Kafka, sorting by schema fields, and calculating the checksum value. For more information, see [`avro/decoder.go`](https://github.com/pingcap/tiflow/blob/master/pkg/sink/codec/avro/decoder.go). +> - After enabling the checksum validation feature, DECIMAL and UNSIGNED BIGINT types data will be converted to STRING types. Therefore, in the downstream consumer code, you need to convert them back to their corresponding numerical types before calculating checksum values. +> - The checksum verification process does not include DELETE events. This is because DELETE events only contain the handle key column, while the checksum is calculated based on all columns. diff --git a/ticdc/ticdc-manage-changefeed.md b/ticdc/ticdc-manage-changefeed.md index c605046ba8b27..52ccb3483f180 100644 --- a/ticdc/ticdc-manage-changefeed.md +++ b/ticdc/ticdc-manage-changefeed.md @@ -284,7 +284,7 @@ force-replicate = true > **Warning:** > -> For tables without a valid index, operations such as `INSERT` and `REPLACE` are not reentrant, so there is a risk of data redundancy. TiCDC guarantees that data is distributed only at least once during the replication process. Therefore, enabling this feature to replicate tables without a valid index will definitely cause data redundancy. If you do not accept data redundancy, it is recommended to add an effective index, such as adding a primary key column with the `AUTO RANDOM` attribute. +> When `force-replicate` is set to `true`, data consistency is not guaranteed. For tables without a valid index, operations such as `INSERT` and `REPLACE` are not reentrant, so there is a risk of data redundancy. TiCDC guarantees that data is distributed only at least once during the replication process. Therefore, enabling this feature to replicate tables without a valid index will definitely cause data redundancy. If you do not accept data redundancy, it is recommended to add an effective index, such as adding a primary key column with the `AUTO RANDOM` attribute. ## Unified Sorter diff --git a/ticdc/ticdc-open-api-v2.md b/ticdc/ticdc-open-api-v2.md index 545af21dfda5d..f8e5ac77c6b84 100644 --- a/ticdc/ticdc-open-api-v2.md +++ b/ticdc/ticdc-open-api-v2.md @@ -92,7 +92,7 @@ curl -X GET http://127.0.0.1:8300/api/v2/status ```json { - "version": "v7.0.0-master-dirty", + "version": "v7.3.0", "git_hash": "10413bded1bdb2850aa6d7b94eb375102e9c44dc", "id": "d2912e63-3349-447c-90ba-72a4e04b5e9e", "pid": 1447, diff --git a/ticdc/ticdc-open-api.md b/ticdc/ticdc-open-api.md index 2359abf8a77dd..38b36c95846c2 100644 --- a/ticdc/ticdc-open-api.md +++ b/ticdc/ticdc-open-api.md @@ -167,7 +167,7 @@ The configuration parameters of sink are as follows: ### Example -The following request creates a replication task with an ID of `test5` and a `sink_uri` of `blackhome://`. +The following request creates a replication task with an ID of `test5` and a `sink_uri` of `blackhole://`. {{< copyable "shell-regular" >}} diff --git a/ticdc/ticdc-overview.md b/ticdc/ticdc-overview.md index 5e426a37fef4d..e91c7281d63e6 100644 --- a/ticdc/ticdc-overview.md +++ b/ticdc/ticdc-overview.md @@ -6,88 +6,88 @@ aliases: ['/docs/dev/ticdc/ticdc-overview/','/docs/dev/reference/tools/ticdc/ove # TiCDC Overview -[TiCDC](https://github.com/pingcap/tiflow/tree/master/cdc) is a tool used for replicating incremental data of TiDB. Specifically, TiCDC pulls TiKV change logs, sorts captured data, and exports row-based incremental data to downstream databases. +[TiCDC](https://github.com/pingcap/tiflow/tree/master/cdc) is a tool used to replicate incremental data from TiDB. Specifically, TiCDC pulls TiKV change logs, sorts captured data, and exports row-based incremental data to downstream databases. ## Usage scenarios -- Provides data high availability and disaster recovery solutions for multiple TiDB clusters, ensuring eventual data consistency between primary and secondary clusters in case of disaster. -- Replicates real-time data changes to homogeneous systems so as to provide data sources for various scenarios such as monitoring, caching, global indexing, data analysis, and primary-secondary replication between heterogeneous databases. +TiCDC has multiple usage scenarios, including: + +- Providing high availability and disaster recovery solutions for multiple TiDB clusters. TiCDC ensures eventual data consistency between primary and secondary clusters in case of a disaster. +- Replicating real-time data changes to homogeneous systems. This provides data sources for various scenarios, such as monitoring, caching, global indexing, data analysis, and primary-secondary replication between heterogeneous databases. ## Major features ### Key capabilities -- Replicate incremental data from one TiDB cluster to another TiDB cluster with second-level RPO and minute-level RTO. -- Replicate data bidirectionally between TiDB clusters, based on which you can create a multi-active TiDB solution using TiCDC. -- Replicate incremental data from a TiDB cluster to a MySQL database (or other MySQL-compatible databases) with low latency. -- Replicate incremental data from a TiDB cluster to a Kafka cluster. The recommended data format includes [Canal-JSON](/ticdc/ticdc-canal-json.md) and [Avro](/ticdc/ticdc-avro-protocol.md). -- Replicate tables with the ability to filter databases, tables, DMLs, and DDLs. -- Be highly available with no single point of failure. Supports dynamically adding and deleting TiCDC nodes. -- Support cluster management through [Open API](/ticdc/ticdc-open-api.md), including querying task status, dynamically modifying task configuration, and creating or deleting tasks. +TiCDC has the following key capabilities: + +- Replicating incremental data between TiDB clusters with second-level RPO and minute-level RTO. +- Bidirectional replication between TiDB clusters, allowing the creation of a multi-active TiDB solution using TiCDC. +- Replicating incremental data from a TiDB cluster to a MySQL database or other MySQL-compatible databases with low latency. +- Replicating incremental data from a TiDB cluster to a Kafka cluster. The recommended data format includes [Canal-JSON](/ticdc/ticdc-canal-json.md) and [Avro](/ticdc/ticdc-avro-protocol.md). +- Replicating tables with the ability to filter databases, tables, DMLs, and DDLs. +- High availability with no single point of failure, supporting dynamically adding and deleting TiCDC nodes. +- Cluster management through [Open API](/ticdc/ticdc-open-api.md), including querying task status, dynamically modifying task configuration, and creating or deleting tasks. ### Replication order - For all DDL or DML statements, TiCDC outputs them **at least once**. - When the TiKV or TiCDC cluster encounters a failure, TiCDC might send the same DDL/DML statement repeatedly. For duplicated DDL/DML statements: - - MySQL sink can execute DDL statements repeatedly. For DDL statements that can be executed repeatedly in the downstream, such as `truncate table`, the statement is executed successfully. For those that cannot be executed repeatedly, such as `create table`, the execution fails, and TiCDC ignores the error and continues the replication. - - Kafka sink - - Kafka sink provides different strategies for data distribution. You can distribute data to different Kafka partitions based on the table, primary key, or timestamp. This ensures that the updated data of a row is sent to the same partition in order. - - All these distribution strategies send Resolved TS messages to all topics and partitions periodically. This indicates that all messages earlier than the Resolved TS have been sent to the topics and partitions. The Kafka consumer can use the Resolved TS to sort the messages received. - - Kafka sink sends duplicated messages sometimes, but these duplicated messages do not affect the constraints of `Resolved Ts`. For example, if a changefeed is paused and then resumed, Kafka sink might send `msg1`, `msg2`, `msg3`, `msg2`, and `msg3` in order. You can filter the duplicated messages from Kafka consumers. + - The MySQL sink can execute DDL statements repeatedly. For DDL statements that can be executed repeatedly in the downstream, such as `TRUNCATE TABLE`, the statement is executed successfully. For those that cannot be executed repeatedly, such as `CREATE TABLE`, the execution fails, and TiCDC ignores the error and continues with the replication process. + - The Kafka sink provides different strategies for data distribution. + - You can distribute data to different Kafka partitions based on the table, primary key, or timestamp. This ensures that the updated data of a row is sent to the same partition in order. + - All these distribution strategies send `Resolved TS` messages to all topics and partitions periodically. This indicates that all messages earlier than the `Resolved TS` have already been sent to the topics and partitions. The Kafka consumer can use the `Resolved TS` to sort the messages received. + - The Kafka sink sometimes sends duplicated messages, but these duplicated messages do not affect the constraints of `Resolved Ts`. For example, if a changefeed is paused and then resumed, the Kafka sink might send `msg1`, `msg2`, `msg3`, `msg2`, and `msg3` in order. You can filter out the duplicated messages from Kafka consumers. ### Replication consistency - MySQL sink - - TiCDC enables redo log to ensure eventual consistency of data replication. - - TiCDC **ensures** that the order of single-row updates is consistent with that in the upstream. - - TiCDC does **not ensure** that the execution order of downstream transactions is the same as that of upstream transactions. + - TiCDC enables the redo log to ensure eventual consistency of data replication. + - TiCDC ensures that the order of single-row updates is consistent with the upstream. + - TiCDC does not ensure that the downstream transactions are executed in the same order as the upstream transactions. > **Note:** > - > Since v6.2, you can use the sink uri parameter [`transaction-atomicity`](/ticdc/ticdc-sink-to-mysql.md#configure-sink-uri-for-mysql-or-tidb) to control whether to split single-table transactions. Splitting single-table transactions can greatly reduce the latency and memory consumption of replicating large transactions. + > Since v6.2, you can use the sink URI parameter [`transaction-atomicity`](/ticdc/ticdc-sink-to-mysql.md#configure-sink-uri-for-mysql-or-tidb) to control whether to split single-table transactions. Splitting single-table transactions can greatly reduce the latency and memory consumption of replicating large transactions. ## TiCDC architecture -As an incremental data replication tool for TiDB, TiCDC is highly available through PD's etcd. The replication process is as follows: +TiCDC is an incremental data replication tool for TiDB, which is highly available through PD's etcd. The replication process consists of the following steps: 1. Multiple TiCDC processes pull data changes from TiKV nodes. -2. Data changes pulled from TiKV are sorted and merged internally. -3. Data changes are replicated to multiple downstream systems through multiple replication tasks (changefeeds). +2. TiCDC sorts and merges the data changes. +3. TiCDC replicates the data changes to multiple downstream systems through multiple replication tasks (changefeeds). -The architecture of TiCDC is shown in the following figure: +The architecture of TiCDC is illustrated in the following figure: ![TiCDC architecture](/media/ticdc/cdc-architecture.png) -The components in the preceding architecture diagram are described as follows: +The components in the architecture diagram are described as follows: -- TiKV Server: TiKV nodes in a TiDB cluster. When data changes, TiKV nodes send the changes as change logs (KV change logs) to TiCDC nodes. If TiCDC nodes find the change logs not continuous, they will actively request the TiKV nodes to provide change logs. -- TiCDC: TiCDC nodes where the TiCDC processes run. Each node runs a TiCDC process. Each process pulls data changes from one or more tables in TiKV nodes, and replicates the changes to the downstream system through the sink component. -- PD: The scheduling module in a TiDB cluster. This module is in charge of scheduling cluster data and usually consists of three PD nodes. PD provides high availability through the etcd cluster. In the etcd cluster, TiCDC stores its metadata, such as node status information and changefeed configurations. +- TiKV Server: TiKV nodes in a TiDB cluster. When data changes occur, TiKV nodes send the changes as change logs (KV change logs) to TiCDC nodes. If TiCDC nodes detect that the change logs are not continuous, they will actively request the TiKV nodes to provide change logs. +- TiCDC: TiCDC nodes where TiCDC processes run. Each node runs a TiCDC process. Each process pulls data changes from one or more tables in TiKV nodes and replicates the changes to the downstream system through the sink component. +- PD: The scheduling module in a TiDB cluster. This module is responsible for scheduling cluster data and usually consists of three PD nodes. PD provides high availability through the etcd cluster. In the etcd cluster, TiCDC stores its metadata, such as node status information and changefeed configurations. -As shown in the preceding architecture diagram, TiCDC supports replicating data to TiDB, MySQL, and Kafka databases. +As shown in the architecture diagram, TiCDC supports replicating data to TiDB, MySQL, and Kafka databases. ## Best practices -- When you use TiCDC to replicate data between two TiDB clusters and the network latency between the clusters is higher than 100 ms, it is recommended that you deploy TiCDC in the region (IDC) where the downstream TiDB cluster is located. -- TiCDC only replicates the table that has at least one **valid index**. A **valid index** is defined as follows: +- If the network latency between two TiDB clusters is higher than 100 ms, it is recommended to deploy TiCDC in the region (IDC) where the downstream TiDB cluster is located when replicating data between the two clusters. +- TiCDC only replicates tables that have at least one valid index. A valid index is defined as follows: - A primary key (`PRIMARY KEY`) is a valid index. - - A unique index (`UNIQUE INDEX`) is valid if every column of the index is explicitly defined as non-nullable (`NOT NULL`) and the index does not have the virtual generated column (`VIRTUAL GENERATED COLUMNS`). + - A unique index (`UNIQUE INDEX`) is valid if every column of the index is explicitly defined as non-nullable (`NOT NULL`) and the index does not have a virtual generated column (`VIRTUAL GENERATED COLUMNS`). - To use TiCDC in disaster recovery scenarios, you need to configure [redo log](/ticdc/ticdc-sink-to-mysql.md#eventually-consistent-replication-in-disaster-scenarios). -- When you replicate a wide table with a large single row (greater than 1K), it is recommended that you configure [`per-table-memory-quota`](/ticdc/ticdc-server-config.md) so that `per-table-memory-quota` = `ticdcTotalMemory`/(`tableCount` * 2). `ticdcTotalMemory` is the memory of a TiCDC node, and `tableCount` is the number of target tables that a TiCDC node replicates. - -> **Note:** -> -> Since v4.0.8, TiCDC supports replicating tables **without a valid index** by modifying the task configuration. However, this compromises the guarantee of data consistency to some extent. For more details, see [Replicate tables without a valid index](/ticdc/ticdc-manage-changefeed.md#replicate-tables-without-a-valid-index). +- When you replicate a wide table with a large single row (greater than 1K), it is recommended to configure the [`per-table-memory-quota`](/ticdc/ticdc-server-config.md) so that `per-table-memory-quota` = `ticdcTotalMemory`/(`tableCount` * 2). `ticdcTotalMemory` is the memory of a TiCDC node, and `tableCount` is the number of target tables that a TiCDC node replicates. -### Unsupported scenarios +## Unsupported scenarios Currently, the following scenarios are not supported: -- The TiKV cluster that uses RawKV alone. -- The [DDL operation `CREATE SEQUENCE`](/sql-statements/sql-statement-create-sequence.md) and the [SEQUENCE function](/sql-statements/sql-statement-create-sequence.md#sequence-function) in TiDB. When the upstream TiDB uses `SEQUENCE`, TiCDC ignores `SEQUENCE` DDL operations/functions performed upstream. However, DML operations using `SEQUENCE` functions can be correctly replicated. +- A TiKV cluster that uses RawKV alone. +- The [`CREATE SEQUENCE` DDL operation](/sql-statements/sql-statement-create-sequence.md) and the [`SEQUENCE` function](/sql-statements/sql-statement-create-sequence.md#sequence-function) in TiDB. When the upstream TiDB uses `SEQUENCE`, TiCDC ignores `SEQUENCE` DDL operations/functions performed upstream. However, DML operations using `SEQUENCE` functions can be correctly replicated. -TiCDC only provides partial support for scenarios of large transactions in the upstream. For details, refer to [Does TiCDC support replicating large transactions? Is there any risk?](/ticdc/ticdc-faq.md#does-ticdc-support-replicating-large-transactions-is-there-any-risk). +TiCDC only partially supports scenarios involving large transactions in the upstream. For details, refer to the [TiCDC FAQ](/ticdc/ticdc-faq.md#does-ticdc-support-replicating-large-transactions-is-there-any-risk), where you can find details on whether TiCDC supports replicating large transactions and any associated risks. diff --git a/ticdc/ticdc-server-config.md b/ticdc/ticdc-server-config.md index 3135a7cee30aa..4f48585b3e2ad 100644 --- a/ticdc/ticdc-server-config.md +++ b/ticdc/ticdc-server-config.md @@ -23,7 +23,7 @@ The following are descriptions of options available in a `cdc server` command: - `cert`: Specifies the path of the certificate file in PEM format for TLS connection (optional). - `cert-allowed-cn`: Specifies the path of the common name in PEM format for TLS connection (optional). - `key`: Specifies the path of the private key file in PEM format for TLS connection (optional). -- `tz`: Time zone used by the TiCDC service. TiCDC uses this time zone when it internally converts time data types such as `TIMESTAMP` or when it replicates data to the downstream. The default is the local time zone in which the process runs. If you specify `time-zone` (in `sink-uri`) and `tz` at the time, the internal TiCDC processes use the time zone specified by `tz`, and the sink uses the time zone specified by `time-zone` for replicating data to the downstream. +- `tz`: Time zone used by the TiCDC service. TiCDC uses this time zone when it internally converts time data types such as `TIMESTAMP` or when it replicates data to the downstream. The default is the local time zone in which the process runs. If you specify `time-zone` (in `sink-uri`) and `tz` at the same time, the internal TiCDC processes use the time zone specified by `tz`, and the sink uses the time zone specified by `time-zone` for replicating data to the downstream. Make sure that the time zone specified by `tz` is the same as that specified by `time-zone` (in `sink-uri`). - `cluster-id`: (optional) The ID of the TiCDC cluster. The default value is `default`. `cluster-id` is the unique identifier of a TiCDC cluster. TiCDC nodes with the same `cluster-id` belong to the same cluster. The length of a `cluster-id` is 128 characters at most. `cluster-id` must follow the pattern of `^[a-zA-Z0-9]+(-[a-zA-Z0-9]+)*$` and cannot be one of the following: `owner`, `capture`, `task`, `changefeed`, `job`, and `meta`. ## `cdc server` configuration file parameters diff --git a/ticdc/ticdc-sink-to-cloud-storage.md b/ticdc/ticdc-sink-to-cloud-storage.md index 8315b6bb57747..eda0e55e5b51f 100644 --- a/ticdc/ticdc-sink-to-cloud-storage.md +++ b/ticdc/ticdc-sink-to-cloud-storage.md @@ -27,6 +27,7 @@ The output is as follows: Info: {"upstream_id":7171388873935111376,"namespace":"default","id":"simple-replication-task","sink_uri":"s3://logbucket/storage_test?protocol=canal-json","create_time":"2022-11-29T18:52:05.566016967+08:00","start_ts":437706850431664129,"engine":"unified","config":{"case_sensitive":true,"enable_old_value":true,"force_replicate":false,"ignore_ineligible_table":false,"check_gc_safe_point":true,"enable_sync_point":false,"sync_point_interval":600000000000,"sync_point_retention":86400000000000,"filter":{"rules":["*.*"],"event_filters":null},"mounter":{"worker_num":16},"sink":{"protocol":"canal-json","schema_registry":"","csv":{"delimiter":",","quote":"\"","null":"\\N","include_commit_ts":false},"column_selectors":null,"transaction_atomicity":"none","encoder_concurrency":16,"terminator":"\r\n","date_separator":"none","enable_partition_separator":false},"consistent":{"level":"none","max_log_size":64,"flush_interval":2000,"storage":""}},"state":"normal","creator_version":"v6.5.0-master-dirty"} ``` +- `--server`: The address of any TiCDC server in the TiCDC cluster. - `--changefeed-id`: The ID of the changefeed. The format must match the `^[a-zA-Z0-9]+(\-[a-zA-Z0-9]+)*$` regular expression. If this ID is not specified, TiCDC automatically generates a UUID (the version 4 format) as the ID. - `--sink-uri`: The downstream address of the changefeed. For details, see [Configure sink URI](#configure-sink-uri). - `--start-ts`: The starting TSO of the changefeed. TiCDC starts pulling data from this TSO. The default value is the current time. diff --git a/ticdc/ticdc-sink-to-kafka.md b/ticdc/ticdc-sink-to-kafka.md index dbf06d54e8b81..c7942a1c44eaf 100644 --- a/ticdc/ticdc-sink-to-kafka.md +++ b/ticdc/ticdc-sink-to-kafka.md @@ -24,6 +24,7 @@ ID: simple-replication-task Info: {"sink-uri":"kafka://127.0.0.1:9092/topic-name?protocol=canal-json&kafka-version=2.4.0&partition-num=6&max-message-bytes=67108864&replication-factor=1","opts":{},"create-time":"2020-03-12T22:04:08.103600025+08:00","start-ts":415241823337054209,"target-ts":0,"admin-job-type":0,"sort-engine":"unified","sort-dir":".","config":{"case-sensitive":true,"filter":{"rules":["*.*"],"ignore-txn-start-ts":null,"ddl-allow-list":null},"mounter":{"worker-num":16},"sink":{"dispatchers":null},"scheduler":{"type":"table-number","polling-time":-1}},"state":"normal","history":null,"error":null} ``` +- `--server`: The address of any TiCDC server in the TiCDC cluster. - `--changefeed-id`: The ID of the replication task. The format must match the `^[a-zA-Z0-9]+(\-[a-zA-Z0-9]+)*$` regular expression. If this ID is not specified, TiCDC automatically generates a UUID (the version 4 format) as the ID. - `--sink-uri`: The downstream address of the replication task. For details, see [Configure sink URI with `kafka`](#configure-sink-uri-for-kafka). - `--start-ts`: Specifies the starting TSO of the changefeed. From this TSO, the TiCDC cluster starts pulling data. The default value is the current time. @@ -66,15 +67,16 @@ The following are descriptions of sink URI parameters and values that can be con | `ca` | The path of the CA certificate file needed to connect to the downstream Kafka instance (optional). | | `cert` | The path of the certificate file needed to connect to the downstream Kafka instance (optional). | | `key` | The path of the certificate key file needed to connect to the downstream Kafka instance (optional). | +| `insecure-skip-verify` | Whether to skip certificate verification when connecting to the downstream Kafka instance (optional, `false` by default). | | `sasl-user` | The identity (authcid) of SASL/PLAIN or SASL/SCRAM authentication needed to connect to the downstream Kafka instance (optional). | -| `sasl-password` | The password of SASL/PLAIN or SASL/SCRAM authentication needed to connect to the downstream Kafka instance (optional). | +| `sasl-password` | The password of SASL/PLAIN or SASL/SCRAM authentication needed to connect to the downstream Kafka instance (optional). If it contains special characters, they need to be URL encoded. | | `sasl-mechanism` | The name of SASL authentication needed to connect to the downstream Kafka instance. The value can be `plain`, `scram-sha-256`, `scram-sha-512`, or `gssapi`. | | `sasl-gssapi-auth-type` | The gssapi authentication type. Values can be `user` or `keytab` (optional). | | `sasl-gssapi-keytab-path` | The gssapi keytab path (optional).| | `sasl-gssapi-kerberos-config-path` | The gssapi kerberos configuration path (optional). | | `sasl-gssapi-service-name` | The gssapi service name (optional). | | `sasl-gssapi-user` | The user name of gssapi authentication (optional). | -| `sasl-gssapi-password` | The password of gssapi authentication (optional). | +| `sasl-gssapi-password` | The password of gssapi authentication (optional). If it contains special characters, they need to be URL encoded. | | `sasl-gssapi-realm` | The gssapi realm name (optional). | | `sasl-gssapi-disable-pafxfast` | Whether to disable the gssapi PA-FX-FAST (optional). | | `dial-timeout` | The timeout in establishing a connection with the downstream Kafka. The default value is `10s`. | @@ -133,7 +135,7 @@ The following are examples when using Kafka SASL authentication: The minimum set of permissions required for TiCDC to function properly is as follows. - - The `Create` and `Write` permissions for the Topic [resource type](https://docs.confluent.io/platform/current/kafka/authorization.html#resources). + - The `Create`, `Write`, and `Describe` permissions for the Topic [resource type](https://docs.confluent.io/platform/current/kafka/authorization.html#resources). - The `DescribeConfigs` permission for the Cluster resource type. ### Integrate TiCDC with Kafka Connect (Confluent Platform) @@ -159,7 +161,17 @@ For detailed integration guide, see [Quick Start Guide on Integrating TiDB with ### Matcher rules -In the example of the previous section: +Take the following configuration of `dispatchers` as an example: + +```toml +[sink] +dispatchers = [ + {matcher = ['test1.*', 'test2.*'], topic = "Topic expression 1", partition = "ts" }, + {matcher = ['test3.*', 'test4.*'], topic = "Topic expression 2", partition = "index-value" }, + {matcher = ['test1.*', 'test5.*'], topic = "Topic expression 3", partition = "table"}, + {matcher = ['test6.*'], partition = "ts"} +] +``` - For the tables that match the matcher rule, they are dispatched according to the policy specified by the corresponding topic expression. For example, the `test3.aa` table is dispatched according to "Topic expression 2"; the `test5.aa` table is dispatched according to "Topic expression 3". - For a table that matches multiple matcher rules, it is dispatched according to the first matching topic expression. For example, the `test1.aa` table is distributed according to "Topic expression 1". @@ -234,6 +246,12 @@ You can use `partition = "xxx"` to specify a partition dispatcher. It supports f > {matcher = ['*.*'], dispatcher = "ts", partition = "table"}, > ``` +> **Warning:** +> +> When the [Old Value feature](/ticdc/ticdc-manage-changefeed.md#output-the-historical-value-of-a-row-changed-event) is enabled (`enable-old-value = true`), using the index-value dispatcher might fail to ensure the order of row changes with the same index value. Therefore, it is recommended to use the default dispatcher. +> +> For more information, see [What changes occur to the change event format when TiCDC enables the Old Value feature?](/ticdc/ticdc-faq.md#what-changes-occur-to-the-change-event-format-when-ticdc-enables-the-old-value-feature). + ## Scale out the load of a single large table to multiple TiCDC nodes This feature splits the data replication range of a single large table into multiple ranges, according to the data volume and the number of modified rows per minute, and it makes the data volume and the number of modified rows replicated in each range approximately the same. This feature distributes these ranges to multiple TiCDC nodes for replication, so that multiple TiCDC nodes can replicate a large single table at the same time. This feature can solve the following two problems: @@ -268,3 +286,68 @@ You can query the number of Regions a table contains by the following SQL statem ```sql SELECT COUNT(*) FROM INFORMATION_SCHEMA.TIKV_REGION_STATUS WHERE DB_NAME="database1" AND TABLE_NAME="table1" AND IS_INDEX=0; ``` + +## Handle messages that exceed the Kafka topic limit + +Kafka topic sets a limit on the size of messages it can receive. This limit is controlled by the [`max.message.bytes`](https://kafka.apache.org/documentation/#topicconfigs_max.message.bytes) parameter. If TiCDC Kafka sink sends data that exceeds this limit, the changefeed reports an error and cannot proceed to replicate data. To solve this problem, TiCDC provides the following solution. + +### Send handle keys only + +Starting from v7.3.0, TiCDC Kafka sink supports sending only the handle keys when the message size exceeds the limit. This can significantly reduce the message size and avoid changefeed errors and task failures caused by the message size exceeding the Kafka topic limit. Handle Key refers to the following: + +* If the table to be replicated has primary key, the primary key is the handle key. +* If the table does not have primary key but has NOT NULL Unique Key, the NOT NULL Unique Key is the handle key. + +Currently, this feature supports two encoding protocols: Canal-JSON and Open Protocol. When using the Canal-JSON protocol, you must specify `enable-tidb-extension=true` in `sink-uri`. + +The sample configuration is as follows: + +```toml +[sink.kafka-config.large-message-handle] +# This configuration is introduced in v7.3.0. +# Empty by default, which means when the message size exceeds the limit, the changefeed fails. +# If this configuration is set to "handle-key-only", when the message size exceeds the limit, only the handle key is sent in the data field. If the message size still exceeds the limit, the changefeed fails. +large-message-handle-option = "handle-key-only" +``` + +### Consume messages with handle keys only + +The message format with handle keys only is as follows: + +```json +{ + "id": 0, + "database": "test", + "table": "tp_int", + "pkNames": [ + "id" + ], + "isDdl": false, + "type": "INSERT", + "es": 1639633141221, + "ts": 1639633142960, + "sql": "", + "sqlType": { + "id": 4 + }, + "mysqlType": { + "id": "int" + }, + "data": [ + { + "id": "2" + } + ], + "old": null, + "_tidb": { // TiDB extension fields + "commitTs": 163963314122145239, + "onlyHandleKey": true + } +} +``` + +When a Kafka consumer receives a message, it first checks the `onlyHandleKey` field. If this field exists and is `true`, it means that the message only contains the handle key of the complete data. In this case, to get the complete data, you need to query the upstream TiDB and use [`tidb_snapshot` to read historical data](/read-historical-data.md). + +> **Warning:** +> +> When the Kafka consumer processes data and queries TiDB, the data might have been deleted by GC. You need to [modify the GC Lifetime of the TiDB cluster](/system-variables.md#tidb_gc_life_time-new-in-v50) to a larger value to avoid this situation. diff --git a/ticdc/ticdc-sink-to-mysql.md b/ticdc/ticdc-sink-to-mysql.md index 6a7f5f1098f6a..9a53ec331ab29 100644 --- a/ticdc/ticdc-sink-to-mysql.md +++ b/ticdc/ticdc-sink-to-mysql.md @@ -24,6 +24,7 @@ ID: simple-replication-task Info: {"sink-uri":"mysql://root:123456@127.0.0.1:3306/","opts":{},"create-time":"2020-03-12T22:04:08.103600025+08:00","start-ts":415241823337054209,"target-ts":0,"admin-job-type":0,"sort-engine":"unified","sort-dir":".","config":{"case-sensitive":true,"filter":{"rules":["*.*"],"ignore-txn-start-ts":null,"ddl-allow-list":null},"mounter":{"worker-num":16},"sink":{"dispatchers":null},"scheduler":{"type":"table-number","polling-time":-1}},"state":"normal","history":null,"error":null} ``` +- `--server`: The address of any TiCDC server in the TiCDC cluster. - `--changefeed-id`: The ID of the replication task. The format must match the `^[a-zA-Z0-9]+(\-[a-zA-Z0-9]+)*$` regular expression. If this ID is not specified, TiCDC automatically generates a UUID (the version 4 format) as the ID. - `--sink-uri`: The downstream address of the replication task. For details, see [Configure sink URI with `mysql`/`tidb`](#configure-sink-uri-for-mysql-or-tidb). - `--start-ts`: Specifies the starting TSO of the changefeed. From this TSO, the TiCDC cluster starts pulling data. The default value is the current time. @@ -62,7 +63,7 @@ The following are descriptions of sink URI parameters and parameter values that | `ssl-ca` | The path of the CA certificate file needed to connect to the downstream MySQL instance (optional). | | `ssl-cert` | The path of the certificate file needed to connect to the downstream MySQL instance (optional). | | `ssl-key` | The path of the certificate key file needed to connect to the downstream MySQL instance (optional). | -| `time-zone` | The time zone used when connecting to the downstream MySQL instance, which is effective since v4.0.8. This is an optional parameter. If this parameter is not specified, the time zone of TiCDC service processes is used. If this parameter is set to an empty value, no time zone is specified when TiCDC connects to the downstream MySQL instance and the default time zone of the downstream is used. | +| `time-zone` | The time zone used when connecting to the downstream MySQL instance, which is effective since v4.0.8. This is an optional parameter. If this parameter is not specified, the time zone of TiCDC service processes is used. If this parameter is set to an empty value, such as `time-zone=""`, no time zone is specified when TiCDC connects to the downstream MySQL instance and the default time zone of the downstream is used. | | `transaction-atomicity` | The atomicity level of a transaction. This is an optional parameter, with the default value of `none`. When the value is `table`, TiCDC ensures the atomicity of a single-table transaction. When the value is `none`, TiCDC splits the single-table transaction. | To encode the database password in the sink URI using Base64, use the following command: diff --git a/ticdc/troubleshoot-ticdc.md b/ticdc/troubleshoot-ticdc.md index cfc39b51876b7..360868060d7a3 100644 --- a/ticdc/troubleshoot-ticdc.md +++ b/ticdc/troubleshoot-ticdc.md @@ -87,43 +87,9 @@ Warning: Unable to load '/usr/share/zoneinfo/zone.tab' as time zone. Skipping it Warning: Unable to load '/usr/share/zoneinfo/zone1970.tab' as time zone. Skipping it. ``` -If the downstream is a special MySQL environment (a public cloud RDS or some MySQL derivative versions) and importing the time zone using the above method fails, you need to specify the MySQL time zone of the downstream using the `time-zone` parameter in `sink-uri`. You can first query the time zone used by MySQL: +If the downstream is a special MySQL environment (a public cloud RDS or some MySQL derivative versions) and importing the time zone using the preceding method fails, you can use the default time zone of the downstream by setting `time-zone` to an empty value, such as `time-zone=""`. -1. Query the time zone used by MySQL: - - {{< copyable "sql" >}} - - ```sql - show variables like '%time_zone%'; - ``` - - ``` - +------------------+--------+ - | Variable_name | Value | - +------------------+--------+ - | system_time_zone | CST | - | time_zone | SYSTEM | - +------------------+--------+ - ``` - -2. Specify the time zone when you create the replication task and create the TiCDC service: - - {{< copyable "shell-regular" >}} - - ```shell - cdc cli changefeed create --sink-uri="mysql://root@127.0.0.1:3306/?time-zone=CST" --server=http://127.0.0.1:8300 - ``` - - > **Note:** - > - > CST might be an abbreviation for the following four different time zones: - > - > - Central Standard Time (USA) UT-6:00 - > - Central Standard Time (Australia) UT+9:30 - > - China Standard Time UT+8:00 - > - Cuba Standard Time UT-4:00 - > - > In China, CST usually stands for China Standard Time. +When using time zones in TiCDC, it is recommended to explicitly specify the time zone, such as `time-zone="Asia/Shanghai"`. Also, make sure that the `tz` specified in TiCDC server configurations and the `time-zone` specified in Sink URI are consistent with the time zone configuration of the downstream database. This prevents data inconsistency caused by inconsistent time zones. ## How do I handle the incompatibility issue of configuration files caused by TiCDC upgrade? diff --git a/tidb-binlog/get-started-with-tidb-binlog.md b/tidb-binlog/get-started-with-tidb-binlog.md index 652c495975c5b..0007ea0c43fde 100644 --- a/tidb-binlog/get-started-with-tidb-binlog.md +++ b/tidb-binlog/get-started-with-tidb-binlog.md @@ -43,7 +43,7 @@ sudo yum install -y mariadb-server ``` ```bash -curl -L https://download.pingcap.org/tidb-community-server-v7.0.0-linux-amd64.tar.gz | tar xzf - +curl -L https://download.pingcap.org/tidb-community-server-v7.3.0-linux-amd64.tar.gz | tar xzf - cd tidb-latest-linux-amd64 ``` diff --git a/tidb-cloud/changefeed-sink-to-mysql.md b/tidb-cloud/changefeed-sink-to-mysql.md index 3afc0ffdcec7a..e8f7fa872bd2d 100644 --- a/tidb-cloud/changefeed-sink-to-mysql.md +++ b/tidb-cloud/changefeed-sink-to-mysql.md @@ -24,11 +24,11 @@ Make sure that your TiDB Cluster can connect to the MySQL service. If your MySQL service is in an AWS VPC that has no public internet access, take the following steps: 1. [Set up a VPC peering connection](/tidb-cloud/set-up-vpc-peering-connections.md) between the VPC of the MySQL service and your TiDB cluster. -2. Modify the inbound rules of the security group that the MySQL service is associated with. +2. Modify the inbound rules of the security group that the MySQL service is associated with. You must add [the CIDR of the region where your TiDB Cloud cluster is located](/tidb-cloud/set-up-vpc-peering-connections.md#prerequisite-set-a-project-cidr) to the inbound rules. Doing so allows the traffic to flow from your TiDB Cluster to the MySQL instance. -3. If the MySQL URL contains a hostname, you need to allow TiDB Cloud to be able to resolve the DNS hostname of the MySQL service. +3. If the MySQL URL contains a hostname, you need to allow TiDB Cloud to be able to resolve the DNS hostname of the MySQL service. 1. Follow the steps in [Enable DNS resolution for a VPC peering connection](https://docs.aws.amazon.com/vpc/latest/peering/modify-peering-connections.html#vpc-peering-dns). 2. Enable the **Accepter DNS resolution** option. @@ -36,10 +36,10 @@ If your MySQL service is in an AWS VPC that has no public internet access, take If your MySQL service is in a GCP VPC that has no public internet access, take the following steps: 1. If your MySQL service is Google Cloud SQL, you must expose a MySQL endpoint in the associated VPC of the Google Cloud SQL instance. You may need to use the [**Cloud SQL Auth proxy**](https://cloud.google.com/sql/docs/mysql/sql-proxy) which is developed by Google. -2. [Set up a VPC peering connection](/tidb-cloud/set-up-vpc-peering-connections.md) between the VPC of the MySQL service and your TiDB cluster. +2. [Set up a VPC peering connection](/tidb-cloud/set-up-vpc-peering-connections.md) between the VPC of the MySQL service and your TiDB cluster. 3. Modify the ingress firewall rules of the VPC where MySQL is located. - You must add [the CIDR of the region where your TiDB Cloud cluster is located](/tidb-cloud/set-up-vpc-peering-connections.md#prerequisite-set-a-project-cidr) to the ingress firewall rules. Doing so allows the traffic to flow from your TiDB Cluster to the MySQL endpoint. + You must add [the CIDR of the region where your TiDB Cloud cluster is located](/tidb-cloud/set-up-vpc-peering-connections.md#prerequisite-set-a-project-cidr) to the ingress firewall rules. Doing so allows the traffic to flow from your TiDB Cluster to the MySQL endpoint. ### Full load data @@ -70,7 +70,7 @@ The **Sink to MySQL** connector can only sink incremental data from your TiDB cl Log: tidb-binlog Pos: 420747102018863124 Finished dump at: 2020-11-10 10:40:20 - ``` + ``` ## Create a MySQL sink @@ -102,7 +102,7 @@ After completing the prerequisites, you can sink your data to MySQL. 7. Click **Next** to review the Changefeed configuration. If you confirm all configurations are correct, check the compliance of cross-region replication, and click **Create**. - + If you want to modify some configurations, click **Previous** to go back to the previous configuration page. 8. The sink starts soon, and you can see the status of the sink changes from "**Creating**" to "**Running**". diff --git a/tidb-cloud/integrate-tidbcloud-with-dbt.md b/tidb-cloud/integrate-tidbcloud-with-dbt.md index 3946c22a39339..1f4b6f62da682 100644 --- a/tidb-cloud/integrate-tidbcloud-with-dbt.md +++ b/tidb-cloud/integrate-tidbcloud-with-dbt.md @@ -77,7 +77,7 @@ To configure the project, take the following steps: In the editor, add the following configuration: - ```yaml + ```yaml jaffle_shop_tidb: # Project name target: dev # Target outputs: diff --git a/tidb-cloud/integrate-tidbcloud-with-zapier.md b/tidb-cloud/integrate-tidbcloud-with-zapier.md index 1af0879314d73..4b3442919251f 100644 --- a/tidb-cloud/integrate-tidbcloud-with-zapier.md +++ b/tidb-cloud/integrate-tidbcloud-with-zapier.md @@ -119,15 +119,15 @@ In the editor page, you can see the trigger and action. Click the trigger to set Click **Test action** to create a new row in the table. If you check your TiDB Cloud cluster, you can find the data is written successfully. - ```sql - mysql> SELECT * FROM test.github_global_event; - +-------------+-------------+------------+-----------------+----------------------------------------------+--------+---------------------+ - | id | type | actor | repo_name | repo_url | public | created_at | - +-------------+-------------+------------+-----------------+----------------------------------------------+--------+---------------------+ - | 25324462424 | CreateEvent | shiyuhang0 | shiyuhang0/docs | https://api.github.com/repos/shiyuhang0/docs | True | 2022-11-18 08:03:14 | - +-------------+-------------+------------+-----------------+----------------------------------------------+--------+---------------------+ - 1 row in set (0.17 sec) - ``` + ```sql + mysql> SELECT * FROM test.github_global_event; + +-------------+-------------+------------+-----------------+----------------------------------------------+--------+---------------------+ + | id | type | actor | repo_name | repo_url | public | created_at | + +-------------+-------------+------------+-----------------+----------------------------------------------+--------+---------------------+ + | 25324462424 | CreateEvent | shiyuhang0 | shiyuhang0/docs | https://api.github.com/repos/shiyuhang0/docs | True | 2022-11-18 08:03:14 | + +-------------+-------------+------------+-----------------+----------------------------------------------+--------+---------------------+ + 1 row in set (0.17 sec) + ``` ### Step 5: Publish your zap diff --git a/tidb-cloud/migrate-sql-shards.md b/tidb-cloud/migrate-sql-shards.md index 5d996a5ff4290..7d8fb24360aa3 100644 --- a/tidb-cloud/migrate-sql-shards.md +++ b/tidb-cloud/migrate-sql-shards.md @@ -125,12 +125,12 @@ In this example, the column IDs of the upstream tables `sale_01` and `sale_02` a ```sql mysql> CREATE TABLE `sales` ( - -> `id` bigint(20) NOT NULL , - -> `uid` varchar(40) NOT NULL, - -> `sale_num` bigint DEFAULT NULL, - -> INDEX (`id`), - -> UNIQUE KEY `ind_uid` (`uid`) - -> ); + `id` bigint(20) NOT NULL , + `uid` varchar(40) NOT NULL, + `sale_num` bigint DEFAULT NULL, + INDEX (`id`), + UNIQUE KEY `ind_uid` (`uid`) + ); Query OK, 0 rows affected (0.17 sec) ``` @@ -406,7 +406,7 @@ The TiDB Cloud console does not provide any feature about incremental data repli host: "tidb.xxxxxxx.xxxxxxxxx.ap-northeast-1.prod.aws.tidbcloud.com" port: 4000 user: "root" - password: "${password}" # If the password is not empty, it is recommended to use a dmctl-encrypted cipher. + password: "${password}" # If the password is not empty, it is recommended to use a dmctl-encrypted cipher. ## ******** Function Configuration ********** routes: diff --git a/tidb-cloud/notification-2023-08-31-console-maintenance b/tidb-cloud/notification-2023-08-31-console-maintenance new file mode 100644 index 0000000000000..1ab04aea1fbea --- /dev/null +++ b/tidb-cloud/notification-2023-08-31-console-maintenance @@ -0,0 +1,89 @@ +--- +title: [2023-08-31] TiDB Cloud Console Maintenance Notification +summary: Disruptions for functionalities involving the creation and updating within the TiDB Cloud Console UI and Open API due to update the meta database services. +--- + +# [2023-08-31] TiDB Cloud Console Maintenance Notification + +## Maintenance window + +- Date: 2023-08-31 +- Start time: 8:00 (UTC+0) +- End time: 10:00 (UTC+0) +- Duration: Approximately 2 hours + +> **Note:** +> +> Currently, the overall maintenance schedule for the TiDB Cloud Console does not support user modifications to the maintenance timing. + +## Reason for maintenance + +We're upgrading the meta database services of the TiDB Cloud Console to enhance performance and efficiency, delivering a better experience for all users. This is part of our ongoing commitment to providing high-quality services. + +## Impact + +During the maintenance window, you might experience intermittent disruptions for functionalities involving the creation and updating within the TiDB Cloud Console UI and Open API. However, your TiDB cluster will maintain its regular operations for data read and write, ensuring no adverse effects on your online business. + +### affected features on Console UI + +- Cluster level + - Cluster management + - Create clusters + - Delete clusters + - Scale clusters + - Pause or Resume clusters + - Change cluster password + - Change cluster traffic filter + - Import + - Create an import job + - Data Migration + - Create a migration job + - Changefeed + - Create a changefeed job + - Backup + - Create a manual backup job + - Auto backup job + - Restore + - Create a restore Job + - Database audit log + - Test connectivity + - Add or delete access record + - Enable or disable Database audit logging + - Restart database audit logging +- Project level + - Network access + - Create a private endpoint + - Delete a private endpoint + - Add VPC Peering + - Delete VPC Peering + - Maintenance + - Change maintenance window + - Defer task + - Recycle Bin + - Delete clusters + - Delete backups + - Restore clusters + +### Affected features on Console Open API + +- Cluster management + - [CreateCluster](https://docs.pingcap.com/tidbcloud/api/v1beta#tag/Cluster/operation/CreateCluster) + - [DeleteCluster](https://docs.pingcap.com/tidbcloud/api/v1beta#tag/Cluster/operation/DeleteCluster) + - [UpdateCluster](https://docs.pingcap.com/tidbcloud/api/v1beta#tag/Cluster/operation/UpdateCluster) + - [CreateAwsCmek](https://docs.pingcap.com/tidbcloud/api/v1beta#tag/Cluster/operation/CreateAwsCmek) +- Backup + - [CreateBackup](https://docs.pingcap.com/tidbcloud/api/v1beta#tag/Backup/operation/CreateBackup) + - [DeleteBackup](https://docs.pingcap.com/tidbcloud/api/v1beta#tag/Backup/operation/DeleteBackup) +- Restore + - [CreateRestoreTask](https://docs.pingcap.com/tidbcloud/api/v1beta#tag/Restore/operation/CreateRestoreTask) +- Import + - [CreateImportTask](https://docs.pingcap.com/tidbcloud/api/v1beta#tag/Import/operation/CreateImportTask) + - [UpdateImportTask](https://docs.pingcap.com/tidbcloud/api/v1beta#tag/Import/operation/UpdateImportTask) + +## Completion and resumption + +Once the maintenance is successfully completed, the affected functionalities will be reinstated, offering you an even better experience. + +## Get support + +If you have any questions or need assistance, contact our [support team](https://docs.pingcap.com/tidbcloud/tidb-cloud-support#tidb-cloud-support). We are here to address your concerns and provide any necessary guidance. diff --git a/tidb-cloud/terraform-get-tidbcloud-provider.md b/tidb-cloud/terraform-get-tidbcloud-provider.md index d3d0d80b8e6b0..4a0dd59ca150b 100644 --- a/tidb-cloud/terraform-get-tidbcloud-provider.md +++ b/tidb-cloud/terraform-get-tidbcloud-provider.md @@ -43,43 +43,43 @@ For detailed steps, see [TiDB Cloud API documentation](https://docs.pingcap.com/ 1. Create a `main.tf` file: - ``` - terraform { - required_providers { - tidbcloud = { - source = "tidbcloud/tidbcloud" - version = "~> 0.1.0" - } - } - required_version = ">= 1.0.0" - } - ``` - - - The `source` attribute specifies the target Terraform provider to be downloaded from [Terraform Registry](https://registry.terraform.io/). - - The `version` attribute is optional, which specifies the version of the Terraform provider. If it is not specified, the latest provider version is used by default. - - The `required_version` is optional, which specifies the version of Terraform. If it is not specified, the latest Terraform version is used by default. + ``` + terraform { + required_providers { + tidbcloud = { + source = "tidbcloud/tidbcloud" + version = "~> 0.1.0" + } + } + required_version = ">= 1.0.0" + } + ``` + + - The `source` attribute specifies the target Terraform provider to be downloaded from [Terraform Registry](https://registry.terraform.io/). + - The `version` attribute is optional, which specifies the version of the Terraform provider. If it is not specified, the latest provider version is used by default. + - The `required_version` is optional, which specifies the version of Terraform. If it is not specified, the latest Terraform version is used by default. 2. Run the `terraform init` command to download TiDB Cloud Terraform Provider from Terraform Registry. - ``` - $ terraform init + ``` + $ terraform init - Initializing the backend... + Initializing the backend... - Initializing provider plugins... - - Reusing previous version of tidbcloud/tidbcloud from the dependency lock file - - Using previously-installed tidbcloud/tidbcloud v0.1.0 + Initializing provider plugins... + - Reusing previous version of tidbcloud/tidbcloud from the dependency lock file + - Using previously-installed tidbcloud/tidbcloud v0.1.0 - Terraform has been successfully initialized! + Terraform has been successfully initialized! - You may now begin working with Terraform. Try running "terraform plan" to see - any changes that are required for your infrastructure. All Terraform commands - should now work. + You may now begin working with Terraform. Try running "terraform plan" to see + any changes that are required for your infrastructure. All Terraform commands + should now work. - If you ever set or change modules or backend configuration for Terraform, - rerun this command to reinitialize your working directory. If you forget, other - commands will detect it and remind you to do so if necessary. - ``` + If you ever set or change modules or backend configuration for Terraform, + rerun this command to reinitialize your working directory. If you forget, other + commands will detect it and remind you to do so if necessary. + ``` ## Step 4. Configure TiDB Cloud Terraform Provider with the API key diff --git a/tidb-cloud/terraform-use-cluster-resource.md b/tidb-cloud/terraform-use-cluster-resource.md index a672b723effa7..2cd11ac54a499 100644 --- a/tidb-cloud/terraform-use-cluster-resource.md +++ b/tidb-cloud/terraform-use-cluster-resource.md @@ -21,96 +21,96 @@ To view the information of all available projects, you can use the `tidbcloud_pr 1. In the `main.tf` file that is created when you [Get TiDB Cloud Terraform Provider](/tidb-cloud/terraform-get-tidbcloud-provider.md), add the `data` and `output` blocks as follows: - ``` - terraform { - required_providers { - tidbcloud = { - source = "tidbcloud/tidbcloud" - version = "~> 0.1.0" - } - } - required_version = ">= 1.0.0" - } - - provider "tidbcloud" { - public_key = "fake_public_key" - private_key = "fake_private_key" - } - - data "tidbcloud_projects" "example_project" { - page = 1 - page_size = 10 - } - - output "projects" { - value = data.tidbcloud_projects.example_project.items - } - ``` - - - Use the `data` block to define the data source of TiDB Cloud, including the data source type and the data source name. - - - To use the projects data source, set the data source type as `tidbcloud_projects`. - - For the data source name, you can define it according to your need. For example, "example_project". - - For the `tidbcloud_projects` data source, you can use the `page` and `page_size` attributes to limit the maximum number of projects you want to check. - - - Use the `output` block to define the data source information to be displayed in the output, and expose the information for other Terraform configurations to use. + ``` + terraform { + required_providers { + tidbcloud = { + source = "tidbcloud/tidbcloud" + version = "~> 0.1.0" + } + } + required_version = ">= 1.0.0" + } + + provider "tidbcloud" { + public_key = "fake_public_key" + private_key = "fake_private_key" + } + + data "tidbcloud_projects" "example_project" { + page = 1 + page_size = 10 + } + + output "projects" { + value = data.tidbcloud_projects.example_project.items + } + ``` + + - Use the `data` block to define the data source of TiDB Cloud, including the data source type and the data source name. + + - To use the projects data source, set the data source type as `tidbcloud_projects`. + - For the data source name, you can define it according to your need. For example, "example_project". + - For the `tidbcloud_projects` data source, you can use the `page` and `page_size` attributes to limit the maximum number of projects you want to check. + + - Use the `output` block to define the data source information to be displayed in the output, and expose the information for other Terraform configurations to use. The `output` block works similarly to returned values in programming languages. See [Terraform documentation](https://www.terraform.io/language/values/outputs) for more details. - To get all the available configurations for the resources and data sources, see this [configuration documentation](https://registry.terraform.io/providers/tidbcloud/tidbcloud/latest/docs). + To get all the available configurations for the resources and data sources, see this [configuration documentation](https://registry.terraform.io/providers/tidbcloud/tidbcloud/latest/docs). 2. Run the `terraform apply` command to apply the configurations. You need to type `yes` at the confirmation prompt to proceed. - To skip the prompt, use `terraform apply --auto-approve`: - - ``` - $ terraform apply --auto-approve - - Changes to Outputs: - + projects = [ - + { - + cluster_count = 0 - + create_timestamp = "1649154426" - + id = "1372813089191121286" - + name = "test1" - + org_id = "1372813089189921287" - + user_count = 1 - }, - + { - + cluster_count = 1 - + create_timestamp = "1640602740" - + id = "1372813089189561287" - + name = "default project" - + org_id = "1372813089189921287" - + user_count = 1 - }, - ] - - You can apply this plan to save these new output values to the Terraform state, without changing any real infrastructure. - - Apply complete! Resources: 0 added, 0 changed, 0 destroyed. - - Outputs: - - projects = tolist([ - { - "cluster_count" = 0 - "create_timestamp" = "1649154426" - "id" = "1372813089191121286" - "name" = "test1" - "org_id" = "1372813089189921287" - "user_count" = 1 - }, - { - "cluster_count" = 1 - "create_timestamp" = "1640602740" - "id" = "1372813089189561287" - "name" = "default project" - "org_id" = "1372813089189921287" - "user_count" = 1 - }, - ]) - ``` + To skip the prompt, use `terraform apply --auto-approve`: + + ``` + $ terraform apply --auto-approve + + Changes to Outputs: + + projects = [ + + { + + cluster_count = 0 + + create_timestamp = "1649154426" + + id = "1372813089191121286" + + name = "test1" + + org_id = "1372813089189921287" + + user_count = 1 + }, + + { + + cluster_count = 1 + + create_timestamp = "1640602740" + + id = "1372813089189561287" + + name = "default project" + + org_id = "1372813089189921287" + + user_count = 1 + }, + ] + + You can apply this plan to save these new output values to the Terraform state, without changing any real infrastructure. + + Apply complete! Resources: 0 added, 0 changed, 0 destroyed. + + Outputs: + + projects = tolist([ + { + "cluster_count" = 0 + "create_timestamp" = "1649154426" + "id" = "1372813089191121286" + "name" = "test1" + "org_id" = "1372813089189921287" + "user_count" = 1 + }, + { + "cluster_count" = 1 + "create_timestamp" = "1640602740" + "id" = "1372813089189561287" + "name" = "default project" + "org_id" = "1372813089189921287" + "user_count" = 1 + }, + ]) + ``` Now, you can get all the available projects from the output. Copy one of the project IDs that you need. @@ -149,7 +149,7 @@ To get the cluster specification information, you can use the `tidbcloud_cluster
Cluster specification - + ``` { "cloud_provider" = "AWS" @@ -294,20 +294,20 @@ The following example shows how to create a Dedicated Tier cluster. 2. Create a `cluster.tf` file: ``` - terraform { - required_providers { - tidbcloud = { - source = "tidbcloud/tidbcloud" - version = "~> 0.1.0" - } - } - required_version = ">= 1.0.0" - } + terraform { + required_providers { + tidbcloud = { + source = "tidbcloud/tidbcloud" + version = "~> 0.1.0" + } + } + required_version = ">= 1.0.0" + } - provider "tidbcloud" { - public_key = "fake_public_key" - private_key = "fake_private_key" - } + provider "tidbcloud" { + public_key = "fake_public_key" + private_key = "fake_private_key" + } resource "tidbcloud_cluster" "example_cluster" { project_id = "1372813089189561287" @@ -343,7 +343,7 @@ The following example shows how to create a Dedicated Tier cluster. ```shell $ terraform apply - + Terraform will perform the following actions: # tidbcloud_cluster.example_cluster will be created @@ -387,11 +387,11 @@ The following example shows how to create a Dedicated Tier cluster. Enter a value: ``` - As in the above result, Terraform generates an execution plan for you, which describes the actions Terraform will take: + As in the above result, Terraform generates an execution plan for you, which describes the actions Terraform will take: - - You can check the difference between the configurations and the states. - - You can also see the results of this `apply`. It will add a new resource, and no resource will be changed or destroyed. - - The `known after apply` shows that you will get the value after `apply`. + - You can check the difference between the configurations and the states. + - You can also see the results of this `apply`. It will add a new resource, and no resource will be changed or destroyed. + - The `known after apply` shows that you will get the value after `apply`. 4. If everything in your plan looks fine, type `yes` to continue: @@ -624,75 +624,75 @@ You can scale a TiDB cluster when its status is `AVAILABLE`. For example, to add one more node for TiDB, 3 more nodes for TiKV (The number of TiKV nodes needs to be a multiple of 3 for its step is 3. You can [get this information from the cluster specifcation](#get-cluster-specification-information-using-the-tidbcloud_cluster_specs-data-source)), and one more node for TiFlash, you can edit the configurations as follows: - ``` - components = { - tidb = { - node_size : "8C16G" - node_quantity : 2 - } - tikv = { - node_size : "8C32G" - storage_size_gib : 500 - node_quantity : 6 - } - tiflash = { - node_size : "8C64G" - storage_size_gib : 500 - node_quantity : 2 - } - } - ``` + ``` + components = { + tidb = { + node_size : "8C16G" + node_quantity : 2 + } + tikv = { + node_size : "8C32G" + storage_size_gib : 500 + node_quantity : 6 + } + tiflash = { + node_size : "8C64G" + storage_size_gib : 500 + node_quantity : 2 + } + } + ``` 2. Run the `terraform apply` command and type `yes` for confirmation: - ``` - $ terraform apply - - tidbcloud_cluster.example_cluster: Refreshing state... [id=1379661944630234067] - - Terraform used the selected providers to generate the following execution plan. Resource actions are indicated with the following symbols: - ~ update in-place - - Terraform will perform the following actions: - - # tidbcloud_cluster.example_cluster will be updated in-place - ~ resource "tidbcloud_cluster" "example_cluster" { - ~ config = { - ~ components = { - ~ tidb = { - ~ node_quantity = 1 -> 2 - # (1 unchanged attribute hidden) - } - ~ tiflash = { - ~ node_quantity = 1 -> 2 - # (2 unchanged attributes hidden) - } - ~ tikv = { - ~ node_quantity = 3 -> 6 - # (2 unchanged attributes hidden) - } - } - # (3 unchanged attributes hidden) - } - id = "1379661944630234067" - name = "firstCluster" - ~ status = "AVAILABLE" -> (known after apply) - # (4 unchanged attributes hidden) - } - - Plan: 0 to add, 1 to change, 0 to destroy. - - Do you want to perform these actions? - Terraform will perform the actions described above. - Only 'yes' will be accepted to approve. - - Enter a value: yes - - tidbcloud_cluster.example_cluster: Modifying... [id=1379661944630234067] - tidbcloud_cluster.example_cluster: Modifications complete after 2s [id=1379661944630234067] - - Apply complete! Resources: 0 added, 1 changed, 0 destroyed. - ``` + ``` + $ terraform apply + + tidbcloud_cluster.example_cluster: Refreshing state... [id=1379661944630234067] + + Terraform used the selected providers to generate the following execution plan. Resource actions are indicated with the following symbols: + ~ update in-place + + Terraform will perform the following actions: + + # tidbcloud_cluster.example_cluster will be updated in-place + ~ resource "tidbcloud_cluster" "example_cluster" { + ~ config = { + ~ components = { + ~ tidb = { + ~ node_quantity = 1 -> 2 + # (1 unchanged attribute hidden) + } + ~ tiflash = { + ~ node_quantity = 1 -> 2 + # (2 unchanged attributes hidden) + } + ~ tikv = { + ~ node_quantity = 3 -> 6 + # (2 unchanged attributes hidden) + } + } + # (3 unchanged attributes hidden) + } + id = "1379661944630234067" + name = "firstCluster" + ~ status = "AVAILABLE" -> (known after apply) + # (4 unchanged attributes hidden) + } + + Plan: 0 to add, 1 to change, 0 to destroy. + + Do you want to perform these actions? + Terraform will perform the actions described above. + Only 'yes' will be accepted to approve. + + Enter a value: yes + + tidbcloud_cluster.example_cluster: Modifying... [id=1379661944630234067] + tidbcloud_cluster.example_cluster: Modifications complete after 2s [id=1379661944630234067] + + Apply complete! Resources: 0 added, 1 changed, 0 destroyed. + ``` Wait for the status to turn from `MODIFYING` to `AVAILABLE`. @@ -705,143 +705,143 @@ You can pause a cluster when its status is `AVAILABLE` or resume a cluster when 1. In the `cluster.tf` file that is used when you [create the cluster](#create-a-cluster-using-the-cluster-resource), add `pause = true` to the `config` configurations: - ``` - config = { - paused = true - root_password = "Your_root_password1." - port = 4000 - ... - } - ``` + ``` + config = { + paused = true + root_password = "Your_root_password1." + port = 4000 + ... + } + ``` 2. Run the `terraform apply` command and type `yes` after check: - ``` - $ terraform apply + ``` + $ terraform apply - tidbcloud_cluster.example_cluster: Refreshing state... [id=1379661944630234067] + tidbcloud_cluster.example_cluster: Refreshing state... [id=1379661944630234067] - Terraform used the selected providers to generate the following execution plan. Resource actions are indicated with the following symbols: - ~ update in-place + Terraform used the selected providers to generate the following execution plan. Resource actions are indicated with the following symbols: + ~ update in-place - Terraform will perform the following actions: + Terraform will perform the following actions: - # tidbcloud_cluster.example_cluster will be updated in-place - ~ resource "tidbcloud_cluster" "example_cluster" { - ~ config = { - + paused = true - # (4 unchanged attributes hidden) - } - id = "1379661944630234067" - name = "firstCluster" - ~ status = "AVAILABLE" -> (known after apply) - # (4 unchanged attributes hidden) - } + # tidbcloud_cluster.example_cluster will be updated in-place + ~ resource "tidbcloud_cluster" "example_cluster" { + ~ config = { + + paused = true + # (4 unchanged attributes hidden) + } + id = "1379661944630234067" + name = "firstCluster" + ~ status = "AVAILABLE" -> (known after apply) + # (4 unchanged attributes hidden) + } - Plan: 0 to add, 1 to change, 0 to destroy. + Plan: 0 to add, 1 to change, 0 to destroy. - Do you want to perform these actions? - Terraform will perform the actions described above. - Only 'yes' will be accepted to approve. + Do you want to perform these actions? + Terraform will perform the actions described above. + Only 'yes' will be accepted to approve. - Enter a value: yes + Enter a value: yes - tidbcloud_cluster.example_cluster: Modifying... [id=1379661944630234067] - tidbcloud_cluster.example_cluster: Modifications complete after 2s [id=1379661944630234067] + tidbcloud_cluster.example_cluster: Modifying... [id=1379661944630234067] + tidbcloud_cluster.example_cluster: Modifications complete after 2s [id=1379661944630234067] - Apply complete! Resources: 0 added, 1 changed, 0 destroyed. - ``` + Apply complete! Resources: 0 added, 1 changed, 0 destroyed. + ``` 3. Use the `terraform state show tidbcloud_cluster.${resource-name}` command to check the status: - ``` - $ terraform state show tidbcloud_cluster.example_cluster - - # tidbcloud_cluster.example_cluster: - resource "tidbcloud_cluster" "example_cluster" { - cloud_provider = "AWS" - cluster_type = "DEDICATED" - config = { - components = { - tidb = { - node_quantity = 2 - node_size = "8C16G" - } - tiflash = { - node_quantity = 2 - node_size = "8C64G" - storage_size_gib = 500 - } - tikv = { - node_quantity = 6 - node_size = "8C32G" - storage_size_gib = 500 - } - } - ip_access_list = [ - # (1 unchanged element hidden) - ] - paused = true - port = 4000 - root_password = "Your_root_password1." - } - id = "1379661944630234067" - name = "firstCluster" - project_id = "1372813089189561287" - region = "eu-central-1" - status = "PAUSED" - } - ``` + ``` + $ terraform state show tidbcloud_cluster.example_cluster + + # tidbcloud_cluster.example_cluster: + resource "tidbcloud_cluster" "example_cluster" { + cloud_provider = "AWS" + cluster_type = "DEDICATED" + config = { + components = { + tidb = { + node_quantity = 2 + node_size = "8C16G" + } + tiflash = { + node_quantity = 2 + node_size = "8C64G" + storage_size_gib = 500 + } + tikv = { + node_quantity = 6 + node_size = "8C32G" + storage_size_gib = 500 + } + } + ip_access_list = [ + # (1 unchanged element hidden) + ] + paused = true + port = 4000 + root_password = "Your_root_password1." + } + id = "1379661944630234067" + name = "firstCluster" + project_id = "1372813089189561287" + region = "eu-central-1" + status = "PAUSED" + } + ``` 4. When you need to resume the cluster, set `paused = false`: - ``` - config = { - paused = false - root_password = "Your_root_password1." - port = 4000 - ... - } - ``` + ``` + config = { + paused = false + root_password = "Your_root_password1." + port = 4000 + ... + } + ``` 5. Run the `terraform apply` command and type `yes` for confirmation. If you use the `terraform state show tidbcloud_cluster.${resource-name}` command to check the status, you will find it turns to `RESUMING`: - ``` - # tidbcloud_cluster.example_cluster: - resource "tidbcloud_cluster" "example_cluster" { - cloud_provider = "AWS" - cluster_type = "DEDICATED" - config = { - components = { - tidb = { - node_quantity = 2 - node_size = "8C16G" - } - tiflash = { - node_quantity = 2 - node_size = "8C64G" - storage_size_gib = 500 - } - tikv = { - node_quantity = 6 - node_size = "8C32G" - storage_size_gib = 500 - } - } - ip_access_list = [ - # (1 unchanged element hidden) - ] - paused = false - port = 4000 - root_password = "Your_root_password1." - } - id = "1379661944630234067" - name = "firstCluster" - project_id = "1372813089189561287" - region = "eu-central-1" - status = "RESUMING" - } - ``` + ``` + # tidbcloud_cluster.example_cluster: + resource "tidbcloud_cluster" "example_cluster" { + cloud_provider = "AWS" + cluster_type = "DEDICATED" + config = { + components = { + tidb = { + node_quantity = 2 + node_size = "8C16G" + } + tiflash = { + node_quantity = 2 + node_size = "8C64G" + storage_size_gib = 500 + } + tikv = { + node_quantity = 6 + node_size = "8C32G" + storage_size_gib = 500 + } + } + ip_access_list = [ + # (1 unchanged element hidden) + ] + paused = false + port = 4000 + root_password = "Your_root_password1." + } + id = "1379661944630234067" + name = "firstCluster" + project_id = "1372813089189561287" + region = "eu-central-1" + status = "RESUMING" + } + ``` 6. Wait for a moment, then use the `terraform refersh` command to update the state. The status will be changed to `AVAILABLE` finally. @@ -857,20 +857,20 @@ For example, you can import a cluster that is not created by Terraform or import ``` terraform { - required_providers { - tidbcloud = { - source = "tidbcloud/tidbcloud" - version = "~> 0.1.0" - } - } - required_version = ">= 1.0.0" - } + required_providers { + tidbcloud = { + source = "tidbcloud/tidbcloud" + version = "~> 0.1.0" + } + } + required_version = ">= 1.0.0" + } resource "tidbcloud_cluster" "import_cluster" {} ``` 2. Import the cluster by `terraform import tidbcloud_cluster.import_cluster projectId,clusterId`: - For example: + For example: ``` $ terraform import tidbcloud_cluster.import_cluster 1372813089189561287,1379661944630264072 @@ -973,7 +973,7 @@ For example, you can import a cluster that is not created by Terraform or import Apply complete! Resources: 0 added, 0 changed, 0 destroyed. ``` -Now you can use Terraform to manage the cluster. +Now you can use Terraform to manage the cluster. ## Delete a cluster diff --git a/tidb-cloud/tidb-cloud-guide-sample-application-java.md b/tidb-cloud/tidb-cloud-guide-sample-application-java.md index 043cbf8791f73..f467df97fc9ba 100644 --- a/tidb-cloud/tidb-cloud-guide-sample-application-java.md +++ b/tidb-cloud/tidb-cloud-guide-sample-application-java.md @@ -23,9 +23,9 @@ This document describes how to use TiDB and Java to build a simple CRUD applicat The following introduces how to start a TiDB cluster. -**Use a TiDB Cloud Serverless Tier cluster** +**Use a TiDB Serverless cluster** -For detailed steps, see [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). +For detailed steps, see [Create a TiDB Serverless cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster). **Use a local cluster** @@ -35,7 +35,7 @@ For detailed steps, see [Deploy a local test cluster](/quick-start-with-tidb.md# -See [Create a Serverless Tier cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-serverless-tier-cluster). +See [Create a TiDB Serverless cluster](/develop/dev-guide-build-cluster-in-cloud.md#step-1-create-a-tidb-serverless-cluster). @@ -294,7 +294,7 @@ public interface PlayerMapper { id, coins, goods - select + select from player where `id` = #{id,jdbcType=VARCHAR} @@ -1449,7 +1449,7 @@ When using JDBC, you need to connect to your cluster and run the statement in th
-If you are using a TiDB Cloud Serverless Tier cluster, modify the `dataSource.url`, `dataSource.username`, `dataSource.password` in `mybatis-config.xml`. +If you are using a TiDB Serverless cluster, modify the `dataSource.url`, `dataSource.username`, `dataSource.password` in `mybatis-config.xml`. ```xml @@ -1524,7 +1524,7 @@ In this case, you can modify the parameters in `dataSource` node as follows:
-If you are using a TiDB Cloud Serverless Tier cluster, modify the `hibernate.connection.url`, `hibernate.connection.username`, `hibernate.connection.password` in `hibernate.cfg.xml`. +If you are using a TiDB Serverless cluster, modify the `hibernate.connection.url`, `hibernate.connection.username`, `hibernate.connection.password` in `hibernate.cfg.xml`. ```xml @@ -1590,7 +1590,7 @@ In this case, you can modify the parameters as follows:
-If you are using a TiDB Cloud Serverless Tier cluster, modify the parameters of the host, port, user, and password in `JDBCExample.java`: +If you are using a TiDB Serverless cluster, modify the parameters of the host, port, user, and password in `JDBCExample.java`: ```java mysqlDataSource.setServerName("localhost"); diff --git a/tidb-cloud/tidb-cloud-quickstart.md b/tidb-cloud/tidb-cloud-quickstart.md index 96768465f92df..313afc2459e43 100644 --- a/tidb-cloud/tidb-cloud-quickstart.md +++ b/tidb-cloud/tidb-cloud-quickstart.md @@ -11,6 +11,8 @@ aliases: ['/tidbcloud/beta/tidb-cloud-quickstart'] This tutorial guides you through an easy way to get started with your TiDB Cloud. +In addition, you can try out TiDB features on [TiDB Playground](https://play.tidbcloud.com/?utm_source=docs&utm_medium=tidb_cloud_quick_start). + ## Step 1. Create a TiDB cluster TiDB Cloud [Serverless Tier](/tidb-cloud/select-cluster-tier.md#serverless-tier-beta) (Beta) is the best way to get started with TiDB Cloud. To create a free Serverless Tier cluster, take the following steps: diff --git a/tidb-cloud/tune-performance.md b/tidb-cloud/tune-performance.md index 425200239f58e..7861e2805e912 100644 --- a/tidb-cloud/tune-performance.md +++ b/tidb-cloud/tune-performance.md @@ -16,7 +16,7 @@ TiDB Cloud provides [Statement Analysis](#statement-analysis), [Slow Query](#slo > **Note:** > > Currently, these three features are unavailable for [Serverless Tier clusters](/tidb-cloud/select-cluster-tier.md#serverless-tier-beta). - + ## Statement Analysis To use the statement analysis, perform the following steps: @@ -37,7 +37,7 @@ For more information, see [Statement Execution Details in TiDB Dashboard](https: ## Slow Query -By default, SQL queries that take more than 300 milliseconds are considered as slow queries. +By default, SQL queries that take more than 300 milliseconds are considered as slow queries. To view slow queries in a cluster, perform the following steps: diff --git a/tidb-configuration-file.md b/tidb-configuration-file.md index 275fbf68892d6..842fe8cef9bf8 100644 --- a/tidb-configuration-file.md +++ b/tidb-configuration-file.md @@ -48,8 +48,13 @@ The TiDB configuration file supports more options than command-line parameters. + File system location used by TiDB to store temporary data. If a feature requires local storage in TiDB nodes, TiDB stores the corresponding temporary data in this location. + When creating an index, if [`tidb_ddl_enable_fast_reorg`](/system-variables.md#tidb_ddl_enable_fast_reorg-new-in-v630) is enabled, data that needs to be backfilled for a newly created index will be at first stored in the TiDB local temporary directory, and then imported into TiKV in batches, thus accelerating the index creation. ++ When [`IMPORT INTO`](/sql-statements/sql-statement-import-into.md) is used to import data, the sorted data is first stored in the TiDB local temporary directory, and then imported into TiKV in batches. + Default value: `"/tmp/tidb"` +> **Note:** +> +> If the directory does not exist, TiDB will automatically create it upon startup. If the directory creation fails or TiDB does not have the read and write permissions on that directory, [`Fast Online DDL`](/system-variables.md#tidb_ddl_enable_fast_reorg-new-in-v630) might experience unpredictable issues. + ### `oom-use-tmp-storage` > **Warning:** @@ -115,10 +120,14 @@ The TiDB configuration file supports more options than command-line parameters. + Modifies the version string returned by TiDB in the following situations: - When the built-in `VERSION()` function is used. - - When TiDB establishes the initial connection to the client and returns the initial handshake packet with version string of the server. For details, see [MySQL Initial Handshake Packet](https://dev.mysql.com/doc/internals/en/connection-phase-packets.html#packet-Protocol::Handshake). + - When TiDB establishes the initial connection to the client and returns the initial handshake packet with version string of the server. For details, see [MySQL Initial Handshake Packet](https://dev.mysql.com/doc/dev/mysql-server/latest/page_protocol_connection_phase.html#sect_protocol_connection_phase_initial_handshake). + Default value: "" + By default, the format of the TiDB version string is `5.7.${mysql_latest_minor_version}-TiDB-${tidb_version}`. +> **Note:** +> +> TiDB nodes use the value of `server-version` to verify the current TiDB version. Therefore, to avoid unexpected behaviors, before upgrading the TiDB cluster, you need to set the value of `server-version` to empty or the real version of the current TiDB cluster. + ### `repair-mode` - Determines whether to enable the untrusted repair mode. When the `repair-mode` is set to `true`, bad tables in the `repair-table-list` cannot be loaded. @@ -192,7 +201,18 @@ The TiDB configuration file supports more options than command-line parameters. + Controls whether to enable the Global Kill (terminating queries or connections across instances) feature. + Default value: `true` -+ When the value is `true`, both `KILL` and `KILL TIDB` statements can terminate queries or connections across instances so you do not need to worry about erroneously terminating queries or connections. When you use a client to connect to any TiDB instance and execute the `KILL` or `KILL TIDB` statement, the statement will be forwarded to the target TiDB instance. If there is a proxy between the client and the TiDB cluster, the `KILL` and `KILL TIDB` statements will also be forwarded to the target TiDB instance for execution. Currently, using the MySQL command line ctrl+c to terminate a query or connection in TiDB is not supported when `enable-global-kill` is `true`. For more information on the `KILL` statement, see [KILL](/sql-statements/sql-statement-kill.md). ++ When the value is `true`, both `KILL` and `KILL TIDB` statements can terminate queries or connections across instances so you do not need to worry about erroneously terminating queries or connections. When you use a client to connect to any TiDB instance and execute the `KILL` or `KILL TIDB` statement, the statement will be forwarded to the target TiDB instance. If there is a proxy between the client and the TiDB cluster, the `KILL` and `KILL TIDB` statements will also be forwarded to the target TiDB instance for execution. ++ Starting from v7.3.0, you can terminate a query or connection using the MySQL command line Control+C when both `enable-global-kill` and [`enable-32bits-connection-id`](#enable-32bits-connection-id-new-in-v730) are set to `true`. For more information, see [`KILL`](/sql-statements/sql-statement-kill.md). + +### `enable-32bits-connection-id` New in v7.3.0 + ++ Controls whether to enable the 32-bit connection ID feature. ++ Default value: `true` ++ When both this configuration item and [`enable-global-kill`](#enable-global-kill-new-in-v610) are set to `true`, TiDB generates 32-bit connection IDs. This enables you to terminate queries or connections by the MySQL command-line Control+C. + +> **Warning:** +> +> When the number of TiDB instances in the cluster exceeds 2048 or the concurrent connection count of a single TiDB instance exceeds 1048576, the 32-bit connection ID space becomes insufficient and is automatically upgraded to 64-bit connection IDs. During the upgrade process, existing business and established connections are unaffected. However, subsequent new connections cannot be terminated using Control+C in the MySQL command-line. ### `initialize-sql-file` New in v6.6.0 @@ -286,6 +306,10 @@ Configuration items related to log. ### `expensive-threshold` +> **Warning:** +> +> Starting from v5.4.0, the `expensive-threshold` configuration item is deprecated and replaced by the system variable [`tidb_expensive_query_time_threshold`](/system-variables.md#tidb_expensive_query_time_threshold). + - Outputs the threshold value of the number of rows for the `expensive` operation. - Default value: `10000` - When the number of query rows (including the intermediate results based on statistics) is larger than this value, it is an `expensive` operation and outputs log with the `[EXPENSIVE_QUERY]` prefix. @@ -545,40 +569,28 @@ Configuration items related to performance. ### `stats-load-concurrency` New in v5.4.0 -> **Warning:** -> -> Currently, synchronously loading statistics is an experimental feature. It is not recommended that you use it in production environments. - + The maximum number of columns that the TiDB synchronously loading statistics feature can process concurrently. + Default value: `5` + Currently, the valid value range is `[1, 128]`. ### `stats-load-queue-size` New in v5.4.0 -> **Warning:** -> -> Currently, synchronously loading statistics is an experimental feature. It is not recommended that you use it in production environments. - + The maximum number of column requests that the TiDB synchronously loading statistics feature can cache. + Default value: `1000` + Currently, the valid value range is `[1, 100000]`. ### `lite-init-stats` New in v7.1.0 -> **Warning:** -> -> This variable is an experimental feature. It is not recommended that you use it in the production environment. This feature might be changed or removed without prior notice. If you find a bug, you can report an [issue](https://github.com/pingcap/tidb/issues) on GitHub. - + Controls whether to use lightweight statistics initialization during TiDB startup. -+ Default value: false ++ Default value: `false` for versions earlier than v7.2.0, `true` for v7.2.0 and later versions. + When the value of `lite-init-stats` is `true`, statistics initialization does not load any histogram, TopN, or Count-Min Sketch of indexes or columns into memory. When the value of `lite-init-stats` is `false`, statistics initialization loads histograms, TopN, and Count-Min Sketch of indexes and primary keys into memory but does not load any histogram, TopN, or Count-Min Sketch of non-primary key columns into memory. When the optimizer needs the histogram, TopN, and Count-Min Sketch of a specific index or column, the necessary statistics are loaded into memory synchronously or asynchronously (controlled by [`tidb_stats_load_sync_wait`](/system-variables.md#tidb_stats_load_sync_wait-new-in-v540)). + Setting `lite-init-stats` to `true` speeds up statistics initialization and reduces TiDB memory usage by avoiding unnecessary statistics loading. For details, see [Load statistics](/statistics.md#load-statistics). ### `force-init-stats` New in v7.1.0 + Controls whether to wait for statistics initialization to finish before providing services during TiDB startup. -+ Default value: false -+ When the value of `force-init-stats` is `true`, TiDB needs to wait until statistics initialization is finished before providing services upon startup. If there are a large number of tables and partitions, setting `force-init-stats` to `true` might prolong the time it takes for TiDB to start providing services. ++ Default value: `false` for versions earlier than v7.2.0, `true` for v7.2.0 and later versions. ++ When the value of `force-init-stats` is `true`, TiDB needs to wait until statistics initialization is finished before providing services upon startup. Note that if there are a large number of tables and partitions and the value of [`lite-init-stats`](/tidb-configuration-file.md#lite-init-stats-new-in-v710) is `false`, setting `force-init-stats` to `true` might prolong the time it takes for TiDB to start providing services. + When the value of `force-init-stats` is `false`, TiDB can still provide services before statistics initialization is finished, but the optimizer uses pseudo statistics to make decisions, which might result in suboptimal execution plans. ## opentracing @@ -842,6 +854,26 @@ Configuration items related to read isolation. - Unit: Milliseconds - Before v6.1.0, this configuration is set by `slow-threshold`. +### `in-mem-slow-query-topn-num` New in v7.3.0 + ++ The configuration controls the number of slowest queries that are cached in memory. ++ Default value: 30 + +### `in-mem-slow-query-recent-num` New in v7.3.0 + ++ The configuration controls the number of recently used slow queries that are cached in memory. ++ Default value: 500 + +### `tidb_expensive_query_time_threshold` + +- This configuration is used to set the threshold value that determines whether to print expensive query logs. The difference between expensive query logs and slow query logs is: + - Slow logs are printed after the statement is executed. + - Expensive query logs print the statements that are being executed, with execution time exceeding the threshold value, and their related information. +- Default value: `60` +- Range: `[10, 2147483647]` +- Unit: Seconds +- Before v5.4.0, this configuration is set by `expensive-threshold`. + ### `tidb_record_plan_in_slow_log` - This configuration is used to control whether to include the execution plan of slow queries in the slow log. @@ -940,6 +972,11 @@ Configuration items related to the PROXY protocol. > > Use `*` with caution because it might introduce security risks by allowing a client of any IP address to report its IP address. In addition, using `*` might also cause the internal component that directly connects to TiDB (such as TiDB Dashboard) to be unavailable. +### `fallbackable` New in v6.5.1 + ++ Controls whether to enable the PROXY protocol fallback mode. If this configuration item is set to `true`, TiDB can accept clients that belong to `proxy-protocol.networks` to connect to TiDB without using the PROXY protocol specification or without sending the PROXY protocol header. By default, TiDB only accepts client connections that belong to `proxy-protocol.networks` and send a PROXY protocol header. ++ Default value: `false` + ## experimental The `experimental` section, introduced in v3.1.0, describes the configurations related to the experimental features of TiDB. diff --git a/tidb-control.md b/tidb-control.md index 6447ffe67eb4c..921a27a761ded 100644 --- a/tidb-control.md +++ b/tidb-control.md @@ -8,6 +8,10 @@ aliases: ['/docs/dev/tidb-control/','/docs/dev/reference/tools/tidb-control/'] TiDB Control is a command-line tool of TiDB, usually used to obtain the status information of TiDB for debugging. This document introduces the features of TiDB Control and how to use these features. +> **Note:** +> +> TiDB Control is specifically designed for debugging purposes and might not be fully compatible with future capabilities introduced in TiDB. It's not recommended to include this tool in applications or utilities development to get information. + ## Get TiDB Control You can get TiDB Control by installing it using TiUP or by compiling it from source code. diff --git a/tidb-distributed-execution-framework.md b/tidb-distributed-execution-framework.md index 74dd8a8071f7e..5d8574aee9d2d 100644 --- a/tidb-distributed-execution-framework.md +++ b/tidb-distributed-execution-framework.md @@ -13,7 +13,7 @@ summary: Learn the use cases, limitations, usage, and implementation principles > **Note:** > -> Currently, this feature is only applicable to Dedicated Tier clusters. You cannot use it on Serverless Tier clusters. +> Currently, this feature is only applicable to TiDB Dedicated clusters. You cannot use it on TiDB Serverless clusters. @@ -27,7 +27,17 @@ This document describes the use cases, limitations, usage, and implementation pr ## Use cases and limitations -In a database management system, in addition to the core transactional processing (TP) and analytical processing (AP) workloads, there are other important tasks, such as DDL operations, Load Data, TTL, Analyze, and Backup/Restore, which are called **backend tasks**. These backend tasks need to process a large amount of data in database objects (tables), so they typically have the following characteristics: + + +In a database management system, in addition to the core transactional processing (TP) and analytical processing (AP) workloads, there are other important tasks, such as DDL operations, IMPORT INTO, TTL, Analyze, and Backup/Restore, which are called **backend tasks**. These backend tasks need to process a large amount of data in database objects (tables), so they typically have the following characteristics: + + + + + +In a database management system, in addition to the core transactional processing (TP) and analytical processing (AP) workloads, there are other important tasks, such as DDL operations, TTL, Analyze, and Backup/Restore, which are called **backend tasks**. These backend tasks need to process a large amount of data in database objects (tables), so they typically have the following characteristics: + + - Need to process all data in a schema or a database object (table). - Might need to be executed periodically, but at a low frequency. @@ -39,12 +49,16 @@ Enabling the TiDB backend task distributed execution framework can solve the abo - The framework supports distributed execution of backend tasks, which can flexibly schedule the available computing resources of the entire TiDB cluster, thereby better utilizing the computing resources in a TiDB cluster. - The framework provides unified resource usage and management capabilities for both overall and individual backend tasks. -Currently, the TiDB backend task distributed execution framework only supports the distributed execution of `ADD INDEX` statements, that is, the DDL statements for creating indexes. For example, the following SQL statements are supported: +Currently, for TiDB Self-Hosted, the TiDB backend task distributed execution framework supports the distributed execution of the `ADD INDEX` and `IMPORT INTO` statements. For TiDB Cloud, the `IMPORT INTO` statement is not applicable. + +- `ADD INDEX` is a DDL statement used to create indexes. For example: + + ```sql + ALTER TABLE t1 ADD INDEX idx1(c1); + CREATE INDEX idx1 ON table t1(c1); + ``` -```sql -ALTER TABLE t1 ADD INDEX idx1(c1); -CREATE INDEX idx1 ON table t1(c1); -``` +- `IMPORT INTO` is used to import data in formats such as `CSV`, `SQL`, and `PARQUET` into an empty table. For more information, see [`IMPORT INTO`](https://docs.pingcap.com/tidb/v7.2/sql-statement-import-into). ## Prerequisites @@ -63,7 +77,7 @@ Before using the distributed framework, you need to enable the [Fast Online DDL] > **Note:** > -> Before you upgrade TiDB to v6.5.0 or later, it is recommended that you check whether the [`temp-dir`](/tidb-configuration-file.md#temp-dir-new-in-v630) path of TiDB is correctly mounted to an SSD disk. This path is a TiDB configuration item, which takes effect after TiDB is restarted. Therefore, setting this configuration item in advance before upgrading can avoid another restart. +> Before you upgrade TiDB to v6.5.0 or later, it is recommended that you check whether the [`temp-dir`](/tidb-configuration-file.md#temp-dir-new-in-v630) path of TiDB is correctly mounted to an SSD disk. Make sure that the operating system user that runs TiDB has the read and write permissions for this directory. Otherwise, The DDL operations might experience unpredictable issues. This path is a TiDB configuration item, which takes effect after TiDB is restarted. Therefore, setting this configuration item in advance before upgrading can avoid another restart. diff --git a/tidb-lightning/data-import-best-practices.md b/tidb-lightning/data-import-best-practices.md new file mode 100644 index 0000000000000..fedfa093ad805 --- /dev/null +++ b/tidb-lightning/data-import-best-practices.md @@ -0,0 +1,155 @@ +--- +title: Best Practices for Importing 50 TiB Data +summary: Learn best practices for importing large volumes of data. +--- + +# Best Practices for Importing 50 TiB Data + +This document provides best practices for importing large volumes of data into TiDB, including some key factors and steps that affect data import. We have successfully imported data of a large single table over 50 TiB into both the internal environment and customer's environment, and have accumulated best practices based on these real application scenarios, which can help you import data more smoothly and efficiently. + +TiDB Lightning ([Physical Import Mode](/tidb-lightning/tidb-lightning-physical-import-mode.md)) is a comprehensive and efficient data import tool used for importing data into empty tables and initializing empty clusters, and uses files as the data source. TiDB Lightning provides two running modes: a single instance and [parallel import](/tidb-lightning/tidb-lightning-distributed-import.md). You can import source files of different sizes. + +- If the data size of the source files is within 10 TiB, it is recommended to use a single instance of TiDB Lightning for the import. +- If the data size of the source files exceeds 10 TiB, it is recommended to use multiple instances of TiDB Lightning for [Parallel Import](/tidb-lightning/tidb-lightning-distributed-import.md). +- If the source file data scale is exceptionally large (larger than 50 TiB), in addition to parallel importing, you need to make certain preparations and optimizations based on the characteristics of the source data, table definitions, and parameter configurations to achieve smoother and faster large-scale data import. + +The following sections apply to both importing multiple tables and importing large single tables: + +- [Key factors](#key-factors) +- [Prepare source files](#prepare-source-files) +- [Estimate storage space](#estimate-storage-space) +- [Change configuration parameters](#change-configuration-parameters) +- [Resolve the "checksum mismatch" error](#resolve-the-checksum-mismatch-error) +- [Enable checkpoint](#enable-checkpoint) +- [Troubleshooting](#troubleshooting) + +The best practices for importing large single tables are described separately in the following section because of its special requirements: + +- [Best practices for importing a large single table](#best-practices-for-importing-a-large-single-table) + +## Key factors + +When you import data, some key factors can affect import performance and might even cause import to fail. Some common critical factors are as follows: + +- Source files + + - Whether the data within a single file is sorted by the primary key. Sorted data can achieve optimal import performance. + - Whether overlapping primary keys or non-null unique indexes exist between source files imported by multiple TiDB Lightning instances. The smaller the overlap is, the better the import performance. + +- Table definitions + + - The number and size of secondary indexes per table can affect the import speed. Fewer indexes result in faster imports and less space consumption after import. + - Index data size = Number of indexes \* Index size \* Number of rows. + +- Compression ratio + + - Data imported into a TiDB cluster is stored in a compressed format. The compression ratio cannot be calculated in advance. It can only be determined after the data is actually imported into the TiKV cluster. + - As a best practice, you can first import a small portion of the data (for example, 10%) to obtain the corresponding compression ratio of the cluster, and then use it to estimate the compression ratio of the entire data import. + +- Configuration parameters + + - `region-concurrency`: The concurrency of TiDB Lightning main logical processing. + - `send-kv-pairs`: The number of Key-Value pairs sent by TiDB Lightning to TiKV in a single request. + - `disk-quota`: The disk quota used by TiDB Lightning local temp files when using the physical import mode. + - `GOMEMLIMIT`: TiDB Lightning is implemented in the Go language. [Configure `GOMEMLIMIT` properly.](#change-configuration-parameters) + +- Data validation + + After data and index import is completed, the [`ADMIN CHECKSUM`](/sql-statements/sql-statement-admin-checksum-table.md) statement is executed on each table, and the checksum value is compared with the local checksum value of TiDB Lightning. When many tables exist, or an individual table has a large number of rows, the checksum phase can take a long time. + +- The analyze operation + + After the checksum is successfully completed, the [`ANALYZE TABLE`](/sql-statements/sql-statement-analyze-table.md) statement is executed on each table to generate the optimal execution plan. The analyze operation can be time-consuming when dealing with a large number of tables or an individual table with a significant amount of data. + +- Relevant issues + + During the actual process of importing 50 TiB of data, certain issues might occur that are only exposed when dealing with a massive number of source files and large-scale clusters. When choosing a product version, it is recommended to check whether the corresponding issues have been fixed. + + The following issues have been resolved in v6.5.3, v7.1.0, and later versions: + + - [Issue-14745](https://github.com/tikv/tikv/issues/14745): After the import is completed, a large number of temporary files are left in the TiKV import directory. + - [Issue-6426](https://github.com/tikv/pd/issues/6426): The PD [range scheduling](/tidb-lightning/tidb-lightning-physical-import-mode-usage.md#scope-of-pausing-scheduling-during-import) interface might fail to scatter regions, resulting in timeout issues. Before v6.2.0, global scheduling is disabled by default, which can avoid triggering this problem. + - [Issue-43079](https://github.com/pingcap/tidb/pull/43079): TiDB Lightning fails to refresh the Region Peers information during retry for NotLeader errors. + - [Issue-43291](https://github.com/pingcap/tidb/issues/43291): TiDB Lightning does not retry in cases where temporary files are not found (the "No such file or directory" error). + +## Prepare source files + +- When generating source files, it is preferable to sort them by the primary key within a single file. If the table definition does not have a primary key, you can add an auto-increment primary key. In this case, the order of the file content does not matter. +- When assigning source files to multiple TiDB Lightning instances, try to avoid the situation where overlapping primary keys or non-null unique indexes exist between multiple source files. If the generated files are globally sorted, they can be distributed into different TiDB Lightning instances based on ranges to achieve optimal import performance. +- Control each file to be less than 96 MiB in size during file generation. +- If a file is exceptionally large and exceeds 256 MiB, enable [`strict-format`](/migrate-from-csv-files-to-tidb.md#step-4-tune-the-import-performance-optional). + +## Estimate storage space + +You can use either of the following two methods to estimate the storage space required for importing data: + +- Assuming the total data size is **A**, the total index size is **B**, the replication factor is **3**, and the compression ratio is **α** (typically around 2.5), the overall occupied space can be calculated as: **(A+B)\*3/α**. This method is primarily used for estimating without performing any data import, to plan the cluster topology. +- Import only 10% of the data and multiply the actual occupied space by 10 to estimate the final space usage for that batch of data. This method is more accurate, especially when you import a large amount of data. + +Note that it is recommended to reserve 20% of storage space, because background tasks such as compaction and snapshot replication also consume a portion of the storage space. + +## Change configuration parameters + +- `region-concurrency`: The concurrency of TiDB Lightning main logical processing. During parallel importing, it is recommended to set it to 75% of the CPU cores to prevent resource overload and potential OOM issues. +- `send-kv-pairs`: The number of Key-Value pairs sent by TiDB Lightning to TiKV in a single request. It is recommended to adjust this value based on the formula send-kv-pairs \* row-size < 1 MiB. Starting from v7.2.0, this parameter is replaced by `send-kv-size`, and no additional setting is required. +- `disk-quota`: It is recommended to ensure that the sorting directory space of TiDB Lightning is larger than the size of the data source. If you cannot ensure that, you can set `disk-quota` to 80% of the sorting directory space of TiDB Lightning. In this way, TiDB Lightning will sort and write data in batches according to the specified `disk-quota`, but note that this approach might result in lower import performance compared to a complete sorting process. +- `GOMEMLIMIT`: TiDB Lightning is implemented in the Go language. Setting `GOMEMLIMIT` to 80% of the instance memory to reduce the probability of OOM caused by the Go GC mechanism. + +For more information about TiDB Lightning parameters, see [TiDB Lightning configuration parameters](/tidb-lightning/tidb-lightning-configuration.md). + +## Resolve the "checksum mismatch" error + +Conflicts might occur during data validation. The error message is "checksum mismatch". To resolve this issue, take the following steps as needed: + +1. In the source data, check for conflicted primary keys or unique keys, and resolve the conflicts before reimporting. In most cases, this is the most common cause. +2. Check if the table primary key or unique key definition is reasonable. If not, modify the table definition and reimport data. +3. If the issue persists after following the preceding two steps, further examination is required to determine whether a small amount (less than 10%) of unexpected conflicting data exists in the source data. To let TiDB Lightning detect and resolve conflicting data, enable [conflict detection](/tidb-lightning/tidb-lightning-physical-import-mode-usage.md#conflict-detection). + +## Enable checkpoint + +For importing a large volume of data, it is essential to refer to [Lightning Checkpoints](/tidb-lightning/tidb-lightning-checkpoints.md) and enable checkpoints. It is recommended to prioritize using MySQL as the driver to avoid losing the checkpoint information if TiDB Lightning is running in a container environment where the container might exit and delete the checkpoint information. + +If you encounter insufficient space in downstream TiKV during import, you can manually run the `kill` command (without the `-9` option) on all TiDB Lightning instances. After scaling up the capacity, you can resume the import based on the checkpoint information. + +## Best practices for importing a large single table + +Importing multiple tables can increase the time required for checksum and analyze operations, sometimes exceeding the time required for data import itself. However, it is generally not necessary to adjust the configuration. If one or more large tables exist among the multiple tables, it is recommended to separate the source files of these large tables and import them separately. + +This section provides the best practices for importing large single tables. There is no strict definition for a large single table, but it is generally considered to meet one of the following criteria: + +- The table size exceeds 10 TiB. +- The number of rows exceeds 1 billion and the number of columns exceeds 50 in a wide table. + +### Generate source files + +Follow the steps outlined in the [Prepare source files](#prepare-source-files). + +For a large single table, if global sorting is not achievable but sorting within each file based on the primary key is possible, and the file is a standard CSV file, it is recommended to generate large single files with each around 20 GiB. + +Then, enable `strict-format`. This approach reduces the overlap of primary and unique keys in the imported files between TiDB Lightning instances, and TiDB Lightning instances can split the large files before importing to achieve optimal import performance. + +### Plan cluster topology + +Prepare TiDB Lightning instances to make each instance process 5 TiB to 10 TiB of source data. Deploy one TiDB Lightning instance on each node. For the specifications of the nodes, refer to the [environment requirements](/tidb-lightning/tidb-lightning-physical-import-mode.md#environment-requirements) of TiDB Lightning instances. + +### Change configuration parameters + +- Set `region-concurrency` to 75% of the number of cores of the TiDB Lightning instance. +- Set `send-kv-pairs` to `3200`. This method applies to TiDB v7.1.0 and earlier versions. Starting from v7.2.0, this parameter is replaced by `send-kv-size`, and no additional setting is required. +- Adjust `GOMEMLIMIT` to 80% of the memory on the node where the instance is located. + +If the PD Scatter Region latency during the import process exceeds 30 minutes, consider the following optimizations: + +- Check whether the TiKV cluster encounters any I/O bottlenecks. +- Increase TiKV `raftstore.apply-pool-size` from the default value of `2` to `4` or `8`. +- Reduce TiDB Lightning `region-split-concurrency` to half the number of CPU cores, with a minimum value of `1`. + +### Disable the analyze operation + +In the case of a large single table (for example, with over 1 billion rows and more than 50 columns), it is recommended to disable the `analyze` operation (`analyze="off"`) during the import process, and manually execute the [`ANALYZE TABLE`](/sql-statements//sql-statement-analyze-table.md) statement after the import is completed. + +For more information about the configuration of `analyze`, see [TiDB Lightning task configuration](/tidb-lightning/tidb-lightning-configuration.md#tidb-lightning-task). + +## Troubleshooting + +If you encounter problems while using TiDB Lightning, see [Troubleshoot TiDB Lightning](/tidb-lightning/troubleshoot-tidb-lightning.md). diff --git a/tidb-lightning/tidb-lightning-command-line-full.md b/tidb-lightning/tidb-lightning-command-line-full.md index 1b3affeb3889a..6450a9a9d5aa2 100644 --- a/tidb-lightning/tidb-lightning-command-line-full.md +++ b/tidb-lightning/tidb-lightning-command-line-full.md @@ -33,7 +33,7 @@ You can configure the following parameters using `tidb-lightning`: | `--enable-checkpoint ` | Whether to enable checkpoints (default = true) | `checkpoint.enable` | | `--analyze ` | Analyze tables after importing. Available values are "required", "optional" (default value), and "off". | `post-restore.analyze` | | `--checksum ` | Compare checksum after importing. Available values are "required" (default value), "optional", and "off". | `post-restore.checksum` | -| `--check-requirements ` | Check cluster version compatibility before starting (default = true) | `lightning.check-requirements` | +| `--check-requirements ` | Check cluster version compatibility before starting the task, and check whether TiKV has more than 10% free space left during running time. (default = true) | `lightning.check-requirements` | | `--ca ` | CA certificate path for TLS connection | `security.ca-path` | | `--cert ` | Certificate path for TLS connection | `security.cert-path` | | `--key ` | Private key path for TLS connection | `security.key-path` | diff --git a/tidb-lightning/tidb-lightning-configuration.md b/tidb-lightning/tidb-lightning-configuration.md index 928010cdacf48..443c8900df6f2 100644 --- a/tidb-lightning/tidb-lightning-configuration.md +++ b/tidb-lightning/tidb-lightning-configuration.md @@ -18,11 +18,13 @@ TiDB Lightning has two configuration classes: "global" and "task", and they have ### tidb-lightning global configuration [lightning] -# The HTTP port for displaying the web interface, pulling Prometheus metrics, exposing debug data, and submitting import tasks (in server mode). Setting it to 0 disables the port. +# The HTTP port for displaying the web interface, pulling Prometheus metrics, exposing debug data, +# and submitting import tasks (in server mode). Setting it to 0 disables the port. status-addr = ':8289' # Server mode. Defaults to false, which means an import task starts immediately after you execute the command. -# If this value is set to true, after you execute the command, TiDB Lightning waits until you submit an import task in the web interface. +# If this value is set to true, after you execute the command, +# TiDB Lightning waits until you submit an import task in the web interface. # See the "TiDB Lightning Web Interface" section for details. server-mode = false @@ -32,6 +34,11 @@ file = "tidb-lightning.log" max-size = 128 # MB max-days = 28 max-backups = 14 + +# Controls whether to enable the diagnostic logs. The default value is false, that is, only the logs related to the import are output, and the logs of other dependent components are not output. +# When you set it to true, logs from both the import process and other dependent components are output, and GRPC debugging is enabled, which can be used for diagnosis. +# This parameter is introduced in v7.3.0. +enable-diagnose-logs = false ``` ### TiDB Lightning (Task) @@ -40,7 +47,7 @@ max-backups = 14 ### tidb-lightning task configuration [lightning] -# Checks whether the cluster satisfies the minimum requirement before starting. +# Checks whether the cluster satisfies the minimum requirement before starting the task, and check whether TiKV has more than 10% free space left during running time. #check-requirements = true # The maximum number of engines to be opened concurrently. @@ -74,12 +81,16 @@ max-error = 0 # To disable error recording, set this to an empty string. # task-info-schema-name = 'lightning_task_info' -# In parallel import mode, the schema name that stores the meta information for each TiDB Lightning instance in the target cluster. By default, the value is "lightning_metadata". +# In parallel import mode, the schema name that stores the meta information for each TiDB Lightning instance in the target cluster. +# By default, the value is "lightning_metadata". # Configure this parameter only if parallel import is enabled. # **Note:** -# - The value set for this parameter must be the same for each TiDB Lightning instance that participates in the same parallel import; otherwise, the correctness of the imported data cannot be ensured. -# - If parallel import mode is enabled, make sure that the user used for import (for the tidb.user configuration) has permissions to create and access the databases corresponding to this configuration. -# - TiDB Lightning removes this schema after the import is completed. So do not use any existing schema name to configure this parameter. +# - The value set for this parameter must be the same for each TiDB Lightning instance +# that participates in the same parallel import; otherwise, the correctness of the imported data cannot be ensured. +# - If parallel import mode is enabled, make sure that the user used for import (for the tidb.user configuration) +# has permissions to create and access the databases corresponding to this configuration. +# - TiDB Lightning removes this schema after the import is completed. +# So do not use any existing schema name to configure this parameter. meta-schema-name = "lightning_metadata" [security] @@ -116,31 +127,55 @@ driver = "file" # will leak metadata about the data source. # keep-after-success = false +[conflict] +# Starting from v7.3.0, a new version of strategy is introduced to handle conflicting data. The default value is "". +# - "": TiDB Lightning does not detect or handle conflicting data. If the source file contains conflicting primary or unique key records, the subsequent step reports an error. +# - "error": when detecting conflicting primary or unique key records in the imported data, TiDB Lightning terminates the import and reports an error. +# - "replace": when encountering conflicting primary or unique key records, TiDB Lightning retains the new data and overwrites the old data. +# - "ignore": when encountering conflicting primary or unique key records, TiDB Lightning retains the old data and ignores the new data. +# The new version strategy cannot be used together with tikv-importer.duplicate-resolution (the old version of conflict detection). +strategy = "" +# Controls the upper limit of the conflicting data that can be handled when strategy is "replace" or "ignore". You can set it only when strategy is "replace" or "ignore". The default value is 9223372036854775807, which means that almost all errors are tolerant. +# threshold = 9223372036854775807 +# Controls the maximum number of records in the conflict_records table. The default value is 100. If the strategy is "ignore", the conflict records that are ignored are recorded; if the strategy is "replace", the conflict records that are overwritten are recorded. However, the "replace" strategy cannot record the conflict records in the logical import mode. +# max-record-rows = 100 + [tikv-importer] -# "local": Physical import mode, used by default. It applies to large dataset import, for example, greater than 1 TiB. However, during the import, downstream TiDB is not available to provide services. -# "tidb": Logical import mode. You can use this mode for small dataset import, for example, smaller than 1 TiB. During the import, downstream TiDB is available to provide services. +# "local": Physical import mode, used by default. It applies to large dataset import, +# for example, greater than 1 TiB. However, during the import, downstream TiDB is not available to provide services. +# "tidb": Logical import mode. You can use this mode for small dataset import, +# for example, smaller than 1 TiB. During the import, downstream TiDB is available to provide services. # backend = "local" -# Whether to enable multiple TiDB Lightning instances (in physical import mode) to import data to one or more target tables in parallel. The default value is `false`. -# When you use parallel import mode, you must set the parameter to `true`, but the premise is that no data exists in the target table, that is, all data can only be imported by TiDB Lightning. Note that this parameter **is not for incremental data import** and is only used in scenarios where the target table is empty. -# incremental-import = false - -# The listening address of tikv-importer when backend is "importer". Change it to the actual address. -addr = "172.16.31.10:8287" -# Action to do when trying to insert a conflicting record in the logical import mode. For more information on the conflict detection, see the document: https://docs.pingcap.com/tidb/dev/tidb-lightning-logical-import-mode-usage#conflict-detection -# - replace: use new entry to replace the existing entry -# - ignore: keep the existing entry, and ignore the new entry -# - error: report error and quit the program -# on-duplicate = "replace" +# Whether to enable multiple TiDB Lightning instances (in physical import mode) to import data to one or more target tables in parallel. +# The default value is `false`. +# When you use parallel import mode, you must set the parameter to `true`, +# but the premise is that no data exists in the target table, that is, all data can only be imported by TiDB Lightning. +# Note that this parameter is only used in scenarios where the target table is empty. +# parallel-import = false # Whether to detect and resolve duplicate records (unique key conflict) in the physical import mode. # The following resolution algorithms are supported: -# - record: After the data is written to the target table, add the duplicate records from the target table to the `lightning_task_info.conflict_error_v1` table in the target TiDB. Note that the required version of the target TiKV is no earlier than v5.2.0; otherwise it falls back to 'none'. -# - none: does not detect duplicate records, which has the best performance of the three algorithms. But if there are duplicate records in the data source, it might lead to inconsistent data in the target TiDB. -# - remove: records all duplicate records in the target table to the lightning_task_info database, like the 'record' algorithm. But it removes all duplicate records from the target table to ensure a consistent state in the target TiDB. +# - none: does not detect duplicate records, which has the best performance of the two algorithms. +# But if there are duplicate records in the data source, it might lead to inconsistent data in the target TiDB. +# - remove: if there are primary key or unique key conflicts between the inserting data A and B, +# A and B will be removed from the target table and recorded +# in the `lightning_task_info.conflict_error_v1` table in the target TiDB. +# You can manually insert the correct records into the target table based on your business requirements. +# Note that the target TiKV must be v5.2.0 or later versions; otherwise it falls back to 'none'. +# The default value is 'none'. # duplicate-resolution = 'none' -# The number of KV pairs sent in one request in the physical import mode. +# The maximum number of KV pairs in one request when sending data to TiKV in physical import mode. +# Starting from v7.2.0, this parameter is deprecated and no longer takes effect after it is set. +# If you want to adjust the amount of data sent to TiKV in one request, use the `send-kv-size` parameter instead. # send-kv-pairs = 32768 -# Whether to enable compression when sending KV pairs to TiKV in the physical import mode. Currently, only the Gzip compression algorithm is supported. To use this algorithm, you can fill in either "gzip" or "gz" for this parameter. By default, the compression is not enabled. +# The maximum size of one request when sending data to TiKV in physical import mode. +# The default value is "16K". It is not recommended to adjust this parameter. +# This parameter is introduced in v7.2.0. +# send-kv-size = "16K" +# Whether to enable compression when sending KV pairs to TiKV in the physical import mode. +# Currently, only the Gzip compression algorithm is supported. +# To use this algorithm, you can fill in either "gzip" or "gz" for this parameter. +# By default, the compression is not enabled. # compress-kv-pairs = "" # The directory of local KV sorting in the physical import mode. If the disk # performance is low (such as in HDD), it is recommended to set the directory @@ -162,28 +197,43 @@ addr = "172.16.31.10:8287" # The default value is `MaxInt64` bytes, that is, 9223372036854775807 bytes. # disk-quota = "10GB" -# Specifies whether Physical Import Mode adds indexes via SQL. The default value is `false`, which means that TiDB Lightning will encode both row data and index data into KV pairs and import them into TiKV together. This mechanism is consistent with that of the historical versions. If you set it to `true`, it means that TiDB Lightning adds indexes via SQL after importing the row data. -# The benefit of adding indexes via SQL is that you can separately import data and import indexes, and import data more quickly. After the data is imported, even if the indexes fail to be added, it does not affect the consistency of the imported data. +# Specifies whether Physical Import Mode adds indexes via SQL. +# The default value is `false`, which means that TiDB Lightning will encode both row data and index data +# into KV pairs and import them into TiKV together. +# This mechanism is consistent with that of the historical versions. +# If you set it to `true`, it means that TiDB Lightning adds indexes via SQL after importing the row data. +# The benefit of adding indexes via SQL is that you can separately import data and import indexes, +# and import data more quickly. After the data is imported, even if the indexes fail to be added, +# it does not affect the consistency of the imported data. # add-index-by-sql = false -# When you use TiDB Lightning to import a multi-tenant TiDB cluster, use this parameter to specify the corresponding key space name. The default value is an empty string, which means TiDB Lightning will automatically get the key space name of the corresponding tenant to import data. If you specify a value, the specified key space name will be used to import data. +# When you use TiDB Lightning to import a multi-tenant TiDB cluster, use this parameter to specify the corresponding key space name. +# The default value is an empty string, which means TiDB Lightning will automatically get the key space name of the corresponding tenant to import data. +# If you specify a value, the specified key space name will be used to import data. # keyspace-name = "" -# In Physical Import Mode, this parameter controls the scope in which TiDB Lightning stops PD scheduling. The value options are as follows: +# In Physical Import Mode, this parameter controls the scope in which TiDB Lightning stops PD scheduling. +# The value options are as follows: # - "table": pause scheduling only for the Region that stores the target table data. The default value is "table". -# - "global": pause global scheduling. When importing data to a cluster without any business traffic, it is recommended to set this parameter to "global" to avoid interference from other scheduling. +# - "global": pause global scheduling. When importing data to a cluster without any business traffic, +# it is recommended to set this parameter to "global" to avoid interference from other scheduling. # pause-pd-scheduler-scope = "table" -# In Physical Import Mode, this parameter controls the number of Regions when splitting Regions in a batch. The maximum number of Regions that can be split at the same time per TiDB Lightning instance is: +# In Physical Import Mode, this parameter controls the number of Regions when splitting Regions in a batch. +# The maximum number of Regions that can be split at the same time per TiDB Lightning instance is: # region-split-batch-size * region-split-concurrency * table-concurrency # This parameter is introduced in v7.1.0. The default value is `4096`. # region-split-batch-size = 4096 -# In Physical Import Mode, this parameter controls the concurrency when splitting Regions. The default value is the number of CPU cores. +# In Physical Import Mode, this parameter controls the concurrency when splitting Regions. +# The default value is the number of CPU cores. # This parameter is introduced in v7.1.0. # region-split-concurrency = -# In Physical Import Mode, this parameter controls the number of retries to wait for the Region to come online after the split and scatter operations. The default value is `1800` and the maximum retry interval is two seconds. The number of retries will not be increased if any Region becomes online between retries. +# In Physical Import Mode, this parameter controls the number of retries to wait for the Region to come online +# after the split and scatter operations. +# The default value is `1800` and the maximum retry interval is two seconds. +# The number of retries will not be increased if any Region becomes online between retries. # This parameter is introduced in v7.1.0. # region-check-backoff-limit = 1800 @@ -204,7 +254,8 @@ read-block-size = "64KiB" # default value # This value should be in the range (0 <= batch-import-ratio < 1). batch-import-ratio = 0.75 -# Local source data directory or the URI of the external storage. For more information about the URI of the external storage, see https://docs.pingcap.com/tidb/v6.6/backup-and-restore-storages#uri-format. +# Local source data directory or the URI of the external storage. +# For more information about the URI of the external storage, see https://docs.pingcap.com/tidb/v6.6/backup-and-restore-storages#uri-format. data-source-dir = "/data/my_database" # The character set of the schema files, containing CREATE TABLE statements; @@ -214,18 +265,23 @@ data-source-dir = "/data/my_database" # an error is reported # - auto: (default) automatically detects whether the schema is UTF-8 or # GB-18030. An error is reported if the encoding is neither. +# - latin1: the schema files use MySQL latin1 encoding, also known as Code Page 1252. # - binary: do not try to decode the schema files character-set = "auto" -# Specifies the character set of the source data file. Lightning converts the source file from the specified character set to UTF-8 encoding when importing. +# Specifies the character set of the source data file. +# Lightning converts the source file from the specified character set to UTF-8 encoding when importing. # Currently, this configuration only specifies the character set of the CSV files with the following options supported: # - utf8mb4: Indicates that the source data file uses UTF-8 encoding. # - GB18030: Indicates that the source data file uses the GB-18030 encoding. # - GBK: The source data file uses GBK encoding (GBK encoding is an extension of the GB-2312 character set, also known as Code Page 936). +# - latin1: The source data file uses MySQL latin1 encoding, also known as Code Page 1252. # - binary: Indicates that Lightning does not convert the encoding (by default). # If left blank, the default value "binary" is used, that is to say, Lightning does not convert the encoding. -# Note that Lightning does not predict about the character set of the source data file and only converts the source file and import the data based on this configuration. -# If the value of this configuration is not the same as the actual encoding of the source data file, a failed import, data loss or data disorder might appear. +# Note that Lightning does not predict about the character set of the source data file +# and only converts the source file and import the data based on this configuration. +# If the value of this configuration is not the same as the actual encoding of the source data file, +# a failed import, data loss or data disorder might appear. data-character-set = "binary" # Specifies the replacement character in case of incompatible characters during the character set conversion of the source data file. # This configuration must not be duplicated with field separators, quote definers, and line breaks. @@ -259,14 +315,25 @@ delimiter = '"' # Line terminator. Empty value means both "\n" (LF) and "\r\n" (CRLF) are line terminators. terminator = '' # Whether the CSV files contain a header. -# If `header` is true, TiDB Lightning treats the first row as a table header and does not import it as data. If `header` is false, the first row is also imported as CSV data. +# If `header` is true, TiDB Lightning treats the first row as a table header and does not import it as data. +# If `header` is false, the first row is also imported as CSV data. header = true # Whether the column names in the CSV file header are matched to those defined in the target table. -# The default value is `true`, which means that you have confirmed that the column names in the CSV header are consistent with those in the target table, so that even if the order of the columns is different between the two, TiDB Lightning can still import the data successfully by mapping the column names. -# If the column names between the CSV table header and the target table do not match (for example, some column names in the CSV table header cannot be found in the target table) but the column order is the same, set this configuration to `false`. -# In this scenario, TiDB Lightning will ignore the CSV header to avoid errors and import the data directly in the order of the columns in the target table. -# Therefore, if the columns are not in the same order, you need to manually adjust the order of the columns in the CSV file to be consistent with that in the target table before importing; otherwise data discrepancies might occur. -# It is important to note that this parameter only applies if the `header` parameter is set to `true`. If `header` is set to `false`, it means that the CSV file does not contain a header, so this parameter is not relevant. +# The default value is `true`, which means that you have confirmed that the column names in the CSV header +# are consistent with those in the target table, so that even if the order of the columns is different between the two, +# TiDB Lightning can still import the data successfully by mapping the column names. +# If the column names between the CSV table header and the target table do not match +# (for example, some column names in the CSV table header cannot be found in the target table) +# but the column order is the same, set this configuration to `false`. +# In this scenario, TiDB Lightning will ignore the CSV header to avoid errors and import the data +# directly in the order of the columns in the target table. +# Therefore, if the columns are not in the same order, +# you need to manually adjust the order of the columns in the CSV file to be consistent with that +# in the target table before importing; +# otherwise data discrepancies might occur. +# It is important to note that this parameter only applies if the `header` parameter is set to `true`. +# If `header` is set to `false`, it means that the CSV file does not contain a header, +# so this parameter is not relevant. header-schema-match = true # Whether the CSV contains any NULL value. # If `not-null` is true, all columns from CSV cannot be NULL. @@ -301,8 +368,8 @@ pd-addr = "172.16.31.4:2379" # This setting controls the log level of the TiDB library. log-level = "error" -# Sets the TiDB session variable to speed up the Checksum and Analyze operations. -# See https://pingcap.com/docs/dev/reference/performance/statistics/#control-analyze-concurrency +# Sets the TiDB session variable to speed up the Checksum and Analyze operations. Note that if checksum-via-sql is set to "true", TiDB Lightning will execute the ADMIN CHECKSUM TABLE SQL statement to perform the Checksum operation on TiDB. In this case, the following parameters `distsql-scan-concurrency` and `checksum-table-concurrency` will not take effect. +# See https://docs.pingcap.com/tidb/stable/statistics#control-analyze-concurrency # for the meaning of each setting build-stats-concurrency = 20 distsql-scan-concurrency = 15 @@ -349,6 +416,11 @@ max-allowed-packet = 67_108_864 # For backward compatibility, bool values "true" and "false" are also allowed for this field. # "true" is equivalent to "required" and "false" is equivalent to "off". checksum = "required" +# Specifies whether the ADMIN CHECKSUM TABLE
operation is executed via TiDB. +# The default value is "false", which means that the ADMIN CHECKSUM TABLE
command is sent to TiKV for execution via TiDB Lightning. +# It is recommended that you set this value to "true" to make it easier to locate the problem if checksum fails. +# Meanwhile, if you want to adjust concurrency when this value is "true", you need to set the `tidb_checksum_table_concurrency` variable in TiDB (https://docs.pingcap.com/tidb/stable/system-variables#tidb_checksum_table_concurrency). +checksum-via-sql = "false" # Specifies whether to perform `ANALYZE TABLE
` for each table after checksum is done. # Options available for this field are the same as `checksum`. However, the default value for this field is "optional". analyze = "optional" @@ -400,7 +472,7 @@ log-progress = "5m" | --enable-checkpoint *bool* | Whether to enable checkpoints (default = true) | `checkpoint.enable` | | --analyze *level* | Analyze tables after importing. Available values are "required", "optional" (default value), and "off" | `post-restore.analyze` | | --checksum *level* | Compare checksum after importing. Available values are "required" (default value), "optional", and "off" | `post-restore.checksum` | -| --check-requirements *bool* | Check cluster version compatibility before starting (default = true) | `lightning.check-requirements` | +| --check-requirements *bool* | Check cluster version compatibility before starting the task, and check whether TiKV has more than 10% free space left during running time. (default = true) | `lightning.check-requirements` | | --ca *file* | CA certificate path for TLS connection | `security.ca-path` | | --cert *file* | Certificate path for TLS connection | `security.cert-path` | | --key *file* | Private key path for TLS connection | `security.key-path` | diff --git a/tidb-lightning/tidb-lightning-distributed-import.md b/tidb-lightning/tidb-lightning-distributed-import.md index 16145f6d9b9d6..5ca0828cbd5b0 100644 --- a/tidb-lightning/tidb-lightning-distributed-import.md +++ b/tidb-lightning/tidb-lightning-distributed-import.md @@ -18,13 +18,13 @@ You can use TiDB Lightning to import data in parallel in the following scenarios > > - Parallel import only supports initialized empty tables in TiDB and does not support migrating data to tables with data written by existing services. Otherwise, data inconsistencies may occur. > -> - Parallel import is usually used in the physical import mode. You need to configure `incremental-import = true`. +> - Parallel import is usually used in the physical import mode. You need to configure `parallel-import = true`. > > - Apply only one backend at a time when using multiple TiDB Lightning instances to import data to the same target. For example, you cannot import data to the same TiDB cluster in both the physical and logical import modes at the same time. ## Considerations -To use parallel import, you need to configure `incremental-import = true`. When TiDB Lightning is started, it registers meta data in the downstream TiDB cluster and automatically detects whether there are other instances migrating data to the target cluster at the same time. If there is, it automatically enters the parallel import mode. +To use parallel import, you need to configure `parallel-import = true`. When TiDB Lightning is started, it registers meta data in the downstream TiDB cluster and automatically detects whether there are other instances migrating data to the target cluster at the same time. If there is, it automatically enters the parallel import mode. But when migrating data in parallel, you need to take the following into consideration: @@ -95,7 +95,7 @@ data-source-dir = "/path/to/source-dir" [tikv-importer] # Whether to allow importing data into tables that already have data. The default value is `false`. # When using parallel import, because multiple TiDB Lightning instances import a table at the same time, this configuration item must be set to `true`. -incremental-import = true +parallel-import = true # "local": The default mode. It applies to large dataset import, for example, greater than 1 TiB. However, during the import, downstream TiDB is not available to provide services. # "tidb": You can use this mode for small dataset import, for example, smaller than 1 TiB. During the import, downstream TiDB is available to provide services. backend = "local" @@ -175,7 +175,7 @@ type = "sql" [tikv-importer] # Whether to allow importing data into tables that already have data. The default value is `false`. # When using parallel import, because multiple TiDB Lightning instances import a table at the same time, this configuration item must be set to `true`. -incremental-import = true +parallel-import = true ``` You can modify the configuration of the other instance to only import the `05001 ~ 10000` data files. diff --git a/tidb-lightning/tidb-lightning-error-resolution.md b/tidb-lightning/tidb-lightning-error-resolution.md index f9a1d49ee1519..0c27bd53f6dc9 100644 --- a/tidb-lightning/tidb-lightning-error-resolution.md +++ b/tidb-lightning/tidb-lightning-error-resolution.md @@ -7,11 +7,18 @@ summary: Learn how to resolve type conversion and duplication errors during data Starting from v5.4.0, you can configure TiDB Lightning to skip errors like invalid type conversion and unique key conflicts, and to continue the data processing as if those wrong row data does not exist. A report will be generated for you to read and manually fix errors afterward. This is ideal for importing from a slightly dirty data source, where locating the errors manually is difficult and restarting TiDB Lightning on every encounter is costly. -This document introduces how to use the type error feature (`lightning.max-error`) and the duplicate resolution feature (`tikv-importer.duplicate-resolution`). It also introduces the database where these errors are stored (`lightning.task-info-schema-name`). At the end of this document, an example is provided. +This document introduces TiDB Lightning error types, how to query the errors, and provides an example. The following configuration items are involved: + +- `lightning.max-error`: the tolerance threshold of type error +- `conflict.strategy`, `conflict.threshold`, and `conflict.max-record-rows`: configurations related to conflicting data +- `tikv-importer.duplicate-resolution`: the conflict handling configuration that can only be used in the physical import mode +- `lightning.task-info-schema-name`: the database where conflicting data is stored when TiDB Lightning detects conflicts + +For more information, see [TiDB Lightning (Task)](/tidb-lightning/tidb-lightning-configuration.md#tidb-lightning-task). ## Type error -You can use the `lightning.max-error` configuration to increase the tolerance of errors related to data types. If this configuration is set to *N*, TiDB Lightning allows and skips up to *N* errors from the data source before it exists. The default value `0` means that no error is allowed. +You can use the `lightning.max-error` configuration to increase the tolerance of errors related to data types. If this configuration is set to *N*, TiDB Lightning allows and skips up to *N* type errors from the data source before it exists. The default value `0` means that no error is allowed. These errors are recorded in a database. After the import is completed, you can view the errors in the database and process them manually. For more information, see [Error Report](#error-report). @@ -31,15 +38,18 @@ The above configuration covers the following errors: * Set NULL to a NOT NULL column. * Failed to evaluate a generated column expression. * Column count mismatch. The number of values in the row does not match the number of columns of the table. -* Unique/Primary key conflict in TiDB-backend, when `on-duplicate = "error"`. * Any other SQL errors. -The following errors are always fatal, and cannot be skipped by changing `max-error`: +The following errors are always fatal, and cannot be skipped by changing `lightning.max-error`: * Syntax error (such as unclosed quotation marks) in the original CSV, SQL or Parquet file. * I/O, network or system permission errors. -Unique/Primary key conflict in the physical import mode is handled separately and explained in the next section. +## Conflict errors + +You can use the [`conflict.threshold`](/tidb-lightning/tidb-lightning-configuration.md#tidb-lightning-task) configuration item to increase the tolerance of errors related to data conflict. If this configuration item is set to *N*, TiDB Lightning allows and skips up to *N* conflict errors from the data source before it exits. The default value is `9223372036854775807`, which means that almost all errors are tolerant. + +These errors are recorded in a table. After the import is completed, you can view the errors in the database and process them manually. For more information, see [Error Report](#error-report) ## Error report @@ -71,16 +81,6 @@ task-info-schema-name = 'lightning_task_info' TiDB Lightning creates 3 tables in this database: ```sql -CREATE TABLE syntax_error_v1 ( - task_id bigint NOT NULL, - create_time datetime(6) NOT NULL DEFAULT now(6), - table_name varchar(261) NOT NULL, - path varchar(2048) NOT NULL, - offset bigint NOT NULL, - error text NOT NULL, - context text -); - CREATE TABLE type_error_v1 ( task_id bigint NOT NULL, create_time datetime(6) NOT NULL DEFAULT now(6), @@ -104,15 +104,24 @@ CREATE TABLE conflict_error_v1 ( raw_row mediumblob NOT NULL, KEY (task_id, table_name) ); +CREATE TABLE conflict_records ( + task_id bigint NOT NULL, + create_time datetime(6) NOT NULL DEFAULT now(6), + table_name varchar(261) NOT NULL, + path varchar(2048) NOT NULL, + offset bigint NOT NULL, + error text NOT NULL, + row_id bigint NOT NULL COMMENT 'the row id of the conflicting row', + row_data text NOT NULL COMMENT 'the row data of the conflicting row', + KEY (task_id, table_name) +); ``` - +`type_error_v1` records all [type errors](#type-error) managed by `lightning.max-error`. Each error corresponds to one row. -**type_error_v1** records all [type errors](#type-error) managed by the `max-error` configuration. There is one row per error. +`conflict_error_v1` records all unique and primary key conflicts managed by `tikv-importer.duplicate-resolution` in the physical import mode. Each pair of conflicts corresponds to two rows. -**conflict_error_v1** records all unique/primary key conflict in the Local-backend. There are 2 rows per pair of conflicts. +`conflict_records` records all unique and primary key conflicts managed by the `conflict` configuration group in logical import mode and physical import mode. Each error corresponds to one row. | Column | Syntax | Type | Conflict | Description | | ------------ | ------ | ---- | -------- | ----------------------------------------------------------------------------------------------------------------------------------- | @@ -317,4 +326,4 @@ In this example, a data source is prepared with some known errors. raw_value: 0x000000000000002A raw_handle: 0x7480000000000000C15F72800000000000002A raw_row: 0x800001000000020A0066696674792D666F7572 - ``` \ No newline at end of file + ``` diff --git a/tidb-lightning/tidb-lightning-faq.md b/tidb-lightning/tidb-lightning-faq.md index 1b5e9bdf56831..5debd62d27127 100644 --- a/tidb-lightning/tidb-lightning-faq.md +++ b/tidb-lightning/tidb-lightning-faq.md @@ -78,14 +78,18 @@ TiDB Lightning supports: Starting from v5.1, TiDB Lightning can automatically recognize the schema and tables in the downstream. If you use TiDB Lightning earlier than v5.1, you need to set `no-schema = true` in the `[mydumper]` section in `tidb-lightning.toml`. This makes TiDB Lightning skip the `CREATE TABLE` invocations and fetch the metadata directly from the target database. TiDB Lightning will exit with error if a table is actually missing. -## Can the Strict SQL Mode be disabled to allow importing invalid data? +## How to prohibit importing invalid data? -Yes. By default, the [`sql_mode`](https://dev.mysql.com/doc/refman/5.7/en/sql-mode.html) used by TiDB Lightning is `"STRICT_TRANS_TABLES,NO_ENGINE_SUBSTITUTION"`, which disallows invalid data such as the date `1970-00-00`. The mode can be changed by modifying the `sql-mode` setting in the `[tidb]` section in `tidb-lightning.toml`. +You can prohibit importing invalid data by enabling Strict SQL Mode. + +By default, the [`sql_mode`](https://dev.mysql.com/doc/refman/5.7/en/sql-mode.html) used by TiDB Lightning is `"ONLY_FULL_GROUP_BY,NO_AUTO_CREATE_USER"`, which allows invalid data such as the date `1970-00-00`. + +To prohibit importing invalid data, you need to change the `sql-mode` setting to `"STRICT_TRANS_TABLES,NO_ENGINE_SUBSTITUTION"` in the `[tidb]` section in `tidb-lightning.toml`. ```toml ... [tidb] -sql-mode = "" +sql-mode = "STRICT_TRANS_TABLES,NO_ENGINE_SUBSTITUTION" ... ``` diff --git a/tidb-lightning/tidb-lightning-logical-import-mode-usage.md b/tidb-lightning/tidb-lightning-logical-import-mode-usage.md index db3225b13d28d..ee2ac84758707 100644 --- a/tidb-lightning/tidb-lightning-logical-import-mode-usage.md +++ b/tidb-lightning/tidb-lightning-logical-import-mode-usage.md @@ -31,12 +31,6 @@ data-source-dir = "/data/my_database" # Import mode. "tidb" means using the logical import mode. backend = "tidb" -# The operation of inserting duplicate data in the logical import mode. -# - replace: replace existing data with new data -# - ignore: keep existing data and ignore new data -# - error: pause the import and report an error -on-duplicate = "replace" - [tidb] # The information of the target cluster. The address of any tidb-server from the cluster. host = "172.16.31.1" @@ -53,15 +47,18 @@ For the complete configuration file, refer to [TiDB Lightning Configuration](/ti ## Conflict detection -Conflicting data refers to two or more records with the same data in the PK or UK column. When the data source contains conflicting data, the actual number of rows in the table is different from the total number of rows returned by the query using the unique index. - -In the logical import mode, you can configure the strategy for resolving conflicting data by setting the `on-duplicate` configuration item. Based on the strategy, TiDB Lightning imports data with different SQL statements. +Conflicting data refers to two or more records with the same data in the PK or UK column. In the logical import mode, you can configure the strategy for handling conflicting data by setting the [`conflict.strategy`](/tidb-lightning/tidb-lightning-configuration.md#tidb-lightning-task) configuration item. Based on the strategy, TiDB Lightning imports data with different SQL statements. | Strategy | Default behavior of conflicting data | The corresponding SQL statement | | :-- | :-- | :-- | -| `replace` | Replacing existing data with new data. | `REPLACE INTO ...` | -| `ignore` | Keeping existing data and ignoring new data. | `INSERT IGNORE INTO ...` | -| `error` | Pausing the import and reporting an error. | `INSERT INTO ...` | +| `"replace"` | Replacing existing data with new data. | `REPLACE INTO ...` | +| `"ignore"` | Keeping existing data and ignoring new data. | `INSERT IGNORE INTO ...` | +| `"error"` | Pausing the import and reporting an error. | `INSERT INTO ...` | +| `""` | TiDB Lightning does not detect or handle conflicting data. If data with primary and unique key conflicts exists, the subsequent step reports an error. | None | + +When the strategy is `"error"`, errors caused by conflicting data directly terminates the import task. When the strategy is `"replace"` or `"ignore"`, you can control the maximum tolerant conflicts by configuring [`conflict.threshold`](/tidb-lightning/tidb-lightning-configuration.md#tidb-lightning-task). The default value is `9223372036854775807`, which means that almost all errors are tolerant. + +When the strategy is `"ignore"`, conflicting data is recorded in the downstream `conflict_records` table. For further details, see [Error report](/tidb-lightning/tidb-lightning-error-resolution.md#error-report). In this case, you can limit the records by configuring [`conflict.max-record-rows`](/tidb-lightning/tidb-lightning-configuration.md#tidb-lightning-task), and conflicting data that exceeds the limit is skipped and not recorded. The default value is `100`. ## Performance tuning diff --git a/tidb-lightning/tidb-lightning-physical-import-mode-usage.md b/tidb-lightning/tidb-lightning-physical-import-mode-usage.md index 0412e6eb69903..5c86465dcc7e0 100644 --- a/tidb-lightning/tidb-lightning-physical-import-mode-usage.md +++ b/tidb-lightning/tidb-lightning-physical-import-mode-usage.md @@ -27,6 +27,17 @@ check-requirements = true # The local data source directory or the URI of the external storage. For more information about the URI of the external storage, see https://docs.pingcap.com/tidb/v6.6/backup-and-restore-storages#uri-format. data-source-dir = "/data/my_database" +[conflict] +# Starting from v7.3.0, a new version of strategy is introduced to handle conflicting data. The default value is "". +# - "": TiDB Lightning does not detect or handle conflicting data. If the source file contains conflicting primary or unique key records, the subsequent step reports an error. +# - "error": when detecting conflicting primary or unique key records in the imported data, TiDB Lightning terminates the import and reports an error. +# - "replace": when encountering conflicting primary or unique key records, TiDB Lightning retains the new data and overwrites the old data. +# - "ignore": when encountering conflicting primary or unique key records, TiDB Lightning retains the old data and ignores the new data. +# The new version strategy cannot be used together with tikv-importer.duplicate-resolution (the old version of conflict detection). +strategy = "" +# threshold = 9223372036854775807 +# max-record-rows = 100 + [tikv-importer] # Import mode. "local" means using the physical import mode. backend = "local" @@ -83,15 +94,48 @@ For the complete configuration file, refer to [the configuration file and comman ## Conflict detection -Conflicting data refers to two or more records with the same PK/UK column data. When the data source contains conflicting data, the actual number of rows in the table is different from the total number of rows returned by the query using unique index. +Conflicting data refers to two or more records with the same primary key or unique key column data. When the data source contains conflicting data and conflict detection feature is not turned on, the actual number of rows in the table is different from the total number of rows returned by the query using unique index. + +There are two versions for conflict detection: + +- The new version of conflict detection, controlled by the `conflict` configuration item. +- The old version of conflict detection, controlled by the `tikv-importer.duplicate-resolution` configuration item. + +### The new version of conflict detection + +The meaning of configuration values are as follows: + +| Strategy | Default behavior of conflicting data | The corresponding SQL statement | +| :-- | :-- | :-- | +| `"replace"` | Replacing existing data with new data. | `REPLACE INTO ...` | +| `"ignore"` | Keeping existing data and ignoring new data. | `INSERT IGNORE INTO ...` | +| `"error"` | Pausing the import and reporting an error. | `INSERT INTO ...` | +| `""` | TiDB Lightning does not detect or handle conflicting data. If data with primary and unique key conflicts exists, the subsequent step reports an error. | None | + +> **Note:** +> +> The conflict detection result in the physical import mode might differ from SQL-based import due to internal implementation and limitation of TiDB Lightning. + +When the strategy is `"replace"` or `"ignore"`, conflicting data is treated as [conflict errors](/tidb-lightning/tidb-lightning-error-resolution.md#conflict-errors). If the [`conflict.threshold`](/tidb-lightning/tidb-lightning-configuration.md#tidb-lightning-task) value is greater than `0`, TiDB Lightning tolerates the specified number of conflict errors. The default value is `9223372036854775807`, which means that almost all errors are tolerant. For more information, see [error resolution](/tidb-lightning/tidb-lightning-error-resolution.md). + +The new version of conflict detection has the following limitations: + +- Before importing, TiDB Lightning prechecks potential conflicting data by reading all data and encoding it. During the detection process, TiDB Lightning uses `tikv-importer.sorted-kv-dir` to store temporary files. After the detection is complete, TiDB Lightning retains the results for import phase. This introduces additional overhead for time consumption, disk space usage, and API requests to read the data. +- The new version of conflict detection only works in a single node, and does not apply to parallel imports and scenarios where the `disk-quota` parameter is enabled. +- The new version (`conflict`) and old version (`tikv-importer.duplicate-resolution`) conflict detection cannot be used at the same time. The new version of conflict detection is enabled when the configuration [`conflict.strategy`](/tidb-lightning/tidb-lightning-configuration.md#tidb-lightning-task) is set. + +Compared with the old version of conflict detection, the new version takes less time when the imported data contains a large amount of conflicting data. It is recommended that you use the new version of conflict detection in non-parallel import tasks when the data contains conflicting data and there is sufficient local disk space. + +### The old version of conflict detection + +The old version of conflict detection is enabled when `tikv-importer.duplicate-resolution` is not an empty string. In v7.2.0 and earlier versions, TiDB Lightning only supports this conflict detection method. -TiDB Lightning offers three strategies for detecting conflicting data: +In the old version of conflict detection, TiDB Lightning offers two strategies: -- `record`: only records conflicting records to the `lightning_task_info.conflict_error_v1` table on the target TiDB. Note that the required version of the target TiKV is v5.2.0 or later versions; otherwise, it falls back to 'none'. -- `remove` (recommended): records all conflicting records, like the `record` strategy. But it removes all conflicting records from the target table to ensure a consistent state in the target TiDB. -- `none`: does not detect duplicate records. `none` has the best performance in the three strategies, but might lead to inconsistent data in the target TiDB. +- `remove` (recommended): records and removes all conflicting records from the target table to ensure a consistent state in the target TiDB. +- `none`: does not detect duplicate records. `none` has the best performance in the two strategies, but might lead to inconsistent data in the target TiDB. -Before v5.3, Lightning does not support conflict detection. If there is conflicting data, the import process fails at the checksum step. When conflict detection is enabled, regardless of the `record` or `remove` strategy, if there is conflicting data, Lightning skips the checksum step (because it always fails). +Before v5.3, TiDB Lightning does not support conflict detection. If there is conflicting data, the import process fails at the checksum step. When conflict detection is enabled, if there is conflicting data, TiDB Lightning skips the checksum step (because it always fails). Suppose an `order_line` table has the following schema: diff --git a/tidb-lightning/tidb-lightning-physical-import-mode.md b/tidb-lightning/tidb-lightning-physical-import-mode.md index 3ad4f9e441f90..30ad504d6a9a3 100644 --- a/tidb-lightning/tidb-lightning-physical-import-mode.md +++ b/tidb-lightning/tidb-lightning-physical-import-mode.md @@ -9,7 +9,13 @@ Physical import mode is an efficient and fast import mode that inserts data dire Before you use the physical import mode, make sure to read [Requirements and restrictions](#requirements-and-restrictions). -The backend for the physical import mode is `local`. +The backend for the physical import mode is `local`. You can modify it in `tidb-lightning.toml`: + + ```toml + [tikv-importer] + # Set the import mode to "local" to use the physical import mode. + backend = "local" + ``` ## Implementation @@ -21,7 +27,7 @@ The backend for the physical import mode is `local`. 2. TiDB Lightning creates table schemas in the target database and fetches the metadata. - If you set `add-index-by-sql` to `true`, `tidb-lightning` adds indexes via the SQL interface, and drops all secondary indexes from the target table before importing the data. + If you set `add-index-by-sql` to `true`, `tidb-lightning` adds indexes via the SQL interface, and drops all secondary indexes from the target table before importing the data. The default value is `false`, which is consistent with earlier versions. 3. Each table is divided into multiple contiguous **blocks**, so that TiDB Lightning can import data from large tables (greater than 200 GB) in parallel. @@ -70,7 +76,7 @@ It is recommended that you allocate CPU more than 32 cores and memory greater th - Do not use the physical import mode to directly import data to TiDB clusters in production. It has severe performance implications. If you need to do so, refer to [Pause scheduling on the table level](/tidb-lightning/tidb-lightning-physical-import-mode-usage.md#scope-of-pausing-scheduling-during-import). - Do not use multiple TiDB Lightning instances to import data to the same TiDB cluster by default. Use [Parallel Import](/tidb-lightning/tidb-lightning-distributed-import.md) instead. - When you use multiple TiDB Lightning to import data to the same target cluster, do not mix the import modes. That is, do not use the physical import mode and the logical import mode at the same time. -- During the process of importing data, do not perform write operations in the target table. Otherwise the import will fail or the data will be inconsistent. At the same time, it is not recommended to perform read operations, because the data you read might be inconsistent. You can perform read and write operations after the import operation is completed. +- During the process of importing data, do not perform DDL and DML operations in the target table. Otherwise the import will fail or the data will be inconsistent. At the same time, it is not recommended to perform read operations, because the data you read might be inconsistent. You can perform read and write operations after the import operation is completed. - A single Lightning process can import a single table of 10 TB at most. Parallel import can use 10 Lightning instances at most. ### Tips for using with other components diff --git a/tidb-lightning/tidb-lightning-prechecks.md b/tidb-lightning/tidb-lightning-prechecks.md index bb092bc6379ef..c5ae306246c71 100644 --- a/tidb-lightning/tidb-lightning-prechecks.md +++ b/tidb-lightning/tidb-lightning-prechecks.md @@ -18,4 +18,4 @@ The following table describes each check item and detailed explanation. | Exceedingly Large CSV files in the data file | >= 5.3.0 | When there are CSV files larger than 10 GiB in the backup file and auto-slicing is not enabled (StrictFormat=false), it will impact the import performance. The purpose of this check is to remind you to ensure the data is in the right format and to enable auto-slicing. | | Recovery from breakpoints | >= 5.3.0 | This check ensures that no changes are made to the source file or schema in the database during the breakpoint recovery process that would result in importing the wrong data. | | Import into an existing table | >= 5.3.0 | When importing into an already created table, it checks, as much as possible, whether the source file matches the existing table. Check if the number of columns matches. If the source file has column names, check if the column names match. When there are default columns in the source file, it checks if the default columns have Default Value, and if they have, the check passes. | -| Whether the target table is empty | >= 5.3.1 | TiDB Lightning automatically exits with an error if the target table is not empty. If parallel import mode is enabled (`incremental-import = true`), this check item will be skipped. | +| Whether the target table is empty | >= 5.3.1 | TiDB Lightning automatically exits with an error if the target table is not empty. If parallel import mode is enabled (`parallel-import = true`), this check item will be skipped. | diff --git a/tidb-limitations.md b/tidb-limitations.md index fe51de5114f81..519ad878c201f 100644 --- a/tidb-limitations.md +++ b/tidb-limitations.md @@ -21,7 +21,7 @@ This document describes the common usage limitations of TiDB, including the maxi ## Limitations on the total number of databases, tables, views, and connections -| Identifier type | Maximum number | +| Type | Maximum number | |:----------|:----------| | Databases | unlimited | | Tables | unlimited | @@ -63,18 +63,6 @@ You can adjust the size limit via the [`txn-entry-size-limit`](/tidb-configurati -## Limitation on a single column - -| Type | Upper limit (default value) | -|:----------|:----------| -| Size | Defaults to 6 MiB and can be adjusted to 120 MiB | - - - -You can adjust the size limit via the [`txn-entry-size-limit`](/tidb-configuration-file.md#txn-entry-size-limit-new-in-v50) configuration item. - - - ## Limitations on data types | Type | Upper limit | diff --git a/tidb-monitoring-api.md b/tidb-monitoring-api.md index cc0843a76ac31..e6e2feebe0417 100644 --- a/tidb-monitoring-api.md +++ b/tidb-monitoring-api.md @@ -28,7 +28,7 @@ The following example uses `http://${host}:${port}/status` to get the current st curl http://127.0.0.1:10080/status { connections: 0, # The current number of clients connected to the TiDB server. - version: "5.7.25-TiDB-v3.0.0-beta-250-g778c3f4a5", # The TiDB version number. + version: "5.7.25-TiDB-v7.3.0", # The TiDB version number. git_hash: "778c3f4a5a716880bcd1d71b257c8165685f0d70" # The Git Hash of the current TiDB code. } ``` @@ -45,13 +45,13 @@ curl http://127.0.0.1:10080/schema_storage/mysql/stats_histograms ``` { - "table_schema": "mysql", - "table_name": "stats_histograms", - "table_rows": 0, - "avg_row_length": 0, - "data_length": 0, - "max_data_length": 0, - "index_length": 0, + "table_schema": "mysql", + "table_name": "stats_histograms", + "table_rows": 0, + "avg_row_length": 0, + "data_length": 0, + "max_data_length": 0, + "index_length": 0, "data_free": 0 } ``` @@ -63,13 +63,13 @@ curl http://127.0.0.1:10080/schema_storage/test ``` [ { - "table_schema": "test", - "table_name": "test", - "table_rows": 0, - "avg_row_length": 0, - "data_length": 0, - "max_data_length": 0, - "index_length": 0, + "table_schema": "test", + "table_name": "test", + "table_rows": 0, + "avg_row_length": 0, + "data_length": 0, + "max_data_length": 0, + "index_length": 0, "data_free": 0 } ] diff --git a/tidb-resource-control.md b/tidb-resource-control.md index 7264bb66835a8..b669af2f9fd10 100644 --- a/tidb-resource-control.md +++ b/tidb-resource-control.md @@ -9,7 +9,7 @@ summary: Learn how to use the resource control feature to control and schedule a > **Note:** > -> This feature is not available on [Serverless Tier clusters](/tidb-cloud/select-cluster-tier.md#serverless-tier-beta). +> This feature is not available on [TiDB Serverless clusters](https://docs.pingcap.com/tidbcloud/select-cluster-tier#tidb-serverless). @@ -43,21 +43,52 @@ Currently, the resource control feature has the following limitations: ## What is Request Unit (RU) -Request Unit (RU) is a unified abstraction unit in TiDB for system resources, which currently includes CPU, IOPS, and IO bandwidth metrics. The consumption of these three metrics is represented by RU according to a certain ratio. +Request Unit (RU) is a unified abstraction unit in TiDB for system resources, which currently includes CPU, IOPS, and IO bandwidth metrics. It is used to indicate the amount of resources consumed by a single request to the database. The number of RUs consumed by a request depends on a variety of factors, such as the type of operations, and the amount of data being queried or modified. Currently, the RU contains consumption statistics for the resources in the following table: + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Resource typeRU consumption
Read2 storage read batches consume 1 RU
8 storage read requests consume 1 RU
64 KiB read request payload consumes 1 RU
Write1 storage write batch consumes 1 RU for each replica
1 storage write request consumes 1 RU
1 KiB write request payload consumes 1 RU
SQL CPU 3 ms consumes 1 RU
-The following table shows the consumption of TiKV storage layer CPU and IO resources by user requests and the corresponding RU weights. - -| Resource | RU Weight | -|:----------------|:-----------------| -| CPU | 1/3 RU per millisecond | -| Read IO | 1/64 RU per KB | -| Write IO | 1 RU/KB | -| Basic overhead of a read request | 0.25 RU | -| Basic overhead of a write request | 1.5 RU | +> **Note:** +> +> - Each write operation is eventually replicated to all replicas (by default, TiKV has 3 replicas). Each replication operation is considered a different write operation. +> - In addition to queries executed by users, RU can be consumed by background tasks, such as automatic statistics collection. +> - The preceding table lists only the resources involved in RU calculation for TiDB Self-Hosted clusters, excluding the network and storage consumption. For TiDB Serverless RUs, see [TiDB Serverless Pricing Details](https://www.pingcap.com/tidb-cloud-serverless-pricing-details/). -Based on the above table, assuming that the TiKV time consumed by a resource group is `c` milliseconds, `r1` times of requests read `r2` KB data, `w1` times of write requests write `w2` KB data, and the number of non-witness TiKV nodes in the cluster is `n`. Then, the formula for the total RUs consumed by the resource group is as follows: +## Estimate RU consumption of SQL statements -`c`\* 1/3 + (`r1` \* 0.25 + `r2` \* 1/64) + (1.5 \* `w1` + `w2` \* 1 \* `n`) +You can use the [`EXPLAIN ANALYZE`](/sql-statements/sql-statement-explain-analyze.md#ru-request-unit-consumption) statement to get the amount of RUs consumed during SQL execution. Note that the amount of RUs is affected by the cache (for example, [coprocessor cache](/coprocessor-cache.md)). When the same SQL is executed multiple times, the amount of RUs consumed by each execution might be different. The RU value does not represent the exact value for each execution, but can be used as a reference for estimation. ## Parameters for resource control @@ -73,7 +104,7 @@ The resource control feature introduces two new global variables. -* TiKV: For on-premises TiDB, you can use the `resource-control.enabled` parameter to control whether to use request scheduling based on resource group quotas. For TiDB Cloud, the value of the `resource-control.enabled` parameter is `true` by default and does not support dynamic modification. +* TiKV: For TiDB Self-Hosted, you can use the `resource-control.enabled` parameter to control whether to use request scheduling based on resource group quotas. For TiDB Cloud, the value of the `resource-control.enabled` parameter is `true` by default and does not support dynamic modification. @@ -81,7 +112,7 @@ Starting from TiDB v7.0.0, both parameters are enabled by default. The results o | `resource-control.enabled` | `tidb_enable_resource_control`= ON | `tidb_enable_resource_control`= OFF | |:----------------------------|:-------------------------------------|:-------------------------------------| -| `resource-control.enabled`= true | Flow control and scheduling (recommended) | Invalid combination | +| `resource-control.enabled`= true | Flow control and scheduling (recommended) | Invalid combination | | `resource-control.enabled`= false | Only flow control (not recommended) | The feature is disabled. | For more information about the resource control mechanism and parameters, see [RFC: Global Resource Control in TiDB](https://github.com/pingcap/tidb/blob/master/docs/design/2022-11-25-global-resource-control.md). @@ -92,20 +123,22 @@ This section describes how to use the resource control feature to manage resourc ### Estimate cluster capacity + + Before resource planning, you need to know the overall capacity of the cluster. TiDB provides the statement [`CALIBRATE RESOURCE`](/sql-statements/sql-statement-calibrate-resource.md) to estimate the cluster capacity. You can use one of the following methods: - [Estimate capacity based on actual workload](/sql-statements/sql-statement-calibrate-resource.md#estimate-capacity-based-on-actual-workload) - [Estimate capacity based on hardware deployment](/sql-statements/sql-statement-calibrate-resource.md#estimate-capacity-based-on-hardware-deployment) - - You can view the [Resource Manager page](/dashboard/dashboard-resource-manager.md) in TiDB Dashboard. For more information, see [`CALIBRATE RESOURCE`](/sql-statements/sql-statement-calibrate-resource.md#methods-for-estimating-capacity). -For more information, see [`CALIBRATE RESOURCE`](/sql-statements/sql-statement-calibrate-resource.md#methods-for-estimating-capacity). +For TiDB Self-Hosted, you can use the [`CALIBRATE RESOURCE`](https://docs.pingcap.com/zh/tidb/stable/sql-statement-calibrate-resource) statement to estimate the cluster capacity. + +For TiDB Cloud, the [`CALIBRATE RESOURCE`](https://docs.pingcap.com/zh/tidb/stable/sql-statement-calibrate-resource) statement is inapplicable. @@ -192,6 +225,148 @@ The following example binds the current statement to the resource group `rg1`. SELECT /*+ RESOURCE_GROUP(rg1) */ * FROM t limit 10; ``` +### Manage queries that consume more resources than expected (Runaway Queries) + +> **Warning:** +> +> This feature is experimental. It is not recommended that you use it in the production environment. This feature might be changed or removed without prior notice. If you find a bug, you can report an [issue](https://github.com/pingcap/tidb/issues) on GitHub. + +A runaway query is a query that consumes more time or resources than expected. The term **runaway queries** is used in the following to describe the feature of managing the runaway query. + +- Starting from v7.2.0, the resource control feature introduces the management of runaway queries. You can set criteria for a resource group to identify runaway queries and automatically take actions to prevent them from exhausting resources and affecting other queries. You can manage runaway queries for a resource group by including the `QUERY_LIMIT` field in [`CREATE RESOURCE GROUP`](/sql-statements/sql-statement-create-resource-group.md) or [`ALTER RESOURCE GROUP`](/sql-statements/sql-statement-alter-resource-group.md). +- Starting from v7.3.0, the resource control feature introduces manual management of runaway watches, enabling quick identification of runaway queries for a given SQL statement or Digest. You can execute the statement [`QUERY WATCH`](/sql-statements/sql-statement-query-watch.md) to manually manage the runaway queries watch list in the resource group. + +#### `QUERY_LIMIT` parameters + +Supported condition setting: + +- `EXEC_ELAPSED`: a query is identified as a runaway query when the query execution time exceeds this limit. + +Supported operations (`ACTION`): + +- `DRYRUN`: no action is taken. The records are appended for the runaway queries. This is mainly used to observe whether the condition setting is reasonable. +- `COOLDOWN`: the execution priority of the query is lowered to the lowest level. The query continues to execute with the lowest priority and does not occupy resources of other operations. +- `KILL`: the identified query is automatically terminated and reports an error `Query execution was interrupted, identified as runaway query`. + +To avoid too many concurrent runaway queries that exhaust system resources, the resource control feature introduces a quick identification mechanism, which can quickly identify and isolate runaway queries. You can use this feature through the `WATCH` clause. When a query is identified as a runaway query, this mechanism extracts the matching feature (defined by the parameter after `WATCH`) of the query. In the next period of time (defined by `DURATION`), the matching feature of the runaway query is added to the watch list, and the TiDB instance matches queries with the watch list. The matching queries are directly marked as runaway queries and isolated according to the corresponding action, instead of waiting for them to be identified by conditions. The `KILL` operation terminates the query and reports an error `Quarantined and interrupted because of being in runaway watch list`. + +There are three methods for `WATCH` to match for quick identification: + +- `EXACT` indicates that only SQL statements with exactly the same SQL text are quickly identified. +- `SIMILAR` indicates all SQL statements with the same pattern are matched by Plan Digest, and the literal values are ignored. +- `PLAN` indicates all SQL statements with the same pattern are matched by Plan Digest. + +The `DURATION` option in `WATCH` indicates the duration of the identification item, which is infinite by default. + +After a watch item is added, neither the matching feature nor the `ACTION` is changed or deleted whenever the `QUERY_LIMIT` configuration is changed or deleted. You can use `QUERY WATCH REMOVE` to remove a watch item. + +The parameters of `QUERY_LIMIT` are as follows: + +| Parameter | Description | Note | +|---------------|--------------|--------------------------------------| +| `EXEC_ELAPSED` | When the query execution time exceeds this value, it is identified as a runaway query | EXEC_ELAPSED =`60s` means the query is identified as a runaway query if it takes more than 60 seconds to execute. | +| `ACTION` | Action taken when a runaway query is identified | The optional values are `DRYRUN`, `COOLDOWN`, and `KILL`. | +| `WATCH` | Quickly match the identified runaway query. If the same or similar query is encountered again within a certain period of time, the corresponding action is performed immediately. | Optional. For example, `WATCH=SIMILAR DURATION '60s'`, `WATCH=EXACT DURATION '1m'`, and `WATCH=PLAN`. | + +#### Examples + +1. Create a resource group `rg1` with a quota of 500 RUs per second, and define a runaway query as one that exceeds 60 seconds, and lower the priority of the runaway query. + + ```sql + CREATE RESOURCE GROUP IF NOT EXISTS rg1 RU_PER_SEC = 500 QUERY_LIMIT=(EXEC_ELAPSED='60s', ACTION=COOLDOWN); + ``` + +2. Change the `rg1` resource group to terminate the runaway queries, and mark the queries with the same pattern as runaway queries immediately in the next 10 minutes. + + ```sql + ALTER RESOURCE GROUP rg1 QUERY_LIMIT=(EXEC_ELAPSED='60s', ACTION=KILL, WATCH=SIMILAR DURATION='10m'); + ``` + +3. Change the `rg1` resource group to cancel the runaway query check. + + ```sql + ALTER RESOURCE GROUP rg1 QUERY_LIMIT=NULL; + ``` + +#### `QUERY WATCH` parameters + +For more information about the synopsis of `QUERY WATCH`, see [`QUERY WATCH`](/sql-statements/sql-statement-query-watch.md). + +The parameters are as follows: + +- The `RESOURCE GROUP` specifies a resource group. The matching features of runaway queries added by this statement are added to the watch list of the resource group. This parameter can be omitted. If omitted, it applies to the `default` resource group. +- The meaning of `ACTION` is the same as `QUERY LIMIT`. This parameter can be omitted. If omitted, the corresponding action after identification adopts the `ACTION` configured by `QUERY LIMIT` in the resource group, and the action does not change with the `QUERY LIMIT` configuration. If there is no `ACTION` configured in the resource group, an error is reported. +- The `QueryWatchTextOption` parameter has three options: `SQL DIGEST`, `PLAN DIGEST`, and `SQL TEXT`. + - `SQL DIGEST` is the same as that of `SIMILAR`. The following parameters accept strings, user-defined variables, or other expressions that yield string result. The string length must be 64, which is the same as the Digest definition in TiDB. + - `PLAN DIGEST` is the same as `PLAN`. The following parameter is a Digest string. + - `SQL TEXT` matches the input SQL as a raw string (`EXACT`), or parses and compiles it into `SQL DIGEST` (`SIMILAR`) or `PLAN DIGEST` (`PLAN`), depending on the following parameter. + +- Add a matching feature to the runaway query watch list for the default resource group (you need to set `QUERY LIMIT` for the default resource group in advance). + + ```sql + QUERY WATCH ADD ACTION KILL SQL TEXT EXACT TO 'select * from test.t2'; + ``` + +- Add a matching feature to the runaway query watch list for the `rg1` resource group by parsing the SQL into SQL Digest. When `ACTION` is not specified, the `ACTION` option already configured for the `rg1` resource group is used. + + ```sql + QUERY WATCH ADD RESOURCE GROUP rg1 SQL TEXT SIMILAR TO 'select * from test.t2'; + ``` + +- Add a matching feature to the runaway query watch list for the `rg1` resource group using `PLAN DIGEST`. + + ```sql + QUERY WATCH ADD RESOURCE GROUP rg1 ACTION KILL PLAN DIGEST 'd08bc323a934c39dc41948b0a073725be3398479b6fa4f6dd1db2a9b115f7f57'; + ``` + +- Get the watch item ID by querying `INFORMATION_SCHEMA.RUNAWAY_WATCHES` and delete the watch item. + + ```sql + SELECT * from information_schema.runaway_watches ORDER BY id; + ``` + + ```sql + *************************** 1. row *************************** + ID: 20003 + RESOURCE_GROUP_NAME: rg2 + START_TIME: 2023-07-28 13:06:08 + END_TIME: UNLIMITED + WATCH: Similar + WATCH_TEXT: 5b7fd445c5756a16f910192ad449c02348656a5e9d2aa61615e6049afbc4a82e + SOURCE: 127.0.0.1:4000 + ACTION: Kill + 1 row in set (0.00 sec) + ``` + + ```sql + QUERY WATCH REMOVE 20003; + ``` + +#### Observability + +You can get more information about runaway queries from the following system tables and `INFORMATION_SCHEMA`: + ++ The `mysql.tidb_runaway_queries` table contains the history records of all runaway queries identified in the past 7 days. Take one of the rows as an example: + + ```sql + MySQL [(none)]> SELECT * FROM mysql.tidb_runaway_queries LIMIT 1\G; + *************************** 1. row *************************** + resource_group_name: rg1 + time: 2023-06-16 17:40:22 + match_type: identify + action: kill + original_sql: select * from sbtest.sbtest1 + plan_digest: 5b7d445c5756a16f910192ad449c02348656a5e9d2aa61615e6049afbc4a82e + tidb_server: 127.0.0.1:4000 + ``` + + In the preceding output,`match_type` indicates how the runaway query is identified. The value can be one of the following: + + - `identify` means that it matches the condition of the runaway query. + - `watch` means that it matches the quick identification rule in the watch list. + ++ The `information_schema.runaway_watches` table contains records of quick identification rules for runaway queries. For more information, see [`RUNAWAY_WATCHES`](/information-schema/information-schema-runaway-watches.md). + ## Disable resource control @@ -214,7 +389,7 @@ SELECT /*+ RESOURCE_GROUP(rg1) */ * FROM t limit 10; SET GLOBAL tidb_enable_resource_control = 'OFF'; ``` -2. For on-premises TiDB, you can use the `resource-control.enabled` parameter to control whether to use request scheduling based on resource group quotas. For TiDB Cloud, the value of the `resource-control.enabled` parameter is `true` by default and does not support dynamic modification. If you need to disable it for TiDB Cloud Dedicated Tier clusters, contact [TiDB Cloud Support](/tidb-cloud/tidb-cloud-support.md). +2. For TiDB Self-Hosted, you can use the `resource-control.enabled` parameter to control whether to use request scheduling based on resource group quotas. For TiDB Cloud, the value of the `resource-control.enabled` parameter is `true` by default and does not support dynamic modification. If you need to disable it for TiDB Dedicated clusters, contact [TiDB Cloud Support](/tidb-cloud/tidb-cloud-support.md). @@ -234,7 +409,7 @@ You can view the data of resource groups in the current [`RESOURCE_GROUPS`](/inf > **Note:** > -> This section is only applicable to on-premises TiDB. Currently, TiDB Cloud does not provide resource control metrics. +> This section is only applicable to TiDB Self-Hosted. Currently, TiDB Cloud does not provide resource control metrics. TiDB regularly collects runtime information about resource control and provides visual charts of the metrics in Grafana's **TiDB** > **Resource Control** dashboard. diff --git a/tidb-roadmap.md b/tidb-roadmap.md index 2d2e150a34c3f..e0ccf433f3134 100644 --- a/tidb-roadmap.md +++ b/tidb-roadmap.md @@ -5,16 +5,18 @@ summary: Learn about what's coming in the future for TiDB. # TiDB Roadmap -This roadmap brings you what's coming in the future, so you can see the new features or improvements in advance, follow the progress, learn about the key milestones on the way, and give feedback as the development work goes on. In the course of development, this roadmap is subject to change based on user needs and feedback. If you have a feature request or want to prioritize a feature, please file an issue on [GitHub](https://github.com/pingcap/tidb/issues). +This roadmap provides a look into the proposed future. This will be continually updated as we release long-term stable (LTS) versions. The purpose is to provide visibility into what is coming, so that you can more closely follow the progress, learn about the key milestones on the way, and give feedback as the development work goes on. -## Highlights of what we are planning +In the course of development, this roadmap is subject to change based on user needs and feedback. As expected, as the columns move right, the items under them are less committed. If you have a feature request or want to prioritize a feature, please file an issue on [GitHub](https://github.com/pingcap/tidb/issues). + +## Rolling roadmap highlights - - + + @@ -26,29 +28,14 @@ This roadmap brings you what's coming in the future, so you can see the new feat @@ -101,55 +74,18 @@ This roadmap brings you what's coming in the future, so you can see the new feat @@ -175,29 +106,20 @@ This roadmap brings you what's coming in the future, so you can see the new feat @@ -373,8 +255,25 @@ This roadmap brings you what's coming in the future, so you can see the new feat These are non-exhaustive plans and are subject to change. Features might differ per service subscriptions. +## Previously delivered roadmap items + +You might have been waiting on some items from the last version. The following lists some previously delivered features. For more details, refer to the [v7.1.0 release notes](/releases/release-7.1.0.md). + +- Foundation of multi-tenancy framework: resource control quotas and scheduling for resource groups +- TiCDC supports object storage sink, including Amazon S3 and Azure Blob Storage (GA) +- Fastest online `ADD INDEX` (GA) +- TiFlash late materialization (GA) +- TiFlash supports spill to disk (GA) +- LDAP authentication +- SQL audit log enhancement (Enterprise-only) +- Partitioned Raft KV storage engine (experimental) +- General session-level plan cache (experimental) +- TiCDC distributed per table with Kafka downstream (experimental) + ## Recently shipped +- [TiDB 7.2.0 Release Notes](https://docs.pingcap.com/tidb/v7.2/release-7.2.0) +- [TiDB 7.1.0 Release Notes](https://docs.pingcap.com/tidb/v7.1/release-7.1.0) - [TiDB 7.0.0 Release Notes](https://docs.pingcap.com/tidb/v7.0/release-7.0.0) - [TiDB 6.6.0 Release Notes](https://docs.pingcap.com/tidb/v6.6/release-6.6.0) - [TiDB 6.5.0 Release Notes](https://docs.pingcap.com/tidb/v6.5/release-6.5.0) diff --git a/tiflash-620-upgrade-guide.md b/tiflash-upgrade-guide.md similarity index 83% rename from tiflash-620-upgrade-guide.md rename to tiflash-upgrade-guide.md index 2b478e04cf14d..88744043f9ad6 100644 --- a/tiflash-620-upgrade-guide.md +++ b/tiflash-upgrade-guide.md @@ -1,11 +1,12 @@ --- -title: TiFlash v6.2 Upgrade Guide -summary: Learn the precautions when you upgrade TiFlash to v6.2. +title: TiFlash Upgrade Guide +summary: Learn the precautions when you upgrade TiFlash. +aliases: ['/tidb/dev/tiflash-620-upgrade-guide'] --- -# TiFlash v6.2 Upgrade Guide +# TiFlash Upgrade Guide -This document describes the functional changes in TiFlash modules you need to pay attention to when you upgrade TiFlash from earlier versions to v6.2, and recommended actions for you to take. +This document describes the function changes and recommended actions that you need to learn when you upgrade TiFlash. To learn the standard upgrade process, see the following documents: @@ -84,3 +85,9 @@ You can forcibly scale in the target TiFlash node and then replicate data from T ## From v6.1 to v6.2 When you upgrade TiFlash from v6.1 to v6.2, pay attention to the change in data storage format. For details, see [PageStorage](#pagestorage). + +## From v6.x or v7.x to v7.3 with `storage.format_version = 5` configured + +Starting from v7.3, TiFlash introduces a new DTFile version: DTFile V3 (experimental). This new DTFile version can merge multiple small files into a single larger file to reduce the total number of files. In v7.3, the default DTFile version is still V2. To use V3, you can set the [TiFlash configuration parameter](/tiflash/tiflash-configuration.md) `storage.format_version = 5`. After the setting, TiFlash can still read V2 DTFiles and will gradually rewrite existing V2 DTFiles to V3 DTFiles during subsequent data compaction. + +After upgrading TiFlash to v7.3 and configuring TiFlash to use V3 DTFiles, if you need to revert TiFlash to an earlier version, you can use the DTTool offline to rewrite V3 DTFiles back to V2 DTFiles. For more information, see [DTTool Migration Tool](/tiflash/tiflash-command-line-flags.md#dttool-migrate). \ No newline at end of file diff --git a/tiflash/create-tiflash-replicas.md b/tiflash/create-tiflash-replicas.md index 14cfe875f0afc..4bad7300e3073 100644 --- a/tiflash/create-tiflash-replicas.md +++ b/tiflash/create-tiflash-replicas.md @@ -127,13 +127,13 @@ Before TiFlash replicas are added, each TiKV instance performs a full table scan 1. Temporarily increase the snapshot write speed limit for each TiKV and TiFlash instance by using the [Dynamic Config SQL statement](https://docs.pingcap.com/tidb/stable/dynamic-config): - ```sql - -- The default value for both configurations are 100MiB, i.e. the maximum disk bandwidth used for writing snapshots is no more than 100MiB/s. - SET CONFIG tikv `server.snap-io-max-bytes-per-sec` = '300MiB'; - SET CONFIG tiflash `raftstore-proxy.server.snap-max-write-bytes-per-sec` = '300MiB'; - ``` + ```sql + -- The default value for both configurations are 100MiB, i.e. the maximum disk bandwidth used for writing snapshots is no more than 100MiB/s. + SET CONFIG tikv `server.snap-io-max-bytes-per-sec` = '300MiB'; + SET CONFIG tiflash `raftstore-proxy.server.snap-max-write-bytes-per-sec` = '300MiB'; + ``` - After executing these SQL statements, the configuration changes take effect immediately without restarting the cluster. However, since the replication speed is still restricted by the PD limit globally, you cannot observe the acceleration for now. + After executing these SQL statements, the configuration changes take effect immediately without restarting the cluster. However, since the replication speed is still restricted by the PD limit globally, you cannot observe the acceleration for now. 2. Use [PD Control](https://docs.pingcap.com/tidb/stable/pd-control) to progressively ease the new replica speed limit. @@ -143,10 +143,10 @@ Before TiFlash replicas are added, each TiKV instance performs a full table scan tiup ctl:v pd -u http://:2379 store limit all engine tiflash 60 add-peer ``` - > In the preceding command, you need to replace `v` with the actual cluster version, such as `v6.5.0` and `:2379` with the address of any PD node. For example: + > In the preceding command, you need to replace `v` with the actual cluster version, such as `v7.3.0` and `:2379` with the address of any PD node. For example: > > ```shell - > tiup ctl:v6.1.1 pd -u http://192.168.1.4:2379 store limit all engine tiflash 60 add-peer + > tiup ctl:v7.3.0 pd -u http://192.168.1.4:2379 store limit all engine tiflash 60 add-peer > ``` Within a few minutes, you will observe a significant increase in CPU and disk IO resource usage of the TiFlash nodes, and TiFlash should create replicas faster. At the same time, the TiKV nodes' CPU and disk IO resource usage increases as well. @@ -159,18 +159,18 @@ Before TiFlash replicas are added, each TiKV instance performs a full table scan 3. After the TiFlash replication is complete, revert to the default configuration to reduce the impact on online services. - Execute the following PD Control command to restore the default new replica speed limit: + Execute the following PD Control command to restore the default new replica speed limit: - ```shell - tiup ctl:v pd -u http://:2379 store limit all engine tiflash 30 add-peer - ``` + ```shell + tiup ctl:v pd -u http://:2379 store limit all engine tiflash 30 add-peer + ``` - Execute the following SQL statements to restore the default snapshot write speed limit: + Execute the following SQL statements to restore the default snapshot write speed limit: - ```sql - SET CONFIG tikv `server.snap-io-max-bytes-per-sec` = '100MiB'; - SET CONFIG tiflash `raftstore-proxy.server.snap-max-write-bytes-per-sec` = '100MiB'; - ``` + ```sql + SET CONFIG tikv `server.snap-io-max-bytes-per-sec` = '100MiB'; + SET CONFIG tiflash `raftstore-proxy.server.snap-max-write-bytes-per-sec` = '100MiB'; + ``` ## Set available zones @@ -256,4 +256,6 @@ When configuring replicas, if you need to distribute TiFlash replicas to multipl For more information about scheduling replicas by using labels, see [Schedule Replicas by Topology Labels](/schedule-replicas-by-topology-labels.md), [Multiple Data Centers in One City Deployment](/multi-data-centers-in-one-city-deployment.md), and [Three Data Centers in Two Cities Deployment](/three-data-centers-in-two-cities-deployment.md). +TiFlash supports configuring the replica selection strategy for different zones. For more information, see [`tiflash_replica_read`](/system-variables.md#tiflash_replica_read-new-in-v730). + diff --git a/tiflash/tiflash-command-line-flags.md b/tiflash/tiflash-command-line-flags.md index 00b7abd4b1bd7..c76e0b4f2735d 100644 --- a/tiflash/tiflash-command-line-flags.md +++ b/tiflash/tiflash-command-line-flags.md @@ -23,10 +23,11 @@ This document introduces the command-line flags that you can use when you launch - If you need to downgrade TiFlash from a version >= v5.4.0 that has enabled data validation to a version < v5.4.0, you can use this tool to downgrade the data format of the DTFile. - If you upgrade TiFlash to a version >= v5.4.0, and if you hope to enable data validation for existing data, you can use this tool to upgrade the data format of the DTFile. - Test the space usage and read speed of the DTFile in different configurations. + - If you need to downgrade TiFlash from a version >= v7.3.0 that has enabled small file merging (that is, `storage.format_version` >= 5) to a version < v7.3.0, you can use this tool to downgrade the data format of the DTFile. - Parameters: - `--imitative`: When you do not use the encryption feature of the DTFile, you can use this flag to avoid using the configuration file and connecting to PD. - - `--version`: The version of DTFile. The value options are `1` and `2` (default). `1` is the old version, and `2` is the version corresponding to the new checksum. + - `--version`: The target version of DTFile. The value options are `1`, `2` (default), and `3`. `1` is the old version, `2` is the version corresponding to the new checksum, and `3` is the version that supports merging small files. - `--algorithm`: The hash algorithm used for data validation. The value options are `xxh3` (default), `city128`, `crc32`, `crc64`, and `none`. This parameter is effective only when `version` is `2`. - `--frame`: The size of the validation frame. The default value is `1048576`. This parameter is effective only when `version` is `2`. - `--compression`: The target compression algorithm. The value options are `LZ4` (default), `LZ4HC`, `zstd`, and `none`. diff --git a/tiflash/tiflash-configuration.md b/tiflash/tiflash-configuration.md index 179aaba4b3f10..229f5911b8436 100644 --- a/tiflash/tiflash-configuration.md +++ b/tiflash/tiflash-configuration.md @@ -43,8 +43,9 @@ This section introduces the configuration parameters of TiFlash. ```toml ## The listening host for supporting services such as TPC/HTTP. It is recommended to configure it as "0.0.0.0", which means to listen on all IP addresses of this machine. listen_host = "0.0.0.0" -## The TiFlash TCP service port. -tcp_port = 9000 +## The TiFlash TCP service port. This port is used for internal testing and is set to 9000 by default. Before TiFlash v7.1.0, this port is enabled by default with a security risk. To enhance security, it is recommended to apply access control on this port to only allow access from whitelisted IP addresses. Starting from TiFlash v7.1.0, you can avoid the security risk by commenting out the configuration of this port. When the TiFlash configuration file does not specify this port, it will be disabled. +## It is **NOT** recommended to configure this port in any TiFlash deployment. (Note: Starting from TiFlash v7.1.0, TiFlash deployed by TiUP >= v1.12.5 or TiDB Operator >= v1.5.0 disables the port by default and is more secure.) +# tcp_port = 9000 ## The cache size limit of the metadata of a data block. Generally, you do not need to change this value. mark_cache_size = 5368709120 ## The cache size limit of the min-max index of a data block. Generally, you do not need to change this value. @@ -76,6 +77,7 @@ delta_index_cache_size = 0 ## * format_version = 2, the default format for versions < v6.0.0. ## * format_version = 3, the default format for v6.0.0 and v6.1.x, which provides more data validation features. ## * format_version = 4, the default format for v6.2.0 and later versions, which reduces write amplification and background task resource consumption + ## * format_version = 5, a new format introduced in v7.3.0 (not the default format for v7.3.0) that reduces the number of physical files by merging smaller files. Note that this format is experimental and not recommended to be used in a production environment. # format_version = 4 [storage.main] diff --git a/tiflash/tiflash-pipeline-model.md b/tiflash/tiflash-pipeline-model.md new file mode 100644 index 0000000000000..0cc5008dc77f5 --- /dev/null +++ b/tiflash/tiflash-pipeline-model.md @@ -0,0 +1,120 @@ +--- +title: TiFlash Pipeline Execution Model +summary: Learn about the TiFlash Pipeline Execution Model. +--- + +# TiFlash Pipeline Execution Model + +This document introduces the TiFlash pipeline execution model. + +Starting from v7.2.0, TiFlash supports a new execution model, the pipeline execution model. You can control whether to enable the TiFlash pipeline execution model by modifying the system variable [`tidb_enable_tiflash_pipeline_model`](/system-variables.md#tidb_enable_tiflash_pipeline_model-new-in-v720). + +Inspired by the paper [Morsel-Driven Parallelism: A NUMA-Aware Query Evaluation Framework for the Many-Core Age](https://dl.acm.org/doi/10.1145/2588555.2610507), the TiFlash pipeline execution model provides a fine-grained task scheduling model, which is different from the traditional thread scheduling model. It reduces the overhead of operating system thread scheduling and provides a fine-grained scheduling mechanism. + +> **Note:** +> +> - The pipeline execution model is currently an experimental feature and is not recommended to use in production environments. +> - The pipeline execution model does not support the following features. When the following features are enabled, even if `tidb_enable_tiflash_pipeline_model` is set to `ON`, the query pushed down to TiFlash will still be executed using the original stream model. +> +> - [Join operator spill to disk](/system-variables.md#tidb_max_bytes_before_tiflash_external_join-new-in-v700) +> - [TiFlash Disaggregated Storage and Compute Architecture and S3 Support](/tiflash/tiflash-disaggregated-and-s3.md) + +## Enable and disable the pipeline execution model + +To enable or disable the pipeline execution model, you can use the [`tidb_enable_tiflash_pipeline_model`](/system-variables.md#tidb_enable_tiflash_pipeline_model-new-in-v720) system variable. This variable can take effect at the session level and global level. By default, `tidb_enable_tiflash_pipeline_model` is set to `OFF`, which means that the TiFlash pipeline execution model is disabled. You can use the following statement to view the variable value: + +```sql +SHOW VARIABLES LIKE 'tidb_enable_tiflash_pipeline_model'; +``` + +``` ++------------------------------------+-------+ +| Variable_name | Value | ++------------------------------------+-------+ +| tidb_enable_tiflash_pipeline_model | OFF | ++------------------------------------+-------+ +``` + +```sql +SHOW GLOBAL VARIABLES LIKE 'tidb_enable_tiflash_pipeline_model'; +``` + +``` ++------------------------------------+-------+ +| Variable_name | Value | ++------------------------------------+-------+ +| tidb_enable_tiflash_pipeline_model | OFF | ++------------------------------------+-------+ +``` + +You can modify the `tidb_enable_tiflash_pipeline_model` variable at the session level and global level. + +- To enable the pipeline execution model in the current session, use the following statement: + + ```sql + SET SESSION tidb_enable_tiflash_pipeline_model=ON; + ``` + +- To enable the pipeline execution model at the global level, use the following statement: + + ```sql + SET GLOBAL tidb_enable_tiflash_pipeline_model=ON; + ``` + + If you set `tidb_enable_tiflash_pipeline_model` to `ON` at the global level, the `tidb_enable_tiflash_pipeline_model` variable at the session level and global level in the new session will be enabled by default. + +To disable the pipeline execution model, use the following statement: + +```sql +SET SESSION tidb_enable_tiflash_pipeline_model=OFF; +``` + +```sql +SET GLOBAL tidb_enable_tiflash_pipeline_model=OFF; +``` + +## Design and implementation + +The original TiFlash stream model is a thread scheduling execution model. Each query independently applies for several threads to execute in coordination. + +The thread scheduling model has the following two defects: + +- In high-concurrency scenarios, too many threads cause a large number of context switches, resulting in high thread scheduling costs. +- The thread scheduling model cannot accurately measure the resource usage of queries or do fine-grained resource control. + +The new pipeline execution model makes the following optimizations: + +- The queries are divided into multiple pipelines and executed in sequence. In each pipeline, the data blocks are kept in the cache as much as possible to achieve better temporal locality and improve the efficiency of the entire execution process. +- To get rid of the native thread scheduling model of the operating system and implement a more fine-grained scheduling mechanism, each pipeline is instantiated into several tasks and uses the task scheduling model. At the same time, a fixed thread pool is used to reduce the overhead of operating system thread scheduling. + +The architecture of the pipeline execution model is as follows: + +![TiFlash pipeline execution model design](/media/tiflash/tiflash-pipeline-model.png) + +As shown in the preceding figure, the pipeline execution model consists of two main components: the pipeline query executor and the task scheduler. + +- The pipeline query executor + + The pipeline query executor converts the query request sent from the TiDB node into a pipeline directed acyclic graph (DAG). + + It will find the pipeline breaker operators in the query and split the query into several pipelines according to the pipeline breakers. Then, it assembles the pipelines into a DAG according to the dependency relationship between the pipelines. + + A pipeline breaker is an operator that has a pause/blocking logic. This type of operator continuously receives data blocks from the upstream operator until all data blocks are received, and then return the processing result to the downstream operator. This type of operator breaks the data processing pipeline, so it is called a pipeline breaker. One of the pipeline breakers is the Aggregation operator, which writes all the data of the upstream operator into a hash table before calculating the data in the hash table and returning the result to the downstream operator. + + After the query is converted into a pipeline DAG, the pipeline query executor executes each pipeline in sequence according to the dependency relationship. The pipeline is instantiated into several tasks according to the query concurrency and submitted to the task scheduler for execution. + +- Task scheduler + + The task scheduler executes the tasks submitted by the pipeline query executor. The tasks are dynamically switched between different components in the task scheduler according to the different execution logic. + + - CPU task thread pool + + Executes the CPU-intensive calculation logic in the task, such as data filtering and function calculation. + + - IO task thread pool + + Executes the IO-intensive calculation logic in the task, such as writing intermediate results to disk. + + - Wait reactor + + Executes the wait logic in the task, such as waiting for the network layer to transfer the data packet to the calculation layer. diff --git a/tiflash/tiflash-supported-pushdown-calculations.md b/tiflash/tiflash-supported-pushdown-calculations.md index 1f9042e4e42fe..6740d6c5a95f1 100644 --- a/tiflash/tiflash-supported-pushdown-calculations.md +++ b/tiflash/tiflash-supported-pushdown-calculations.md @@ -22,7 +22,7 @@ TiFlash supports the push-down of the following operators: * The operator can be pushed down only in the [MPP mode](/tiflash/use-tiflash-mpp-mode.md). * Supported joins are Inner Join, Left Join, Semi Join, Anti Semi Join, Left Semi Join, and Anti Left Semi Join. * The preceding joins support both Equi Join and Non-Equi Join (Cartesian Join or Null-aware Semi Join). When calculating Cartesian Join or Null-aware Semi Join, the Broadcast algorithm, instead of the Shuffle Hash Join algorithm, is used. -* [Window functions](/functions-and-operators/window-functions.md): Currently, TiFlash supports `ROW_NUMBER()`, `RANK()`, `DENSE_RANK()`, `LEAD()`, and `LAG()`. +* [Window functions](/functions-and-operators/window-functions.md): Currently, TiFlash supports `ROW_NUMBER()`, `RANK()`, `DENSE_RANK()`, `LEAD()`, `LAG()`, `FIRST_VALUE()`, and `LAST_VALUE()`. In TiDB, operators are organized in a tree structure. For an operator to be pushed down to TiFlash, all of the following prerequisites must be met: diff --git a/tiflash/troubleshoot-tiflash.md b/tiflash/troubleshoot-tiflash.md index c014f403d8329..d2c0b6c139e8c 100644 --- a/tiflash/troubleshoot-tiflash.md +++ b/tiflash/troubleshoot-tiflash.md @@ -14,21 +14,21 @@ The issue might occur due to different reasons. It is recommended that you troub 1. Check whether your system is RedHat Enterprise Linux 8. - RedHat Enterprise Linux 8 does not have the `libnsl.so` system library. You can manually install it via the following command: + RedHat Enterprise Linux 8 does not have the `libnsl.so` system library. You can manually install it via the following command: - {{< copyable "shell-regular" >}} + {{< copyable "shell-regular" >}} - ```shell - dnf install libnsl - ``` + ```shell + dnf install libnsl + ``` 2. Check your system's `ulimit` parameter setting. - {{< copyable "shell-regular" >}} + {{< copyable "shell-regular" >}} - ```shell - ulimit -n 1000000 - ``` + ```shell + ulimit -n 1000000 + ``` 3. Use the PD Control tool to check whether there is any TiFlash instance that failed to go offline on the node (same IP and Port) and force the instance(s) to go offline. For detailed steps, refer to [Scale in a TiFlash cluster](/scale-tidb-using-tiup.md#scale-in-a-tiflash-cluster). diff --git a/tiflash/use-fastscan.md b/tiflash/use-fastscan.md index 097a8b705269c..7683dc8026d1b 100644 --- a/tiflash/use-fastscan.md +++ b/tiflash/use-fastscan.md @@ -12,6 +12,41 @@ By default, TiFlash guarantees the precision of query results and data consisten Some OLAP scenarios allow for some tolerance to the accuracy of the query results. In these cases, if you need higher query performance, you can enable the FastScan feature at the session or global level. You can choose whether to enable the FastScan feature by configuring the variable `tiflash_fastscan`. +## Restrictions + +When the FastScan feature is enabled, your query results might include old data of a table. This means that you might get multiple historical versions of data with the same primary key or data that has been deleted. + +For example: + +```sql +CREATE TABLE t1 (a INT PRIMARY KEY, b INT); +ALTER TABLE t1 SET TIFLASH REPLICA 1; +INSERT INTO t1 VALUES(1,2); +INSERT INTO t1 VALUES(10,20); +UPDATE t1 SET b = 4 WHERE a = 1; +DELETE FROM t1 WHERE a = 10; +SET SESSION tidb_isolation_read_engines='tiflash'; + +SELECT * FROM t1; ++------+------+ +| a | b | ++------+------+ +| 1 | 4 | ++------+------+ + +SET SESSION tiflash_fastscan=ON; +SELECT * FROM t1; ++------+------+ +| a | b | ++------+------+ +| 1 | 2 | +| 1 | 4 | +| 10 | 20 | ++------+------+ +``` + +Although TiFlash can automatically initiate compaction of old data in the background, the old data will not be cleaned up physically until it has been compacted and its data versions are older than the GC safe point. After the physical cleaning, the cleaned old data will no longer be returned in FastScan mode. The timing of data compaction is automatically triggered by various factors. You can also manually trigger data compaction using the [`ALTER TABLE ... COMPACT`](/sql-statements/sql-statement-alter-table-compact.md) statement. + ## Enable and disable FastScan By default, the variable is `tiflash_fastscan=OFF` at the session level and global level, that is, the FastScan feature is not enabled. You can view the variable information by using the following statement. @@ -62,8 +97,8 @@ Data in the storage layer of TiFlash is stored in two layers: Delta layer and St By default, FastScan is not enabled, and the TableScan operator processes data in the following steps: 1. Read data: create separate data streams in the Delta layer and Stable layer to read the respective data. -2. Sort Merge: merge the data streams created in step 1. Then return the data after sorting in (handle, version) order. +2. Sort Merge: merge the data streams created in step 1. Then return the data after sorting in the order of (primary key column, timestamp column). 3. Range Filter: according to the data range, filter the data generated in step 2, and then return the data. -4. MVCC + Column Filter: filter the data generated in step 3 through MVCC and filter out unneeded columns, and then return the data. +4. MVCC + Column Filter: filter the data generated in step 3 through MVCC (that is, filtering the data version according to the primary key column and the timestamp column) and through columns (that is, filtering out unneeded columns), and then return the data. FastScan gains faster query speed by sacrificing some data consistency. Step 2 and the MVCC part in step 4 in the normal scan process are omitted in FastScan, thus improving query performance. diff --git a/tikv-configuration-file.md b/tikv-configuration-file.md index 71e339f0c1621..2788ce908f0f9 100644 --- a/tikv-configuration-file.md +++ b/tikv-configuration-file.md @@ -516,7 +516,11 @@ Configuration items related to the sharing of block cache among multiple RocksDB ### `capacity` + The size of the shared block cache. -+ Default value: 45% of the size of total system memory ++ Default value: + + + When `storage.engine="raft-kv"`, the default value is 45% of the size of total system memory. + + When `storage.engine="partitioned-raft-kv"`, the default value is 30% of the size of total system memory. + + Unit: KB|MB|GB ## storage.flow-control @@ -572,7 +576,7 @@ Configuration items related to the I/O rate limiter. ### `retry-interval` -+ The interval for retrying to initialize the PD connection ++ The interval for retrying the PD connection. + Default value: `"300ms"` ### `retry-log-every` @@ -760,7 +764,10 @@ Configuration items related to Raftstore. ### `region-compact-check-step` + The number of Regions checked at one time for each round of manual compaction -+ Default value: `100` ++ Default value: + + + When `storage.engine="raft-kv"`, the default value is `100`. + + When `storage.engine="partitioned-raft-kv"`, the default value is `5`. + Minimum value: `0` ### `region-compact-min-tombstones` @@ -776,6 +783,28 @@ Configuration items related to Raftstore. + Minimum value: `1` + Maximum value: `100` +### `region-compact-min-redundant-rows` New in v7.1.0 + ++ The number of redundant MVCC rows required to trigger RocksDB compaction. This configuration only takes effect for Partitioned Raft KV (`storage.engine="partitioned-raft-kv"`). ++ Default value: `50000` ++ Minimum value: `0` + +### `region-compact-redundant-rows-percent` New in v7.1.0 + ++ The percentage of redundant MVCC rows required to trigger RocksDB compaction. This configuration only takes effect for Partitioned Raft KV (`storage.engine="partitioned-raft-kv"`). ++ Default value: `20` ++ Minimum value: `1` ++ Maximum value: `100` + +### `report-region-buckets-tick-interval` New in v6.1.0 + +> **Warning:** +> +> `report-region-buckets-tick-interval` is an experimental feature introduced in TiDB v6.1.0. It is not recommended that you use it in production environments. + ++ The interval at which TiKV reports bucket information to PD when `enable-region-bucket` is true. ++ Default value: `10s` + ### `pd-heartbeat-tick-interval` + The time interval at which a Region's heartbeat to PD is triggered. `0` means that this feature is disabled. @@ -988,10 +1017,10 @@ Configuration items related to Raftstore. + Default value: `1MB` + Minimum value: `0` -### `report-min-resolved-ts-interval` +### `report-min-resolved-ts-interval` New in v6.0.0 -+ Determines the minimum interval at which the resolved timestamp is reported to the PD leader. If this value is set to `0`, it means that the reporting is disabled. -+ Default value: `"1s"`, which is the smallest positive value ++ Determines the interval at which the minimum resolved timestamp is reported to the PD leader. If this value is set to `0`, it means that the reporting is disabled. ++ Default value: Before v6.3.0, the default value is `"0s"`. Starting from v6.3.0, the default value is `"1s"`, which is the smallest positive value. + Minimum value: `0` + Unit: second @@ -1060,21 +1089,12 @@ Configuration items related to Coprocessor. ### `region-bucket-size` New in v6.1.0 + The size of a bucket when `enable-region-bucket` is true. -+ Default value: `96MiB` ++ Default value: Starting from v7.3.0, the default value is changed from `96MiB` to `50MiB`. > **Warning:** > > `region-bucket-size` is an experimental feature introduced in TiDB v6.1.0. It is not recommended that you use it in production environments. -### `report-region-buckets-tick-interval` New in v6.1.0 - -> **Warning:** -> -> `report-region-buckets-tick-interval` is an experimental feature introduced in TiDB v6.1.0. It is not recommended that you use it in production environments. - -+ The interval at which TiKV reports bucket information to PD when `enable-region-bucket` is true. -+ Default value: `10s` - ## rocksdb Configuration items related to RocksDB @@ -1153,12 +1173,18 @@ Configuration items related to RocksDB ### `max-total-wal-size` + The maximum RocksDB WAL size in total, which is the size of `*.log` files in the `data-dir`. -+ Default value: `"4GB"` ++ Default value: + + + When `storage.engine="raft-kv"`, the default value is `"4GB"`. + + When `storage.engine="partitioned-raft-kv"`, the default value is `1`. ### `stats-dump-period` + The interval at which statistics are output to the log. -+ Default value: `10m` ++ Default value: + + + When `storage.engine="raft-kv"`, the default value is `"10m"`. + + When `storage.engine="partitioned-raft-kv"`, the default value is `"0"`. ### `compaction-readahead-size` @@ -1268,8 +1294,12 @@ Configuration items related to RocksDB > > This feature is experimental. It is not recommended that you use it in the production environment. This feature might be changed or removed without prior notice. If you find a bug, you can report an [issue](https://github.com/pingcap/tidb/issues) on GitHub. -+ Specifies the total memory limit of `memtable` for all RocksDB instances in a single TiKV. The default value is 25% of the memory of the machine. It is recommended to configure a memory of at least 5 GiB. This configuration only takes effect for Partitioned Raft KV (`storage.engine`=`"partitioned-raft-kv"`). -+ Default value: 25% ++ Specifies the total memory limit of `memtable` for all RocksDB instances in a single TiKV. `0` means no limit. ++ Default value: + + + When `storage.engine="raft-kv"`, the default value is `0`, which means no limit. + + When `storage.engine="partitioned-raft-kv"`, the default value is 20% of the size of total system memory. + + Unit: KiB|MiB|GiB ## rocksdb.titan @@ -1348,7 +1378,7 @@ Configuration items related to `rocksdb.defaultcf`, `rocksdb.writecf`, and `rock + Default value for `defaultcf`: `true` + Default value for `writecf` and `lockcf`: `false` -### `optimize-filters-for-memory` New in v7.1.0 +### `optimize-filters-for-memory` New in v7.2.0 + Determines whether to generate Bloom/Ribbon filters that minimize memory internal fragmentation. + Note that this configuration item takes effect only when [`format-version`](#format-version-new-in-v620) >= 5. @@ -1371,7 +1401,7 @@ Configuration items related to `rocksdb.defaultcf`, `rocksdb.writecf`, and `rock + Determines whether each block creates a bloom filter + Default value: `false` -### `ribbon-filter-above-level` New in v7.1.0 +### `ribbon-filter-above-level` New in v7.2.0 + Determines whether to use Ribbon filters for levels greater than or equal to this value and use non-block-based bloom filters for levels less than this value. When this configuration item is set, [`block-based-bloom-filter`](#block-based-bloom-filter) will be ignored. + Note that this configuration item takes effect only when [`format-version`](#format-version-new-in-v620) >= 5. @@ -1402,7 +1432,9 @@ Configuration items related to `rocksdb.defaultcf`, `rocksdb.writecf`, and `rock + Memtable size + Default value for `defaultcf` and `writecf`: `"128MB"` -+ Default value for `lockcf`: `"32MB"` ++ Default value for `lockcf`: + + When `storage.engine="raft-kv"`, the default value is `"32MB"`. + + When `storage.engine="partitioned-raft-kv"`, the default value is `"4MB"`. + Minimum value: `0` + Unit: KB|MB|GB @@ -1537,7 +1569,22 @@ Configuration items related to `rocksdb.defaultcf`, `rocksdb.writecf`, and `rock - `3`: Can be read by TiKV v2.1 and later versions. Changes the encoding of the keys in index blocks. - `4`: Can be read by TiKV v3.0 and later versions. Changes the encoding of the values in index blocks. - `5`: Can be read by TiKV v6.1 and later versions. Full and partitioned filters use a faster and more accurate Bloom filter implementation with a different schema. -+ Default value: `2` ++ Default value: + + + When `storage.engine="raft-kv"`, the default value is `2`. + + When `storage.engine="partitioned-raft-kv"`, the default value is `5`. + +### `ttl` New in v7.2.0 + ++ SST files with updates older than the TTL will be automatically selected for compaction. These SST files will go through the compaction in a cascading way so that they can be compacted to the bottommost level or file. ++ Default value: `"30d"` ++ Unit: s(second)|h(hour)|d(day) + +### `periodic-compaction-seconds` New in v7.2.0 + ++ The time interval for periodic compaction. SST files with updates older than this value will be selected for compaction and rewritten to the same level where these SST files originally reside. ++ Default value: `"30d"` ++ Unit: s(second)|h(hour)|d(day) ## rocksdb.defaultcf.titan @@ -1676,6 +1723,8 @@ Configuration items related to `raftdb` + The maximum RocksDB WAL size in total + Default value: `"4GB"` + + When `storage.engine="raft-kv"`, the default value is `"4GB"`. + + When `storage.engine="partitioned-raft-kv"`, the default value is `1`. ### `compaction-readahead-size` @@ -1830,7 +1879,9 @@ Configuration items related to Raft Engine. + Value Options: + `1`: Default log file version for TiKV earlier than v6.3.0. Can be read by TiKV >= v6.1.0. + `2`: Supports log recycling. Can be read by TiKV >= v6.3.0. -+ Default value: `2` ++ Default value: + + When `storage.engine="raft-kv"`, the default value is `2`. + + When `storage.engine="partitioned-raft-kv"`, the default value is `5`. ### `enable-log-recycle` New in v6.3.0 diff --git a/time-to-live.md b/time-to-live.md index 163cff8d71e48..6754e999d19cd 100644 --- a/time-to-live.md +++ b/time-to-live.md @@ -159,7 +159,7 @@ The preceding statement allows TTL jobs to be scheduled only between 1:00 and 5: > **Note:** > -> This section is only applicable to on-premises TiDB. Currently, TiDB Cloud does not provide TTL metrics. +> This section is only applicable to TiDB Self-Hosted. Currently, TiDB Cloud does not provide TTL metrics. @@ -252,7 +252,7 @@ Currently, the TTL feature has the following limitations: * A table with the TTL attribute does not support being referenced by other tables as the primary table in a foreign key constraint. * It is not guaranteed that all expired data is deleted immediately. The time when expired data is deleted depends on the scheduling interval and scheduling window of the background cleanup job. * For tables that use [clustered indexes](/clustered-indexes.md), if the primary key is neither an integer nor a binary string type, the TTL job cannot be split into multiple tasks. This will cause the TTL job to be executed sequentially on a single TiDB node. If the table contains a large amount of data, the execution of the TTL job might become slow. -* TTL is not available for [TiDB Cloud Serverless Tier](https://docs.pingcap.com/tidbcloud/select-cluster-tier#serverless-tier-beta). +* TTL is not available for [TiDB Serverless](https://docs.pingcap.com/tidbcloud/select-cluster-tier#tidb-serverless). ## FAQs diff --git a/tiup/tiup-bench.md b/tiup/tiup-bench.md index 11850720ec83e..c69b483af73eb 100644 --- a/tiup/tiup-bench.md +++ b/tiup/tiup-bench.md @@ -229,6 +229,6 @@ You can write an arbitrary query in a SQL file, and then use it for the test by 2. Run the RawSQL test: - ```shell - tiup bench rawsql run --count 60 --query-files demo.sql - ``` + ```shell + tiup bench rawsql run --count 60 --query-files demo.sql + ``` diff --git a/tiup/tiup-cluster.md b/tiup/tiup-cluster.md index 009db0bc420f8..257966223fa75 100644 --- a/tiup/tiup-cluster.md +++ b/tiup/tiup-cluster.md @@ -17,7 +17,7 @@ tiup cluster ``` ``` -Starting component `cluster`: /home/tidb/.tiup/components/cluster/v1.11.3/cluster +Starting component `cluster`: /home/tidb/.tiup/components/cluster/v1.12.3/cluster Deploy a TiDB cluster for production Usage: @@ -62,7 +62,7 @@ To deploy the cluster, run the `tiup cluster deploy` command. The usage of the c tiup cluster deploy [flags] ``` -This command requires you to provide the cluster name, the TiDB cluster version (such as `v6.5.0`), and a topology file of the cluster. +This command requires you to provide the cluster name, the TiDB cluster version (such as `v7.3.0`), and a topology file of the cluster. To write a topology file, refer to [the example](https://github.com/pingcap/tiup/blob/master/embed/examples/cluster/topology.example.yaml). The following file is an example of the simplest topology: @@ -119,12 +119,12 @@ tidb_servers: ... ``` -Save the file as `/tmp/topology.yaml`. If you want to use TiDB v7.0.0 and your cluster name is `prod-cluster`, run the following command: +Save the file as `/tmp/topology.yaml`. If you want to use TiDB v7.3.0 and your cluster name is `prod-cluster`, run the following command: {{< copyable "shell-regular" >}} ```shell -tiup cluster deploy -p prod-cluster v7.0.0 /tmp/topology.yaml +tiup cluster deploy -p prod-cluster v7.3.0 /tmp/topology.yaml ``` During the execution, TiUP asks you to confirm your topology again and requires the root password of the target machine (the `-p` flag means inputting password): @@ -132,7 +132,7 @@ During the execution, TiUP asks you to confirm your topology again and requires ```bash Please confirm your topology: TiDB Cluster: prod-cluster -TiDB Version: v7.0.0 +TiDB Version: v7.3.0 Type Host Ports OS/Arch Directories ---- ---- ----- ------- ----------- pd 172.16.5.134 2379/2380 linux/x86_64 deploy/pd-2379,data/pd-2379 @@ -172,10 +172,10 @@ tiup cluster list ``` ``` -Starting /root/.tiup/components/cluster/v1.11.3/cluster list +Starting /root/.tiup/components/cluster/v1.12.3/cluster list Name User Version Path PrivateKey ---- ---- ------- ---- ---------- -prod-cluster tidb v7.0.0 /root/.tiup/storage/cluster/clusters/prod-cluster /root/.tiup/storage/cluster/clusters/prod-cluster/ssh/id_rsa +prod-cluster tidb v7.3.0 /root/.tiup/storage/cluster/clusters/prod-cluster /root/.tiup/storage/cluster/clusters/prod-cluster/ssh/id_rsa ``` ## Start the cluster @@ -203,9 +203,9 @@ tiup cluster display prod-cluster ``` ``` -Starting /root/.tiup/components/cluster/v1.11.3/cluster display prod-cluster +Starting /root/.tiup/components/cluster/v1.12.3/cluster display prod-cluster TiDB Cluster: prod-cluster -TiDB Version: v7.0.0 +TiDB Version: v7.3.0 ID Role Host Ports OS/Arch Status Data Dir Deploy Dir -- ---- ---- ----- ------- ------ -------- ---------- 172.16.5.134:3000 grafana 172.16.5.134 3000 linux/x86_64 Up - deploy/grafana-3000 @@ -277,9 +277,9 @@ tiup cluster display prod-cluster ``` ``` -Starting /root/.tiup/components/cluster/v1.11.3/cluster display prod-cluster +Starting /root/.tiup/components/cluster/v1.12.3/cluster display prod-cluster TiDB Cluster: prod-cluster -TiDB Version: v7.0.0 +TiDB Version: v7.3.0 ID Role Host Ports OS/Arch Status Data Dir Deploy Dir -- ---- ---- ----- ------- ------ -------- ---------- 172.16.5.134:3000 grafana 172.16.5.134 3000 linux/x86_64 Up - deploy/grafana-3000 @@ -390,12 +390,12 @@ Global Flags: -y, --yes Skip all confirmations and assumes 'yes' ``` -For example, the following command upgrades the cluster to v7.0.0: +For example, the following command upgrades the cluster to v7.3.0: {{< copyable "shell-regular" >}} ```bash -tiup cluster upgrade tidb-test v7.0.0 +tiup cluster upgrade tidb-test v7.3.0 ``` ## Update configuration @@ -577,14 +577,14 @@ tiup cluster audit ``` ``` -Starting component `cluster`: /home/tidb/.tiup/components/cluster/v1.11.3/cluster audit +Starting component `cluster`: /home/tidb/.tiup/components/cluster/v1.12.3/cluster audit ID Time Command -- ---- ------- -4BLhr0 2022-03-29T23:55:09+08:00 /home/tidb/.tiup/components/cluster/v1.11.3/cluster deploy test v7.0.0 /tmp/topology.yaml -4BKWjF 2022-03-029T23:36:57+08:00 /home/tidb/.tiup/components/cluster/v1.11.3/cluster deploy test v7.0.0 /tmp/topology.yaml -4BKVwH 2022-03-29T23:02:08+08:00 /home/tidb/.tiup/components/cluster/v1.11.3/cluster deploy test v7.0.0 /tmp/topology.yaml -4BKKH1 2022-03-29T16:39:04+08:00 /home/tidb/.tiup/components/cluster/v1.11.3/cluster destroy test -4BKKDx 2022-03-29T16:36:57+08:00 /home/tidb/.tiup/components/cluster/v1.11.3/cluster deploy test v7.0.0 /tmp/topology.yaml +4BLhr0 2023-08-10T23:55:09+08:00 /home/tidb/.tiup/components/cluster/v1.12.3/cluster deploy test v7.3.0 /tmp/topology.yaml +4BKWjF 2023-08-10T23:36:57+08:00 /home/tidb/.tiup/components/cluster/v1.12.3/cluster deploy test v7.3.0 /tmp/topology.yaml +4BKVwH 2023-08-10T23:02:08+08:00 /home/tidb/.tiup/components/cluster/v1.12.3/cluster deploy test v7.3.0 /tmp/topology.yaml +4BKKH1 2023-08-10T16:39:04+08:00 /home/tidb/.tiup/components/cluster/v1.12.3/cluster destroy test +4BKKDx 2023-08-10T16:36:57+08:00 /home/tidb/.tiup/components/cluster/v1.12.3/cluster deploy test v7.3.0 /tmp/topology.yaml ``` The first column is `audit-id`. To view the execution log of a certain command, pass the `audit-id` of a command as the flag as follows: @@ -700,7 +700,7 @@ All operations above performed on the cluster machine use the SSH client embedde Then you can use the `--ssh=system` command-line flag to enable the system-native command-line tool: -- Deploy a cluster: `tiup cluster deploy --ssh=system`. Fill in the name of your cluster for ``, the TiDB version to be deployed (such as `v6.5.0`) for ``, and the topology file for ``. +- Deploy a cluster: `tiup cluster deploy --ssh=system`. Fill in the name of your cluster for ``, the TiDB version to be deployed (such as `v7.3.0`) for ``, and the topology file for ``. - Start a cluster: `tiup cluster start --ssh=system` - Upgrade a cluster: `tiup cluster upgrade ... --ssh=system` diff --git a/tiup/tiup-component-cluster-deploy.md b/tiup/tiup-component-cluster-deploy.md index e5edb38846dab..0498f2a1a1e60 100644 --- a/tiup/tiup-component-cluster-deploy.md +++ b/tiup/tiup-component-cluster-deploy.md @@ -13,7 +13,7 @@ tiup cluster deploy [flags] ``` - ``: the name of the new cluster, which cannot be the same as the existing cluster names. -- ``: the version number of the TiDB cluster to deploy, such as `v7.0.0`. +- ``: the version number of the TiDB cluster to deploy, such as `v7.3.0`. - ``: the prepared [topology file](/tiup/tiup-cluster-topology-reference.md). ## Options diff --git a/tiup/tiup-component-cluster-patch.md b/tiup/tiup-component-cluster-patch.md index 758a061edaf07..227387d6d418e 100644 --- a/tiup/tiup-component-cluster-patch.md +++ b/tiup/tiup-component-cluster-patch.md @@ -28,7 +28,7 @@ Before running the `tiup cluster patch` command, you need to pack the binary pac 1. Determine the following variables: - `${component}`: the name of the component to be replaced (such as `tidb`, `tikv`, or `pd`). - - `${version}`: the version of the component (such as `v7.0.0` or `v6.5.1`). + - `${version}`: the version of the component (such as `v7.3.0` or `v6.5.3`). - `${os}`: the operating system (`linux`). - `${arch}`: the platform on which the component runs (`amd64`, `arm64`). diff --git a/tiup/tiup-component-cluster-upgrade.md b/tiup/tiup-component-cluster-upgrade.md index 99fe75ed76de0..5b2c509356864 100644 --- a/tiup/tiup-component-cluster-upgrade.md +++ b/tiup/tiup-component-cluster-upgrade.md @@ -13,7 +13,7 @@ tiup cluster upgrade [flags] ``` - ``: the cluster name to operate on. If you forget the cluster name, you can check it with the [cluster list](/tiup/tiup-component-cluster-list.md) command. -- ``: the target version to upgrade to, such as `v6.5.0`. Currently, it is only allowed to upgrade to a version higher than the current cluster, that is, no downgrade is allowed. It is also not allowed to upgrade to the nightly version. +- ``: the target version to upgrade to, such as `v7.3.0`. Currently, it is only allowed to upgrade to a version higher than the current cluster, that is, no downgrade is allowed. It is also not allowed to upgrade to the nightly version. ## Options diff --git a/tiup/tiup-component-dm-upgrade.md b/tiup/tiup-component-dm-upgrade.md index b040f82a14ea1..4f8714a611a53 100644 --- a/tiup/tiup-component-dm-upgrade.md +++ b/tiup/tiup-component-dm-upgrade.md @@ -13,7 +13,7 @@ tiup dm upgrade [flags] ``` - `` is the name of the cluster to be operated on. If you forget the cluster name, you can check it using the [`tiup dm list`](/tiup/tiup-component-dm-list.md) command. -- `` is the target version to be upgraded to, such as `v6.5.0`. Currently, only upgrading to a later version is allowed, and upgrading to an earlier version is not allowed, which means the downgrade is not allowed. Upgrading to a nightly version is not allowed either. +- `` is the target version to be upgraded to, such as `v7.3.0`. Currently, only upgrading to a later version is allowed, and upgrading to an earlier version is not allowed, which means the downgrade is not allowed. Upgrading to a nightly version is not allowed either. ## Options diff --git a/tiup/tiup-component-management.md b/tiup/tiup-component-management.md index 8f924b137b83a..6e0782a40ce73 100644 --- a/tiup/tiup-component-management.md +++ b/tiup/tiup-component-management.md @@ -70,12 +70,12 @@ Example 2: Use TiUP to install the nightly version of TiDB. tiup install tidb:nightly ``` -Example 3: Use TiUP to install TiKV v7.0.0. +Example 3: Use TiUP to install TiKV v7.3.0. {{< copyable "shell-regular" >}} ```shell -tiup install tikv:v7.0.0 +tiup install tikv:v7.3.0 ``` ## Upgrade components @@ -128,12 +128,12 @@ Before the component is started, TiUP creates a directory for it, and then puts If you want to start the same component multiple times and reuse the previous working directory, you can use `--tag` to specify the same name when the component is started. After the tag is specified, the working directory will *not be automatically deleted* when the instance is terminated, which makes it convenient to reuse the working directory. -Example 1: Operate TiDB v7.0.0. +Example 1: Operate TiDB v7.3.0. {{< copyable "shell-regular" >}} ```shell -tiup tidb:v7.0.0 +tiup tidb:v7.3.0 ``` Example 2: Specify the tag with which TiKV operates. @@ -219,12 +219,12 @@ The following flags are supported in this command: - If the version is ignored, adding `--all` means to uninstall all versions of this component. - If the version and the component are both ignored, adding `--all` means to uninstall all components of all versions. -Example 1: Uninstall TiDB v7.0.0. +Example 1: Uninstall TiDB v7.3.0. {{< copyable "shell-regular" >}} ```shell -tiup uninstall tidb:v7.0.0 +tiup uninstall tidb:v7.3.0 ``` Example 2: Uninstall TiKV of all versions. diff --git a/tiup/tiup-mirror.md b/tiup/tiup-mirror.md index 36e530f4827fe..d57529b6a7707 100644 --- a/tiup/tiup-mirror.md +++ b/tiup/tiup-mirror.md @@ -87,9 +87,9 @@ The `tiup mirror clone` command provides many optional flags (might provide more If you want to clone only one version (not all versions) of a component, use `--=` to specify this version. For example: - - Execute the `tiup mirror clone --tidb v7.0.0` command to clone the v7.0.0 version of the TiDB component. - - Run the `tiup mirror clone --tidb v7.0.0 --tikv all` command to clone the v7.0.0 version of the TiDB component and all versions of the TiKV component. - - Run the `tiup mirror clone v7.0.0` command to clone the v7.0.0 version of all components in a cluster. + - Execute the `tiup mirror clone --tidb v7.3.0` command to clone the v7.3.0 version of the TiDB component. + - Run the `tiup mirror clone --tidb v7.3.0 --tikv all` command to clone the v7.3.0 version of the TiDB component and all versions of the TiKV component. + - Run the `tiup mirror clone v7.3.0` command to clone the v7.3.0 version of all components in a cluster. After cloning, signing keys are set up automatically. diff --git a/tiup/tiup-playground.md b/tiup/tiup-playground.md index c26028b47c73e..1bd608cb7f08c 100644 --- a/tiup/tiup-playground.md +++ b/tiup/tiup-playground.md @@ -20,9 +20,9 @@ If you directly execute the `tiup playground` command, TiUP uses the locally ins This command actually performs the following operations: -- Because this command does not specify the version of the playground component, TiUP first checks the latest version of the installed playground component. Assume that the latest version is v1.11.3, then this command works the same as `tiup playground:v1.11.3`. +- Because this command does not specify the version of the playground component, TiUP first checks the latest version of the installed playground component. Assume that the latest version is v1.12.3, then this command works the same as `tiup playground:v1.12.3`. - If you have not used TiUP playground to install the TiDB, TiKV, and PD components, the playground component installs the latest stable version of these components, and then start these instances. -- Because this command does not specify the version of the TiDB, PD, and TiKV component, TiUP playground uses the latest version of each component by default. Assume that the latest version is v7.0.0, then this command works the same as `tiup playground:v1.11.3 v7.0.0`. +- Because this command does not specify the version of the TiDB, PD, and TiKV component, TiUP playground uses the latest version of each component by default. Assume that the latest version is v7.3.0, then this command works the same as `tiup playground:v1.12.3 v7.3.0`. - Because this command does not specify the number of each component, TiUP playground, by default, starts a smallest cluster that consists of one TiDB instance, one TiKV instance, one PD instance, and one TiFlash instance. - After starting each TiDB component, TiUP playground reminds you that the cluster is successfully started and provides you some useful information, such as how to connect to the TiDB cluster through the MySQL client and how to access the [TiDB Dashboard](/dashboard/dashboard-intro.md). diff --git a/transaction-overview.md b/transaction-overview.md index 3f5860cc0e80b..e12e891f22dcf 100644 --- a/transaction-overview.md +++ b/transaction-overview.md @@ -92,9 +92,9 @@ For example: ```sql mysql> CREATE TABLE t1 ( - -> id INT NOT NULL PRIMARY KEY auto_increment, - -> pad1 VARCHAR(100) - -> ); + id INT NOT NULL PRIMARY KEY auto_increment, + pad1 VARCHAR(100) + ); Query OK, 0 rows affected (0.09 sec) mysql> SELECT @@autocommit; @@ -132,9 +132,9 @@ Autocommit will not apply if a transaction has been explicitly started. In the f ```sql mysql> CREATE TABLE t2 ( - -> id INT NOT NULL PRIMARY KEY auto_increment, - -> pad1 VARCHAR(100) - -> ); + id INT NOT NULL PRIMARY KEY auto_increment, + pad1 VARCHAR(100) + ); Query OK, 0 rows affected (0.10 sec) mysql> SELECT @@autocommit; diff --git a/tune-region-performance.md b/tune-region-performance.md index 8650370589bda..323cfaa565916 100644 --- a/tune-region-performance.md +++ b/tune-region-performance.md @@ -35,4 +35,4 @@ When the Dumpling tool is used, the Region size should not exceed 1 GiB. In this > > Currently, this is an experimental feature introduced in TiDB v6.1.0. It is not recommended that you use it in production environments. -When Regions are set to a larger size, you need to set [`coprocessor.enable-region-bucket`](/tikv-configuration-file.md#enable-region-bucket-new-in-v610) to `true` to increase the query concurrency. When you use this configuration, Regions are divided into buckets. Buckets are smaller ranges within a Region and are used as the unit of concurrent query to improve the scan concurrency. You can control the bucket size using [`coprocessor.region-bucket-size`](/tikv-configuration-file.md#region-bucket-size-new-in-v610). The default value is `96MiB`. \ No newline at end of file +When Regions are set to a larger size, you need to set [`coprocessor.enable-region-bucket`](/tikv-configuration-file.md#enable-region-bucket-new-in-v610) to `true` to increase the query concurrency. When you use this configuration, Regions are divided into buckets. Buckets are smaller ranges within a Region and are used as the unit of concurrent query to improve the scan concurrency. You can control the bucket size using [`coprocessor.region-bucket-size`](/tikv-configuration-file.md#region-bucket-size-new-in-v610). \ No newline at end of file diff --git a/upgrade-tidb-using-tiup.md b/upgrade-tidb-using-tiup.md index 79a63fb931a4b..0629dc5f5390a 100644 --- a/upgrade-tidb-using-tiup.md +++ b/upgrade-tidb-using-tiup.md @@ -8,15 +8,10 @@ aliases: ['/docs/dev/upgrade-tidb-using-tiup/','/docs/dev/how-to/upgrade/using-t This document is targeted for the following upgrade paths: -- Upgrade from TiDB 4.0 versions to TiDB 7.0. -- Upgrade from TiDB 5.0-5.4 versions to TiDB 7.0. -- Upgrade from TiDB 6.0 to TiDB 7.0. -- Upgrade from TiDB 6.1 to TiDB 7.0. -- Upgrade from TiDB 6.2 to TiDB 7.0. -- Upgrade from TiDB 6.3 to TiDB 7.0. -- Upgrade from TiDB 6.4 to TiDB 7.0. -- Upgrade from TiDB 6.5 to TiDB 7.0. -- Upgrade from TiDB 6.6 to TiDB 7.0. +- Upgrade from TiDB 4.0 versions to TiDB 7.3. +- Upgrade from TiDB 5.0-5.4 versions to TiDB 7.3. +- Upgrade from TiDB 6.0-6.6 to TiDB 7.3. +- Upgrade from TiDB 7.0-7.2 to TiDB 7.3. > **Warning:** > @@ -28,17 +23,18 @@ This document is targeted for the following upgrade paths: > **Note:** > -> If your cluster to be upgraded is v3.1 or an earlier version (v3.0 or v2.1), the direct upgrade to v7.0.0 is not supported. You need to upgrade your cluster first to v4.0 and then to v7.0.0. +> - If your cluster to be upgraded is v3.1 or an earlier version (v3.0 or v2.1), the direct upgrade to v7.3.0 is not supported. You need to upgrade your cluster first to v4.0 and then to v7.3.0. +> - TiDB nodes use the value of the [`server-version`](/tidb-configuration-file.md#server-version) configuration item to verify the current TiDB version. Therefore, to avoid unexpected behaviors, before upgrading the TiDB cluster, you need to set the value of `server-version` to empty or the real version of the current TiDB cluster. ## Upgrade caveat - TiDB currently does not support version downgrade or rolling back to an earlier version after the upgrade. -- For the v4.0 cluster managed using TiDB Ansible, you need to import the cluster to TiUP (`tiup cluster`) for new management according to [Upgrade TiDB Using TiUP (v4.0)](https://docs.pingcap.com/tidb/v4.0/upgrade-tidb-using-tiup#import-tidb-ansible-and-the-inventoryini-configuration-to-tiup). Then you can upgrade the cluster to v7.0.0 according to this document. -- To update versions earlier than v3.0 to v7.0.0: +- For the v4.0 cluster managed using TiDB Ansible, you need to import the cluster to TiUP (`tiup cluster`) for new management according to [Upgrade TiDB Using TiUP (v4.0)](https://docs.pingcap.com/tidb/v4.0/upgrade-tidb-using-tiup#import-tidb-ansible-and-the-inventoryini-configuration-to-tiup). Then you can upgrade the cluster to v7.3.0 according to this document. +- To update versions earlier than v3.0 to v7.3.0: 1. Update this version to 3.0 using [TiDB Ansible](https://docs.pingcap.com/tidb/v3.0/upgrade-tidb-using-ansible). 2. Use TiUP (`tiup cluster`) to import the TiDB Ansible configuration. 3. Update the 3.0 version to 4.0 according to [Upgrade TiDB Using TiUP (v4.0)](https://docs.pingcap.com/tidb/v4.0/upgrade-tidb-using-tiup#import-tidb-ansible-and-the-inventoryini-configuration-to-tiup). - 4. Upgrade the cluster to v7.0.0 according to this document. + 4. Upgrade the cluster to v7.3.0 according to this document. - Support upgrading the versions of TiDB Binlog, TiCDC, TiFlash, and other components. - When upgrading TiFlash from versions earlier than v6.3.0 to v6.3.0 and later versions, note that the CPU must support the AVX2 instruction set under the Linux AMD64 architecture and the ARMv8 instruction set architecture under the Linux ARM64 architecture. For details, see the description in [v6.3.0 Release Notes](/releases/release-6.3.0.md#others). - For detailed compatibility changes of different versions, see the [Release Notes](/releases/release-notes.md) of each version. Modify your cluster configuration according to the "Compatibility Changes" section of the corresponding release notes. @@ -50,7 +46,7 @@ This section introduces the preparation works needed before upgrading your TiDB ### Step 1: Review compatibility changes -Review [the compatibility changes](/releases/release-7.0.0.md#compatibility-changes) in TiDB v7.0.0 release notes. If any changes affect your upgrade, take actions accordingly. +Review [the compatibility changes](/releases/release-7.3.0.md#compatibility-changes) in TiDB v7.3.0 release notes. If any changes affect your upgrade, take actions accordingly. ### Step 2: Upgrade TiUP or TiUP offline mirror @@ -125,7 +121,7 @@ Now, the offline mirror has been upgraded successfully. If an error occurs durin > Skip this step if one of the following situations applies: > > + You have not modified the configuration parameters of the original cluster. Or you have modified the configuration parameters using `tiup cluster` but no more modification is needed. -> + After the upgrade, you want to use v7.0.0's default parameter values for the unmodified configuration items. +> + After the upgrade, you want to use v7.3.0's default parameter values for the unmodified configuration items. 1. Enter the `vi` editing mode to edit the topology file: @@ -141,7 +137,7 @@ Now, the offline mirror has been upgraded successfully. If an error occurs durin > **Note:** > -> Before you upgrade the cluster to v6.6.0, make sure that the parameters you have modified in v4.0 are compatible in v7.0.0. For details, see [TiKV Configuration File](/tikv-configuration-file.md). +> Before you upgrade the cluster to v6.6.0, make sure that the parameters you have modified in v4.0 are compatible in v7.3.0. For details, see [TiKV Configuration File](/tikv-configuration-file.md). ### Step 4: Check the health status of the current cluster @@ -185,12 +181,12 @@ If your application has a maintenance window for the database to be stopped for tiup cluster upgrade ``` -For example, if you want to upgrade the cluster to v7.0.0: +For example, if you want to upgrade the cluster to v7.3.0: {{< copyable "shell-regular" >}} ```shell -tiup cluster upgrade v7.0.0 +tiup cluster upgrade v7.3.0 ``` > **Note:** @@ -218,7 +214,7 @@ tiup cluster upgrade v7.0.0 tiup cluster stop ``` -2. Use the `upgrade` command with the `--offline` option to perform the offline upgrade. Fill in the name of your cluster for `` and the version to upgrade to for ``, such as `v6.5.0`. +2. Use the `upgrade` command with the `--offline` option to perform the offline upgrade. Fill in the name of your cluster for `` and the version to upgrade to for ``, such as `v7.3.0`. {{< copyable "shell-regular" >}} @@ -247,7 +243,7 @@ tiup cluster display ``` Cluster type: tidb Cluster name: -Cluster version: v7.0.0 +Cluster version: v7.3.0 ``` ## FAQ @@ -278,7 +274,7 @@ Re-execute the `tiup cluster upgrade` command to resume the upgrade. The upgrade ### The evict leader has waited too long during the upgrade. How to skip this step for a quick upgrade? -You can specify `--force`. Then the processes of transferring PD leader and evicting TiKV leader are skipped during the upgrade. The cluster is directly restarted to update the version, which has a great impact on the cluster that runs online. In the following command, `` is the version to upgrade to, such as `v6.5.0`. +You can specify `--force`. Then the processes of transferring PD leader and evicting TiKV leader are skipped during the upgrade. The cluster is directly restarted to update the version, which has a great impact on the cluster that runs online. In the following command, `` is the version to upgrade to, such as `v7.3.0`. {{< copyable "shell-regular" >}} @@ -293,5 +289,5 @@ You can upgrade the tool version by using TiUP to install the `ctl` component of {{< copyable "shell-regular" >}} ```shell -tiup install ctl:v7.0.0 +tiup install ctl:v7.3.0 ```
CategoryMid-calendar-year LTS releaseEnd-calendar-year LTS releaseEnd of CY23 LTS releaseMid of CY24 LTS release Future releases
  • - General plan cache
    Improve general read performanceGA of Partitioned Raft KV storage engine
    PB-scale clusters, increased write velocity, faster scaling operations, and improved compaction stability

  • - Partitioned Raft KV storage engine
    - Provide increased write velocity, faster scaling operations, - and larger clusters - -
  • -
    -
  • - TiFlash performance boost
    - Implement TiFlash optimization such as late materialization - and runtime filter - -
  • -
    -
  • - Fastest online DDL distributed framework
    - Complete the distributed framework to support the fastest - online DDL + Augmented replica read
    + Reduced cross-AZ data transfer costs in TiKV

  • @@ -57,38 +44,24 @@ This roadmap brings you what's coming in the future, so you can see the new feat
  • - Stability at PB scale
    - Provide reliable and consistent performance for tremendous - data + Performance optimization framework for all applicable background tasks, like DDL, TTL, and cluster analysis
    + This distributes the workload of these operations throughout the cluster, leading to accelerated performance and reduced resource utilization on individual nodes. This framework already applies to the ADD INDEX operation

  • - Disaggregate TiFlash compute and storage (auto-scaling)
    - Realize elastic HTAP resource utilization + GA of disaggregated storage and compute architecture and S3 shared storage in TiFlash
    + Enable more cost-effective and elastic HTAP

  • -
  • - TiFlash S3 based storage engine -
    Provide shared storage at lower cost -

    -
  • - Next generation, more powerful storage engine -
  • -
  • Unlimited transaction size

  • -
  • - Multi-model support -

  • - Resource control: quotas and scheduling for resource groups - and background tasks
    + Resource control for background tasks
    - Reliably and efficiently manage workloads and applications - that share the same cluster - -
  • -
    -
  • - TiCDC/PITR recovery objective enhancements -
    - Increase business continuity and minimize the impact of system - failures + Control over how background tasks, such as imports, DDL, TTL, auto-analyze, and compactions, can affect foreground traffic
  • -
    -
  • - TiProxy -
    - Keep database connections during cluster upgrade and scale - in/out, and avoid impact on applications - -
  • -
    -
  • - End-to-end data correctness check -
    Prevent data error or corruption through TiCDC -
  • -
  • Multi-tenancy -
    Provide fine-grained resource control and isolation to reduce - cost -
  • -
    -
  • - Improved cluster/node level fault tolerance -
    Enhance cluster resilience -
  • -
    -
  • - TiFlash spill to disk -
    Avoid TiFlash OOM +
    Resource isolation on top of resource control

@@ -159,11 +95,6 @@ This roadmap brings you what's coming in the future, so you can see the new feat
  • Enhanced TiDB memory management
  • -
    -
  • - Global table -
  • -
    • - Production-ready TTL (time-to-live) data management -
      - Manage database size and improve performance by automatically - expiring outdated data - + MySQL 8.0 compatibility

    • -
    • - Table level flashback -
      - Support traveling a single table to a specific time via SQL - +
    • + Unified SQL interface for import, Backup & Restore, and PITR
    • -
    • - Materialized views -
      Support pre-calculation to boost query performance + Cascades framework for optimizer +
      Improved framework for query optimization, and make the optimizer more extensible and future-proof
    • -
    @@ -206,10 +128,6 @@ This roadmap brings you what's coming in the future, so you can see the new feat Federated query
    -
  • - Cascades optimizer -
  • -
  • Full text search & GIS support
  • @@ -231,60 +149,32 @@ This roadmap brings you what's coming in the future, so you can see the new feat
  • Distributed TiCDC single table replication
    - Dramatically improve TiCDC throughput by distributing the - workload to multiple nodes + Dramatically improve TiDB-TiDB replication throughput

  • Production-ready TiCDC sink to Amazon S3 and Azure object - storage -
    Enhance ecosystem to better work with big data -
  • -
    -
  • - TiDB Operator fast scale-in -
    Improve from scaling in one by one to scaling in at onceAutomatic pause/resume DDL during upgrade +
    Ensure a smooth upgrade experience

  • - SQL-based data import + TiCDC native integrations with big data systems
    Improve user-friendliness through operational enhancementsSuch as Snowflake and Iceburg
  • -
      -
    • - Major performance boost for data import -
      Expect 3-4 times of improvements -
    • -
    • Multiple upstreams for TiCDC
      Support N:1 TiDB to TiCDC

    • -
    • - SQL-based data management -
      Improve data management for TiCDC, data migration, and backup - and restore tools -
    • -
      -
    • - Automatic pause/resume DDL during upgrade -
      Ensure a smooth upgrade experience -
    • -
    @@ -311,48 +201,41 @@ This roadmap brings you what's coming in the future, so you can see the new feat
    • - JWT authentication -
      Provide secure and standard authentication + Key management via Azure Key Vault +
      Static encryption managed by Azure Key Vault

    • - LDAP integration -
      Authenticate via LDAP server over TLS + Column-level access control +
      Grant and restrict access to specific columns

    • - Audit log enhancement -
      - Enhance logs with greater details + Database-level encryption +
      At-rest encryption configured at database level
    • -
    • - Column-level/row-level access control -
      - Provide finer-grained control -
    • -
      -
    • - Database encryption -
      Encryption at rest with more granularity at the table level and column level + IAM authentication for AWS +
      TiDB as AWS third-party ARN for AWS IAM access

    • Unified TLS CA/Key rotation policy -
      - Enhance security and operational efficiency for all TiDB - components - +
      Unified certificate management mechanism for all TiDB components
    • -
      +
    • + Label-based access control +
      Access permissions granted by configured labels +
    • +
    • Enhanced client-side encryption
    • @@ -364,7 +247,6 @@ This roadmap brings you what's coming in the future, so you can see the new feat
    • Enhanced data lifecycle management
    • -