diff --git a/.github/workflows/assign_milestone.yml b/.github/workflows/assign_milestone.yml index e0c0b100a74..293e56556b2 100644 --- a/.github/workflows/assign_milestone.yml +++ b/.github/workflows/assign_milestone.yml @@ -14,7 +14,7 @@ env: jobs: build: name: Assign Milestone - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Checkout code diff --git a/.github/workflows/auto_approve_pr.yml b/.github/workflows/auto_approve_pr.yml index e76142c659f..fbd9ea9bd57 100644 --- a/.github/workflows/auto_approve_pr.yml +++ b/.github/workflows/auto_approve_pr.yml @@ -9,7 +9,7 @@ permissions: jobs: auto_approve: name: Auto Approve Pull Request - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 permissions: pull-requests: write # only given on local PRs, forks run with `read` access diff --git a/.github/workflows/check_label.yml b/.github/workflows/check_label.yml index 2c6439a29f9..cc5227eaa57 100644 --- a/.github/workflows/check_label.yml +++ b/.github/workflows/check_label.yml @@ -9,7 +9,7 @@ jobs: check_pull_request_labels: name: Check Pull Request labels timeout-minutes: 10 - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 if: github.repository == 'vitessio/vitess' steps: - name: Release Notes label @@ -70,11 +70,11 @@ jobs: exit 1 fi fi - + - name: Do Not Merge label run: | if [[ "${{contains( github.event.pull_request.labels.*.name, 'Do Not Merge')}}" == "true" ]]; then echo "This PR should not be merged. The 'Do Not Merge' label is set. Please unset it if you wish to merge this PR." exit 1 - fi \ No newline at end of file + fi diff --git a/.github/workflows/check_make_vtadmin_authz_testgen.yml b/.github/workflows/check_make_vtadmin_authz_testgen.yml index 57c850635a9..40ab2a4b4fa 100644 --- a/.github/workflows/check_make_vtadmin_authz_testgen.yml +++ b/.github/workflows/check_make_vtadmin_authz_testgen.yml @@ -6,7 +6,7 @@ permissions: read-all jobs: build: name: Check Make vtadmin_authz_testgen - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI run: | diff --git a/.github/workflows/check_make_vtadmin_web_proto.yml b/.github/workflows/check_make_vtadmin_web_proto.yml index 9493e3d7c6d..00a1cbc9e0b 100644 --- a/.github/workflows/check_make_vtadmin_web_proto.yml +++ b/.github/workflows/check_make_vtadmin_web_proto.yml @@ -6,7 +6,7 @@ permissions: read-all jobs: build: name: Check Make VTAdmin Web Proto - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI run: | diff --git a/.github/workflows/close_stale_pull_requests.yml b/.github/workflows/close_stale_pull_requests.yml index 1edda7cee91..def1af79d27 100644 --- a/.github/workflows/close_stale_pull_requests.yml +++ b/.github/workflows/close_stale_pull_requests.yml @@ -9,7 +9,7 @@ permissions: read-all jobs: close_stale_pull_requests: - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 permissions: pull-requests: write @@ -31,4 +31,3 @@ jobs: close-pr-message: "This PR was closed because it has been stale for 7 days with no activity." GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - diff --git a/.github/workflows/cluster_endtoend_12.yml b/.github/workflows/cluster_endtoend_12.yml index e4feb45d4c8..2434752c1cd 100644 --- a/.github/workflows/cluster_endtoend_12.yml +++ b/.github/workflows/cluster_endtoend_12.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (12) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_13.yml b/.github/workflows/cluster_endtoend_13.yml index 249e7533bbf..0b60eacc934 100644 --- a/.github/workflows/cluster_endtoend_13.yml +++ b/.github/workflows/cluster_endtoend_13.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (13) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_15.yml b/.github/workflows/cluster_endtoend_15.yml index 18cea7d200c..a5a36eb4f9e 100644 --- a/.github/workflows/cluster_endtoend_15.yml +++ b/.github/workflows/cluster_endtoend_15.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (15) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_18.yml b/.github/workflows/cluster_endtoend_18.yml index 2d260a20de1..ae3586908e7 100644 --- a/.github/workflows/cluster_endtoend_18.yml +++ b/.github/workflows/cluster_endtoend_18.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (18) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_21.yml b/.github/workflows/cluster_endtoend_21.yml index e6773b5ddf9..acd61ff669d 100644 --- a/.github/workflows/cluster_endtoend_21.yml +++ b/.github/workflows/cluster_endtoend_21.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (21) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_22.yml b/.github/workflows/cluster_endtoend_22.yml index 950ba2d844b..418b52cba69 100644 --- a/.github/workflows/cluster_endtoend_22.yml +++ b/.github/workflows/cluster_endtoend_22.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (22) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_backup_pitr.yml b/.github/workflows/cluster_endtoend_backup_pitr.yml index aeb805a5615..c64839275a8 100644 --- a/.github/workflows/cluster_endtoend_backup_pitr.yml +++ b/.github/workflows/cluster_endtoend_backup_pitr.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (backup_pitr) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_backup_pitr_xtrabackup.yml b/.github/workflows/cluster_endtoend_backup_pitr_xtrabackup.yml index 7a7d7752b73..df257e51bcb 100644 --- a/.github/workflows/cluster_endtoend_backup_pitr_xtrabackup.yml +++ b/.github/workflows/cluster_endtoend_backup_pitr_xtrabackup.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (backup_pitr_xtrabackup) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_ers_prs_newfeatures_heavy.yml b/.github/workflows/cluster_endtoend_ers_prs_newfeatures_heavy.yml index ec4a5c9c897..f3793d96208 100644 --- a/.github/workflows/cluster_endtoend_ers_prs_newfeatures_heavy.yml +++ b/.github/workflows/cluster_endtoend_ers_prs_newfeatures_heavy.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (ers_prs_newfeatures_heavy) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_mysql80.yml b/.github/workflows/cluster_endtoend_mysql80.yml index 5ac64acd95d..7c8dd919b11 100644 --- a/.github/workflows/cluster_endtoend_mysql80.yml +++ b/.github/workflows/cluster_endtoend_mysql80.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (mysql80) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_mysql_server_vault.yml b/.github/workflows/cluster_endtoend_mysql_server_vault.yml index 9531d7a6008..acc0fc09449 100644 --- a/.github/workflows/cluster_endtoend_mysql_server_vault.yml +++ b/.github/workflows/cluster_endtoend_mysql_server_vault.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (mysql_server_vault) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_onlineddl_ghost.yml b/.github/workflows/cluster_endtoend_onlineddl_ghost.yml index 0ca55431696..460f880136c 100644 --- a/.github/workflows/cluster_endtoend_onlineddl_ghost.yml +++ b/.github/workflows/cluster_endtoend_onlineddl_ghost.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (onlineddl_ghost) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_onlineddl_revert.yml b/.github/workflows/cluster_endtoend_onlineddl_revert.yml index 8afd97ffa69..a30fb8e0de7 100644 --- a/.github/workflows/cluster_endtoend_onlineddl_revert.yml +++ b/.github/workflows/cluster_endtoend_onlineddl_revert.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (onlineddl_revert) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_onlineddl_scheduler.yml b/.github/workflows/cluster_endtoend_onlineddl_scheduler.yml index 4bfe5dcb16f..80f9e9fa2fc 100644 --- a/.github/workflows/cluster_endtoend_onlineddl_scheduler.yml +++ b/.github/workflows/cluster_endtoend_onlineddl_scheduler.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (onlineddl_scheduler) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_onlineddl_vrepl.yml b/.github/workflows/cluster_endtoend_onlineddl_vrepl.yml index 2daa3307089..dd585ab8482 100644 --- a/.github/workflows/cluster_endtoend_onlineddl_vrepl.yml +++ b/.github/workflows/cluster_endtoend_onlineddl_vrepl.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (onlineddl_vrepl) - runs-on: gh-hosted-runners-16cores-1-24.04 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_onlineddl_vrepl_mysql57.yml b/.github/workflows/cluster_endtoend_onlineddl_vrepl_mysql57.yml index 42c3ada0f37..c028e5b95ef 100644 --- a/.github/workflows/cluster_endtoend_onlineddl_vrepl_mysql57.yml +++ b/.github/workflows/cluster_endtoend_onlineddl_vrepl_mysql57.yml @@ -16,7 +16,7 @@ env: jobs: build: name: Run endtoend tests on Cluster (onlineddl_vrepl) mysql57 - runs-on: gh-hosted-runners-16cores-1 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_onlineddl_vrepl_stress.yml b/.github/workflows/cluster_endtoend_onlineddl_vrepl_stress.yml index edca4514c20..8c806816404 100644 --- a/.github/workflows/cluster_endtoend_onlineddl_vrepl_stress.yml +++ b/.github/workflows/cluster_endtoend_onlineddl_vrepl_stress.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (onlineddl_vrepl_stress) - runs-on: gh-hosted-runners-16cores-1-24.04 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_onlineddl_vrepl_stress_mysql57.yml b/.github/workflows/cluster_endtoend_onlineddl_vrepl_stress_mysql57.yml index 597685894cc..8bc54aecd9b 100644 --- a/.github/workflows/cluster_endtoend_onlineddl_vrepl_stress_mysql57.yml +++ b/.github/workflows/cluster_endtoend_onlineddl_vrepl_stress_mysql57.yml @@ -16,7 +16,7 @@ env: jobs: build: name: Run endtoend tests on Cluster (onlineddl_vrepl_stress) mysql57 - runs-on: gh-hosted-runners-16cores-1 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_onlineddl_vrepl_stress_suite.yml b/.github/workflows/cluster_endtoend_onlineddl_vrepl_stress_suite.yml index 9bf80b64241..2a689860e84 100644 --- a/.github/workflows/cluster_endtoend_onlineddl_vrepl_stress_suite.yml +++ b/.github/workflows/cluster_endtoend_onlineddl_vrepl_stress_suite.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (onlineddl_vrepl_stress_suite) - runs-on: gh-hosted-runners-16cores-1-24.04 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_onlineddl_vrepl_stress_suite_mysql57.yml b/.github/workflows/cluster_endtoend_onlineddl_vrepl_stress_suite_mysql57.yml index 7f1f625ac26..a60b5c5f864 100644 --- a/.github/workflows/cluster_endtoend_onlineddl_vrepl_stress_suite_mysql57.yml +++ b/.github/workflows/cluster_endtoend_onlineddl_vrepl_stress_suite_mysql57.yml @@ -16,7 +16,7 @@ env: jobs: build: name: Run endtoend tests on Cluster (onlineddl_vrepl_stress_suite) mysql57 - runs-on: gh-hosted-runners-16cores-1 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_onlineddl_vrepl_suite.yml b/.github/workflows/cluster_endtoend_onlineddl_vrepl_suite.yml index 6c8b88d1e37..738d0c26269 100644 --- a/.github/workflows/cluster_endtoend_onlineddl_vrepl_suite.yml +++ b/.github/workflows/cluster_endtoend_onlineddl_vrepl_suite.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (onlineddl_vrepl_suite) - runs-on: gh-hosted-runners-16cores-1-24.04 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_onlineddl_vrepl_suite_mysql57.yml b/.github/workflows/cluster_endtoend_onlineddl_vrepl_suite_mysql57.yml index da2dfdb3882..93206e43f55 100644 --- a/.github/workflows/cluster_endtoend_onlineddl_vrepl_suite_mysql57.yml +++ b/.github/workflows/cluster_endtoend_onlineddl_vrepl_suite_mysql57.yml @@ -16,7 +16,7 @@ env: jobs: build: name: Run endtoend tests on Cluster (onlineddl_vrepl_suite) mysql57 - runs-on: gh-hosted-runners-16cores-1 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_schemadiff_vrepl.yml b/.github/workflows/cluster_endtoend_schemadiff_vrepl.yml index 2b781656a32..9beba455f8d 100644 --- a/.github/workflows/cluster_endtoend_schemadiff_vrepl.yml +++ b/.github/workflows/cluster_endtoend_schemadiff_vrepl.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (schemadiff_vrepl) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_tabletmanager_consul.yml b/.github/workflows/cluster_endtoend_tabletmanager_consul.yml index 6a030a8b7f2..9dfa4c43436 100644 --- a/.github/workflows/cluster_endtoend_tabletmanager_consul.yml +++ b/.github/workflows/cluster_endtoend_tabletmanager_consul.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (tabletmanager_consul) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_tabletmanager_tablegc.yml b/.github/workflows/cluster_endtoend_tabletmanager_tablegc.yml index 40fe5141aac..351c70373b4 100644 --- a/.github/workflows/cluster_endtoend_tabletmanager_tablegc.yml +++ b/.github/workflows/cluster_endtoend_tabletmanager_tablegc.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (tabletmanager_tablegc) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_tabletmanager_throttler_topo.yml b/.github/workflows/cluster_endtoend_tabletmanager_throttler_topo.yml index af851bd2bf7..467dc3b7f34 100644 --- a/.github/workflows/cluster_endtoend_tabletmanager_throttler_topo.yml +++ b/.github/workflows/cluster_endtoend_tabletmanager_throttler_topo.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (tabletmanager_throttler_topo) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_topo_connection_cache.yml b/.github/workflows/cluster_endtoend_topo_connection_cache.yml index 4b724d8fe7a..7c7c0e740ca 100644 --- a/.github/workflows/cluster_endtoend_topo_connection_cache.yml +++ b/.github/workflows/cluster_endtoend_topo_connection_cache.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (topo_connection_cache) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vreplication_across_db_versions.yml b/.github/workflows/cluster_endtoend_vreplication_across_db_versions.yml index b1305a13f8a..1d3b0387289 100644 --- a/.github/workflows/cluster_endtoend_vreplication_across_db_versions.yml +++ b/.github/workflows/cluster_endtoend_vreplication_across_db_versions.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vreplication_across_db_versions) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vreplication_basic.yml b/.github/workflows/cluster_endtoend_vreplication_basic.yml index 1594113cb45..5ff95ce9d6b 100644 --- a/.github/workflows/cluster_endtoend_vreplication_basic.yml +++ b/.github/workflows/cluster_endtoend_vreplication_basic.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vreplication_basic) - runs-on: gh-hosted-runners-16cores-1-24.04 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vreplication_cellalias.yml b/.github/workflows/cluster_endtoend_vreplication_cellalias.yml index 49138b690c4..5a502a6957c 100644 --- a/.github/workflows/cluster_endtoend_vreplication_cellalias.yml +++ b/.github/workflows/cluster_endtoend_vreplication_cellalias.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vreplication_cellalias) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vreplication_foreign_key_stress.yml b/.github/workflows/cluster_endtoend_vreplication_foreign_key_stress.yml index c97243bb670..b4bbdca2c8f 100644 --- a/.github/workflows/cluster_endtoend_vreplication_foreign_key_stress.yml +++ b/.github/workflows/cluster_endtoend_vreplication_foreign_key_stress.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vreplication_foreign_key_stress) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vreplication_migrate_vdiff2_convert_tz.yml b/.github/workflows/cluster_endtoend_vreplication_migrate_vdiff2_convert_tz.yml index 6b89cab99e1..20e5facf714 100644 --- a/.github/workflows/cluster_endtoend_vreplication_migrate_vdiff2_convert_tz.yml +++ b/.github/workflows/cluster_endtoend_vreplication_migrate_vdiff2_convert_tz.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vreplication_migrate_vdiff2_convert_tz) - runs-on: gh-hosted-runners-16cores-1-24.04 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vreplication_partial_movetables_and_materialize.yml b/.github/workflows/cluster_endtoend_vreplication_partial_movetables_and_materialize.yml index 9ef3c2a918d..45dfa0567c6 100644 --- a/.github/workflows/cluster_endtoend_vreplication_partial_movetables_and_materialize.yml +++ b/.github/workflows/cluster_endtoend_vreplication_partial_movetables_and_materialize.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vreplication_partial_movetables_and_materialize) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vreplication_v2.yml b/.github/workflows/cluster_endtoend_vreplication_v2.yml index 6a7990435eb..d340f526b05 100644 --- a/.github/workflows/cluster_endtoend_vreplication_v2.yml +++ b/.github/workflows/cluster_endtoend_vreplication_v2.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vreplication_v2) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vstream.yml b/.github/workflows/cluster_endtoend_vstream.yml index 5b6e9bc257b..cc3d33fbc85 100644 --- a/.github/workflows/cluster_endtoend_vstream.yml +++ b/.github/workflows/cluster_endtoend_vstream.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vstream) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vtbackup.yml b/.github/workflows/cluster_endtoend_vtbackup.yml index b2bc5fcc510..e7d4a2e6f63 100644 --- a/.github/workflows/cluster_endtoend_vtbackup.yml +++ b/.github/workflows/cluster_endtoend_vtbackup.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vtbackup) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vtctlbackup_sharded_clustertest_heavy.yml b/.github/workflows/cluster_endtoend_vtctlbackup_sharded_clustertest_heavy.yml index a2e4656dae9..e918fb28a7a 100644 --- a/.github/workflows/cluster_endtoend_vtctlbackup_sharded_clustertest_heavy.yml +++ b/.github/workflows/cluster_endtoend_vtctlbackup_sharded_clustertest_heavy.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vtctlbackup_sharded_clustertest_heavy) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vtgate_concurrentdml.yml b/.github/workflows/cluster_endtoend_vtgate_concurrentdml.yml index 54bdac42d5a..6954b3ca398 100644 --- a/.github/workflows/cluster_endtoend_vtgate_concurrentdml.yml +++ b/.github/workflows/cluster_endtoend_vtgate_concurrentdml.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vtgate_concurrentdml) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vtgate_foreignkey_stress.yml b/.github/workflows/cluster_endtoend_vtgate_foreignkey_stress.yml index dd79ccbc92f..2721332912a 100644 --- a/.github/workflows/cluster_endtoend_vtgate_foreignkey_stress.yml +++ b/.github/workflows/cluster_endtoend_vtgate_foreignkey_stress.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vtgate_foreignkey_stress) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vtgate_gen4.yml b/.github/workflows/cluster_endtoend_vtgate_gen4.yml index 1330adf3d25..c34dc9b2626 100644 --- a/.github/workflows/cluster_endtoend_vtgate_gen4.yml +++ b/.github/workflows/cluster_endtoend_vtgate_gen4.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vtgate_gen4) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vtgate_general_heavy.yml b/.github/workflows/cluster_endtoend_vtgate_general_heavy.yml index f9d31f6dded..f515e550065 100644 --- a/.github/workflows/cluster_endtoend_vtgate_general_heavy.yml +++ b/.github/workflows/cluster_endtoend_vtgate_general_heavy.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vtgate_general_heavy) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vtgate_godriver.yml b/.github/workflows/cluster_endtoend_vtgate_godriver.yml index 4d3ff0e0bbf..ff8d30a83c7 100644 --- a/.github/workflows/cluster_endtoend_vtgate_godriver.yml +++ b/.github/workflows/cluster_endtoend_vtgate_godriver.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vtgate_godriver) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vtgate_partial_keyspace.yml b/.github/workflows/cluster_endtoend_vtgate_partial_keyspace.yml index 385fce1b9c8..34fd9969e28 100644 --- a/.github/workflows/cluster_endtoend_vtgate_partial_keyspace.yml +++ b/.github/workflows/cluster_endtoend_vtgate_partial_keyspace.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vtgate_partial_keyspace) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vtgate_queries.yml b/.github/workflows/cluster_endtoend_vtgate_queries.yml index fbfa01ec743..d9163e2ab56 100644 --- a/.github/workflows/cluster_endtoend_vtgate_queries.yml +++ b/.github/workflows/cluster_endtoend_vtgate_queries.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vtgate_queries) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vtgate_readafterwrite.yml b/.github/workflows/cluster_endtoend_vtgate_readafterwrite.yml index 2e470778fa2..d8ac18cc595 100644 --- a/.github/workflows/cluster_endtoend_vtgate_readafterwrite.yml +++ b/.github/workflows/cluster_endtoend_vtgate_readafterwrite.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vtgate_readafterwrite) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vtgate_reservedconn.yml b/.github/workflows/cluster_endtoend_vtgate_reservedconn.yml index 4e6096033e5..eeb1fe3f8ff 100644 --- a/.github/workflows/cluster_endtoend_vtgate_reservedconn.yml +++ b/.github/workflows/cluster_endtoend_vtgate_reservedconn.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vtgate_reservedconn) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vtgate_schema.yml b/.github/workflows/cluster_endtoend_vtgate_schema.yml index 8e07af66dce..d6c66448b72 100644 --- a/.github/workflows/cluster_endtoend_vtgate_schema.yml +++ b/.github/workflows/cluster_endtoend_vtgate_schema.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vtgate_schema) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vtgate_schema_tracker.yml b/.github/workflows/cluster_endtoend_vtgate_schema_tracker.yml index 3ebf5b631f7..ac4050b3898 100644 --- a/.github/workflows/cluster_endtoend_vtgate_schema_tracker.yml +++ b/.github/workflows/cluster_endtoend_vtgate_schema_tracker.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vtgate_schema_tracker) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vtgate_tablet_healthcheck_cache.yml b/.github/workflows/cluster_endtoend_vtgate_tablet_healthcheck_cache.yml index ebd29dd27b9..19251bc2410 100644 --- a/.github/workflows/cluster_endtoend_vtgate_tablet_healthcheck_cache.yml +++ b/.github/workflows/cluster_endtoend_vtgate_tablet_healthcheck_cache.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vtgate_tablet_healthcheck_cache) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vtgate_topo.yml b/.github/workflows/cluster_endtoend_vtgate_topo.yml index 5dc738d0bf4..826b41464d2 100644 --- a/.github/workflows/cluster_endtoend_vtgate_topo.yml +++ b/.github/workflows/cluster_endtoend_vtgate_topo.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vtgate_topo) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vtgate_topo_consul.yml b/.github/workflows/cluster_endtoend_vtgate_topo_consul.yml index bdfa7bb5895..48023793402 100644 --- a/.github/workflows/cluster_endtoend_vtgate_topo_consul.yml +++ b/.github/workflows/cluster_endtoend_vtgate_topo_consul.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vtgate_topo_consul) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vtgate_topo_etcd.yml b/.github/workflows/cluster_endtoend_vtgate_topo_etcd.yml index 28440f8c0d2..dce7dc44306 100644 --- a/.github/workflows/cluster_endtoend_vtgate_topo_etcd.yml +++ b/.github/workflows/cluster_endtoend_vtgate_topo_etcd.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vtgate_topo_etcd) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vtgate_transaction.yml b/.github/workflows/cluster_endtoend_vtgate_transaction.yml index 82c1cbc8f46..4834f493310 100644 --- a/.github/workflows/cluster_endtoend_vtgate_transaction.yml +++ b/.github/workflows/cluster_endtoend_vtgate_transaction.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vtgate_transaction) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vtgate_unsharded.yml b/.github/workflows/cluster_endtoend_vtgate_unsharded.yml index 07519c15cbf..81f079be04f 100644 --- a/.github/workflows/cluster_endtoend_vtgate_unsharded.yml +++ b/.github/workflows/cluster_endtoend_vtgate_unsharded.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vtgate_unsharded) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vtgate_vindex_heavy.yml b/.github/workflows/cluster_endtoend_vtgate_vindex_heavy.yml index bd499d3f714..10ffff837f5 100644 --- a/.github/workflows/cluster_endtoend_vtgate_vindex_heavy.yml +++ b/.github/workflows/cluster_endtoend_vtgate_vindex_heavy.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vtgate_vindex_heavy) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vtgate_vschema.yml b/.github/workflows/cluster_endtoend_vtgate_vschema.yml index fd6fe0c7254..702b75f8c15 100644 --- a/.github/workflows/cluster_endtoend_vtgate_vschema.yml +++ b/.github/workflows/cluster_endtoend_vtgate_vschema.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vtgate_vschema) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vtorc.yml b/.github/workflows/cluster_endtoend_vtorc.yml index 7c114105121..acbc83487dc 100644 --- a/.github/workflows/cluster_endtoend_vtorc.yml +++ b/.github/workflows/cluster_endtoend_vtorc.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vtorc) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_vttablet_prscomplex.yml b/.github/workflows/cluster_endtoend_vttablet_prscomplex.yml index 2f0d5c04250..8d33eed850c 100644 --- a/.github/workflows/cluster_endtoend_vttablet_prscomplex.yml +++ b/.github/workflows/cluster_endtoend_vttablet_prscomplex.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (vttablet_prscomplex) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_xb_backup.yml b/.github/workflows/cluster_endtoend_xb_backup.yml index c179bee2447..1f92edef9e5 100644 --- a/.github/workflows/cluster_endtoend_xb_backup.yml +++ b/.github/workflows/cluster_endtoend_xb_backup.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (xb_backup) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/cluster_endtoend_xb_recovery.yml b/.github/workflows/cluster_endtoend_xb_recovery.yml index 8fbfaba920e..6e7e00e1054 100644 --- a/.github/workflows/cluster_endtoend_xb_recovery.yml +++ b/.github/workflows/cluster_endtoend_xb_recovery.yml @@ -17,7 +17,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on Cluster (xb_recovery) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/code_freeze.yml b/.github/workflows/code_freeze.yml index a66fb6e8b2b..5640bc01b69 100644 --- a/.github/workflows/code_freeze.yml +++ b/.github/workflows/code_freeze.yml @@ -7,7 +7,7 @@ permissions: read-all jobs: build: name: Code Freeze - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Fail if Code Freeze is enabled run: | diff --git a/.github/workflows/codeql_analysis.yml b/.github/workflows/codeql_analysis.yml index 0e9ce81f0b4..633f3353151 100644 --- a/.github/workflows/codeql_analysis.yml +++ b/.github/workflows/codeql_analysis.yml @@ -14,7 +14,7 @@ permissions: read-all jobs: analyze: name: Analyze - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 permissions: actions: read contents: read @@ -74,10 +74,10 @@ jobs: sudo bash -c "echo '/usr/sbin/mysqld { }' > /etc/apparmor.d/usr.sbin.mysqld" # https://bugs.launchpad.net/ubuntu/+source/mariadb-10.1/+bug/1806263 sudo ln -s /etc/apparmor.d/usr.sbin.mysqld /etc/apparmor.d/disable/ sudo apparmor_parser -R /etc/apparmor.d/usr.sbin.mysqld || echo "could not remove mysqld profile" - + # install JUnit report formatter go install github.com/vitessio/go-junit-report@HEAD - + - name: Building binaries timeout-minutes: 30 run: | diff --git a/.github/workflows/create_release.yml b/.github/workflows/create_release.yml index 55f5d954bec..bbebb181624 100644 --- a/.github/workflows/create_release.yml +++ b/.github/workflows/create_release.yml @@ -14,7 +14,7 @@ permissions: jobs: build: name: Create Release - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Check out code uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 diff --git a/.github/workflows/docker_build_base.yml b/.github/workflows/docker_build_base.yml index 9e93ef800ae..edfe7de4eca 100644 --- a/.github/workflows/docker_build_base.yml +++ b/.github/workflows/docker_build_base.yml @@ -15,7 +15,7 @@ permissions: read-all jobs: build_and_push_base: name: Build and push vitess/base Docker images - runs-on: gh-hosted-runners-16cores-1-24.04 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} if: github.repository == 'vitessio/vitess' strategy: @@ -79,7 +79,7 @@ jobs: build_and_push_k8s: needs: build_and_push_base name: Build and push vitess/k8s image - runs-on: gh-hosted-runners-16cores-1-24.04 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} if: github.repository == 'vitessio/vitess' strategy: @@ -174,7 +174,7 @@ jobs: build_and_push_components: needs: build_and_push_k8s name: Build and push vitess components Docker images - runs-on: gh-hosted-runners-16cores-1 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} if: github.repository == 'vitessio/vitess' strategy: diff --git a/.github/workflows/docker_build_lite.yml b/.github/workflows/docker_build_lite.yml index d09fd3bc1b7..f5bd6b2ac32 100644 --- a/.github/workflows/docker_build_lite.yml +++ b/.github/workflows/docker_build_lite.yml @@ -15,7 +15,7 @@ permissions: read-all jobs: build_and_push: name: Build and push vitess/lite Docker images - runs-on: gh-hosted-runners-16cores-1 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} if: github.repository == 'vitessio/vitess' strategy: @@ -72,4 +72,4 @@ jobs: context: . file: ${{ env.DOCKERFILE }} push: true - tags: ${{ env.DOCKER_TAG }} \ No newline at end of file + tags: ${{ env.DOCKER_TAG }} diff --git a/.github/workflows/docker_build_vttestserver.yml b/.github/workflows/docker_build_vttestserver.yml index 82e09456c9d..f8ad06418e9 100644 --- a/.github/workflows/docker_build_vttestserver.yml +++ b/.github/workflows/docker_build_vttestserver.yml @@ -15,7 +15,7 @@ permissions: read-all jobs: build_and_push: name: Build and push vitess/vttestserver Docker images - runs-on: gh-hosted-runners-16cores-1 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} if: github.repository == 'vitessio/vitess' strategy: @@ -64,4 +64,4 @@ jobs: context: . file: ${{ env.DOCKERFILE }} push: true - tags: ${{ env.DOCKER_TAG }} \ No newline at end of file + tags: ${{ env.DOCKER_TAG }} diff --git a/.github/workflows/docker_test_cluster_10.yml b/.github/workflows/docker_test_cluster_10.yml index 0fca1d18a58..3119ca3f8aa 100644 --- a/.github/workflows/docker_test_cluster_10.yml +++ b/.github/workflows/docker_test_cluster_10.yml @@ -5,7 +5,7 @@ jobs: build: name: Docker Test Cluster 10 - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/docker_test_cluster_25.yml b/.github/workflows/docker_test_cluster_25.yml index 3e304033d47..2e5bfe18669 100644 --- a/.github/workflows/docker_test_cluster_25.yml +++ b/.github/workflows/docker_test_cluster_25.yml @@ -5,7 +5,7 @@ jobs: build: name: Docker Test Cluster 25 - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI diff --git a/.github/workflows/e2e_race.yml b/.github/workflows/e2e_race.yml index e7aa09278d0..cb4b068f9df 100644 --- a/.github/workflows/e2e_race.yml +++ b/.github/workflows/e2e_race.yml @@ -5,7 +5,7 @@ jobs: build: name: End-to-End Test (Race) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI run: | @@ -71,7 +71,7 @@ jobs: echo mysql-apt-config mysql-apt-config/select-server select mysql-8.0 | sudo debconf-set-selections sudo DEBIAN_FRONTEND="noninteractive" dpkg -i mysql-apt-config* sudo apt-get update - + # Install everything else we need, and configure sudo apt-get install -y mysql-server mysql-client make unzip g++ etcd-client etcd-server curl git wget eatmydata xz-utils sudo service mysql stop diff --git a/.github/workflows/endtoend.yml b/.github/workflows/endtoend.yml index bee9f1c5a2c..0b77f9ee8a8 100644 --- a/.github/workflows/endtoend.yml +++ b/.github/workflows/endtoend.yml @@ -5,7 +5,7 @@ jobs: build: name: End-to-End Test - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI run: | diff --git a/.github/workflows/local_example.yml b/.github/workflows/local_example.yml index 338ef72624c..b8924c932f4 100644 --- a/.github/workflows/local_example.yml +++ b/.github/workflows/local_example.yml @@ -4,8 +4,8 @@ permissions: read-all jobs: build: - name: Local example using ${{ matrix.topo }} on Ubuntu - runs-on: gh-hosted-runners-16cores-1-24.04 + name: Local example using ${{ matrix.topo }} on ubuntu-22.04 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} strategy: matrix: topo: [consul,etcd,zk2] @@ -75,6 +75,24 @@ jobs: - name: Get dependencies if: steps.skip-workflow.outputs.skip-workflow == 'false' && steps.changes.outputs.examples == 'true' run: | + if [ ${{matrix.os}} = "ubuntu-22.04" ]; then + # Get key to latest MySQL repo + sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys A8D3785C + # Setup MySQL 8.0 + wget -c https://dev.mysql.com/get/mysql-apt-config_0.8.29-1_all.deb + echo mysql-apt-config mysql-apt-config/select-server select mysql-8.0 | sudo debconf-set-selections + sudo DEBIAN_FRONTEND="noninteractive" dpkg -i mysql-apt-config* + sudo apt-get update + + # Install everything else we need, and configure + sudo apt-get install -y mysql-server mysql-client make unzip g++ etcd curl git wget eatmydata + sudo service mysql stop + sudo service etcd stop + sudo ln -s /etc/apparmor.d/usr.sbin.mysqld /etc/apparmor.d/disable/ + sudo apparmor_parser -R /etc/apparmor.d/usr.sbin.mysqld + elif [ ${{matrix.os}} = "macos-latest" ]; then + brew install mysql@5.7 make unzip etcd curl git wget + fi go mod download - name: Run make minimaltools diff --git a/.github/workflows/region_example.yml b/.github/workflows/region_example.yml index 3463657f428..3b204c40eaa 100644 --- a/.github/workflows/region_example.yml +++ b/.github/workflows/region_example.yml @@ -4,8 +4,8 @@ permissions: read-all jobs: build: - name: Region Sharding example using ${{ matrix.topo }} on Ubuntu - runs-on: gh-hosted-runners-16cores-1-24.04 + name: Region Sharding example using ${{ matrix.topo }} on ubuntu-22.04 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} strategy: matrix: topo: [etcd] diff --git a/.github/workflows/static_checks_etc.yml b/.github/workflows/static_checks_etc.yml index 669644c34f2..785e91aced6 100644 --- a/.github/workflows/static_checks_etc.yml +++ b/.github/workflows/static_checks_etc.yml @@ -9,7 +9,7 @@ permissions: read-all jobs: build: name: Static Code Checks Etc - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI @@ -27,7 +27,7 @@ jobs: skip='true' fi echo Skip ${skip} - echo "skip-workflow=${skip}" >> $GITHUB_OUTPUT + echo "skip-workflow=${skip}" >> $GITHUB_OUTPUT - name: Checkout code if: steps.skip-workflow.outputs.skip-workflow == 'false' diff --git a/.github/workflows/unit_race.yml b/.github/workflows/unit_race.yml index ae3800b652b..8b1d97123fb 100644 --- a/.github/workflows/unit_race.yml +++ b/.github/workflows/unit_race.yml @@ -10,7 +10,7 @@ jobs: build: name: Unit Test (Race) - runs-on: gh-hosted-runners-16cores-1-24.04 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} steps: - name: Skip CI run: | diff --git a/.github/workflows/unit_test_mysql57.yml b/.github/workflows/unit_test_mysql57.yml index a6b52835e9b..ba246e96b33 100644 --- a/.github/workflows/unit_test_mysql57.yml +++ b/.github/workflows/unit_test_mysql57.yml @@ -16,7 +16,7 @@ env: jobs: test: name: Unit Test (mysql57) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI @@ -132,7 +132,7 @@ jobs: go mod download go install golang.org/x/tools/cmd/goimports@latest - + # install JUnit report formatter go install github.com/vitessio/go-junit-report@HEAD diff --git a/.github/workflows/unit_test_mysql80.yml b/.github/workflows/unit_test_mysql80.yml index e7108b2be15..a3f90a29cc3 100644 --- a/.github/workflows/unit_test_mysql80.yml +++ b/.github/workflows/unit_test_mysql80.yml @@ -16,7 +16,7 @@ env: jobs: test: name: Unit Test (mysql80) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI @@ -124,7 +124,7 @@ jobs: go mod download go install golang.org/x/tools/cmd/goimports@latest - + # install JUnit report formatter go install github.com/vitessio/go-junit-report@HEAD diff --git a/.github/workflows/update_golang_dependencies.yml b/.github/workflows/update_golang_dependencies.yml index 3e40b111459..4c672e435b5 100644 --- a/.github/workflows/update_golang_dependencies.yml +++ b/.github/workflows/update_golang_dependencies.yml @@ -14,7 +14,7 @@ jobs: contents: write pull-requests: write name: Update Golang Dependencies - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Set up Go uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2 diff --git a/.github/workflows/update_golang_version.yml b/.github/workflows/update_golang_version.yml index 9c472035c17..a42389de2fb 100644 --- a/.github/workflows/update_golang_version.yml +++ b/.github/workflows/update_golang_version.yml @@ -17,7 +17,7 @@ jobs: matrix: branch: [ main, release-19.0, release-18.0, release-17.0, release-16.0 ] name: Update Golang Version - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Set up Go uses: actions/setup-go@v5 @@ -47,7 +47,7 @@ jobs: if [ -z "${output}" ]; then exit 0 fi - + go_version=$(go run ./go/tools/go-upgrade/go-upgrade.go get go-version) bootstrap_version=$(go run ./go/tools/go-upgrade/go-upgrade.go get bootstrap-version) echo "go-version=${go_version}" >> $GITHUB_OUTPUT diff --git a/.github/workflows/upgrade_downgrade_test_backups_e2e.yml b/.github/workflows/upgrade_downgrade_test_backups_e2e.yml index 0a0531658d7..068cc91e01d 100644 --- a/.github/workflows/upgrade_downgrade_test_backups_e2e.yml +++ b/.github/workflows/upgrade_downgrade_test_backups_e2e.yml @@ -13,7 +13,7 @@ jobs: upgrade_downgrade_test_e2e: timeout-minutes: 60 name: Run Upgrade Downgrade Test - Backups - E2E - runs-on: gh-hosted-runners-16cores-1-24.04 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} steps: - name: Skip CI diff --git a/.github/workflows/upgrade_downgrade_test_backups_e2e_next_release.yml b/.github/workflows/upgrade_downgrade_test_backups_e2e_next_release.yml index 8e910e8e1db..3f9375de68f 100644 --- a/.github/workflows/upgrade_downgrade_test_backups_e2e_next_release.yml +++ b/.github/workflows/upgrade_downgrade_test_backups_e2e_next_release.yml @@ -14,7 +14,7 @@ jobs: upgrade_downgrade_test_e2e: timeout-minutes: 60 name: Run Upgrade Downgrade Test - Backups - E2E - Next Release - runs-on: gh-hosted-runners-16cores-1-24.04 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} steps: - name: Skip CI diff --git a/.github/workflows/upgrade_downgrade_test_backups_manual.yml b/.github/workflows/upgrade_downgrade_test_backups_manual.yml index 4318df3dd3f..5fdf268ff93 100644 --- a/.github/workflows/upgrade_downgrade_test_backups_manual.yml +++ b/.github/workflows/upgrade_downgrade_test_backups_manual.yml @@ -15,7 +15,7 @@ jobs: upgrade_downgrade_test_manual: timeout-minutes: 40 name: Run Upgrade Downgrade Test - Backups - Manual - runs-on: gh-hosted-runners-16cores-1-24.04 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} steps: - name: Skip CI diff --git a/.github/workflows/upgrade_downgrade_test_backups_manual_next_release.yml b/.github/workflows/upgrade_downgrade_test_backups_manual_next_release.yml index 3f6547f54ee..d2fc1bc1d3e 100644 --- a/.github/workflows/upgrade_downgrade_test_backups_manual_next_release.yml +++ b/.github/workflows/upgrade_downgrade_test_backups_manual_next_release.yml @@ -15,7 +15,7 @@ jobs: upgrade_downgrade_test_manual: timeout-minutes: 40 name: Run Upgrade Downgrade Test - Backups - Manual - Next Release - runs-on: gh-hosted-runners-16cores-1-24.04 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} steps: - name: Skip CI diff --git a/.github/workflows/upgrade_downgrade_test_query_serving_queries.yml b/.github/workflows/upgrade_downgrade_test_query_serving_queries.yml index b7633ddecb7..2e089741e45 100644 --- a/.github/workflows/upgrade_downgrade_test_query_serving_queries.yml +++ b/.github/workflows/upgrade_downgrade_test_query_serving_queries.yml @@ -17,7 +17,7 @@ jobs: upgrade_downgrade_test: timeout-minutes: 60 name: Run Upgrade Downgrade Test - Query Serving (Queries) - runs-on: gh-hosted-runners-16cores-1-24.04 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} steps: - name: Skip CI @@ -197,7 +197,7 @@ jobs: if: steps.skip-workflow.outputs.skip-workflow == 'false' && steps.changes.outputs.end_to_end == 'true' run: | source build.env - + rm -Rf bin/* cp -r /tmp/vitess-build-current/bin/* $PWD/bin/ diff --git a/.github/workflows/upgrade_downgrade_test_query_serving_queries_next_release.yml b/.github/workflows/upgrade_downgrade_test_query_serving_queries_next_release.yml index b59075f0831..a1c967cbfef 100644 --- a/.github/workflows/upgrade_downgrade_test_query_serving_queries_next_release.yml +++ b/.github/workflows/upgrade_downgrade_test_query_serving_queries_next_release.yml @@ -17,7 +17,7 @@ jobs: upgrade_downgrade_test: timeout-minutes: 60 name: Run Upgrade Downgrade Test - Query Serving (Queries) Next Release - runs-on: gh-hosted-runners-16cores-1-24.04 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} steps: - name: Skip CI diff --git a/.github/workflows/upgrade_downgrade_test_query_serving_schema.yml b/.github/workflows/upgrade_downgrade_test_query_serving_schema.yml index 352d5df2549..d99b74b866f 100644 --- a/.github/workflows/upgrade_downgrade_test_query_serving_schema.yml +++ b/.github/workflows/upgrade_downgrade_test_query_serving_schema.yml @@ -17,7 +17,7 @@ jobs: upgrade_downgrade_test: timeout-minutes: 60 name: Run Upgrade Downgrade Test - Query Serving (Schema) - runs-on: gh-hosted-runners-16cores-1-24.04 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} steps: - name: Skip CI diff --git a/.github/workflows/upgrade_downgrade_test_query_serving_schema_next_release.yml b/.github/workflows/upgrade_downgrade_test_query_serving_schema_next_release.yml index 1db4cb0180d..ee94ac5d567 100644 --- a/.github/workflows/upgrade_downgrade_test_query_serving_schema_next_release.yml +++ b/.github/workflows/upgrade_downgrade_test_query_serving_schema_next_release.yml @@ -17,7 +17,7 @@ jobs: upgrade_downgrade_test: timeout-minutes: 60 name: Run Upgrade Downgrade Test - Query Serving (Schema) Next Release - runs-on: gh-hosted-runners-16cores-1-24.04 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} steps: - name: Skip CI diff --git a/.github/workflows/upgrade_downgrade_test_reparent_new_vtctl.yml b/.github/workflows/upgrade_downgrade_test_reparent_new_vtctl.yml index 1a523b1139f..dd9b2bc59e1 100644 --- a/.github/workflows/upgrade_downgrade_test_reparent_new_vtctl.yml +++ b/.github/workflows/upgrade_downgrade_test_reparent_new_vtctl.yml @@ -17,7 +17,7 @@ jobs: upgrade_downgrade_test: timeout-minutes: 60 name: Run Upgrade Downgrade Test - Reparent New Vtctl - runs-on: gh-hosted-runners-16cores-1-24.04 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} steps: - name: Skip CI diff --git a/.github/workflows/upgrade_downgrade_test_reparent_new_vttablet.yml b/.github/workflows/upgrade_downgrade_test_reparent_new_vttablet.yml index e59ce370a70..44858e4daef 100644 --- a/.github/workflows/upgrade_downgrade_test_reparent_new_vttablet.yml +++ b/.github/workflows/upgrade_downgrade_test_reparent_new_vttablet.yml @@ -17,7 +17,7 @@ jobs: upgrade_downgrade_test: timeout-minutes: 60 name: Run Upgrade Downgrade Test - Reparent New VTTablet - runs-on: gh-hosted-runners-16cores-1-24.04 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} steps: - name: Skip CI diff --git a/.github/workflows/upgrade_downgrade_test_reparent_old_vtctl.yml b/.github/workflows/upgrade_downgrade_test_reparent_old_vtctl.yml index a67c78938c4..bb81a1af175 100644 --- a/.github/workflows/upgrade_downgrade_test_reparent_old_vtctl.yml +++ b/.github/workflows/upgrade_downgrade_test_reparent_old_vtctl.yml @@ -17,7 +17,7 @@ jobs: upgrade_downgrade_test: timeout-minutes: 60 name: Run Upgrade Downgrade Test - Reparent Old Vtctl - runs-on: gh-hosted-runners-16cores-1-24.04 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} steps: - name: Skip CI diff --git a/.github/workflows/upgrade_downgrade_test_reparent_old_vttablet.yml b/.github/workflows/upgrade_downgrade_test_reparent_old_vttablet.yml index 3dc47d27532..b38d48c168f 100644 --- a/.github/workflows/upgrade_downgrade_test_reparent_old_vttablet.yml +++ b/.github/workflows/upgrade_downgrade_test_reparent_old_vttablet.yml @@ -17,7 +17,7 @@ jobs: upgrade_downgrade_test: timeout-minutes: 60 name: Run Upgrade Downgrade Test - Reparent Old VTTablet - runs-on: gh-hosted-runners-16cores-1-24.04 + runs-on: ${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }} steps: - name: Skip CI diff --git a/.github/workflows/vitess_tester_vtgate.yml b/.github/workflows/vitess_tester_vtgate.yml index 1d96907c6c1..f861f03b13b 100644 --- a/.github/workflows/vitess_tester_vtgate.yml +++ b/.github/workflows/vitess_tester_vtgate.yml @@ -16,7 +16,7 @@ env: jobs: build: name: Run endtoend tests on Vitess Tester (vtgate) - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI @@ -112,7 +112,7 @@ jobs: # install JUnit report formatter go install github.com/vitessio/go-junit-report@HEAD - + # install vitess tester go install github.com/vitessio/vt/go/vt@e43009309f599378504905d4b804460f47822ac5 @@ -141,12 +141,12 @@ jobs: set -exo pipefail i=1 - for dir in ./go/test/endtoend/vtgate/vitess_tester/*/; do + for dir in ./go/test/endtoend/vtgate/vitess_tester/*/; do # We go over all the directories in the given path. # If there is a vschema file there, we use it, otherwise we let vt tester autogenerate it. if [ -f $dir/vschema.json ]; then vt tester --xunit --vschema "$dir"vschema.json $dir/*.test - else + else vt tester --sharded --xunit $dir/*.test fi # Number the reports by changing their file names. diff --git a/.github/workflows/vtadmin_web_build.yml b/.github/workflows/vtadmin_web_build.yml index 334d78d2ffa..0a686ac8485 100644 --- a/.github/workflows/vtadmin_web_build.yml +++ b/.github/workflows/vtadmin_web_build.yml @@ -1,6 +1,6 @@ name: vtadmin-web build -# In specifying the 'paths' property, we need to include the path to this workflow .yml file. +# In specifying the 'paths' property, we need to include the path to this workflow .yml file. # See https://github.community/t/trigger-a-workflow-on-change-to-the-yml-file-itself/17792/4) on: push: @@ -16,7 +16,7 @@ permissions: read-all jobs: build: - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI run: | @@ -55,6 +55,6 @@ jobs: run: cd ./web/vtadmin && npm run build # Cancel pending and in-progress runs of this workflow if a newer ref is pushed to CI. - concurrency: + concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true diff --git a/.github/workflows/vtadmin_web_lint.yml b/.github/workflows/vtadmin_web_lint.yml index 9b05a44a4c2..3ff915d77dc 100644 --- a/.github/workflows/vtadmin_web_lint.yml +++ b/.github/workflows/vtadmin_web_lint.yml @@ -1,6 +1,6 @@ name: vtadmin-web linting + formatting -# In specifying the 'paths' property, we need to include the path to this workflow .yml file. +# In specifying the 'paths' property, we need to include the path to this workflow .yml file. # See https://github.community/t/trigger-a-workflow-on-change-to-the-yml-file-itself/17792/4) on: push: @@ -16,7 +16,7 @@ permissions: read-all jobs: lint: - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI run: | @@ -51,13 +51,13 @@ jobs: run: cd ./web/vtadmin && npm ci # Using "if: always()" means each step will run, even if a previous - # step fails. This is nice because, for example, we want stylelint and - # prettier to run even if eslint fails. + # step fails. This is nice because, for example, we want stylelint and + # prettier to run even if eslint fails. # # An undesirable secondary effect of this is these steps # will run even if the install, etc. steps fail, which is... weird. # A nice enhancement is to parallelize these steps into jobs, with the - # trade-off of more complexity around sharing npm install artifacts. + # trade-off of more complexity around sharing npm install artifacts. - name: Run eslint if: steps.skip-workflow.outputs.skip-workflow == 'false' && always() run: cd ./web/vtadmin && npm run lint:eslint @@ -65,12 +65,12 @@ jobs: - name: Run stylelint if: steps.skip-workflow.outputs.skip-workflow == 'false' && always() run: cd ./web/vtadmin && npm run lint:stylelint -- -f verbose - + - name: Run prettier if: steps.skip-workflow.outputs.skip-workflow == 'false' && always() run: cd ./web/vtadmin && npm run lint:prettier # Cancel pending and in-progress runs of this workflow if a newer ref is pushed to CI. - concurrency: + concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true diff --git a/.github/workflows/vtadmin_web_unit_tests.yml b/.github/workflows/vtadmin_web_unit_tests.yml index 08e8dcd8806..2e285d736f1 100644 --- a/.github/workflows/vtadmin_web_unit_tests.yml +++ b/.github/workflows/vtadmin_web_unit_tests.yml @@ -1,6 +1,6 @@ name: vtadmin-web unit tests -# In specifying the 'paths' property, we need to include the path to this workflow .yml file. +# In specifying the 'paths' property, we need to include the path to this workflow .yml file. # See https://github.community/t/trigger-a-workflow-on-change-to-the-yml-file-itself/17792/4) on: push: @@ -16,7 +16,7 @@ permissions: read-all jobs: unit-tests: - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI run: | @@ -53,8 +53,8 @@ jobs: - name: Run unit tests if: steps.skip-workflow.outputs.skip-workflow == 'false' run: cd ./web/vtadmin && CI=true npm run test - + # Cancel pending and in-progress runs of this workflow if a newer ref is pushed to CI. - concurrency: + concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true diff --git a/go.mod b/go.mod index f6575f0faa9..e37a1d38031 100644 --- a/go.mod +++ b/go.mod @@ -51,7 +51,7 @@ require ( github.com/planetscale/pargzip v0.0.0-20201116224723-90c7fc03ea8a github.com/planetscale/vtprotobuf v0.5.0 github.com/prometheus/client_golang v1.19.0 - github.com/prometheus/common v0.49.0 // indirect + github.com/prometheus/common v0.49.0 github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 github.com/sjmudd/stopwatch v0.1.1 github.com/soheilhy/cmux v0.1.5 diff --git a/go/cmd/vtadmin/main.go b/go/cmd/vtadmin/main.go index 2548986c2ba..6cc3b9065b5 100644 --- a/go/cmd/vtadmin/main.go +++ b/go/cmd/vtadmin/main.go @@ -221,8 +221,6 @@ func main() { if err := rootCmd.Execute(); err != nil { log.Fatal(err) } - - log.Flush() } type noopCloser struct{} diff --git a/go/cmd/vtcombo/cli/main.go b/go/cmd/vtcombo/cli/main.go index 7b9143f1384..5b2ac09a09f 100644 --- a/go/cmd/vtcombo/cli/main.go +++ b/go/cmd/vtcombo/cli/main.go @@ -40,6 +40,7 @@ import ( "vitess.io/vitess/go/vt/srvtopo" "vitess.io/vitess/go/vt/topo" "vitess.io/vitess/go/vt/topo/memorytopo" + "vitess.io/vitess/go/vt/topo/topoproto" "vitess.io/vitess/go/vt/topotools" "vitess.io/vitess/go/vt/vtcombo" "vitess.io/vitess/go/vt/vtctld" @@ -77,9 +78,10 @@ In particular, it contains: plannerName string vschemaPersistenceDir string - tpb vttestpb.VTTestTopology - ts *topo.Server - resilientServer *srvtopo.ResilientServer + tpb vttestpb.VTTestTopology + ts *topo.Server + resilientServer *srvtopo.ResilientServer + tabletTypesToWait []topodatapb.TabletType env *vtenv.Environment ) @@ -113,6 +115,7 @@ func init() { Main.Flags().Var(vttest.TextTopoData(&tpb), "proto_topo", "vttest proto definition of the topology, encoded in compact text format. See vttest.proto for more information.") Main.Flags().Var(vttest.JSONTopoData(&tpb), "json_topo", "vttest proto definition of the topology, encoded in json format. See vttest.proto for more information.") + Main.Flags().Var((*topoproto.TabletTypeListFlag)(&tabletTypesToWait), "tablet_types_to_wait", "Wait till connected for specified tablet types during Gateway initialization. Should be provided as a comma-separated set of tablet types.") // We're going to force the value later, so don't even bother letting the // user know about this flag. @@ -294,11 +297,22 @@ func run(cmd *cobra.Command, args []string) (err error) { // vtgate configuration and init resilientServer = srvtopo.NewResilientServer(context.Background(), ts, "ResilientSrvTopoServer") - tabletTypesToWait := []topodatapb.TabletType{ - topodatapb.TabletType_PRIMARY, - topodatapb.TabletType_REPLICA, - topodatapb.TabletType_RDONLY, + + tabletTypes := make([]topodatapb.TabletType, 0, 1) + if len(tabletTypesToWait) != 0 { + for _, tt := range tabletTypesToWait { + if topoproto.IsServingType(tt) { + tabletTypes = append(tabletTypes, tt) + } + } + + if len(tabletTypes) == 0 { + log.Exitf("tablet_types_to_wait should contain at least one serving tablet type") + } + } else { + tabletTypes = append(tabletTypes, topodatapb.TabletType_PRIMARY, topodatapb.TabletType_REPLICA, topodatapb.TabletType_RDONLY) } + plannerVersion, _ := plancontext.PlannerNameToVersion(plannerName) vtgate.QueryLogHandler = "/debug/vtgate/querylog" @@ -306,7 +320,7 @@ func run(cmd *cobra.Command, args []string) (err error) { vtgate.QueryzHandler = "/debug/vtgate/queryz" // pass nil for healthcheck, it will get created - vtg := vtgate.Init(context.Background(), env, nil, resilientServer, tpb.Cells[0], tabletTypesToWait, plannerVersion) + vtg := vtgate.Init(context.Background(), env, nil, resilientServer, tpb.Cells[0], tabletTypes, plannerVersion) // vtctld configuration and init err = vtctld.InitVtctld(env, ts) diff --git a/go/flags/endtoend/vtcombo.txt b/go/flags/endtoend/vtcombo.txt index 8b62d4e9edf..0371ce26aa9 100644 --- a/go/flags/endtoend/vtcombo.txt +++ b/go/flags/endtoend/vtcombo.txt @@ -349,6 +349,7 @@ Flags: --tablet_manager_protocol string Protocol to use to make tabletmanager RPCs to vttablets. (default "grpc") --tablet_refresh_interval duration Tablet refresh interval. (default 1m0s) --tablet_refresh_known_tablets Whether to reload the tablet's address/port map from topo in case they change. (default true) + --tablet_types_to_wait strings Wait till connected for specified tablet types during Gateway initialization. Should be provided as a comma-separated set of tablet types. --tablet_url_template string Format string describing debug tablet url formatting. See getTabletDebugURL() for how to customize this. (default "http://{{ "{{.GetTabletHostPort}}" }}") --throttle_tablet_types string Comma separated VTTablet types to be considered by the throttler. default: 'replica'. example: 'replica,rdonly'. 'replica' always implicitly included (default "replica") --topo_consul_lock_delay duration LockDelay for consul session. (default 15s) diff --git a/go/logstats/logger.go b/go/logstats/logger.go new file mode 100644 index 00000000000..90e208e7703 --- /dev/null +++ b/go/logstats/logger.go @@ -0,0 +1,217 @@ +/* +Copyright 2024 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package logstats + +import ( + "io" + "slices" + "strconv" + "strings" + "sync" + "time" + + "vitess.io/vitess/go/hack" + "vitess.io/vitess/go/sqltypes" + querypb "vitess.io/vitess/go/vt/proto/query" +) + +type logbv struct { + Name string + BVar *querypb.BindVariable +} + +// Logger is a zero-allocation logger for logstats. +// It can output logs as JSON or as plaintext, following the commonly used +// logstats format that is shared between the tablets and the gates. +type Logger struct { + b []byte + bvars []logbv + n int + json bool +} + +func sortBVars(sorted []logbv, bvars map[string]*querypb.BindVariable) []logbv { + for k, bv := range bvars { + sorted = append(sorted, logbv{k, bv}) + } + slices.SortFunc(sorted, func(a, b logbv) int { + return strings.Compare(a.Name, b.Name) + }) + return sorted +} + +func (log *Logger) appendBVarsJSON(b []byte, bvars map[string]*querypb.BindVariable, full bool) []byte { + log.bvars = sortBVars(log.bvars[:0], bvars) + + b = append(b, '{') + for i, bv := range log.bvars { + if i > 0 { + b = append(b, ',', ' ') + } + b = strconv.AppendQuote(b, bv.Name) + b = append(b, `: {"type": `...) + b = strconv.AppendQuote(b, querypb.Type_name[int32(bv.BVar.Type)]) + b = append(b, `, "value": `...) + + if sqltypes.IsIntegral(bv.BVar.Type) || sqltypes.IsFloat(bv.BVar.Type) { + b = append(b, bv.BVar.Value...) + } else if bv.BVar.Type == sqltypes.Tuple { + b = append(b, '"') + b = strconv.AppendInt(b, int64(len(bv.BVar.Values)), 10) + b = append(b, ` items"`...) + } else { + if full { + b = strconv.AppendQuote(b, hack.String(bv.BVar.Value)) + } else { + b = append(b, '"') + b = strconv.AppendInt(b, int64(len(bv.BVar.Values)), 10) + b = append(b, ` bytes"`...) + } + } + b = append(b, '}') + } + return append(b, '}') +} + +func (log *Logger) Init(json bool) { + log.n = 0 + log.json = json + if log.json { + log.b = append(log.b, '{') + } +} + +func (log *Logger) Redacted() { + log.String("[REDACTED]") +} + +func (log *Logger) Key(key string) { + if log.json { + if log.n > 0 { + log.b = append(log.b, ',', ' ') + } + log.b = append(log.b, '"') + log.b = append(log.b, key...) + log.b = append(log.b, '"', ':', ' ') + } else { + if log.n > 0 { + log.b = append(log.b, '\t') + } + } + log.n++ +} + +func (log *Logger) StringUnquoted(value string) { + if log.json { + log.b = strconv.AppendQuote(log.b, value) + } else { + log.b = append(log.b, value...) + } +} + +func (log *Logger) TabTerminated() { + if !log.json { + log.b = append(log.b, '\t') + } +} + +func (log *Logger) String(value string) { + log.b = strconv.AppendQuote(log.b, value) +} + +func (log *Logger) StringSingleQuoted(value string) { + if log.json { + log.b = strconv.AppendQuote(log.b, value) + } else { + log.b = append(log.b, '\'') + log.b = append(log.b, value...) + log.b = append(log.b, '\'') + } +} + +func (log *Logger) Time(t time.Time) { + const timeFormat = "2006-01-02 15:04:05.000000" + if log.json { + log.b = append(log.b, '"') + log.b = t.AppendFormat(log.b, timeFormat) + log.b = append(log.b, '"') + } else { + log.b = t.AppendFormat(log.b, timeFormat) + } +} + +func (log *Logger) Duration(t time.Duration) { + log.b = strconv.AppendFloat(log.b, t.Seconds(), 'f', 6, 64) +} + +func (log *Logger) BindVariables(bvars map[string]*querypb.BindVariable, full bool) { + // the bind variables are printed as JSON in text mode because the original + // printing syntax, which was simply `fmt.Sprintf("%v")`, is not stable or + // safe to parse + log.b = log.appendBVarsJSON(log.b, bvars, full) +} + +func (log *Logger) Int(i int64) { + log.b = strconv.AppendInt(log.b, i, 10) +} + +func (log *Logger) Uint(u uint64) { + log.b = strconv.AppendUint(log.b, u, 10) +} + +func (log *Logger) Bool(b bool) { + log.b = strconv.AppendBool(log.b, b) +} + +func (log *Logger) Strings(strs []string) { + log.b = append(log.b, '[') + for i, t := range strs { + if i > 0 { + log.b = append(log.b, ',') + } + log.b = strconv.AppendQuote(log.b, t) + } + log.b = append(log.b, ']') +} + +func (log *Logger) Flush(w io.Writer) (err error) { + if log.json { + log.b = append(log.b, '}') + } + log.b = append(log.b, '\n') + _, err = w.Write(log.b) + + clear(log.bvars) + log.bvars = log.bvars[:0] + log.b = log.b[:0] + log.n = 0 + + loggerPool.Put(log) + return err +} + +var loggerPool = sync.Pool{New: func() any { + return &Logger{} +}} + +// NewLogger returns a new Logger instance to perform logstats logging. +// The logger must be initialized with (*Logger).Init before usage and +// flushed with (*Logger).Flush once all the key-values have been written +// to it. +func NewLogger() *Logger { + return loggerPool.Get().(*Logger) +} diff --git a/go/test/endtoend/vreplication/cluster_test.go b/go/test/endtoend/vreplication/cluster_test.go index 7f06dc87680..33449462503 100644 --- a/go/test/endtoend/vreplication/cluster_test.go +++ b/go/test/endtoend/vreplication/cluster_test.go @@ -56,8 +56,9 @@ var ( sidecarDBIdentifier = sqlparser.String(sqlparser.NewIdentifierCS(sidecarDBName)) mainClusterConfig *ClusterConfig externalClusterConfig *ClusterConfig - extraVTGateArgs = []string{"--tablet_refresh_interval", "10ms", "--enable_buffer", "--buffer_window", loadTestBufferingWindowDurationStr, - "--buffer_size", "100000", "--buffer_min_time_between_failovers", "0s", "--buffer_max_failover_duration", loadTestBufferingWindowDurationStr} + extraVTGateArgs = []string{"--tablet_refresh_interval", "10ms", "--enable_buffer", "--buffer_window", loadTestBufferingWindowDuration.String(), + "--buffer_size", "250000", "--buffer_min_time_between_failovers", "1s", "--buffer_max_failover_duration", loadTestBufferingWindowDuration.String(), + "--buffer_drain_concurrency", "10"} extraVtctldArgs = []string{"--remote_operation_timeout", "600s", "--topo_etcd_lease_ttl", "120"} // This variable can be used within specific tests to alter vttablet behavior extraVTTabletArgs = []string{} diff --git a/go/test/endtoend/vreplication/fk_config_test.go b/go/test/endtoend/vreplication/fk_config_test.go index 97d868cb371..822d44105b4 100644 --- a/go/test/endtoend/vreplication/fk_config_test.go +++ b/go/test/endtoend/vreplication/fk_config_test.go @@ -67,7 +67,7 @@ insert into t2 values(1, 1, 't21'), (2, 1, 't22'), (3, 2, 't23'); } ] }, - "t1": { + "t1": { "column_vindexes": [ { "column": "id", @@ -75,7 +75,7 @@ insert into t2 values(1, 1, 't21'), (2, 1, 't22'), (3, 2, 't23'); } ] }, - "t2": { + "t2": { "column_vindexes": [ { "column": "t1id", diff --git a/go/test/endtoend/vreplication/fk_test.go b/go/test/endtoend/vreplication/fk_test.go index 64587a21da8..00842ca7f34 100644 --- a/go/test/endtoend/vreplication/fk_test.go +++ b/go/test/endtoend/vreplication/fk_test.go @@ -285,10 +285,10 @@ func (ls *fkLoadSimulator) exec(query string) *sqltypes.Result { // constraints, where the parent table is lexicographically sorted before the child table and // thus may be dropped first, can be successfully cancelled. func testFKCancel(t *testing.T, vc *VitessCluster) { - var targetKeyspace = "fktarget" - var sourceKeyspace = "fksource" - var workflowName = "wf2" - var ksWorkflow = fmt.Sprintf("%s.%s", targetKeyspace, workflowName) + targetKeyspace := "fktarget" + sourceKeyspace := "fksource" + workflowName := "wf2" + ksWorkflow := fmt.Sprintf("%s.%s", targetKeyspace, workflowName) mt := newMoveTables(vc, &moveTablesWorkflow{ workflowInfo: &workflowInfo{ vc: vc, diff --git a/go/test/endtoend/vreplication/helper_test.go b/go/test/endtoend/vreplication/helper_test.go index f5e89e157da..75a459c3f6d 100644 --- a/go/test/endtoend/vreplication/helper_test.go +++ b/go/test/endtoend/vreplication/helper_test.go @@ -18,17 +18,18 @@ package vreplication import ( "context" - "crypto/rand" "encoding/hex" "encoding/json" "fmt" "io" + "math/rand" "net/http" "os" "os/exec" "regexp" "sort" "strings" + "sync" "sync/atomic" "testing" "time" @@ -121,9 +122,10 @@ func getConnectionNoError(t *testing.T, hostname string, port int) *mysql.Conn { func getConnection(t *testing.T, hostname string, port int) *mysql.Conn { vtParams := mysql.ConnParams{ - Host: hostname, - Port: port, - Uname: "vt_dba", + Host: hostname, + Port: port, + Uname: "vt_dba", + ConnectTimeoutMs: 1000, } ctx := context.Background() conn, err := mysql.Connect(ctx, &vtParams) @@ -781,92 +783,111 @@ func getRowCount(t *testing.T, vtgateConn *mysql.Conn, table string) int { } const ( - loadTestBufferingWindowDurationStr = "30s" - loadTestPostBufferingInsertWindow = 60 * time.Second // should be greater than loadTestBufferingWindowDurationStr - loadTestWaitForCancel = 30 * time.Second - loadTestWaitBetweenQueries = 2 * time.Millisecond + loadTestBufferingWindowDuration = 10 * time.Second + loadTestAvgWaitBetweenQueries = 500 * time.Microsecond + loadTestDefaultConnections = 100 ) type loadGenerator struct { - t *testing.T - vc *VitessCluster - ctx context.Context - cancel context.CancelFunc + t *testing.T + vc *VitessCluster + ctx context.Context + cancel context.CancelFunc + connections int + wg sync.WaitGroup } func newLoadGenerator(t *testing.T, vc *VitessCluster) *loadGenerator { return &loadGenerator{ - t: t, - vc: vc, + t: t, + vc: vc, + connections: loadTestDefaultConnections, } } func (lg *loadGenerator) stop() { - time.Sleep(loadTestPostBufferingInsertWindow) // wait for buffering to stop and additional records to be inserted by startLoad after traffic is switched + // Wait for buffering to stop and additional records to be inserted by start + // after traffic is switched. + time.Sleep(loadTestBufferingWindowDuration * 2) log.Infof("Canceling load") lg.cancel() - time.Sleep(loadTestWaitForCancel) // wait for cancel to take effect + lg.wg.Wait() } func (lg *loadGenerator) start() { t := lg.t lg.ctx, lg.cancel = context.WithCancel(context.Background()) + var connectionCount atomic.Int64 var id int64 - log.Infof("startLoad: starting") + log.Infof("loadGenerator: starting") queryTemplate := "insert into loadtest(id, name) values (%d, 'name-%d')" var totalQueries, successfulQueries int64 var deniedErrors, ambiguousErrors, reshardedErrors, tableNotFoundErrors, otherErrors int64 + lg.wg.Add(1) defer func() { - - log.Infof("startLoad: totalQueries: %d, successfulQueries: %d, deniedErrors: %d, ambiguousErrors: %d, reshardedErrors: %d, tableNotFoundErrors: %d, otherErrors: %d", + defer lg.wg.Done() + log.Infof("loadGenerator: totalQueries: %d, successfulQueries: %d, deniedErrors: %d, ambiguousErrors: %d, reshardedErrors: %d, tableNotFoundErrors: %d, otherErrors: %d", totalQueries, successfulQueries, deniedErrors, ambiguousErrors, reshardedErrors, tableNotFoundErrors, otherErrors) }() - logOnce := true for { select { case <-lg.ctx.Done(): - log.Infof("startLoad: context cancelled") - log.Infof("startLoad: deniedErrors: %d, ambiguousErrors: %d, reshardedErrors: %d, tableNotFoundErrors: %d, otherErrors: %d", + log.Infof("loadGenerator: context cancelled") + log.Infof("loadGenerator: deniedErrors: %d, ambiguousErrors: %d, reshardedErrors: %d, tableNotFoundErrors: %d, otherErrors: %d", deniedErrors, ambiguousErrors, reshardedErrors, tableNotFoundErrors, otherErrors) require.Equal(t, int64(0), deniedErrors) require.Equal(t, int64(0), otherErrors) + require.Equal(t, int64(0), reshardedErrors) require.Equal(t, totalQueries, successfulQueries) return default: - go func() { - conn := vc.GetVTGateConn(t) - defer conn.Close() - atomic.AddInt64(&id, 1) - query := fmt.Sprintf(queryTemplate, id, id) - _, err := conn.ExecuteFetch(query, 1, false) - atomic.AddInt64(&totalQueries, 1) - if err != nil { - sqlErr := err.(*sqlerror.SQLError) - if strings.Contains(strings.ToLower(err.Error()), "denied tables") { - log.Infof("startLoad: denied tables error executing query: %d:%v", sqlErr.Number(), err) - atomic.AddInt64(&deniedErrors, 1) - } else if strings.Contains(strings.ToLower(err.Error()), "ambiguous") { - // this can happen when a second keyspace is setup with the same tables, but there are no routing rules - // set yet by MoveTables. So we ignore these errors. - atomic.AddInt64(&ambiguousErrors, 1) - } else if strings.Contains(strings.ToLower(err.Error()), "current keyspace is being resharded") { - atomic.AddInt64(&reshardedErrors, 1) - } else if strings.Contains(strings.ToLower(err.Error()), "not found") { - atomic.AddInt64(&tableNotFoundErrors, 1) - } else { - if logOnce { - log.Infof("startLoad: error executing query: %d:%v", sqlErr.Number(), err) - logOnce = false + if int(connectionCount.Load()) < lg.connections { + connectionCount.Add(1) + lg.wg.Add(1) + go func() { + defer lg.wg.Done() + defer connectionCount.Add(-1) + conn := vc.GetVTGateConn(t) + defer conn.Close() + for { + select { + case <-lg.ctx.Done(): + return + default: } - atomic.AddInt64(&otherErrors, 1) + newID := atomic.AddInt64(&id, 1) + query := fmt.Sprintf(queryTemplate, newID, newID) + _, err := conn.ExecuteFetch(query, 1, false) + atomic.AddInt64(&totalQueries, 1) + if err != nil { + sqlErr := err.(*sqlerror.SQLError) + if strings.Contains(strings.ToLower(err.Error()), "denied tables") { + if debugMode { + t.Logf("loadGenerator: denied tables error executing query: %d:%v", sqlErr.Number(), err) + } + atomic.AddInt64(&deniedErrors, 1) + } else if strings.Contains(strings.ToLower(err.Error()), "ambiguous") { + // This can happen when a second keyspace is setup with the same tables, but + // there are no routing rules set yet by MoveTables. So we ignore these errors. + atomic.AddInt64(&ambiguousErrors, 1) + } else if strings.Contains(strings.ToLower(err.Error()), "current keyspace is being resharded") { + atomic.AddInt64(&reshardedErrors, 1) + } else if strings.Contains(strings.ToLower(err.Error()), "not found") { + atomic.AddInt64(&tableNotFoundErrors, 1) + } else { + if debugMode { + t.Logf("loadGenerator: error executing query: %d:%v", sqlErr.Number(), err) + } + atomic.AddInt64(&otherErrors, 1) + } + } else { + atomic.AddInt64(&successfulQueries, 1) + } + time.Sleep(time.Duration(int64(float64(loadTestAvgWaitBetweenQueries.Microseconds()) * rand.Float64()))) } - time.Sleep(loadTestWaitBetweenQueries) - } else { - atomic.AddInt64(&successfulQueries, 1) - } - }() - time.Sleep(loadTestWaitBetweenQueries) + }() + } } } } diff --git a/go/test/endtoend/vreplication/movetables_buffering_test.go b/go/test/endtoend/vreplication/movetables_buffering_test.go index a977320ec4a..f456c32bfd5 100644 --- a/go/test/endtoend/vreplication/movetables_buffering_test.go +++ b/go/test/endtoend/vreplication/movetables_buffering_test.go @@ -2,6 +2,7 @@ package vreplication import ( "testing" + "time" "github.com/stretchr/testify/require" @@ -33,8 +34,12 @@ func TestMoveTablesBuffering(t *testing.T) { catchup(t, targetTab2, workflowName, "MoveTables") vdiffSideBySide(t, ksWorkflow, "") waitForLowLag(t, "customer", workflowName) - tstWorkflowSwitchReads(t, "", "") - tstWorkflowSwitchWrites(t) + for i := 0; i < 10; i++ { + tstWorkflowSwitchReadsAndWrites(t) + time.Sleep(loadTestBufferingWindowDuration + 1*time.Second) + tstWorkflowReverseReadsAndWrites(t) + time.Sleep(loadTestBufferingWindowDuration + 1*time.Second) + } log.Infof("SwitchWrites done") lg.stop() diff --git a/go/test/endtoend/vreplication/partial_movetables_test.go b/go/test/endtoend/vreplication/partial_movetables_test.go index 2f0c7c71d29..4236bff95a3 100644 --- a/go/test/endtoend/vreplication/partial_movetables_test.go +++ b/go/test/endtoend/vreplication/partial_movetables_test.go @@ -20,6 +20,7 @@ import ( "fmt" "strings" "testing" + "time" binlogdatapb "vitess.io/vitess/go/vt/proto/binlogdata" @@ -67,10 +68,12 @@ func testCancel(t *testing.T) { mt.SwitchReadsAndWrites() checkDenyList(targetKeyspace, false) checkDenyList(sourceKeyspace, true) + time.Sleep(loadTestBufferingWindowDuration + 1*time.Second) mt.ReverseReadsAndWrites() checkDenyList(targetKeyspace, true) checkDenyList(sourceKeyspace, false) + time.Sleep(loadTestBufferingWindowDuration + 1*time.Second) mt.Cancel() checkDenyList(targetKeyspace, false) @@ -123,6 +126,7 @@ func testPartialMoveTablesBasic(t *testing.T, flavor workflowFlavor) { catchup(t, targetTab80Dash, workflowName, "MoveTables") vdiff(t, targetKeyspace, workflowName, defaultCellName, false, true, nil) mt.SwitchReadsAndWrites() + time.Sleep(loadTestBufferingWindowDuration + 1*time.Second) mt.Complete() emptyGlobalRoutingRules := "{}\n" @@ -246,6 +250,7 @@ func testPartialMoveTablesBasic(t *testing.T, flavor workflowFlavor) { // Switch all traffic for the shard mt80Dash.SwitchReadsAndWrites() + time.Sleep(loadTestBufferingWindowDuration + 1*time.Second) // Confirm global routing rules -- everything should still be routed // to the source side, customer, globally. @@ -331,6 +336,7 @@ func testPartialMoveTablesBasic(t *testing.T, flavor workflowFlavor) { catchup(t, targetTabDash80, workflowName, "MoveTables") vdiff(t, targetKeyspace, workflowName, defaultCellName, false, true, nil) mtDash80.SwitchReadsAndWrites() + time.Sleep(loadTestBufferingWindowDuration + 1*time.Second) // Confirm global routing rules: everything should still be routed // to the source side, customer, globally. diff --git a/go/test/endtoend/vreplication/vreplication_test.go b/go/test/endtoend/vreplication/vreplication_test.go index 7e3f93b6b20..99a4512c45e 100644 --- a/go/test/endtoend/vreplication/vreplication_test.go +++ b/go/test/endtoend/vreplication/vreplication_test.go @@ -22,6 +22,7 @@ import ( "io" "net/http" "runtime" + "strconv" "strings" "sync" "testing" @@ -57,7 +58,7 @@ var ( targetKsOpts = make(map[string]string) httpClient = throttlebase.SetupHTTPClient(time.Second) sourceThrottlerAppName = throttlerapp.VStreamerName - targetThrottlerAppName = throttlerapp.VReplicationName + targetThrottlerAppName = throttlerapp.VPlayerName ) const ( @@ -1167,7 +1168,7 @@ func materialize(t *testing.T, spec string, useVtctldClient bool) { func materializeProduct(t *testing.T, useVtctldClient bool) { t.Run("materializeProduct", func(t *testing.T) { - // materializing from "product" keyspace to "customer" keyspace + // Materializing from "product" keyspace to "customer" keyspace. workflow := "cproduct" keyspace := "customer" defaultCell := vc.Cells[vc.CellNames[0]] @@ -1181,7 +1182,7 @@ func materializeProduct(t *testing.T, useVtctldClient bool) { productTablets := vc.getVttabletsInKeyspace(t, defaultCell, "product", "primary") t.Run("throttle-app-product", func(t *testing.T) { - // Now, throttle the streamer on source tablets, insert some rows + // Now, throttle the source side component (vstreamer), and insert some rows. for _, tab := range productTablets { body, err := throttleApp(tab, sourceThrottlerAppName) assert.NoError(t, err) @@ -1192,19 +1193,22 @@ func materializeProduct(t *testing.T, useVtctldClient bool) { waitForTabletThrottlingStatus(t, tab, targetThrottlerAppName, throttlerStatusNotThrottled) } insertMoreProductsForSourceThrottler(t) - // To be fair to the test, we give the target time to apply the new changes. We expect it to NOT get them in the first place, - // we expect the additional rows to **not appear** in the materialized view + // To be fair to the test, we give the target time to apply the new changes. We + // expect it to NOT get them in the first place, we expect the additional rows + // to **not appear** in the materialized view. for _, tab := range customerTablets { waitForRowCountInTablet(t, tab, keyspace, workflow, 5) + // Confirm that we updated the stats on the target tablets as expected. + confirmVReplicationThrottling(t, tab, sourceKs, workflow, sourceThrottlerAppName) } }) t.Run("unthrottle-app-product", func(t *testing.T) { - // unthrottle on source tablets, and expect the rows to show up + // Unthrottle the vstreamer component, and expect the rows to show up. for _, tab := range productTablets { body, err := unthrottleApp(tab, sourceThrottlerAppName) assert.NoError(t, err) assert.Contains(t, body, sourceThrottlerAppName) - // give time for unthrottling to take effect and for target to fetch data + // Give time for unthrottling to take effect and for targets to fetch data. waitForTabletThrottlingStatus(t, tab, sourceThrottlerAppName, throttlerStatusNotThrottled) } for _, tab := range customerTablets { @@ -1213,8 +1217,8 @@ func materializeProduct(t *testing.T, useVtctldClient bool) { }) t.Run("throttle-app-customer", func(t *testing.T) { - // Now, throttle vreplication (vcopier/vapplier) on target tablets, and - // insert some more rows. + // Now, throttle vreplication on the target side (vplayer), and insert some + // more rows. for _, tab := range customerTablets { body, err := throttleApp(tab, targetThrottlerAppName) assert.NoError(t, err) @@ -1229,6 +1233,8 @@ func materializeProduct(t *testing.T, useVtctldClient bool) { // rows to **not appear** in the materialized view. for _, tab := range customerTablets { waitForRowCountInTablet(t, tab, keyspace, workflow, 8) + // Confirm that we updated the stats on the target tablets as expected. + confirmVReplicationThrottling(t, tab, sourceKs, workflow, targetThrottlerAppName) } }) t.Run("unthrottle-app-customer", func(t *testing.T) { @@ -1784,3 +1790,52 @@ func waitForInnoDBHistoryLength(t *testing.T, tablet *cluster.VttabletProcess, e func releaseInnoDBRowHistory(t *testing.T, dbConn *mysql.Conn) { execQuery(t, dbConn, "rollback") } + +// confirmVReplicationThrottling confirms that the throttling related metrics reflect that +// the workflow is being throttled as expected, via the expected app name, and that this +// is impacting the lag as expected. +// The tablet passed should be a target tablet for the given workflow while the keyspace +// name provided should be the source keyspace as the target tablet stats note the stream's +// source keyspace and shard. +func confirmVReplicationThrottling(t *testing.T, tab *cluster.VttabletProcess, keyspace, workflow string, appname throttlerapp.Name) { + const ( + sleepTime = 5 * time.Second + zv = int64(0) + ) + time.Sleep(sleepTime) // To be sure that we accrue some lag + + jsVal, err := getDebugVar(t, tab.Port, []string{"VReplicationThrottledCounts"}) + require.NoError(t, err) + require.NotEqual(t, "{}", jsVal) + // The JSON value looks like this: {"cproduct.4.tablet.vstreamer": 2, "cproduct.4.tablet.vplayer": 4} + throttledCount := gjson.Get(jsVal, fmt.Sprintf(`%s\.*\.tablet\.%s`, workflow, appname)).Int() + require.Greater(t, throttledCount, zv, "JSON value: %s", jsVal) + + val, err := getDebugVar(t, tab.Port, []string{"VReplicationThrottledCountTotal"}) + require.NoError(t, err) + require.NotEqual(t, "", val) + throttledCountTotal, err := strconv.ParseInt(val, 10, 64) + require.NoError(t, err) + require.GreaterOrEqual(t, throttledCountTotal, throttledCount, "Value: %s", val) + + // We do not calculate replication lag for the vcopier as it's not replicating + // events. + if appname != throttlerapp.VCopierName { + jsVal, err = getDebugVar(t, tab.Port, []string{"VReplicationLagSeconds"}) + require.NoError(t, err) + require.NotEqual(t, "{}", jsVal) + // The JSON value looks like this: {"product.0.cproduct.4": 6} + vreplLagSeconds := gjson.Get(jsVal, fmt.Sprintf(`%s\.*\.%s\.*`, keyspace, workflow)).Int() + require.NoError(t, err) + // Take off 1 second to deal with timing issues in the test. + minLagSecs := int64(int64(sleepTime.Seconds()) - 1) + require.GreaterOrEqual(t, vreplLagSeconds, minLagSecs, "JSON value: %s", jsVal) + + val, err = getDebugVar(t, tab.Port, []string{"VReplicationLagSecondsMax"}) + require.NoError(t, err) + require.NotEqual(t, "", val) + vreplLagSecondsMax, err := strconv.ParseInt(val, 10, 64) + require.NoError(t, err) + require.GreaterOrEqual(t, vreplLagSecondsMax, vreplLagSeconds, "Value: %s", val) + } +} diff --git a/go/vt/binlog/binlogplayer/binlog_player.go b/go/vt/binlog/binlogplayer/binlog_player.go index 35f50269a29..d7479a23a08 100644 --- a/go/vt/binlog/binlogplayer/binlog_player.go +++ b/go/vt/binlog/binlogplayer/binlog_player.go @@ -106,6 +106,8 @@ type Stats struct { PartialQueryCount *stats.CountersWithMultiLabels PartialQueryCacheSize *stats.CountersWithMultiLabels + + ThrottledCounts *stats.CountersWithMultiLabels // By throttler and component } // RecordHeartbeat updates the time the last heartbeat from vstreamer was seen @@ -175,6 +177,7 @@ func NewStats() *Stats { bps.TableCopyTimings = stats.NewTimings("", "", "Table") bps.PartialQueryCacheSize = stats.NewCountersWithMultiLabels("", "", []string{"type"}) bps.PartialQueryCount = stats.NewCountersWithMultiLabels("", "", []string{"type"}) + bps.ThrottledCounts = stats.NewCountersWithMultiLabels("", "", []string{"throttler", "component"}) return bps } @@ -370,13 +373,14 @@ func (blp *BinlogPlayer) applyEvents(ctx context.Context) error { if backoff == throttler.NotThrottled { break } + blp.blplStats.ThrottledCounts.Add([]string{"trx", "binlogplayer"}, 1) // We don't bother checking for context cancellation here because the // sleep will block only up to 1 second. (Usually, backoff is 1s / rate // e.g. a rate of 1000 TPS results into a backoff of 1 ms.) time.Sleep(backoff) } - // get the response + // Get the response. response, err := stream.Recv() // Check context before checking error, because canceled // contexts could be wrapped as regular errors. diff --git a/go/vt/discovery/healthcheck.go b/go/vt/discovery/healthcheck.go index 72b4214d5a8..39b6a0e1ad3 100644 --- a/go/vt/discovery/healthcheck.go +++ b/go/vt/discovery/healthcheck.go @@ -793,30 +793,8 @@ func (hc *HealthCheckImpl) WaitForAllServingTablets(ctx context.Context, targets return hc.waitForTablets(ctx, targets, true) } -// FilterTargetsByKeyspaces only returns the targets that are part of the provided keyspaces -func FilterTargetsByKeyspaces(keyspaces []string, targets []*query.Target) []*query.Target { - filteredTargets := make([]*query.Target, 0) - - // Keep them all if there are no keyspaces to watch - if len(KeyspacesToWatch) == 0 { - return append(filteredTargets, targets...) - } - - // Let's remove from the target shards that are not in the keyspaceToWatch list. - for _, target := range targets { - for _, keyspaceToWatch := range keyspaces { - if target.Keyspace == keyspaceToWatch { - filteredTargets = append(filteredTargets, target) - } - } - } - return filteredTargets -} - // waitForTablets is the internal method that polls for tablets. func (hc *HealthCheckImpl) waitForTablets(ctx context.Context, targets []*query.Target, requireServing bool) error { - targets = FilterTargetsByKeyspaces(KeyspacesToWatch, targets) - for { // We nil targets as we find them. allPresent := true diff --git a/go/vt/discovery/healthcheck_test.go b/go/vt/discovery/healthcheck_test.go index 19722641375..ba79fd56c61 100644 --- a/go/vt/discovery/healthcheck_test.go +++ b/go/vt/discovery/healthcheck_test.go @@ -728,27 +728,6 @@ func TestWaitForAllServingTablets(t *testing.T) { err = hc.WaitForAllServingTablets(ctx, targets) assert.NotNil(t, err, "error should not be nil (there are no tablets on this keyspace") - - targets = []*querypb.Target{ - - { - Keyspace: tablet.Keyspace, - Shard: tablet.Shard, - TabletType: tablet.Type, - }, - { - Keyspace: "newkeyspace", - Shard: tablet.Shard, - TabletType: tablet.Type, - }, - } - - KeyspacesToWatch = []string{tablet.Keyspace} - - err = hc.WaitForAllServingTablets(ctx, targets) - assert.Nil(t, err, "error should be nil. Keyspace with no tablets is filtered") - - KeyspacesToWatch = []string{} } // TestRemoveTablet tests the behavior when a tablet goes away. diff --git a/go/vt/discovery/keyspace_events.go b/go/vt/discovery/keyspace_events.go index 91f9e67ce3e..fea3b1901cb 100644 --- a/go/vt/discovery/keyspace_events.go +++ b/go/vt/discovery/keyspace_events.go @@ -19,8 +19,11 @@ package discovery import ( "context" "fmt" + "slices" "sync" + "time" + "golang.org/x/sync/errgroup" "google.golang.org/protobuf/proto" "vitess.io/vitess/go/vt/key" @@ -36,6 +39,11 @@ import ( vschemapb "vitess.io/vitess/go/vt/proto/vschema" ) +var ( + // waitConsistentKeyspacesCheck is the amount of time to wait for between checks to verify the keyspace is consistent. + waitConsistentKeyspacesCheck = 100 * time.Millisecond +) + // KeyspaceEventWatcher is an auxiliary watcher that watches all availability incidents // for all keyspaces in a Vitess cell and notifies listeners when the events have been resolved. // Right now this is capable of detecting the end of failovers, both planned and unplanned, @@ -93,18 +101,8 @@ func NewKeyspaceEventWatcher(ctx context.Context, topoServer srvtopo.Server, hc return kew } -type MoveTablesStatus int - -const ( - MoveTablesUnknown MoveTablesStatus = iota - // MoveTablesSwitching is set when the write traffic is the middle of being switched from the source to the target - MoveTablesSwitching - // MoveTablesSwitched is set when write traffic has been completely switched to the target - MoveTablesSwitched -) - // keyspaceState is the internal state for all the keyspaces that the KEW is -// currently watching +// currently watching. type keyspaceState struct { kew *KeyspaceEventWatcher keyspace string @@ -120,7 +118,7 @@ type keyspaceState struct { moveTablesState *MoveTablesState } -// Format prints the internal state for this keyspace for debug purposes +// Format prints the internal state for this keyspace for debug purposes. func (kss *keyspaceState) Format(f fmt.State, verb rune) { kss.mu.Lock() defer kss.mu.Unlock() @@ -137,9 +135,9 @@ func (kss *keyspaceState) Format(f fmt.State, verb rune) { fmt.Fprintf(f, "]\n") } -// beingResharded returns whether this keyspace is thought to be in the middle of a resharding -// operation. currentShard is the name of the shard that belongs to this keyspace and which -// we are trying to access. currentShard can _only_ be a primary shard. +// beingResharded returns whether this keyspace is thought to be in the middle of a +// resharding operation. currentShard is the name of the shard that belongs to this +// keyspace and which we are trying to access. currentShard can _only_ be a primary shard. func (kss *keyspaceState) beingResharded(currentShard string) bool { kss.mu.Lock() defer kss.mu.Unlock() @@ -183,11 +181,19 @@ type shardState struct { currentPrimary *topodatapb.TabletAlias } -// Subscribe returns a channel that will receive any KeyspaceEvents for all keyspaces in the current cell +// Subscribe returns a channel that will receive any KeyspaceEvents for all keyspaces in the +// current cell. func (kew *KeyspaceEventWatcher) Subscribe() chan *KeyspaceEvent { kew.subsMu.Lock() defer kew.subsMu.Unlock() - c := make(chan *KeyspaceEvent, 2) + // Use a decent size buffer to: + // 1. Avoid blocking the KEW + // 2. While not losing/missing any events + // 3. And processing them in the order received + // TODO: do we care about intermediate events? + // If not, then we could instead e.g. pull the first/oldest event + // from the channel, discard it, and add the current/latest. + c := make(chan *KeyspaceEvent, 10) kew.subs[c] = struct{}{} return c } @@ -199,14 +205,11 @@ func (kew *KeyspaceEventWatcher) Unsubscribe(c chan *KeyspaceEvent) { delete(kew.subs, c) } -func (kew *KeyspaceEventWatcher) broadcast(th *KeyspaceEvent) { +func (kew *KeyspaceEventWatcher) broadcast(ev *KeyspaceEvent) { kew.subsMu.Lock() defer kew.subsMu.Unlock() for c := range kew.subs { - select { - case c <- th: - default: - } + c <- ev } } @@ -244,7 +247,8 @@ func (kew *KeyspaceEventWatcher) run(ctx context.Context) { } // ensureConsistentLocked checks if the current keyspace has recovered from an availability -// event, and if so, returns information about the availability event to all subscribers +// event, and if so, returns information about the availability event to all subscribers. +// Note: you MUST be holding the ks.mu when calling this function. func (kss *keyspaceState) ensureConsistentLocked() { // if this keyspace is consistent, there's no ongoing availability event if kss.consistent { @@ -289,7 +293,8 @@ func (kss *keyspaceState) ensureConsistentLocked() { } } - // clone the current moveTablesState, if any, to handle race conditions where it can get updated while we're broadcasting + // Clone the current moveTablesState, if any, to handle race conditions where it can get + // updated while we're broadcasting. var moveTablesState MoveTablesState if kss.moveTablesState != nil { moveTablesState = *kss.moveTablesState @@ -316,8 +321,8 @@ func (kss *keyspaceState) ensureConsistentLocked() { Serving: sstate.serving, }) - log.Infof("keyspace event resolved: %s/%s is now consistent (serving: %v)", - sstate.target.Keyspace, sstate.target.Keyspace, + log.Infof("keyspace event resolved: %s is now consistent (serving: %t)", + topoproto.KeyspaceShardString(sstate.target.Keyspace, sstate.target.Shard), sstate.serving, ) @@ -329,9 +334,10 @@ func (kss *keyspaceState) ensureConsistentLocked() { kss.kew.broadcast(ksevent) } -// onHealthCheck is the callback that updates this keyspace with event data from the HealthCheck stream. -// the HealthCheck stream applies to all the keyspaces in the cluster and emits TabletHealth events to our -// parent KeyspaceWatcher, which will mux them into their corresponding keyspaceState +// onHealthCheck is the callback that updates this keyspace with event data from the HealthCheck +// stream. The HealthCheck stream applies to all the keyspaces in the cluster and emits +// TabletHealth events to our parent KeyspaceWatcher, which will mux them into their +// corresponding keyspaceState. func (kss *keyspaceState) onHealthCheck(th *TabletHealth) { // we only care about health events on the primary if th.Target.TabletType != topodatapb.TabletType_PRIMARY { @@ -401,6 +407,17 @@ func (kss *keyspaceState) onHealthCheck(th *TabletHealth) { kss.ensureConsistentLocked() } +type MoveTablesStatus int + +const ( + MoveTablesUnknown MoveTablesStatus = iota + // MoveTablesSwitching is set when the write traffic is the middle of being switched from + // the source to the target. + MoveTablesSwitching + // MoveTablesSwitched is set when write traffic has been completely switched to the target. + MoveTablesSwitched +) + type MoveTablesType int const ( @@ -414,33 +431,66 @@ type MoveTablesState struct { State MoveTablesStatus } +func (mts MoveTablesState) String() string { + var typ, state string + switch mts.Typ { + case MoveTablesRegular: + typ = "Regular" + case MoveTablesShardByShard: + typ = "ShardByShard" + default: + typ = "None" + } + switch mts.State { + case MoveTablesSwitching: + state = "Switching" + case MoveTablesSwitched: + state = "Switched" + default: + state = "Unknown" + } + return fmt.Sprintf("{Type: %s, State: %s}", typ, state) +} + func (kss *keyspaceState) getMoveTablesStatus(vs *vschemapb.SrvVSchema) (*MoveTablesState, error) { mtState := &MoveTablesState{ Typ: MoveTablesNone, State: MoveTablesUnknown, } - // if there are no routing rules defined, then movetables is not in progress, exit early + // If there are no routing rules defined, then movetables is not in progress, exit early. if len(vs.GetRoutingRules().GetRules()) == 0 && len(vs.GetShardRoutingRules().GetRules()) == 0 { return mtState, nil } shortCtx, cancel := context.WithTimeout(context.Background(), topo.RemoteOperationTimeout) defer cancel() - ts, _ := kss.kew.ts.GetTopoServer() - - // collect all current shard information from the topo + ts, err := kss.kew.ts.GetTopoServer() + if err != nil { + return mtState, err + } + // Collect all current shard information from the topo. var shardInfos []*topo.ShardInfo + mu := sync.Mutex{} + eg, ectx := errgroup.WithContext(shortCtx) for _, sstate := range kss.shards { - si, err := ts.GetShard(shortCtx, kss.keyspace, sstate.target.Shard) - if err != nil { - return nil, err - } - shardInfos = append(shardInfos, si) + eg.Go(func() error { + si, err := ts.GetShard(ectx, kss.keyspace, sstate.target.Shard) + if err != nil { + return err + } + mu.Lock() + defer mu.Unlock() + shardInfos = append(shardInfos, si) + return nil + }) + } + if err := eg.Wait(); err != nil { + return mtState, err } - // check if any shard has denied tables and if so, record one of these to check where it currently points to - // using the (shard) routing rules + // Check if any shard has denied tables and if so, record one of these to check where it + // currently points to using the (shard) routing rules. var shardsWithDeniedTables []string var oneDeniedTable string for _, si := range shardInfos { @@ -455,11 +505,11 @@ func (kss *keyspaceState) getMoveTablesStatus(vs *vschemapb.SrvVSchema) (*MoveTa return mtState, nil } - // check if a shard by shard migration is in progress and if so detect if it has been switched - isPartialTables := vs.ShardRoutingRules != nil && len(vs.ShardRoutingRules.Rules) > 0 + // Check if a shard by shard migration is in progress and if so detect if it has been switched. + isPartialTables := vs.GetShardRoutingRules() != nil && len(vs.GetShardRoutingRules().GetRules()) > 0 if isPartialTables { - srr := topotools.GetShardRoutingRulesMap(vs.ShardRoutingRules) + srr := topotools.GetShardRoutingRulesMap(vs.GetShardRoutingRules()) mtState.Typ = MoveTablesShardByShard mtState.State = MoveTablesSwitched for _, shard := range shardsWithDeniedTables { @@ -470,31 +520,32 @@ func (kss *keyspaceState) getMoveTablesStatus(vs *vschemapb.SrvVSchema) (*MoveTa break } } - log.Infof("getMoveTablesStatus: keyspace %s declaring partial move tables %v", kss.keyspace, mtState) + log.Infof("getMoveTablesStatus: keyspace %s declaring partial move tables %s", kss.keyspace, mtState.String()) return mtState, nil } - // it wasn't a shard by shard migration, but since we have denied tables it must be a regular MoveTables + // It wasn't a shard by shard migration, but since we have denied tables it must be a + // regular MoveTables. mtState.Typ = MoveTablesRegular mtState.State = MoveTablesSwitching - rr := topotools.GetRoutingRulesMap(vs.RoutingRules) + rr := topotools.GetRoutingRulesMap(vs.GetRoutingRules()) if rr != nil { r, ok := rr[oneDeniedTable] - // if a rule exists for the table and points to the target keyspace, writes have been switched + // If a rule exists for the table and points to the target keyspace, writes have been switched. if ok && len(r) > 0 && r[0] != fmt.Sprintf("%s.%s", kss.keyspace, oneDeniedTable) { mtState.State = MoveTablesSwitched log.Infof("onSrvKeyspace:: keyspace %s writes have been switched for table %s, rule %v", kss.keyspace, oneDeniedTable, r[0]) } } - log.Infof("getMoveTablesStatus: keyspace %s declaring regular move tables %v", kss.keyspace, mtState) + log.Infof("getMoveTablesStatus: keyspace %s declaring regular move tables %s", kss.keyspace, mtState.String()) return mtState, nil } -// onSrvKeyspace is the callback that updates this keyspace with fresh topology data from our topology server. -// this callback is called from a Watcher in the topo server whenever a change to the topology for this keyspace -// occurs. this watcher is dedicated to this keyspace, and will only yield topology metadata changes for as -// long as we're interested on this keyspace. +// onSrvKeyspace is the callback that updates this keyspace with fresh topology data from our +// topology server. this callback is called from a Watcher in the topo server whenever a change to +// the topology for this keyspace occurs. This watcher is dedicated to this keyspace, and will +// only yield topology metadata changes for as long as we're interested on this keyspace. func (kss *keyspaceState) onSrvKeyspace(newKeyspace *topodatapb.SrvKeyspace, newError error) bool { kss.mu.Lock() defer kss.mu.Unlock() @@ -508,23 +559,25 @@ func (kss *keyspaceState) onSrvKeyspace(newKeyspace *topodatapb.SrvKeyspace, new return false } - // if there's another kind of error while watching this keyspace, we assume it's temporary and related - // to the topology server, not to the keyspace itself. we'll keep waiting for more topology events. + // If there's another kind of error while watching this keyspace, we assume it's temporary and + // related to the topology server, not to the keyspace itself. we'll keep waiting for more + // topology events. if newError != nil { kss.lastError = newError log.Errorf("error while watching keyspace %q: %v", kss.keyspace, newError) return true } - // if the topology metadata for our keyspace is identical to the last one we saw there's nothing to do - // here. this is a side-effect of the way ETCD watchers work. + // If the topology metadata for our keyspace is identical to the last one we saw there's nothing to + // do here. this is a side-effect of the way ETCD watchers work. if proto.Equal(kss.lastKeyspace, newKeyspace) { // no changes return true } - // we only mark this keyspace as inconsistent if there has been a topology change in the PRIMARY for - // this keyspace, but we store the topology metadata for both primary and replicas for future-proofing. + // we only mark this keyspace as inconsistent if there has been a topology change in the PRIMARY + // for this keyspace, but we store the topology metadata for both primary and replicas for + // future-proofing. var oldPrimary, newPrimary *topodatapb.SrvKeyspace_KeyspacePartition if kss.lastKeyspace != nil { oldPrimary = topoproto.SrvKeyspaceGetPartition(kss.lastKeyspace, topodatapb.TabletType_PRIMARY) @@ -555,20 +608,24 @@ func (kss *keyspaceState) isServing() bool { // onSrvVSchema is called from a Watcher in the topo server whenever the SrvVSchema is updated by Vitess. // For the purposes here, we are interested in updates to the RoutingRules or ShardRoutingRules. -// In addition, the traffic switcher updates SrvVSchema when the DeniedTables attributes in a Shard record is -// modified. +// In addition, the traffic switcher updates SrvVSchema when the DeniedTables attributes in a Shard +// record is modified. func (kss *keyspaceState) onSrvVSchema(vs *vschemapb.SrvVSchema, err error) bool { - // the vschema can be nil if the server is currently shutting down + // The vschema can be nil if the server is currently shutting down. if vs == nil { return true } kss.mu.Lock() defer kss.mu.Unlock() - kss.moveTablesState, _ = kss.getMoveTablesStatus(vs) + var kerr error + if kss.moveTablesState, kerr = kss.getMoveTablesStatus(vs); err != nil { + log.Errorf("onSrvVSchema: keyspace %s failed to get move tables status: %v", kss.keyspace, kerr) + } if kss.moveTablesState != nil && kss.moveTablesState.Typ != MoveTablesNone { - // mark the keyspace as inconsistent. ensureConsistentLocked() checks if the workflow is switched, - // and if so, it will send an event to the buffering subscribers to indicate that buffering can be stopped. + // Mark the keyspace as inconsistent. ensureConsistentLocked() checks if the workflow is + // switched, and if so, it will send an event to the buffering subscribers to indicate that + // buffering can be stopped. kss.consistent = false kss.ensureConsistentLocked() } @@ -590,8 +647,9 @@ func newKeyspaceState(ctx context.Context, kew *KeyspaceEventWatcher, cell, keys return kss } -// processHealthCheck is the callback that is called by the global HealthCheck stream that was initiated -// by this KeyspaceEventWatcher. it redirects the TabletHealth event to the corresponding keyspaceState +// processHealthCheck is the callback that is called by the global HealthCheck stream that was +// initiated by this KeyspaceEventWatcher. It redirects the TabletHealth event to the +// corresponding keyspaceState. func (kew *KeyspaceEventWatcher) processHealthCheck(ctx context.Context, th *TabletHealth) { kss := kew.getKeyspaceStatus(ctx, th.Target.Keyspace) if kss == nil { @@ -601,8 +659,8 @@ func (kew *KeyspaceEventWatcher) processHealthCheck(ctx context.Context, th *Tab kss.onHealthCheck(th) } -// getKeyspaceStatus returns the keyspaceState object for the corresponding keyspace, allocating it -// if we've never seen the keyspace before. +// getKeyspaceStatus returns the keyspaceState object for the corresponding keyspace, allocating +// it if we've never seen the keyspace before. func (kew *KeyspaceEventWatcher) getKeyspaceStatus(ctx context.Context, keyspace string) *keyspaceState { kew.mu.Lock() defer kew.mu.Unlock() @@ -641,28 +699,53 @@ func (kew *KeyspaceEventWatcher) TargetIsBeingResharded(ctx context.Context, tar return ks.beingResharded(target.Shard) } -// PrimaryIsNotServing checks if the reason why the given target is not accessible right now is -// that the primary tablet for that shard is not serving. This is possible during a Planned Reparent Shard -// operation. Just as the operation completes, a new primary will be elected, and it will send its own healthcheck -// stating that it is serving. We should buffer requests until that point. -// There are use cases where people do not run with a Primary server at all, so we must verify that -// we only start buffering when a primary was present, and it went not serving. -// The shard state keeps track of the current primary and the last externally reparented time, which we can use -// to determine that there was a serving primary which now became non serving. This is only possible in a DemotePrimary -// RPC which are only called from ERS and PRS. So buffering will stop when these operations succeed. -// We return the tablet alias of the primary if it is serving. -func (kew *KeyspaceEventWatcher) PrimaryIsNotServing(ctx context.Context, target *querypb.Target) (*topodatapb.TabletAlias, bool) { +// ShouldStartBufferingForTarget checks if we should be starting buffering for the given target. +// We check the following things before we start buffering - +// 1. The shard must have a primary. +// 2. The primary must be non-serving. +// 3. The keyspace must be marked inconsistent. +// +// This buffering is meant to kick in during a Planned Reparent Shard operation. +// As part of that operation the old primary will become non-serving. At that point +// this code should return true to start buffering requests. +// Just as the PRS operation completes, a new primary will be elected, and +// it will send its own healthcheck stating that it is serving. We should buffer requests until +// that point. +// +// There are use cases where people do not run with a Primary server at all, so we must +// verify that we only start buffering when a primary was present, and it went not serving. +// The shard state keeps track of the current primary and the last externally reparented time, which +// we can use to determine that there was a serving primary which now became non serving. This is +// only possible in a DemotePrimary RPC which are only called from ERS and PRS. So buffering will +// stop when these operations succeed. We also return the tablet alias of the primary if it is serving. +func (kew *KeyspaceEventWatcher) ShouldStartBufferingForTarget(ctx context.Context, target *querypb.Target) (*topodatapb.TabletAlias, bool) { if target.TabletType != topodatapb.TabletType_PRIMARY { + // We don't support buffering for any target tablet type other than the primary. return nil, false } ks := kew.getKeyspaceStatus(ctx, target.Keyspace) if ks == nil { + // If the keyspace status is nil, then the keyspace must be deleted. + // The user query is trying to access a keyspace that has been deleted. + // There is no reason to buffer this query. return nil, false } ks.mu.Lock() defer ks.mu.Unlock() if state, ok := ks.shards[target.Shard]; ok { - // If the primary tablet was present then externallyReparented will be non-zero and currentPrimary will be not nil + // As described in the function comment, we only want to start buffering when all the following conditions are met - + // 1. The shard must have a primary. We check this by checking the currentPrimary and externallyReparented fields being non-empty. + // They are set the first time the shard registers an update from a serving primary and are never cleared out after that. + // If the user has configured vtgates to wait for the primary tablet healthchecks before starting query service, this condition + // will always be true. + // 2. The primary must be non-serving. We check this by checking the serving field in the shard state. + // When a primary becomes non-serving, it also marks the keyspace inconsistent. So the next check is only added + // for being defensive against any bugs. + // 3. The keyspace must be marked inconsistent. We check this by checking the consistent field in the keyspace state. + // + // The reason we need all the three checks is that we want to be very defensive in when we start buffering. + // We don't want to start buffering when we don't know for sure if the primary + // is not serving and we will receive an update that stops buffering soon. return state.currentPrimary, !state.serving && !ks.consistent && state.externallyReparented != 0 && state.currentPrimary != nil } return nil, false @@ -714,3 +797,46 @@ func (kew *KeyspaceEventWatcher) MarkShardNotServing(ctx context.Context, keyspa } return true } + +// WaitForConsistentKeyspaces waits for the given set of keyspaces to be marked consistent. +func (kew *KeyspaceEventWatcher) WaitForConsistentKeyspaces(ctx context.Context, ksList []string) error { + // We don't want to change the original keyspace list that we receive so we clone it + // before we empty it elements down below. + keyspaces := slices.Clone(ksList) + for { + // We empty keyspaces as we find them to be consistent. + allConsistent := true + for i, ks := range keyspaces { + if ks == "" { + continue + } + + // Get the keyspace status and see it is consistent yet or not. + kss := kew.getKeyspaceStatus(ctx, ks) + // If kss is nil, then it must be deleted. In that case too it is fine for us to consider + // it consistent since the keyspace has been deleted. + if kss == nil || kss.consistent { + keyspaces[i] = "" + } else { + allConsistent = false + } + } + + if allConsistent { + // all the keyspaces are consistent. + return nil + } + + // Unblock after the sleep or when the context has expired. + select { + case <-ctx.Done(): + for _, ks := range keyspaces { + if ks != "" { + log.Infof("keyspace %v didn't become consistent", ks) + } + } + return ctx.Err() + case <-time.After(waitConsistentKeyspacesCheck): + } + } +} diff --git a/go/vt/discovery/keyspace_events_test.go b/go/vt/discovery/keyspace_events_test.go index c77f7c4c6e9..21e0167e5cf 100644 --- a/go/vt/discovery/keyspace_events_test.go +++ b/go/vt/discovery/keyspace_events_test.go @@ -20,6 +20,7 @@ import ( "context" "encoding/hex" "sync" + "sync/atomic" "testing" "time" @@ -54,6 +55,67 @@ func TestSrvKeyspaceWithNilNewKeyspace(t *testing.T) { require.True(t, kss.onSrvKeyspace(nil, nil)) } +// TestKeyspaceEventConcurrency confirms that the keyspace event watcher +// does not fail to broadcast received keyspace events to subscribers. +// This verifies that no events are lost when there's a high number of +// concurrent keyspace events. +func TestKeyspaceEventConcurrency(t *testing.T) { + cell := "cell1" + factory := faketopo.NewFakeTopoFactory() + factory.AddCell(cell) + sts := &fakeTopoServer{} + hc := NewFakeHealthCheck(make(chan *TabletHealth)) + defer hc.Close() + kew := &KeyspaceEventWatcher{ + hc: hc, + ts: sts, + localCell: cell, + keyspaces: make(map[string]*keyspaceState), + subs: make(map[chan *KeyspaceEvent]struct{}), + } + + // Subscribe to the watcher's broadcasted keyspace events. + receiver := kew.Subscribe() + + updates := atomic.Uint32{} + updates.Store(0) + wg := sync.WaitGroup{} + concurrency := 100 + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + go func() { + for { + select { + case <-ctx.Done(): + return + case <-receiver: + updates.Add(1) + } + } + }() + // Start up concurent go-routines that will broadcast keyspace events. + for i := 1; i <= concurrency; i++ { + wg.Add(1) + go func() { + defer wg.Done() + kew.broadcast(&KeyspaceEvent{}) + }() + } + wg.Wait() + for { + select { + case <-ctx.Done(): + require.Equal(t, concurrency, int(updates.Load()), "expected %d updates, got %d", concurrency, updates.Load()) + return + default: + if int(updates.Load()) == concurrency { // Pass + cancel() + return + } + } + } +} + // TestKeyspaceEventTypes confirms that the keyspace event watcher determines // that the unavailability event is caused by the correct scenario. We should // consider it to be caused by a resharding operation when the following @@ -86,11 +148,11 @@ func TestKeyspaceEventTypes(t *testing.T) { kew := NewKeyspaceEventWatcher(ctx, ts2, hc, cell) type testCase struct { - name string - kss *keyspaceState - shardToCheck string - expectResharding bool - expectPrimaryNotServing bool + name string + kss *keyspaceState + shardToCheck string + expectResharding bool + expectShouldBuffer bool } testCases := []testCase{ @@ -127,9 +189,9 @@ func TestKeyspaceEventTypes(t *testing.T) { }, consistent: false, }, - shardToCheck: "-", - expectResharding: true, - expectPrimaryNotServing: false, + shardToCheck: "-", + expectResharding: true, + expectShouldBuffer: false, }, { name: "two to four resharding in progress", @@ -188,9 +250,9 @@ func TestKeyspaceEventTypes(t *testing.T) { }, consistent: false, }, - shardToCheck: "-80", - expectResharding: true, - expectPrimaryNotServing: false, + shardToCheck: "-80", + expectResharding: true, + expectShouldBuffer: false, }, { name: "unsharded primary not serving", @@ -214,9 +276,9 @@ func TestKeyspaceEventTypes(t *testing.T) { }, consistent: false, }, - shardToCheck: "-", - expectResharding: false, - expectPrimaryNotServing: true, + shardToCheck: "-", + expectResharding: false, + expectShouldBuffer: true, }, { name: "sharded primary not serving", @@ -248,9 +310,9 @@ func TestKeyspaceEventTypes(t *testing.T) { }, consistent: false, }, - shardToCheck: "-80", - expectResharding: false, - expectPrimaryNotServing: true, + shardToCheck: "-80", + expectResharding: false, + expectShouldBuffer: true, }, } @@ -265,8 +327,89 @@ func TestKeyspaceEventTypes(t *testing.T) { resharding := kew.TargetIsBeingResharded(ctx, tc.kss.shards[tc.shardToCheck].target) require.Equal(t, resharding, tc.expectResharding, "TargetIsBeingResharded should return %t", tc.expectResharding) - _, primaryDown := kew.PrimaryIsNotServing(ctx, tc.kss.shards[tc.shardToCheck].target) - require.Equal(t, primaryDown, tc.expectPrimaryNotServing, "PrimaryIsNotServing should return %t", tc.expectPrimaryNotServing) + _, shouldBuffer := kew.ShouldStartBufferingForTarget(ctx, tc.kss.shards[tc.shardToCheck].target) + require.Equal(t, shouldBuffer, tc.expectShouldBuffer, "ShouldStartBufferingForTarget should return %t", tc.expectShouldBuffer) + }) + } +} + +// TestWaitForConsistentKeyspaces tests the behaviour of WaitForConsistent for different scenarios. +func TestWaitForConsistentKeyspaces(t *testing.T) { + testcases := []struct { + name string + ksMap map[string]*keyspaceState + ksList []string + errExpected string + }{ + { + name: "Empty keyspace list", + ksList: nil, + ksMap: map[string]*keyspaceState{ + "ks1": {}, + }, + errExpected: "", + }, + { + name: "All keyspaces consistent", + ksList: []string{"ks1", "ks2"}, + ksMap: map[string]*keyspaceState{ + "ks1": { + consistent: true, + }, + "ks2": { + consistent: true, + }, + }, + errExpected: "", + }, + { + name: "One keyspace inconsistent", + ksList: []string{"ks1", "ks2"}, + ksMap: map[string]*keyspaceState{ + "ks1": { + consistent: true, + }, + "ks2": { + consistent: false, + }, + }, + errExpected: "context canceled", + }, + { + name: "One deleted keyspace - consistent", + ksList: []string{"ks1", "ks2"}, + ksMap: map[string]*keyspaceState{ + "ks1": { + consistent: true, + }, + "ks2": { + deleted: true, + }, + }, + errExpected: "", + }, + } + + for _, tt := range testcases { + t.Run(tt.name, func(t *testing.T) { + // We create a cancelable context and immediately cancel it. + // We don't want the unit tests to wait, so we only test the first + // iteration of whether the keyspace event watcher returns + // that the keyspaces are consistent or not. + ctx, cancel := context.WithCancel(context.Background()) + cancel() + kew := KeyspaceEventWatcher{ + keyspaces: tt.ksMap, + mu: sync.Mutex{}, + ts: &fakeTopoServer{}, + } + err := kew.WaitForConsistentKeyspaces(ctx, tt.ksList) + if tt.errExpected != "" { + require.ErrorContains(t, err, tt.errExpected) + } else { + require.NoError(t, err) + } + }) } } @@ -531,6 +674,26 @@ func (f *fakeTopoServer) GetSrvKeyspace(ctx context.Context, cell, keyspace stri return ks, nil } +// GetSrvVSchema returns the SrvVSchema for a cell. +func (f *fakeTopoServer) GetSrvVSchema(ctx context.Context, cell string) (*vschemapb.SrvVSchema, error) { + vs := &vschemapb.SrvVSchema{ + Keyspaces: map[string]*vschemapb.Keyspace{ + "ks1": { + Sharded: true, + }, + }, + RoutingRules: &vschemapb.RoutingRules{ + Rules: []*vschemapb.RoutingRule{ + { + FromTable: "db1.t1", + ToTables: []string{"db1.t1"}, + }, + }, + }, + } + return vs, nil +} + func (f *fakeTopoServer) WatchSrvKeyspace(ctx context.Context, cell, keyspace string, callback func(*topodatapb.SrvKeyspace, error) bool) { ks, err := f.GetSrvKeyspace(ctx, cell, keyspace) callback(ks, err) @@ -540,5 +703,6 @@ func (f *fakeTopoServer) WatchSrvKeyspace(ctx context.Context, cell, keyspace st // the provided cell. It will call the callback when // a new value or an error occurs. func (f *fakeTopoServer) WatchSrvVSchema(ctx context.Context, cell string, callback func(*vschemapb.SrvVSchema, error) bool) { - + sv, err := f.GetSrvVSchema(ctx, cell) + callback(sv, err) } diff --git a/go/vt/sqlparser/normalizer.go b/go/vt/sqlparser/normalizer.go index b1728a47fb1..9dc0fe40410 100644 --- a/go/vt/sqlparser/normalizer.go +++ b/go/vt/sqlparser/normalizer.go @@ -47,15 +47,17 @@ type normalizer struct { bindVars map[string]*querypb.BindVariable reserved *ReservedVars vals map[Literal]string + tupleVals map[string]string err error inDerived bool } func newNormalizer(reserved *ReservedVars, bindVars map[string]*querypb.BindVariable) *normalizer { return &normalizer{ - bindVars: bindVars, - reserved: reserved, - vals: make(map[Literal]string), + bindVars: bindVars, + reserved: reserved, + vals: make(map[Literal]string), + tupleVals: make(map[string]string), } } @@ -311,8 +313,22 @@ func (nz *normalizer) rewriteInComparisons(node *ComparisonExpr) { Value: bval.Value, }) } - bvname := nz.reserved.nextUnusedVar() - nz.bindVars[bvname] = bvals + + var bvname string + + if key, err := bvals.MarshalVT(); err != nil { + bvname = nz.reserved.nextUnusedVar() + nz.bindVars[bvname] = bvals + } else { + // Check if we can find key in tuplevals + if bvname, ok = nz.tupleVals[string(key)]; !ok { + bvname = nz.reserved.nextUnusedVar() + } + + nz.bindVars[bvname] = bvals + nz.tupleVals[string(key)] = bvname + } + // Modify RHS to be a list bindvar. node.Right = ListArg(bvname) } diff --git a/go/vt/sqlparser/normalizer_test.go b/go/vt/sqlparser/normalizer_test.go index 394968f2893..df7638cb7b2 100644 --- a/go/vt/sqlparser/normalizer_test.go +++ b/go/vt/sqlparser/normalizer_test.go @@ -301,6 +301,13 @@ func TestNormalize(t *testing.T) { outbv: map[string]*querypb.BindVariable{ "bv1": sqltypes.TestBindVariable([]any{1, "2"}), }, + }, { + // repeated IN clause with vals + in: "select * from t where v1 in (1, '2') OR v2 in (1, '2')", + outstmt: "select * from t where v1 in ::bv1 or v2 in ::bv1", + outbv: map[string]*querypb.BindVariable{ + "bv1": sqltypes.TestBindVariable([]any{1, "2"}), + }, }, { // NOT IN clause in: "select * from t where v1 not in (1, '2')", @@ -715,9 +722,9 @@ JOIN warehouse%d AS w ON c_w_id=w_id WHERE w_id = %d AND c_d_id = %d AND c_id = %d`, - `SELECT d_next_o_id, d_tax -FROM district%d -WHERE d_w_id = %d + `SELECT d_next_o_id, d_tax +FROM district%d +WHERE d_w_id = %d AND d_id = %d FOR UPDATE`, `UPDATE district%d SET d_next_o_id = %d @@ -727,58 +734,58 @@ WHERE d_id = %d AND d_w_id= %d`, VALUES (%d,%d,%d,%d,NOW(),%d,%d)`, `INSERT INTO new_orders%d (no_o_id, no_d_id, no_w_id) VALUES (%d,%d,%d)`, - `SELECT i_price, i_name, i_data + `SELECT i_price, i_name, i_data FROM item%d WHERE i_id = %d`, - `SELECT s_quantity, s_data, s_dist_%s s_dist -FROM stock%d + `SELECT s_quantity, s_data, s_dist_%s s_dist +FROM stock%d WHERE s_i_id = %d AND s_w_id= %d FOR UPDATE`, `UPDATE stock%d SET s_quantity = %d -WHERE s_i_id = %d +WHERE s_i_id = %d AND s_w_id= %d`, `INSERT INTO order_line%d (ol_o_id, ol_d_id, ol_w_id, ol_number, ol_i_id, ol_supply_w_id, ol_quantity, ol_amount, ol_dist_info) VALUES (%d,%d,%d,%d,%d,%d,%d,%d,'%s')`, `UPDATE warehouse%d -SET w_ytd = w_ytd + %d +SET w_ytd = w_ytd + %d WHERE w_id = %d`, `SELECT w_street_1, w_street_2, w_city, w_state, w_zip, w_name FROM warehouse%d WHERE w_id = %d`, - `UPDATE district%d -SET d_ytd = d_ytd + %d -WHERE d_w_id = %d + `UPDATE district%d +SET d_ytd = d_ytd + %d +WHERE d_w_id = %d AND d_id= %d`, - `SELECT d_street_1, d_street_2, d_city, d_state, d_zip, d_name + `SELECT d_street_1, d_street_2, d_city, d_state, d_zip, d_name FROM district%d -WHERE d_w_id = %d +WHERE d_w_id = %d AND d_id = %d`, `SELECT count(c_id) namecnt FROM customer%d -WHERE c_w_id = %d +WHERE c_w_id = %d AND c_d_id= %d AND c_last='%s'`, `SELECT c_first, c_middle, c_last, c_street_1, c_street_2, c_city, c_state, c_zip, c_phone, c_credit, c_credit_lim, c_discount, c_balance, c_ytd_payment, c_since FROM customer%d -WHERE c_w_id = %d +WHERE c_w_id = %d AND c_d_id= %d AND c_id=%d FOR UPDATE`, `SELECT c_data FROM customer%d -WHERE c_w_id = %d +WHERE c_w_id = %d AND c_d_id=%d AND c_id= %d`, `UPDATE customer%d SET c_balance=%f, c_ytd_payment=%f, c_data='%s' -WHERE c_w_id = %d +WHERE c_w_id = %d AND c_d_id=%d AND c_id=%d`, `UPDATE customer%d SET c_balance=%f, c_ytd_payment=%f -WHERE c_w_id = %d +WHERE c_w_id = %d AND c_d_id=%d AND c_id=%d`, `INSERT INTO history%d @@ -786,71 +793,71 @@ AND c_id=%d`, VALUES (%d,%d,%d,%d,%d,NOW(),%d,'%s')`, `SELECT count(c_id) namecnt FROM customer%d -WHERE c_w_id = %d +WHERE c_w_id = %d AND c_d_id= %d AND c_last='%s'`, `SELECT c_balance, c_first, c_middle, c_id FROM customer%d -WHERE c_w_id = %d +WHERE c_w_id = %d AND c_d_id= %d AND c_last='%s' ORDER BY c_first`, `SELECT c_balance, c_first, c_middle, c_last FROM customer%d -WHERE c_w_id = %d +WHERE c_w_id = %d AND c_d_id=%d AND c_id=%d`, `SELECT o_id, o_carrier_id, o_entry_d -FROM orders%d -WHERE o_w_id = %d -AND o_d_id = %d -AND o_c_id = %d +FROM orders%d +WHERE o_w_id = %d +AND o_d_id = %d +AND o_c_id = %d ORDER BY o_id DESC`, `SELECT ol_i_id, ol_supply_w_id, ol_quantity, ol_amount, ol_delivery_d FROM order_line%d WHERE ol_w_id = %d AND ol_d_id = %d AND ol_o_id = %d`, `SELECT no_o_id -FROM new_orders%d -WHERE no_d_id = %d -AND no_w_id = %d +FROM new_orders%d +WHERE no_d_id = %d +AND no_w_id = %d ORDER BY no_o_id ASC LIMIT 1 FOR UPDATE`, `DELETE FROM new_orders%d -WHERE no_o_id = %d -AND no_d_id = %d +WHERE no_o_id = %d +AND no_d_id = %d AND no_w_id = %d`, `SELECT o_c_id -FROM orders%d -WHERE o_id = %d -AND o_d_id = %d +FROM orders%d +WHERE o_id = %d +AND o_d_id = %d AND o_w_id = %d`, - `UPDATE orders%d + `UPDATE orders%d SET o_carrier_id = %d -WHERE o_id = %d -AND o_d_id = %d +WHERE o_id = %d +AND o_d_id = %d AND o_w_id = %d`, - `UPDATE order_line%d + `UPDATE order_line%d SET ol_delivery_d = NOW() -WHERE ol_o_id = %d -AND ol_d_id = %d +WHERE ol_o_id = %d +AND ol_d_id = %d AND ol_w_id = %d`, `SELECT SUM(ol_amount) sm -FROM order_line%d -WHERE ol_o_id = %d -AND ol_d_id = %d +FROM order_line%d +WHERE ol_o_id = %d +AND ol_d_id = %d AND ol_w_id = %d`, - `UPDATE customer%d + `UPDATE customer%d SET c_balance = c_balance + %f, c_delivery_cnt = c_delivery_cnt + 1 -WHERE c_id = %d -AND c_d_id = %d +WHERE c_id = %d +AND c_d_id = %d AND c_w_id = %d`, - `SELECT d_next_o_id + `SELECT d_next_o_id FROM district%d WHERE d_id = %d AND d_w_id= %d`, `SELECT COUNT(DISTINCT(s.s_i_id)) FROM stock%d AS s -JOIN order_line%d AS ol ON ol.ol_w_id=s.s_w_id AND ol.ol_i_id=s.s_i_id -WHERE ol.ol_w_id = %d +JOIN order_line%d AS ol ON ol.ol_w_id=s.s_w_id AND ol.ol_i_id=s.s_i_id +WHERE ol.ol_w_id = %d AND ol.ol_d_id = %d -AND ol.ol_o_id < %d +AND ol.ol_o_id < %d AND ol.ol_o_id >= %d AND s.s_w_id= %d AND s.s_quantity < %d `, @@ -861,7 +868,7 @@ AND ol_o_id < %d AND ol_o_id >= %d`, WHERE s_w_id = %d AND s_i_id = %d AND s_quantity < %d`, `SELECT min(no_o_id) mo -FROM new_orders%d +FROM new_orders%d WHERE no_w_id = %d AND no_d_id = %d`, `SELECT o_id FROM orders%d o, (SELECT o_c_id,o_w_id,o_d_id,count(distinct o_id) FROM orders%d WHERE o_w_id=%d AND o_d_id=%d AND o_id > 2100 AND o_id < %d GROUP BY o_c_id,o_d_id,o_w_id having count( distinct o_id) > 1 limit 1) t WHERE t.o_w_id=o.o_w_id and t.o_d_id=o.o_d_id and t.o_c_id=o.o_c_id limit 1 `, `DELETE FROM order_line%d where ol_w_id=%d AND ol_d_id=%d AND ol_o_id=%d`, diff --git a/go/vt/srvtopo/discover.go b/go/vt/srvtopo/discover.go index 91aaea9daf6..2b020e89887 100644 --- a/go/vt/srvtopo/discover.go +++ b/go/vt/srvtopo/discover.go @@ -17,9 +17,8 @@ limitations under the License. package srvtopo import ( - "sync" - "context" + "sync" "vitess.io/vitess/go/vt/concurrency" "vitess.io/vitess/go/vt/log" @@ -29,20 +28,24 @@ import ( topodatapb "vitess.io/vitess/go/vt/proto/topodata" ) -// FindAllTargets goes through all serving shards in the topology -// for the provided tablet types. It returns one Target object per -// keyspace / shard / matching TabletType. -func FindAllTargets(ctx context.Context, ts Server, cell string, tabletTypes []topodatapb.TabletType) ([]*querypb.Target, error) { - ksNames, err := ts.GetSrvKeyspaceNames(ctx, cell, true) - if err != nil { - return nil, err +// FindAllTargetsAndKeyspaces goes through all serving shards in the topology for the provided keyspaces +// and tablet types. If no keyspaces are provided all available keyspaces in the topo are +// fetched. It returns one Target object per keyspace/shard/matching TabletType. +// It also returns all the keyspaces that it found. +func FindAllTargetsAndKeyspaces(ctx context.Context, ts Server, cell string, keyspaces []string, tabletTypes []topodatapb.TabletType) ([]*querypb.Target, []string, error) { + var err error + if len(keyspaces) == 0 { + keyspaces, err = ts.GetSrvKeyspaceNames(ctx, cell, true) + if err != nil { + return nil, nil, err + } } var targets []*querypb.Target var wg sync.WaitGroup var mu sync.Mutex var errRecorder concurrency.AllErrorRecorder - for _, ksName := range ksNames { + for _, ksName := range keyspaces { wg.Add(1) go func(keyspace string) { defer wg.Done() @@ -92,8 +95,8 @@ func FindAllTargets(ctx context.Context, ts Server, cell string, tabletTypes []t } wg.Wait() if errRecorder.HasErrors() { - return nil, errRecorder.Error() + return nil, nil, errRecorder.Error() } - return targets, nil + return targets, keyspaces, nil } diff --git a/go/vt/srvtopo/discover_test.go b/go/vt/srvtopo/discover_test.go index ca4774a1b84..0232bce7a65 100644 --- a/go/vt/srvtopo/discover_test.go +++ b/go/vt/srvtopo/discover_test.go @@ -18,11 +18,12 @@ package srvtopo import ( "context" - "reflect" "sort" "testing" "time" + "github.com/stretchr/testify/assert" + "vitess.io/vitess/go/vt/topo/memorytopo" querypb "vitess.io/vitess/go/vt/proto/query" @@ -47,7 +48,7 @@ func (a TargetArray) Less(i, j int) bool { return a[i].TabletType < a[j].TabletType } -func TestFindAllTargets(t *testing.T) { +func TestFindAllTargetsAndKeyspaces(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() ts := memorytopo.NewServer(ctx, "cell1", "cell2") @@ -62,16 +63,13 @@ func TestFindAllTargets(t *testing.T) { rs := NewResilientServer(ctx, ts, "TestFindAllKeyspaceShards") // No keyspace / shards. - ks, err := FindAllTargets(ctx, rs, "cell1", []topodatapb.TabletType{topodatapb.TabletType_PRIMARY}) - if err != nil { - t.Errorf("unexpected error: %v", err) - } - if len(ks) > 0 { - t.Errorf("why did I get anything? %v", ks) - } + targets, ksList, err := FindAllTargetsAndKeyspaces(ctx, rs, "cell1", []string{"test_keyspace"}, []topodatapb.TabletType{topodatapb.TabletType_PRIMARY}) + assert.NoError(t, err) + assert.Len(t, targets, 0) + assert.EqualValues(t, []string{"test_keyspace"}, ksList) // Add one. - if err := ts.UpdateSrvKeyspace(ctx, "cell1", "test_keyspace", &topodatapb.SrvKeyspace{ + assert.NoError(t, ts.UpdateSrvKeyspace(ctx, "cell1", "test_keyspace", &topodatapb.SrvKeyspace{ Partitions: []*topodatapb.SrvKeyspace_KeyspacePartition{ { ServedType: topodatapb.TabletType_PRIMARY, @@ -82,28 +80,36 @@ func TestFindAllTargets(t *testing.T) { }, }, }, - }); err != nil { - t.Fatalf("can't add srvKeyspace: %v", err) - } + })) // Get it. - ks, err = FindAllTargets(ctx, rs, "cell1", []topodatapb.TabletType{topodatapb.TabletType_PRIMARY}) - if err != nil { - t.Errorf("unexpected error: %v", err) - } - if !reflect.DeepEqual(ks, []*querypb.Target{ + targets, ksList, err = FindAllTargetsAndKeyspaces(ctx, rs, "cell1", []string{"test_keyspace"}, []topodatapb.TabletType{topodatapb.TabletType_PRIMARY}) + assert.NoError(t, err) + assert.EqualValues(t, []*querypb.Target{ { Cell: "cell1", Keyspace: "test_keyspace", Shard: "test_shard0", TabletType: topodatapb.TabletType_PRIMARY, }, - }) { - t.Errorf("got wrong value: %v", ks) - } + }, targets) + assert.EqualValues(t, []string{"test_keyspace"}, ksList) + + // Get any keyspace. + targets, ksList, err = FindAllTargetsAndKeyspaces(ctx, rs, "cell1", nil, []topodatapb.TabletType{topodatapb.TabletType_PRIMARY}) + assert.NoError(t, err) + assert.EqualValues(t, []*querypb.Target{ + { + Cell: "cell1", + Keyspace: "test_keyspace", + Shard: "test_shard0", + TabletType: topodatapb.TabletType_PRIMARY, + }, + }, targets) + assert.EqualValues(t, []string{"test_keyspace"}, ksList) // Add another one. - if err := ts.UpdateSrvKeyspace(ctx, "cell1", "test_keyspace2", &topodatapb.SrvKeyspace{ + assert.NoError(t, ts.UpdateSrvKeyspace(ctx, "cell1", "test_keyspace2", &topodatapb.SrvKeyspace{ Partitions: []*topodatapb.SrvKeyspace_KeyspacePartition{ { ServedType: topodatapb.TabletType_PRIMARY, @@ -122,17 +128,13 @@ func TestFindAllTargets(t *testing.T) { }, }, }, - }); err != nil { - t.Fatalf("can't add srvKeyspace: %v", err) - } + })) - // Get it for all types. - ks, err = FindAllTargets(ctx, rs, "cell1", []topodatapb.TabletType{topodatapb.TabletType_PRIMARY, topodatapb.TabletType_REPLICA}) - if err != nil { - t.Errorf("unexpected error: %v", err) - } - sort.Sort(TargetArray(ks)) - if !reflect.DeepEqual(ks, []*querypb.Target{ + // Get it for any keyspace, all types. + targets, ksList, err = FindAllTargetsAndKeyspaces(ctx, rs, "cell1", nil, []topodatapb.TabletType{topodatapb.TabletType_PRIMARY, topodatapb.TabletType_REPLICA}) + assert.NoError(t, err) + sort.Sort(TargetArray(targets)) + assert.EqualValues(t, []*querypb.Target{ { Cell: "cell1", Keyspace: "test_keyspace", @@ -151,23 +153,46 @@ func TestFindAllTargets(t *testing.T) { Shard: "test_shard2", TabletType: topodatapb.TabletType_REPLICA, }, - }) { - t.Errorf("got wrong value: %v", ks) - } + }, targets) + sort.Strings(ksList) + assert.EqualValues(t, []string{"test_keyspace", "test_keyspace2"}, ksList) + + // Only get 1 keyspace for all types. + targets, ksList, err = FindAllTargetsAndKeyspaces(ctx, rs, "cell1", []string{"test_keyspace2"}, []topodatapb.TabletType{topodatapb.TabletType_PRIMARY, topodatapb.TabletType_REPLICA}) + assert.NoError(t, err) + assert.EqualValues(t, []*querypb.Target{ + { + Cell: "cell1", + Keyspace: "test_keyspace2", + Shard: "test_shard1", + TabletType: topodatapb.TabletType_PRIMARY, + }, + { + Cell: "cell1", + Keyspace: "test_keyspace2", + Shard: "test_shard2", + TabletType: topodatapb.TabletType_REPLICA, + }, + }, targets) + assert.EqualValues(t, []string{"test_keyspace2"}, ksList) - // Only get the REPLICA targets. - ks, err = FindAllTargets(ctx, rs, "cell1", []topodatapb.TabletType{topodatapb.TabletType_REPLICA}) - if err != nil { - t.Errorf("unexpected error: %v", err) - } - if !reflect.DeepEqual(ks, []*querypb.Target{ + // Only get the REPLICA targets for any keyspace. + targets, ksList, err = FindAllTargetsAndKeyspaces(ctx, rs, "cell1", []string{}, []topodatapb.TabletType{topodatapb.TabletType_REPLICA}) + assert.NoError(t, err) + assert.Equal(t, []*querypb.Target{ { Cell: "cell1", Keyspace: "test_keyspace2", Shard: "test_shard2", TabletType: topodatapb.TabletType_REPLICA, }, - }) { - t.Errorf("got wrong value: %v", ks) - } + }, targets) + sort.Strings(ksList) + assert.EqualValues(t, []string{"test_keyspace", "test_keyspace2"}, ksList) + + // Get non-existent keyspace. + targets, ksList, err = FindAllTargetsAndKeyspaces(ctx, rs, "cell1", []string{"doesnt-exist"}, []topodatapb.TabletType{topodatapb.TabletType_PRIMARY, topodatapb.TabletType_REPLICA}) + assert.NoError(t, err) + assert.Len(t, targets, 0) + assert.EqualValues(t, []string{"doesnt-exist"}, ksList) } diff --git a/go/vt/srvtopo/watch_srvvschema.go b/go/vt/srvtopo/watch_srvvschema.go index 1b5536e623d..c758211375d 100644 --- a/go/vt/srvtopo/watch_srvvschema.go +++ b/go/vt/srvtopo/watch_srvvschema.go @@ -21,8 +21,9 @@ import ( "time" "vitess.io/vitess/go/stats" - vschemapb "vitess.io/vitess/go/vt/proto/vschema" "vitess.io/vitess/go/vt/topo" + + vschemapb "vitess.io/vitess/go/vt/proto/vschema" ) type SrvVSchemaWatcher struct { diff --git a/go/vt/tableacl/tableacl.go b/go/vt/tableacl/tableacl.go index 9a6e6eeba4e..66f42b04680 100644 --- a/go/vt/tableacl/tableacl.go +++ b/go/vt/tableacl/tableacl.go @@ -110,6 +110,10 @@ func (tacl *tableACL) init(configFile string, aclCB func()) error { log.Infof("unable to read tableACL config file: %v Error: %v", configFile, err) return err } + if len(data) == 0 { + return errors.New("tableACL config file is empty") + } + config := &tableaclpb.Config{} if err := config.UnmarshalVT(data); err != nil { // try to parse tableacl as json file diff --git a/go/vt/tableacl/tableacl_test.go b/go/vt/tableacl/tableacl_test.go index 388567b62e2..16e5c5d6163 100644 --- a/go/vt/tableacl/tableacl_test.go +++ b/go/vt/tableacl/tableacl_test.go @@ -23,6 +23,7 @@ import ( "reflect" "testing" + "github.com/stretchr/testify/require" "google.golang.org/protobuf/proto" "vitess.io/vitess/go/vt/tableacl/acl" @@ -74,6 +75,19 @@ func TestInitWithValidConfig(t *testing.T) { } } +func TestInitWithEmptyConfig(t *testing.T) { + tacl := tableACL{factory: &simpleacl.Factory{}} + f, err := os.CreateTemp("", "tableacl") + require.NoError(t, err) + + defer os.Remove(f.Name()) + err = f.Close() + require.NoError(t, err) + + err = tacl.init(f.Name(), func() {}) + require.Error(t, err) +} + func TestInitFromProto(t *testing.T) { tacl := tableACL{factory: &simpleacl.Factory{}} readerACL := tacl.Authorized("my_test_table", READER) diff --git a/go/vt/topo/etcd2topo/watch.go b/go/vt/topo/etcd2topo/watch.go index cdc9be44b21..2fc58d437ff 100644 --- a/go/vt/topo/etcd2topo/watch.go +++ b/go/vt/topo/etcd2topo/watch.go @@ -51,7 +51,7 @@ func (s *Server) Watch(ctx context.Context, filePath string) (*topo.WatchData, < } wd := &topo.WatchData{ Contents: initial.Kvs[0].Value, - Version: EtcdVersion(initial.Kvs[0].ModRevision), + Version: EtcdVersion(initial.Kvs[0].Version), } // Create an outer context that will be canceled on return and will cancel all inner watches. @@ -76,7 +76,7 @@ func (s *Server) Watch(ctx context.Context, filePath string) (*topo.WatchData, < defer close(notifications) defer outerCancel() - var currVersion = initial.Header.Revision + var rev = initial.Header.Revision var watchRetries int for { select { @@ -107,9 +107,9 @@ func (s *Server) Watch(ctx context.Context, filePath string) (*topo.WatchData, < // Cancel inner context on retry and create new one. watchCancel() watchCtx, watchCancel = context.WithCancel(ctx) - newWatcher := s.cli.Watch(watchCtx, nodePath, clientv3.WithRev(currVersion)) + newWatcher := s.cli.Watch(watchCtx, nodePath, clientv3.WithRev(rev)) if newWatcher == nil { - log.Warningf("watch %v failed and get a nil channel returned, currVersion: %v", nodePath, currVersion) + log.Warningf("watch %v failed and get a nil channel returned, rev: %v", nodePath, rev) } else { watcher = newWatcher } @@ -126,7 +126,7 @@ func (s *Server) Watch(ctx context.Context, filePath string) (*topo.WatchData, < return } - currVersion = wresp.Header.GetRevision() + rev = wresp.Header.GetRevision() for _, ev := range wresp.Events { switch ev.Type { @@ -174,7 +174,7 @@ func (s *Server) WatchRecursive(ctx context.Context, dirpath string) ([]*topo.Wa var wd topo.WatchDataRecursive wd.Path = string(kv.Key) wd.Contents = kv.Value - wd.Version = EtcdVersion(initial.Kvs[0].ModRevision) + wd.Version = EtcdVersion(initial.Kvs[0].Version) initialwd = append(initialwd, &wd) } @@ -200,7 +200,7 @@ func (s *Server) WatchRecursive(ctx context.Context, dirpath string) ([]*topo.Wa defer close(notifications) defer outerCancel() - var currVersion = initial.Header.Revision + var rev = initial.Header.Revision var watchRetries int for { select { @@ -228,9 +228,9 @@ func (s *Server) WatchRecursive(ctx context.Context, dirpath string) ([]*topo.Wa watchCancel() watchCtx, watchCancel = context.WithCancel(ctx) - newWatcher := s.cli.Watch(watchCtx, nodePath, clientv3.WithRev(currVersion), clientv3.WithPrefix()) + newWatcher := s.cli.Watch(watchCtx, nodePath, clientv3.WithRev(rev), clientv3.WithPrefix()) if newWatcher == nil { - log.Warningf("watch %v failed and get a nil channel returned, currVersion: %v", nodePath, currVersion) + log.Warningf("watch %v failed and get a nil channel returned, rev: %v", nodePath, rev) } else { watcher = newWatcher } @@ -247,7 +247,7 @@ func (s *Server) WatchRecursive(ctx context.Context, dirpath string) ([]*topo.Wa return } - currVersion = wresp.Header.GetRevision() + rev = wresp.Header.GetRevision() for _, ev := range wresp.Events { switch ev.Type { diff --git a/go/vt/vtctl/workflow/server.go b/go/vt/vtctl/workflow/server.go index 1658a4e7243..a7f02dd67af 100644 --- a/go/vt/vtctl/workflow/server.go +++ b/go/vt/vtctl/workflow/server.go @@ -2991,7 +2991,9 @@ func (s *Server) WorkflowSwitchTraffic(ctx context.Context, req *vtctldatapb.Wor return nil, err } if hasReplica || hasRdonly { - if rdDryRunResults, err = s.switchReads(ctx, req, ts, startState, timeout, false, direction); err != nil { + // If we're going to switch writes immediately after then we don't need to + // rebuild the SrvVSchema here as we will do it after switching writes. + if rdDryRunResults, err = s.switchReads(ctx, req, ts, startState, !hasPrimary /* rebuildSrvVSchema */, direction); err != nil { return nil, err } log.Infof("Switch Reads done for workflow %s.%s", req.Keyspace, req.Workflow) @@ -3045,7 +3047,7 @@ func (s *Server) WorkflowSwitchTraffic(ctx context.Context, req *vtctldatapb.Wor } // switchReads is a generic way of switching read traffic for a workflow. -func (s *Server) switchReads(ctx context.Context, req *vtctldatapb.WorkflowSwitchTrafficRequest, ts *trafficSwitcher, state *State, timeout time.Duration, cancel bool, direction TrafficSwitchDirection) (*[]string, error) { +func (s *Server) switchReads(ctx context.Context, req *vtctldatapb.WorkflowSwitchTrafficRequest, ts *trafficSwitcher, state *State, rebuildSrvVSchema bool, direction TrafficSwitchDirection) (*[]string, error) { var roTabletTypes []topodatapb.TabletType // When we are switching all traffic we also get the primary tablet type, which we need to // filter out for switching reads. @@ -3132,7 +3134,7 @@ func (s *Server) switchReads(ctx context.Context, req *vtctldatapb.WorkflowSwitc if ts.MigrationType() == binlogdatapb.MigrationType_TABLES { if ts.isPartialMigration { ts.Logger().Infof("Partial migration, skipping switchTableReads as traffic is all or nothing per shard and overridden for reads AND writes in the ShardRoutingRule created when switching writes.") - } else if err := sw.switchTableReads(ctx, req.Cells, roTabletTypes, direction); err != nil { + } else if err := sw.switchTableReads(ctx, req.Cells, roTabletTypes, rebuildSrvVSchema, direction); err != nil { return handleError("failed to switch read traffic for the tables", err) } return sw.logs(), nil diff --git a/go/vt/vtctl/workflow/switcher.go b/go/vt/vtctl/workflow/switcher.go index 5e95e648299..d0b924016d0 100644 --- a/go/vt/vtctl/workflow/switcher.go +++ b/go/vt/vtctl/workflow/switcher.go @@ -66,8 +66,8 @@ func (r *switcher) switchShardReads(ctx context.Context, cells []string, servedT return r.ts.switchShardReads(ctx, cells, servedTypes, direction) } -func (r *switcher) switchTableReads(ctx context.Context, cells []string, servedTypes []topodatapb.TabletType, direction TrafficSwitchDirection) error { - return r.ts.switchTableReads(ctx, cells, servedTypes, direction) +func (r *switcher) switchTableReads(ctx context.Context, cells []string, servedTypes []topodatapb.TabletType, rebuildSrvVSchema bool, direction TrafficSwitchDirection) error { + return r.ts.switchTableReads(ctx, cells, servedTypes, rebuildSrvVSchema, direction) } func (r *switcher) startReverseVReplication(ctx context.Context) error { diff --git a/go/vt/vtctl/workflow/switcher_dry_run.go b/go/vt/vtctl/workflow/switcher_dry_run.go index b7ad8207574..14075f60dee 100644 --- a/go/vt/vtctl/workflow/switcher_dry_run.go +++ b/go/vt/vtctl/workflow/switcher_dry_run.go @@ -77,7 +77,7 @@ func (dr *switcherDryRun) switchShardReads(ctx context.Context, cells []string, return nil } -func (dr *switcherDryRun) switchTableReads(ctx context.Context, cells []string, servedTypes []topodatapb.TabletType, direction TrafficSwitchDirection) error { +func (dr *switcherDryRun) switchTableReads(ctx context.Context, cells []string, servedTypes []topodatapb.TabletType, rebuildSrvVSchema bool, direction TrafficSwitchDirection) error { ks := dr.ts.TargetKeyspaceName() if direction == DirectionBackward { ks = dr.ts.SourceKeyspaceName() @@ -89,6 +89,9 @@ func (dr *switcherDryRun) switchTableReads(ctx context.Context, cells []string, tables := strings.Join(dr.ts.Tables(), ",") dr.drLog.Logf("Switch reads for tables [%s] to keyspace %s for tablet types [%s]", tables, ks, strings.Join(tabletTypes, ",")) dr.drLog.Logf("Routing rules for tables [%s] will be updated", tables) + if rebuildSrvVSchema { + dr.drLog.Logf("Serving VSchema will be rebuilt for the %s keyspace", ks) + } return nil } diff --git a/go/vt/vtctl/workflow/switcher_interface.go b/go/vt/vtctl/workflow/switcher_interface.go index 9f73fd45ad6..b9b8b6f6126 100644 --- a/go/vt/vtctl/workflow/switcher_interface.go +++ b/go/vt/vtctl/workflow/switcher_interface.go @@ -36,7 +36,7 @@ type iswitcher interface { changeRouting(ctx context.Context) error streamMigraterfinalize(ctx context.Context, ts *trafficSwitcher, workflows []string) error startReverseVReplication(ctx context.Context) error - switchTableReads(ctx context.Context, cells []string, servedType []topodatapb.TabletType, direction TrafficSwitchDirection) error + switchTableReads(ctx context.Context, cells []string, servedType []topodatapb.TabletType, rebuildSrvVSchema bool, direction TrafficSwitchDirection) error switchShardReads(ctx context.Context, cells []string, servedType []topodatapb.TabletType, direction TrafficSwitchDirection) error validateWorkflowHasCompleted(ctx context.Context) error removeSourceTables(ctx context.Context, removalType TableRemovalType) error diff --git a/go/vt/vtctl/workflow/traffic_switcher.go b/go/vt/vtctl/workflow/traffic_switcher.go index f921342fce9..0a0c88198c1 100644 --- a/go/vt/vtctl/workflow/traffic_switcher.go +++ b/go/vt/vtctl/workflow/traffic_switcher.go @@ -577,7 +577,7 @@ func (ts *trafficSwitcher) switchShardReads(ctx context.Context, cells []string, return nil } -func (ts *trafficSwitcher) switchTableReads(ctx context.Context, cells []string, servedTypes []topodatapb.TabletType, direction TrafficSwitchDirection) error { +func (ts *trafficSwitcher) switchTableReads(ctx context.Context, cells []string, servedTypes []topodatapb.TabletType, rebuildSrvVSchema bool, direction TrafficSwitchDirection) error { log.Infof("switchTableReads: cells: %s, tablet types: %+v, direction %d", strings.Join(cells, ","), servedTypes, direction) rules, err := topotools.GetRoutingRules(ctx, ts.TopoServer()) if err != nil { @@ -609,7 +609,10 @@ func (ts *trafficSwitcher) switchTableReads(ctx context.Context, cells []string, if err := topotools.SaveRoutingRules(ctx, ts.TopoServer(), rules); err != nil { return err } - return ts.TopoServer().RebuildSrvVSchema(ctx, cells) + if rebuildSrvVSchema { + return ts.TopoServer().RebuildSrvVSchema(ctx, cells) + } + return nil } func (ts *trafficSwitcher) startReverseVReplication(ctx context.Context) error { diff --git a/go/vt/vtgate/buffer/buffer.go b/go/vt/vtgate/buffer/buffer.go index 0900709145f..dec83e2c78c 100644 --- a/go/vt/vtgate/buffer/buffer.go +++ b/go/vt/vtgate/buffer/buffer.go @@ -176,6 +176,10 @@ func New(cfg *Config) *Buffer { } } +func (b *Buffer) GetConfig() *Config { + return b.config +} + // WaitForFailoverEnd blocks until a pending buffering due to a failover for // keyspace/shard is over. // If there is no ongoing failover, "err" is checked. If it's caused by a diff --git a/go/vt/vtgate/buffer/flags.go b/go/vt/vtgate/buffer/flags.go index b45f10a6e38..01a3c33e869 100644 --- a/go/vt/vtgate/buffer/flags.go +++ b/go/vt/vtgate/buffer/flags.go @@ -70,6 +70,9 @@ func verifyFlags() error { if bufferSize < 1 { return fmt.Errorf("--buffer_size must be >= 1 (specified value: %d)", bufferSize) } + if bufferMinTimeBetweenFailovers < 1*time.Second { + return fmt.Errorf("--buffer_min_time_between_failovers must be >= 1s (specified value: %v)", bufferMinTimeBetweenFailovers) + } if bufferDrainConcurrency < 1 { return fmt.Errorf("--buffer_drain_concurrency must be >= 1 (specified value: %d)", bufferDrainConcurrency) diff --git a/go/vt/vtgate/buffer/shard_buffer.go b/go/vt/vtgate/buffer/shard_buffer.go index 5a2a6c9ec18..e1f02bb7f0e 100644 --- a/go/vt/vtgate/buffer/shard_buffer.go +++ b/go/vt/vtgate/buffer/shard_buffer.go @@ -25,14 +25,13 @@ import ( "time" "vitess.io/vitess/go/vt/discovery" - - "vitess.io/vitess/go/vt/vtgate/errorsanitizer" - "vitess.io/vitess/go/vt/log" "vitess.io/vitess/go/vt/logutil" - topodatapb "vitess.io/vitess/go/vt/proto/topodata" "vitess.io/vitess/go/vt/topo/topoproto" "vitess.io/vitess/go/vt/vterrors" + "vitess.io/vitess/go/vt/vtgate/errorsanitizer" + + topodatapb "vitess.io/vitess/go/vt/proto/topodata" ) // bufferState represents the different states a shardBuffer object can be in. diff --git a/go/vt/vtgate/executor.go b/go/vt/vtgate/executor.go index b99873ced02..3bb3f84810d 100644 --- a/go/vt/vtgate/executor.go +++ b/go/vt/vtgate/executor.go @@ -30,15 +30,12 @@ import ( "github.com/spf13/pflag" - "vitess.io/vitess/go/cache/theine" - "vitess.io/vitess/go/streamlog" - "vitess.io/vitess/go/vt/vtenv" - "vitess.io/vitess/go/vt/vthash" - "vitess.io/vitess/go/acl" + "vitess.io/vitess/go/cache/theine" "vitess.io/vitess/go/mysql/collations" "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/stats" + "vitess.io/vitess/go/streamlog" "vitess.io/vitess/go/trace" "vitess.io/vitess/go/vt/callerid" "vitess.io/vitess/go/vt/key" @@ -53,6 +50,7 @@ import ( "vitess.io/vitess/go/vt/srvtopo" "vitess.io/vitess/go/vt/sysvars" "vitess.io/vitess/go/vt/topo/topoproto" + "vitess.io/vitess/go/vt/vtenv" "vitess.io/vitess/go/vt/vterrors" "vitess.io/vitess/go/vt/vtgate/engine" "vitess.io/vitess/go/vt/vtgate/evalengine" @@ -62,6 +60,7 @@ import ( "vitess.io/vitess/go/vt/vtgate/vindexes" "vitess.io/vitess/go/vt/vtgate/vschemaacl" "vitess.io/vitess/go/vt/vtgate/vtgateservice" + "vitess.io/vitess/go/vt/vthash" ) var ( diff --git a/go/vt/vtgate/executor_select_test.go b/go/vt/vtgate/executor_select_test.go index 90118603f46..8b40c8d8636 100644 --- a/go/vt/vtgate/executor_select_test.go +++ b/go/vt/vtgate/executor_select_test.go @@ -3130,33 +3130,17 @@ func TestSelectWithUnionAll(t *testing.T) { bv1, _ := sqltypes.BuildBindVariable([]int64{1, 2}) bv2, _ := sqltypes.BuildBindVariable([]int64{3}) sbc1WantQueries := []*querypb.BoundQuery{{ - Sql: "select id from `user` where id in ::__vals", - BindVariables: map[string]*querypb.BindVariable{ - "__vals": bv1, - "vtg1": bv, - "vtg2": bv, - }, - }, { - Sql: "select id from `user` where id in ::__vals", + Sql: "select id from `user` where id in ::__vals union all select id from `user` where id in ::vtg1", BindVariables: map[string]*querypb.BindVariable{ "__vals": bv1, "vtg1": bv, - "vtg2": bv, }, }} sbc2WantQueries := []*querypb.BoundQuery{{ - Sql: "select id from `user` where id in ::__vals", - BindVariables: map[string]*querypb.BindVariable{ - "__vals": bv2, - "vtg1": bv, - "vtg2": bv, - }, - }, { - Sql: "select id from `user` where id in ::__vals", + Sql: "select id from `user` where id in ::__vals union all select id from `user` where id in ::vtg1", BindVariables: map[string]*querypb.BindVariable{ "__vals": bv2, "vtg1": bv, - "vtg2": bv, }, }} session := &vtgatepb.Session{ diff --git a/go/vt/vtgate/executor_test.go b/go/vt/vtgate/executor_test.go index 498d87eca8c..821f5f93001 100644 --- a/go/vt/vtgate/executor_test.go +++ b/go/vt/vtgate/executor_test.go @@ -1317,7 +1317,7 @@ func TestExecutorAlterVSchemaKeyspace(t *testing.T) { session := NewSafeSession(&vtgatepb.Session{TargetString: "@primary", Autocommit: true}) vschemaUpdates := make(chan *vschemapb.SrvVSchema, 2) - executor.serv.WatchSrvVSchema(ctx, "aa", func(vschema *vschemapb.SrvVSchema, err error) bool { + executor.serv.WatchSrvVSchema(ctx, executor.cell, func(vschema *vschemapb.SrvVSchema, err error) bool { vschemaUpdates <- vschema return true }) diff --git a/go/vt/vtgate/executor_vschema_ddl_test.go b/go/vt/vtgate/executor_vschema_ddl_test.go index 1c2813a33c4..1c912ed0d62 100644 --- a/go/vt/vtgate/executor_vschema_ddl_test.go +++ b/go/vt/vtgate/executor_vschema_ddl_test.go @@ -17,26 +17,23 @@ limitations under the License. package vtgate import ( - "context" "reflect" "slices" "testing" "time" - "vitess.io/vitess/go/test/utils" - - "vitess.io/vitess/go/vt/callerid" - querypb "vitess.io/vitess/go/vt/proto/query" - vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "vitess.io/vitess/go/sqltypes" + "vitess.io/vitess/go/test/utils" + "vitess.io/vitess/go/vt/callerid" "vitess.io/vitess/go/vt/vtgate/vschemaacl" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - + querypb "vitess.io/vitess/go/vt/proto/query" vschemapb "vitess.io/vitess/go/vt/proto/vschema" vtgatepb "vitess.io/vitess/go/vt/proto/vtgate" + vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" ) func waitForVindex(t *testing.T, ks, name string, watch chan *vschemapb.SrvVSchema, executor *Executor) (*vschemapb.SrvVSchema, *vschemapb.Vindex) { @@ -426,9 +423,7 @@ func TestExecutorDropSequenceDDL(t *testing.T) { _, err = executor.Execute(ctx, nil, "TestExecute", session, stmt, nil) require.NoError(t, err) - ctxWithTimeout, cancel := context.WithTimeout(ctx, 5*time.Second) - defer cancel() - if !waitForNewerVSchema(ctxWithTimeout, executor, ts) { + if !waitForNewerVSchema(ctx, executor, ts, 5*time.Second) { t.Fatalf("vschema did not drop the sequene 'test_seq'") } @@ -464,9 +459,7 @@ func TestExecutorDropAutoIncDDL(t *testing.T) { stmt = "alter vschema on test_table add auto_increment id using `db-name`.`test_seq`" _, err = executor.Execute(ctx, nil, "TestExecute", session, stmt, nil) require.NoError(t, err) - ctxWithTimeout, cancel := context.WithTimeout(ctx, 5*time.Second) - defer cancel() - if !waitForNewerVSchema(ctxWithTimeout, executor, ts) { + if !waitForNewerVSchema(ctx, executor, ts, 5*time.Second) { t.Fatalf("vschema did not update with auto_increment for 'test_table'") } ts = executor.VSchema().GetCreated() @@ -480,9 +473,7 @@ func TestExecutorDropAutoIncDDL(t *testing.T) { _, err = executor.Execute(ctx, nil, "TestExecute", session, stmt, nil) require.NoError(t, err) - ctxWithTimeout, cancel2 := context.WithTimeout(ctx, 5*time.Second) - defer cancel2() - if !waitForNewerVSchema(ctxWithTimeout, executor, ts) { + if !waitForNewerVSchema(ctx, executor, ts, 5*time.Second) { t.Fatalf("vschema did not drop the auto_increment for 'test_table'") } if executor.vm.GetCurrentSrvVschema().Keyspaces[ks].Tables["test_table"].AutoIncrement != nil { diff --git a/go/vt/vtgate/logstats/logstats.go b/go/vt/vtgate/logstats/logstats.go index 5ea7820a72e..8f8ba41e3cd 100644 --- a/go/vt/vtgate/logstats/logstats.go +++ b/go/vt/vtgate/logstats/logstats.go @@ -18,21 +18,16 @@ package logstats import ( "context" - "encoding/json" - "fmt" "io" "net/url" "time" "github.com/google/safehtml" - "vitess.io/vitess/go/sqltypes" + "vitess.io/vitess/go/logstats" "vitess.io/vitess/go/streamlog" - "vitess.io/vitess/go/tb" "vitess.io/vitess/go/vt/callerid" "vitess.io/vitess/go/vt/callinfo" - "vitess.io/vitess/go/vt/log" - querypb "vitess.io/vitess/go/vt/proto/query" ) @@ -128,69 +123,60 @@ func (stats *LogStats) Logf(w io.Writer, params url.Values) error { return nil } - // FormatBindVariables call might panic so we're going to catch it here - // and print out the stack trace for debugging. - defer func() { - if x := recover(); x != nil { - log.Errorf("Uncaught panic:\n%v\n%s", x, tb.Stack(4)) - } - }() - - formattedBindVars := "\"[REDACTED]\"" - if !streamlog.GetRedactDebugUIQueries() { - _, fullBindParams := params["full"] - formattedBindVars = sqltypes.FormatBindVariables( - stats.BindVariables, - fullBindParams, - streamlog.GetQueryLogFormat() == streamlog.QueryLogFormatJSON, - ) - } - - // TODO: remove username here we fully enforce immediate caller id + redacted := streamlog.GetRedactDebugUIQueries() + _, fullBindParams := params["full"] remoteAddr, username := stats.RemoteAddrUsername() - var fmtString string - switch streamlog.GetQueryLogFormat() { - case streamlog.QueryLogFormatText: - fmtString = "%v\t%v\t%v\t'%v'\t'%v'\t%v\t%v\t%.6f\t%.6f\t%.6f\t%.6f\t%v\t%q\t%v\t%v\t%v\t%q\t%q\t%q\t%v\t%v\t%q\n" - case streamlog.QueryLogFormatJSON: - fmtString = "{\"Method\": %q, \"RemoteAddr\": %q, \"Username\": %q, \"ImmediateCaller\": %q, \"Effective Caller\": %q, \"Start\": \"%v\", \"End\": \"%v\", \"TotalTime\": %.6f, \"PlanTime\": %v, \"ExecuteTime\": %v, \"CommitTime\": %v, \"StmtType\": %q, \"SQL\": %q, \"BindVars\": %v, \"ShardQueries\": %v, \"RowsAffected\": %v, \"Error\": %q, \"TabletType\": %q, \"SessionUUID\": %q, \"Cached Plan\": %v, \"TablesUsed\": %v, \"ActiveKeyspace\": %q}\n" - } - - tables := stats.TablesUsed - if tables == nil { - tables = []string{} - } - tablesUsed, marshalErr := json.Marshal(tables) - if marshalErr != nil { - return marshalErr + log := logstats.NewLogger() + log.Init(streamlog.GetQueryLogFormat() == streamlog.QueryLogFormatJSON) + log.Key("Method") + log.StringUnquoted(stats.Method) + log.Key("RemoteAddr") + log.StringUnquoted(remoteAddr) + log.Key("Username") + log.StringUnquoted(username) + log.Key("ImmediateCaller") + log.StringSingleQuoted(stats.ImmediateCaller()) + log.Key("Effective Caller") + log.StringSingleQuoted(stats.EffectiveCaller()) + log.Key("Start") + log.Time(stats.StartTime) + log.Key("End") + log.Time(stats.EndTime) + log.Key("TotalTime") + log.Duration(stats.TotalTime()) + log.Key("PlanTime") + log.Duration(stats.PlanTime) + log.Key("ExecuteTime") + log.Duration(stats.ExecuteTime) + log.Key("CommitTime") + log.Duration(stats.CommitTime) + log.Key("StmtType") + log.StringUnquoted(stats.StmtType) + log.Key("SQL") + log.String(stats.SQL) + log.Key("BindVars") + if redacted { + log.Redacted() + } else { + log.BindVariables(stats.BindVariables, fullBindParams) } - _, err := fmt.Fprintf( - w, - fmtString, - stats.Method, - remoteAddr, - username, - stats.ImmediateCaller(), - stats.EffectiveCaller(), - stats.StartTime.Format("2006-01-02 15:04:05.000000"), - stats.EndTime.Format("2006-01-02 15:04:05.000000"), - stats.TotalTime().Seconds(), - stats.PlanTime.Seconds(), - stats.ExecuteTime.Seconds(), - stats.CommitTime.Seconds(), - stats.StmtType, - stats.SQL, - formattedBindVars, - stats.ShardQueries, - stats.RowsAffected, - stats.ErrorStr(), - stats.TabletType, - stats.SessionUUID, - stats.CachedPlan, - string(tablesUsed), - stats.ActiveKeyspace, - ) - - return err + log.Key("ShardQueries") + log.Uint(stats.ShardQueries) + log.Key("RowsAffected") + log.Uint(stats.RowsAffected) + log.Key("Error") + log.String(stats.ErrorStr()) + log.Key("TabletType") + log.String(stats.TabletType) + log.Key("SessionUUID") + log.String(stats.SessionUUID) + log.Key("Cached Plan") + log.Bool(stats.CachedPlan) + log.Key("TablesUsed") + log.Strings(stats.TablesUsed) + log.Key("ActiveKeyspace") + log.String(stats.ActiveKeyspace) + + return log.Flush(w) } diff --git a/go/vt/vtgate/logstats/logstats_test.go b/go/vt/vtgate/logstats/logstats_test.go index dbe49b200b8..ae3c01e0f0b 100644 --- a/go/vt/vtgate/logstats/logstats_test.go +++ b/go/vt/vtgate/logstats/logstats_test.go @@ -79,7 +79,7 @@ func TestLogStatsFormat(t *testing.T) { { // 0 redact: false, format: "text", - expected: "test\t\t\t''\t''\t2017-01-01 01:02:03.000000\t2017-01-01 01:02:04.000001\t1.000001\t0.000000\t0.000000\t0.000000\t\t\"sql1\"\tmap[intVal:type:INT64 value:\"1\"]\t0\t0\t\"\"\t\"PRIMARY\"\t\"suuid\"\tfalse\t[\"ks1.tbl1\",\"ks2.tbl2\"]\t\"db\"\n", + expected: "test\t\t\t''\t''\t2017-01-01 01:02:03.000000\t2017-01-01 01:02:04.000001\t1.000001\t0.000000\t0.000000\t0.000000\t\t\"sql1\"\t{\"intVal\": {\"type\": \"INT64\", \"value\": 1}}\t0\t0\t\"\"\t\"PRIMARY\"\t\"suuid\"\tfalse\t[\"ks1.tbl1\",\"ks2.tbl2\"]\t\"db\"\n", bindVars: intBindVar, }, { // 1 redact: true, @@ -99,7 +99,7 @@ func TestLogStatsFormat(t *testing.T) { }, { // 4 redact: false, format: "text", - expected: "test\t\t\t''\t''\t2017-01-01 01:02:03.000000\t2017-01-01 01:02:04.000001\t1.000001\t0.000000\t0.000000\t0.000000\t\t\"sql1\"\tmap[strVal:type:VARCHAR value:\"abc\"]\t0\t0\t\"\"\t\"PRIMARY\"\t\"suuid\"\tfalse\t[\"ks1.tbl1\",\"ks2.tbl2\"]\t\"db\"\n", + expected: "test\t\t\t''\t''\t2017-01-01 01:02:03.000000\t2017-01-01 01:02:04.000001\t1.000001\t0.000000\t0.000000\t0.000000\t\t\"sql1\"\t{\"strVal\": {\"type\": \"VARCHAR\", \"value\": \"abc\"}}\t0\t0\t\"\"\t\"PRIMARY\"\t\"suuid\"\tfalse\t[\"ks1.tbl1\",\"ks2.tbl2\"]\t\"db\"\n", bindVars: stringBindVar, }, { // 5 redact: true, @@ -129,14 +129,13 @@ func TestLogStatsFormat(t *testing.T) { streamlog.SetQueryLogFormat(test.format) if test.format == "text" { got := testFormat(t, logStats, params) + t.Logf("got: %s", got) assert.Equal(t, test.expected, got) - for _, variable := range logStats.BindVariables { - fmt.Println("->" + fmt.Sprintf("%v", variable)) - } return } got := testFormat(t, logStats, params) + t.Logf("got: %s", got) var parsed map[string]any err := json.Unmarshal([]byte(got), &parsed) assert.NoError(t, err) @@ -157,12 +156,12 @@ func TestLogStatsFilter(t *testing.T) { params := map[string][]string{"full": {}} got := testFormat(t, logStats, params) - want := "test\t\t\t''\t''\t2017-01-01 01:02:03.000000\t2017-01-01 01:02:04.000001\t1.000001\t0.000000\t0.000000\t0.000000\t\t\"sql1 /* LOG_THIS_QUERY */\"\tmap[intVal:type:INT64 value:\"1\"]\t0\t0\t\"\"\t\"\"\t\"\"\tfalse\t[]\t\"\"\n" + want := "test\t\t\t''\t''\t2017-01-01 01:02:03.000000\t2017-01-01 01:02:04.000001\t1.000001\t0.000000\t0.000000\t0.000000\t\t\"sql1 /* LOG_THIS_QUERY */\"\t{\"intVal\": {\"type\": \"INT64\", \"value\": 1}}\t0\t0\t\"\"\t\"\"\t\"\"\tfalse\t[]\t\"\"\n" assert.Equal(t, want, got) streamlog.SetQueryLogFilterTag("LOG_THIS_QUERY") got = testFormat(t, logStats, params) - want = "test\t\t\t''\t''\t2017-01-01 01:02:03.000000\t2017-01-01 01:02:04.000001\t1.000001\t0.000000\t0.000000\t0.000000\t\t\"sql1 /* LOG_THIS_QUERY */\"\tmap[intVal:type:INT64 value:\"1\"]\t0\t0\t\"\"\t\"\"\t\"\"\tfalse\t[]\t\"\"\n" + want = "test\t\t\t''\t''\t2017-01-01 01:02:03.000000\t2017-01-01 01:02:04.000001\t1.000001\t0.000000\t0.000000\t0.000000\t\t\"sql1 /* LOG_THIS_QUERY */\"\t{\"intVal\": {\"type\": \"INT64\", \"value\": 1}}\t0\t0\t\"\"\t\"\"\t\"\"\tfalse\t[]\t\"\"\n" assert.Equal(t, want, got) streamlog.SetQueryLogFilterTag("NOT_THIS_QUERY") @@ -180,12 +179,12 @@ func TestLogStatsRowThreshold(t *testing.T) { params := map[string][]string{"full": {}} got := testFormat(t, logStats, params) - want := "test\t\t\t''\t''\t2017-01-01 01:02:03.000000\t2017-01-01 01:02:04.000001\t1.000001\t0.000000\t0.000000\t0.000000\t\t\"sql1 /* LOG_THIS_QUERY */\"\tmap[intVal:type:INT64 value:\"1\"]\t0\t0\t\"\"\t\"\"\t\"\"\tfalse\t[]\t\"\"\n" + want := "test\t\t\t''\t''\t2017-01-01 01:02:03.000000\t2017-01-01 01:02:04.000001\t1.000001\t0.000000\t0.000000\t0.000000\t\t\"sql1 /* LOG_THIS_QUERY */\"\t{\"intVal\": {\"type\": \"INT64\", \"value\": 1}}\t0\t0\t\"\"\t\"\"\t\"\"\tfalse\t[]\t\"\"\n" assert.Equal(t, want, got) streamlog.SetQueryLogRowThreshold(0) got = testFormat(t, logStats, params) - want = "test\t\t\t''\t''\t2017-01-01 01:02:03.000000\t2017-01-01 01:02:04.000001\t1.000001\t0.000000\t0.000000\t0.000000\t\t\"sql1 /* LOG_THIS_QUERY */\"\tmap[intVal:type:INT64 value:\"1\"]\t0\t0\t\"\"\t\"\"\t\"\"\tfalse\t[]\t\"\"\n" + want = "test\t\t\t''\t''\t2017-01-01 01:02:03.000000\t2017-01-01 01:02:04.000001\t1.000001\t0.000000\t0.000000\t0.000000\t\t\"sql1 /* LOG_THIS_QUERY */\"\t{\"intVal\": {\"type\": \"INT64\", \"value\": 1}}\t0\t0\t\"\"\t\"\"\t\"\"\tfalse\t[]\t\"\"\n" assert.Equal(t, want, got) streamlog.SetQueryLogRowThreshold(1) got = testFormat(t, logStats, params) diff --git a/go/vt/vtgate/plan_execute.go b/go/vt/vtgate/plan_execute.go index 4e2c3bfea4c..199892842ee 100644 --- a/go/vt/vtgate/plan_execute.go +++ b/go/vt/vtgate/plan_execute.go @@ -24,20 +24,20 @@ import ( "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/vt/log" - querypb "vitess.io/vitess/go/vt/proto/query" - vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" "vitess.io/vitess/go/vt/sqlparser" "vitess.io/vitess/go/vt/vterrors" "vitess.io/vitess/go/vt/vtgate/engine" "vitess.io/vitess/go/vt/vtgate/logstats" "vitess.io/vitess/go/vt/vtgate/vtgateservice" + + querypb "vitess.io/vitess/go/vt/proto/query" + vtrpcpb "vitess.io/vitess/go/vt/proto/vtrpc" ) type planExec func(ctx context.Context, plan *engine.Plan, vc *vcursorImpl, bindVars map[string]*querypb.BindVariable, startTime time.Time) error type txResult func(sqlparser.StatementType, *sqltypes.Result) error -func waitForNewerVSchema(ctx context.Context, e *Executor, lastVSchemaCreated time.Time) bool { - timeout := 30 * time.Second +func waitForNewerVSchema(ctx context.Context, e *Executor, lastVSchemaCreated time.Time, timeout time.Duration) bool { pollingInterval := 10 * time.Millisecond waitCtx, cancel := context.WithTimeout(ctx, timeout) ticker := time.NewTicker(pollingInterval) @@ -48,7 +48,7 @@ func waitForNewerVSchema(ctx context.Context, e *Executor, lastVSchemaCreated ti case <-waitCtx.Done(): return false case <-ticker.C: - if e.VSchema().GetCreated().After(lastVSchemaCreated) { + if e.VSchema() != nil && e.VSchema().GetCreated().After(lastVSchemaCreated) { return true } } @@ -64,11 +64,11 @@ func (e *Executor) newExecute( logStats *logstats.LogStats, execPlan planExec, // used when there is a plan to execute recResult txResult, // used when it's something simple like begin/commit/rollback/savepoint -) error { - // 1: Prepare before planning and execution +) (err error) { + // 1: Prepare before planning and execution. // Start an implicit transaction if necessary. - err := e.startTxIfNecessary(ctx, safeSession) + err = e.startTxIfNecessary(ctx, safeSession) if err != nil { return err } @@ -79,21 +79,35 @@ func (e *Executor) newExecute( query, comments := sqlparser.SplitMarginComments(sql) - // 2: Parse and Validate query + // 2: Parse and Validate query. stmt, reservedVars, err := parseAndValidateQuery(query, e.env.Parser()) if err != nil { return err } - var lastVSchemaCreated time.Time - vs := e.VSchema() - lastVSchemaCreated = vs.GetCreated() + var ( + vs = e.VSchema() + lastVSchemaCreated = vs.GetCreated() + result *sqltypes.Result + plan *engine.Plan + ) + for try := 0; try < MaxBufferingRetries; try++ { - if try > 0 && !vs.GetCreated().After(lastVSchemaCreated) { - // There is a race due to which the executor's vschema may not have been updated yet. - // Without a wait we fail non-deterministically since the previous vschema will not have the updated routing rules - if waitForNewerVSchema(ctx, e, lastVSchemaCreated) { + if try > 0 && !vs.GetCreated().After(lastVSchemaCreated) { // We need to wait for a vschema update + // Without a wait we fail non-deterministically since the previous vschema will not have + // the updated routing rules. + // We retry MaxBufferingRetries-1 (2) times before giving up. How long we wait before each retry + // -- IF we don't see a newer vschema come in -- affects how long we retry in total and how quickly + // we retry the query and (should) succeed when the traffic switch fails or we otherwise hit the + // max buffer failover time without resolving the keyspace event and marking it as consistent. + // This calculation attemps to ensure that we retry at a sensible interval and number of times + // based on the buffering configuration. This way we should be able to perform the max retries + // within the given window of time for most queries and we should not end up waiting too long + // after the traffic switch fails or the buffer window has ended, retrying old queries. + timeout := e.resolver.scatterConn.gateway.buffer.GetConfig().MaxFailoverDuration / (MaxBufferingRetries - 1) + if waitForNewerVSchema(ctx, e, lastVSchemaCreated, timeout) { vs = e.VSchema() + lastVSchemaCreated = vs.GetCreated() } } @@ -102,16 +116,13 @@ func (e *Executor) newExecute( return err } - // 3: Create a plan for the query + // 3: Create a plan for the query. // If we are retrying, it is likely that the routing rules have changed and hence we need to // replan the query since the target keyspace of the resolved shards may have changed as a - // result of MoveTables. So we cannot reuse the plan from the first try. - // When buffering ends, many queries might be getting planned at the same time. Ideally we - // should be able to reuse plans once the first drained query has been planned. For now, we - // punt on this and choose not to prematurely optimize since it is not clear how much caching - // will help and if it will result in hard-to-track edge cases. - - var plan *engine.Plan + // result of MoveTables SwitchTraffic which does a RebuildSrvVSchema which in turn causes + // the vtgate to clear the cached plans when processing the new serving vschema. + // When buffering ends, many queries might be getting planned at the same time and we then + // take full advatange of the cached plan. plan, err = e.getPlan(ctx, vcursor, query, stmt, comments, bindVars, reservedVars, e.normalize, logStats) execStart := e.logPlanningFinished(logStats, plan) @@ -124,12 +135,12 @@ func (e *Executor) newExecute( safeSession.ClearWarnings() } - // add any warnings that the planner wants to add + // Add any warnings that the planner wants to add. for _, warning := range plan.Warnings { safeSession.RecordWarning(warning) } - result, err := e.handleTransactions(ctx, mysqlCtx, safeSession, plan, logStats, vcursor, stmt) + result, err = e.handleTransactions(ctx, mysqlCtx, safeSession, plan, logStats, vcursor, stmt) if err != nil { return err } @@ -137,14 +148,14 @@ func (e *Executor) newExecute( return recResult(plan.Type, result) } - // 4: Prepare for execution + // 4: Prepare for execution. err = e.addNeededBindVars(vcursor, plan.BindVarNeeds, bindVars, safeSession) if err != nil { logStats.Error = err return err } - // 5: Execute the plan and retry if needed + // 5: Execute the plan. if plan.Instructions.NeedsTransaction() { err = e.insideTransaction(ctx, safeSession, logStats, func() error { @@ -158,10 +169,39 @@ func (e *Executor) newExecute( return err } + // 6: Retry if needed. rootCause := vterrors.RootCause(err) if rootCause != nil && strings.Contains(rootCause.Error(), "enforce denied tables") { log.V(2).Infof("Retry: %d, will retry query %s due to %v", try, query, err) - lastVSchemaCreated = vs.GetCreated() + if try == 0 { // We are going to retry at least once + defer func() { + // Prevent any plan cache pollution from queries planned against the wrong keyspace during a MoveTables + // traffic switching operation. + if err != nil { // The error we're checking here is the return value from the newExecute function + cause := vterrors.RootCause(err) + if cause != nil && strings.Contains(cause.Error(), "enforce denied tables") { + // The executor's VSchemaManager clears the plan cache when it receives a new vschema via its + // SrvVSchema watcher (it calls executor.SaveVSchema() in its watch's subscriber callback). This + // happens concurrently with the KeyspaceEventWatcher also receiving the new vschema in its + // SrvVSchema watcher and in its subscriber callback processing it (which includes getting info + // on all shards from the topo), and eventually determining that the keyspace is consistent and + // ending the buffering window. So there's race with query retries such that a query could be + // planned against the wrong side just as the keyspace event is getting resolved and the buffers + // drained. Then that bad plan is the cached plan for the query until you do another + // topo.RebuildSrvVSchema/vtctldclient RebuildVSchemaGraph which then causes the VSchemaManager + // to clear the plan cache. It's essentially a race between the two SrvVSchema watchers and the + // work they do when a new one is received. If we DID a retry AND the last time we retried + // still encountered the error, we know that the plan used was 1) not valid/correct and going to + // the wrong side of the traffic switch as it failed with the denied tables error and 2) it will + // remain the plan in the cache if we do not clear the plans after it was added to to the cache. + // So here we clear the plan cache in order to prevent this scenario where the bad plan is + // cached indefinitely and re-used after the buffering window ends and the keyspace event is + // resolved. + e.ClearPlans() + } + } + }() + } continue } diff --git a/go/vt/vtgate/planbuilder/operators/route_planning.go b/go/vt/vtgate/planbuilder/operators/route_planning.go index d0b32e37200..83634f8f7b7 100644 --- a/go/vt/vtgate/planbuilder/operators/route_planning.go +++ b/go/vt/vtgate/planbuilder/operators/route_planning.go @@ -392,8 +392,8 @@ func canMergeOnFilter(ctx *plancontext.PlanningContext, a, b *Route, predicate s if comparison.Operator != sqlparser.EqualOp { return false } - left := comparison.Left - right := comparison.Right + left := getColName(comparison.Left) + right := getColName(comparison.Right) lVindex := findColumnVindex(ctx, a, left) if lVindex == nil { @@ -533,6 +533,18 @@ func gen4ValuesEqual(ctx *plancontext.PlanningContext, a, b []sqlparser.Expr) bo func gen4ValEqual(ctx *plancontext.PlanningContext, a, b sqlparser.Expr) bool { switch a := a.(type) { + case sqlparser.ValTuple: + if b, ok := b.(sqlparser.ValTuple); ok { + return gen4ValuesEqual(ctx, a, b) + } + + return false + + case sqlparser.ListArg: + if b, ok := b.(sqlparser.ListArg); ok { + return a == b + } + case *sqlparser.ColName: if b, ok := b.(*sqlparser.ColName); ok { if !a.Name.Equal(b.Name) { diff --git a/go/vt/vtgate/planbuilder/operators/union_merging.go b/go/vt/vtgate/planbuilder/operators/union_merging.go index c2fd79cd026..1ede79424ad 100644 --- a/go/vt/vtgate/planbuilder/operators/union_merging.go +++ b/go/vt/vtgate/planbuilder/operators/union_merging.go @@ -148,8 +148,6 @@ func tryMergeUnionShardedRouting( scatterA := tblA.RouteOpCode == engine.Scatter scatterB := tblB.RouteOpCode == engine.Scatter - uniqueA := tblA.RouteOpCode == engine.EqualUnique - uniqueB := tblB.RouteOpCode == engine.EqualUnique switch { case scatterA: @@ -158,7 +156,11 @@ func tryMergeUnionShardedRouting( case scatterB: return createMergedUnion(ctx, routeA, routeB, exprsA, exprsB, distinct, tblB) - case uniqueA && uniqueB: + case tblA.RouteOpCode == engine.EqualUnique && tblB.RouteOpCode == engine.EqualUnique: + fallthrough + case tblA.RouteOpCode == engine.Equal && tblB.RouteOpCode == engine.Equal: + fallthrough + case tblA.RouteOpCode == engine.IN && tblB.RouteOpCode == engine.IN: aVdx := tblA.SelectedVindex() bVdx := tblB.SelectedVindex() aExpr := tblA.VindexExpressions() diff --git a/go/vt/vtgate/planbuilder/testdata/select_cases.json b/go/vt/vtgate/planbuilder/testdata/select_cases.json index 212cc3a427d..3b73e7e8382 100644 --- a/go/vt/vtgate/planbuilder/testdata/select_cases.json +++ b/go/vt/vtgate/planbuilder/testdata/select_cases.json @@ -1496,6 +1496,102 @@ ] } }, + { + "comment": "union with the same target shard (using `IN` with multiple matching values)", + "query": "SELECT id FROM music WHERE music.user_id IN (1, 2, 3) UNION SELECT id FROM music WHERE music.user_id IN (1, 2, 3)", + "plan": { + "QueryType": "SELECT", + "Original": "SELECT id FROM music WHERE music.user_id IN (1, 2, 3) UNION SELECT id FROM music WHERE music.user_id IN (1, 2, 3)", + "Instructions": { + "OperatorType": "Distinct", + "Collations": [ + "(0:1)" + ], + "ResultColumns": 1, + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "IN", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select dt.id, weight_string(dt.id) from (select id from music where 1 != 1 union select id from music where 1 != 1) as dt where 1 != 1", + "Query": "select dt.id, weight_string(dt.id) from (select id from music where music.user_id in ::__vals union select id from music where music.user_id in (1, 2, 3)) as dt", + "Table": "music", + "Values": [ + "(1, 2, 3)" + ], + "Vindex": "user_index" + } + ] + }, + "TablesUsed": [ + "user.music" + ] + } + }, + { + "comment": "union with the same target shard (using `IN` with the same normalized value)", + "query": "SELECT id FROM music WHERE music.user_id IN ::vtg1 UNION SELECT id FROM music WHERE music.user_id IN ::vtg1", + "plan": { + "QueryType": "SELECT", + "Original": "SELECT id FROM music WHERE music.user_id IN ::vtg1 UNION SELECT id FROM music WHERE music.user_id IN ::vtg1", + "Instructions": { + "OperatorType": "Distinct", + "Collations": [ + "(0:1)" + ], + "ResultColumns": 1, + "Inputs": [ + { + "OperatorType": "Route", + "Variant": "IN", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select dt.id, weight_string(dt.id) from (select id from music where 1 != 1 union select id from music where 1 != 1) as dt where 1 != 1", + "Query": "select dt.id, weight_string(dt.id) from (select id from music where music.user_id in ::__vals union select id from music where music.user_id in ::vtg1) as dt", + "Table": "music", + "Values": [ + "::vtg1" + ], + "Vindex": "user_index" + } + ] + }, + "TablesUsed": [ + "user.music" + ] + } + }, + { + "comment": "union with the same target shard (using a mix of `IN` and `=` conditions)", + "query": "SELECT id FROM music WHERE music.user_id IN (1) UNION SELECT id FROM music WHERE music.user_id = 1", + "plan": { + "QueryType": "SELECT", + "Original": "SELECT id FROM music WHERE music.user_id IN (1) UNION SELECT id FROM music WHERE music.user_id = 1", + "Instructions": { + "OperatorType": "Route", + "Variant": "EqualUnique", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select id from music where 1 != 1 union select id from music where 1 != 1", + "Query": "select id from music where music.user_id in (1) union select id from music where music.user_id = 1", + "Table": "music", + "Values": [ + "1" + ], + "Vindex": "user_index" + }, + "TablesUsed": [ + "user.music" + ] + } + }, { "comment": "union with the same target shard last_insert_id", "query": "select *, last_insert_id() from music where user_id = 1 union select * from user where id = 1", @@ -3846,46 +3942,19 @@ "QueryType": "SELECT", "Original": "SELECT music.id FROM music WHERE music.id IN (SELECT MAX(music.id) FROM music WHERE music.user_id IN (5, 6) GROUP BY music.user_id)", "Instructions": { - "OperatorType": "UncorrelatedSubquery", - "Variant": "PulloutIn", - "PulloutVars": [ - "__sq_has_values", - "__sq1" + "OperatorType": "Route", + "Variant": "IN", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select music.id from music where 1 != 1", + "Query": "select music.id from music where music.id in (select max(music.id) from music where music.user_id in (5, 6) group by music.user_id)", + "Table": "music", + "Values": [ + "(5, 6)" ], - "Inputs": [ - { - "InputName": "SubQuery", - "OperatorType": "Route", - "Variant": "IN", - "Keyspace": { - "Name": "user", - "Sharded": true - }, - "FieldQuery": "select max(music.id) from music where 1 != 1 group by music.user_id", - "Query": "select max(music.id) from music where music.user_id in ::__vals group by music.user_id", - "Table": "music", - "Values": [ - "(5, 6)" - ], - "Vindex": "user_index" - }, - { - "InputName": "Outer", - "OperatorType": "Route", - "Variant": "IN", - "Keyspace": { - "Name": "user", - "Sharded": true - }, - "FieldQuery": "select music.id from music where 1 != 1", - "Query": "select music.id from music where :__sq_has_values and music.id in ::__vals", - "Table": "music", - "Values": [ - "::__sq1" - ], - "Vindex": "music_user_map" - } - ] + "Vindex": "user_index" }, "TablesUsed": [ "user.music" @@ -3959,46 +4028,19 @@ "QueryType": "SELECT", "Original": "SELECT music.id FROM music WHERE music.id IN (SELECT MAX(music.id) FROM music WHERE music.user_id = 5)", "Instructions": { - "OperatorType": "UncorrelatedSubquery", - "Variant": "PulloutIn", - "PulloutVars": [ - "__sq_has_values", - "__sq1" + "OperatorType": "Route", + "Variant": "EqualUnique", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select music.id from music where 1 != 1", + "Query": "select music.id from music where music.id in (select max(music.id) from music where music.user_id = 5)", + "Table": "music", + "Values": [ + "5" ], - "Inputs": [ - { - "InputName": "SubQuery", - "OperatorType": "Route", - "Variant": "EqualUnique", - "Keyspace": { - "Name": "user", - "Sharded": true - }, - "FieldQuery": "select max(music.id) from music where 1 != 1", - "Query": "select max(music.id) from music where music.user_id = 5", - "Table": "music", - "Values": [ - "5" - ], - "Vindex": "user_index" - }, - { - "InputName": "Outer", - "OperatorType": "Route", - "Variant": "IN", - "Keyspace": { - "Name": "user", - "Sharded": true - }, - "FieldQuery": "select music.id from music where 1 != 1", - "Query": "select music.id from music where :__sq_has_values and music.id in ::__vals", - "Table": "music", - "Values": [ - "::__sq1" - ], - "Vindex": "music_user_map" - } - ] + "Vindex": "user_index" }, "TablesUsed": [ "user.music" @@ -4012,46 +4054,19 @@ "QueryType": "SELECT", "Original": "SELECT music.id FROM music WHERE music.id IN (SELECT MAX(music.id) FROM music WHERE music.user_id = 5 LIMIT 10)", "Instructions": { - "OperatorType": "UncorrelatedSubquery", - "Variant": "PulloutIn", - "PulloutVars": [ - "__sq_has_values", - "__sq1" + "OperatorType": "Route", + "Variant": "EqualUnique", + "Keyspace": { + "Name": "user", + "Sharded": true + }, + "FieldQuery": "select music.id from music where 1 != 1", + "Query": "select music.id from music where music.id in (select max(music.id) from music where music.user_id = 5 limit 10)", + "Table": "music", + "Values": [ + "5" ], - "Inputs": [ - { - "InputName": "SubQuery", - "OperatorType": "Route", - "Variant": "EqualUnique", - "Keyspace": { - "Name": "user", - "Sharded": true - }, - "FieldQuery": "select max(music.id) from music where 1 != 1", - "Query": "select max(music.id) from music where music.user_id = 5 limit 10", - "Table": "music", - "Values": [ - "5" - ], - "Vindex": "user_index" - }, - { - "InputName": "Outer", - "OperatorType": "Route", - "Variant": "IN", - "Keyspace": { - "Name": "user", - "Sharded": true - }, - "FieldQuery": "select music.id from music where 1 != 1", - "Query": "select music.id from music where :__sq_has_values and music.id in ::__vals", - "Table": "music", - "Values": [ - "::__sq1" - ], - "Vindex": "music_user_map" - } - ] + "Vindex": "user_index" }, "TablesUsed": [ "user.music" diff --git a/go/vt/vtgate/sandbox_test.go b/go/vt/vtgate/sandbox_test.go index 3ceee09d5f7..27be6442cfe 100644 --- a/go/vt/vtgate/sandbox_test.go +++ b/go/vt/vtgate/sandbox_test.go @@ -281,7 +281,10 @@ func (sct *sandboxTopo) WatchSrvVSchema(ctx context.Context, cell string, callba } sct.topoServer.UpdateSrvVSchema(ctx, cell, srvVSchema) - current, updateChan, _ := sct.topoServer.WatchSrvVSchema(ctx, cell) + current, updateChan, err := sct.topoServer.WatchSrvVSchema(ctx, cell) + if err != nil { + panic(fmt.Sprintf("sandboxTopo WatchSrvVSchema returned an error: %v", err)) + } if !callback(current.Value, nil) { panic("sandboxTopo callback returned false") } diff --git a/go/vt/vtgate/tabletgateway.go b/go/vt/vtgate/tabletgateway.go index 1b4e1f1c0c1..ca07b04b55d 100644 --- a/go/vt/vtgate/tabletgateway.go +++ b/go/vt/vtgate/tabletgateway.go @@ -192,11 +192,24 @@ func (gw *TabletGateway) WaitForTablets(ctx context.Context, tabletTypesToWait [ } // Finds the targets to look for. - targets, err := srvtopo.FindAllTargets(ctx, gw.srvTopoServer, gw.localCell, tabletTypesToWait) + targets, keyspaces, err := srvtopo.FindAllTargetsAndKeyspaces(ctx, gw.srvTopoServer, gw.localCell, discovery.KeyspacesToWatch, tabletTypesToWait) if err != nil { return err } - return gw.hc.WaitForAllServingTablets(ctx, targets) + err = gw.hc.WaitForAllServingTablets(ctx, targets) + if err != nil { + return err + } + // After having waited for all serving tablets. We should also wait for the keyspace event watcher to have seen + // the updates and marked all the keyspaces as consistent (if we want to wait for primary tablets). + // Otherwise, we could be in a situation where even though the healthchecks have arrived, the keyspace event watcher hasn't finished processing them. + // So, if a primary tablet goes non-serving (because of a PRS or some other reason), we won't be able to start buffering. + // Waiting for the keyspaces to become consistent ensures that all the primary tablets for all the shards should be serving as seen by the keyspace event watcher + // and any disruption from now on, will make sure we start buffering properly. + if topoproto.IsTypeInList(topodatapb.TabletType_PRIMARY, tabletTypesToWait) && gw.kev != nil { + return gw.kev.WaitForConsistentKeyspaces(ctx, keyspaces) + } + return nil } // Close shuts down underlying connections. @@ -283,18 +296,21 @@ func (gw *TabletGateway) withRetry(ctx context.Context, target *querypb.Target, if len(tablets) == 0 { // if we have a keyspace event watcher, check if the reason why our primary is not available is that it's currently being resharded // or if a reparent operation is in progress. - if kev := gw.kev; kev != nil { + // We only check for whether reshard is ongoing or primary is serving or not, only if the target is primary. We don't want to buffer + // replica queries, so it doesn't make any sense to check for resharding or reparenting in that case. + if kev := gw.kev; kev != nil && target.TabletType == topodatapb.TabletType_PRIMARY { if kev.TargetIsBeingResharded(ctx, target) { log.V(2).Infof("current keyspace is being resharded, retrying: %s: %s", target.Keyspace, debug.Stack()) err = vterrors.Errorf(vtrpcpb.Code_CLUSTER_EVENT, buffer.ClusterEventReshardingInProgress) continue } - primary, notServing := kev.PrimaryIsNotServing(ctx, target) - if notServing { + primary, shouldBuffer := kev.ShouldStartBufferingForTarget(ctx, target) + if shouldBuffer { err = vterrors.Errorf(vtrpcpb.Code_CLUSTER_EVENT, buffer.ClusterEventReparentInProgress) continue } - // if primary is serving, but we initially found no tablet, we're in an inconsistent state + // if the keyspace event manager doesn't think we should buffer queries, and also sees a primary tablet, + // but we initially found no tablet, we're in an inconsistent state // we then retry the entire loop if primary != nil { err = vterrors.Errorf(vtrpcpb.Code_UNAVAILABLE, "inconsistent state detected, primary is serving but initially found no available tablet") diff --git a/go/vt/vtgate/tabletgateway_flaky_test.go b/go/vt/vtgate/tabletgateway_flaky_test.go index acd24ecd7db..d136542d176 100644 --- a/go/vt/vtgate/tabletgateway_flaky_test.go +++ b/go/vt/vtgate/tabletgateway_flaky_test.go @@ -67,7 +67,7 @@ func TestGatewayBufferingWhenPrimarySwitchesServingState(t *testing.T) { waitForBuffering := func(enabled bool) { timer := time.NewTimer(bufferingWaitTimeout) defer timer.Stop() - for _, buffering := tg.kev.PrimaryIsNotServing(ctx, target); buffering != enabled; _, buffering = tg.kev.PrimaryIsNotServing(ctx, target) { + for _, buffering := tg.kev.ShouldStartBufferingForTarget(ctx, target); buffering != enabled; _, buffering = tg.kev.ShouldStartBufferingForTarget(ctx, target) { select { case <-timer.C: require.Fail(t, "timed out waiting for buffering of enabled: %t", enabled) @@ -213,8 +213,8 @@ func TestGatewayBufferingWhileReparenting(t *testing.T) { hc.Broadcast(primaryTablet) require.Len(t, tg.hc.GetHealthyTabletStats(target), 0, "GetHealthyTabletStats has tablets even though it shouldn't") - _, isNotServing := tg.kev.PrimaryIsNotServing(ctx, target) - require.True(t, isNotServing) + _, shouldStartBuffering := tg.kev.ShouldStartBufferingForTarget(ctx, target) + require.True(t, shouldStartBuffering) // add a result to the sandbox connection of the new primary sbcReplica.SetResults([]*sqltypes.Result{sqlResult1}) @@ -245,8 +245,8 @@ outer: case <-timeout: require.Fail(t, "timed out - could not verify the new primary") case <-time.After(10 * time.Millisecond): - newPrimary, notServing := tg.kev.PrimaryIsNotServing(ctx, target) - if newPrimary != nil && newPrimary.Uid == replicaTablet.Alias.Uid && !notServing { + newPrimary, shouldBuffer := tg.kev.ShouldStartBufferingForTarget(ctx, target) + if newPrimary != nil && newPrimary.Uid == replicaTablet.Alias.Uid && !shouldBuffer { break outer } } diff --git a/go/vt/vtgate/tabletgateway_test.go b/go/vt/vtgate/tabletgateway_test.go index 32d18dcc9ab..fc86ab358c8 100644 --- a/go/vt/vtgate/tabletgateway_test.go +++ b/go/vt/vtgate/tabletgateway_test.go @@ -26,6 +26,7 @@ import ( "github.com/stretchr/testify/require" "vitess.io/vitess/go/test/utils" + "vitess.io/vitess/go/vt/vttablet/queryservice" "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/vt/discovery" @@ -298,3 +299,58 @@ func verifyShardErrors(t *testing.T, err error, wantErrors []string, wantCode vt } require.Equal(t, vterrors.Code(err), wantCode, "wanted error code: %s, got: %v", wantCode, vterrors.Code(err)) } + +// TestWithRetry tests the functionality of withRetry function in different circumstances. +func TestWithRetry(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + tg := NewTabletGateway(ctx, discovery.NewFakeHealthCheck(nil), &fakeTopoServer{}, "cell") + tg.kev = discovery.NewKeyspaceEventWatcher(ctx, tg.srvTopoServer, tg.hc, tg.localCell) + defer func() { + cancel() + tg.Close(ctx) + }() + + testcases := []struct { + name string + target *querypb.Target + inTransaction bool + inner func(ctx context.Context, target *querypb.Target, conn queryservice.QueryService) (bool, error) + expectedErr string + }{ + { + name: "Transaction on a replica", + target: &querypb.Target{ + Keyspace: "ks", + Shard: "0", + TabletType: topodatapb.TabletType_REPLICA, + }, + inTransaction: true, + inner: func(ctx context.Context, target *querypb.Target, conn queryservice.QueryService) (bool, error) { + return false, nil + }, + expectedErr: "tabletGateway's query service can only be used for non-transactional queries on replicas", + }, { + name: "No replica tablets available", + target: &querypb.Target{ + Keyspace: "ks", + Shard: "0", + TabletType: topodatapb.TabletType_REPLICA, + }, + inTransaction: false, + inner: func(ctx context.Context, target *querypb.Target, conn queryservice.QueryService) (bool, error) { + return false, nil + }, + expectedErr: `target: ks.0.replica: no healthy tablet available for 'keyspace:"ks" shard:"0" tablet_type:REPLICA'`, + }, + } + for _, tt := range testcases { + t.Run(tt.name, func(t *testing.T) { + err := tg.withRetry(ctx, tt.target, nil, "", tt.inTransaction, tt.inner) + if tt.expectedErr == "" { + require.NoError(t, err) + } else { + require.ErrorContains(t, err, tt.expectedErr) + } + }) + } +} diff --git a/go/vt/vttablet/filelogger/filelogger_test.go b/go/vt/vttablet/filelogger/filelogger_test.go index 1562c9626a8..f747ebba93b 100644 --- a/go/vt/vttablet/filelogger/filelogger_test.go +++ b/go/vt/vttablet/filelogger/filelogger_test.go @@ -60,7 +60,7 @@ func TestFileLog(t *testing.T) { for i := 0; i < 10; i++ { time.Sleep(10 * time.Millisecond) - want := "\t\t\t''\t''\t0001-01-01 00:00:00.000000\t0001-01-01 00:00:00.000000\t0.000000\t\t\"test 1\"\tmap[]\t1\t\"test 1 PII\"\tmysql\t0.000000\t0.000000\t0\t0\t0\t\"\"\t\n\t\t\t''\t''\t0001-01-01 00:00:00.000000\t0001-01-01 00:00:00.000000\t0.000000\t\t\"test 2\"\tmap[]\t1\t\"test 2 PII\"\tmysql\t0.000000\t0.000000\t0\t0\t0\t\"\"\t\n" + want := "\t\t\t''\t''\t0001-01-01 00:00:00.000000\t0001-01-01 00:00:00.000000\t0.000000\t\t\"test 1\"\t{}\t1\t\"test 1 PII\"\tmysql\t0.000000\t0.000000\t0\t0\t0\t\"\"\t\n\t\t\t''\t''\t0001-01-01 00:00:00.000000\t0001-01-01 00:00:00.000000\t0.000000\t\t\"test 2\"\t{}\t1\t\"test 2 PII\"\tmysql\t0.000000\t0.000000\t0\t0\t0\t\"\"\t\n" contents, _ := os.ReadFile(logPath) got := string(contents) if want == got { diff --git a/go/vt/vttablet/sysloglogger/sysloglogger_test.go b/go/vt/vttablet/sysloglogger/sysloglogger_test.go index 7a1678638ca..3a06b98ed1c 100644 --- a/go/vt/vttablet/sysloglogger/sysloglogger_test.go +++ b/go/vt/vttablet/sysloglogger/sysloglogger_test.go @@ -87,7 +87,7 @@ func (fw *failingFakeWriter) Close() error { return nil } // expectedLogStatsText returns the results expected from the plugin processing a dummy message generated by mockLogStats(...). func expectedLogStatsText(originalSQL string) string { return fmt.Sprintf("Execute\t\t\t''\t''\t0001-01-01 00:00:00.000000\t0001-01-01 00:00:00.000000\t0.000000\tPASS_SELECT\t"+ - "\"%s\"\t%s\t1\t\"%s\"\tmysql\t0.000000\t0.000000\t0\t0\t0\t\"\"", originalSQL, "map[]", originalSQL) + "\"%s\"\t%s\t1\t\"%s\"\tmysql\t0.000000\t0.000000\t0\t0\t0\t\"\"", originalSQL, "{}", originalSQL) } // expectedRedactedLogStatsText returns the results expected from the plugin processing a dummy message generated by mockLogStats(...) diff --git a/go/vt/vttablet/tabletmanager/vreplication/stats.go b/go/vt/vttablet/tabletmanager/vreplication/stats.go index 892247efee0..5b5b6ede24c 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/stats.go +++ b/go/vt/vttablet/tabletmanager/vreplication/stats.go @@ -59,10 +59,12 @@ type vrStats struct { mu sync.Mutex isOpen bool controllers map[int32]*controller + + ThrottledCount *stats.Counter } func (st *vrStats) register() { - + st.ThrottledCount = stats.NewCounter("", "") stats.NewGaugeFunc("VReplicationStreamCount", "Number of vreplication streams", st.numControllers) stats.NewGaugeFunc("VReplicationLagSecondsMax", "Max vreplication seconds behind primary", st.maxReplicationLagSeconds) stats.NewStringMapFuncWithMultiLabels( @@ -502,6 +504,29 @@ func (st *vrStats) register() { return result }) + stats.NewCounterFunc( + "VReplicationThrottledCountTotal", + "The total number of times that vreplication has been throttled", + func() int64 { + st.mu.Lock() + defer st.mu.Unlock() + return st.ThrottledCount.Get() + }) + stats.NewCountersFuncWithMultiLabels( + "VReplicationThrottledCounts", + "The number of times vreplication was throttled by workflow, id, throttler (trx or tablet), and the sub-component that was throttled", + []string{"workflow", "id", "throttler", "component"}, + func() map[string]int64 { + st.mu.Lock() + defer st.mu.Unlock() + result := make(map[string]int64) + for _, ct := range st.controllers { + for key, val := range ct.blpStats.ThrottledCounts.Counts() { + result[fmt.Sprintf("%s.%d.%s", ct.workflow, ct.id, key)] = val + } + } + return result + }) } func (st *vrStats) numControllers() int64 { diff --git a/go/vt/vttablet/tabletmanager/vreplication/stats_test.go b/go/vt/vttablet/tabletmanager/vreplication/stats_test.go index 79149d34d6d..d94802adb7b 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/stats_test.go +++ b/go/vt/vttablet/tabletmanager/vreplication/stats_test.go @@ -26,6 +26,7 @@ import ( "github.com/stretchr/testify/require" "vitess.io/vitess/go/mysql/replication" + "vitess.io/vitess/go/stats" "vitess.io/vitess/go/vt/binlog/binlogplayer" "vitess.io/vitess/go/vt/proto/binlogdata" @@ -132,7 +133,9 @@ func TestStatusHtml(t *testing.T) { func TestVReplicationStats(t *testing.T) { blpStats := binlogplayer.NewStats() defer blpStats.Stop() - testStats := &vrStats{} + testStats := &vrStats{ + ThrottledCount: stats.NewCounter("", ""), + } testStats.isOpen = true testStats.controllers = map[int32]*controller{ 1: { @@ -184,6 +187,14 @@ func TestVReplicationStats(t *testing.T) { require.Equal(t, int64(100), testStats.status().Controllers[0].CopyLoopCount) require.Equal(t, int64(200), testStats.status().Controllers[0].CopyRowCount) + testStats.ThrottledCount.Add(99) + require.Equal(t, int64(99), testStats.ThrottledCount.Get()) + + blpStats.ThrottledCounts.Add([]string{"tablet", "vcopier"}, 10) + blpStats.ThrottledCounts.Add([]string{"tablet", "vplayer"}, 80) + require.Equal(t, int64(10), testStats.controllers[1].blpStats.ThrottledCounts.Counts()["tablet.vcopier"]) + require.Equal(t, int64(80), testStats.controllers[1].blpStats.ThrottledCounts.Counts()["tablet.vplayer"]) + var tm int64 = 1234567890 blpStats.RecordHeartbeat(tm) require.Equal(t, tm, blpStats.Heartbeat()) diff --git a/go/vt/vttablet/tabletmanager/vreplication/vplayer.go b/go/vt/vttablet/tabletmanager/vreplication/vplayer.go index fe0ba3914bb..84f052b3dfc 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/vplayer.go +++ b/go/vt/vttablet/tabletmanager/vreplication/vplayer.go @@ -453,12 +453,18 @@ func (vp *vplayer) recordHeartbeat() error { func (vp *vplayer) applyEvents(ctx context.Context, relay *relayLog) error { defer vp.vr.dbClient.Rollback() + estimateLag := func() { + behind := time.Now().UnixNano() - vp.lastTimestampNs - vp.timeOffsetNs + vp.vr.stats.ReplicationLagSeconds.Store(behind / 1e9) + vp.vr.stats.VReplicationLags.Add(strconv.Itoa(int(vp.vr.id)), time.Duration(behind/1e9)*time.Second) + } + // If we're not running, set ReplicationLagSeconds to be very high. // TODO(sougou): if we also stored the time of the last event, we // can estimate this value more accurately. defer vp.vr.stats.ReplicationLagSeconds.Store(math.MaxInt64) defer vp.vr.stats.VReplicationLags.Add(strconv.Itoa(int(vp.vr.id)), math.MaxInt64) - var sbm int64 = -1 + var lagSecs int64 for { if ctx.Err() != nil { return ctx.Err() @@ -466,6 +472,7 @@ func (vp *vplayer) applyEvents(ctx context.Context, relay *relayLog) error { // Check throttler. if !vp.vr.vre.throttlerClient.ThrottleCheckOKOrWaitAppName(ctx, throttlerapp.Name(vp.throttlerAppName)) { _ = vp.vr.updateTimeThrottled(throttlerapp.VPlayerName) + estimateLag() continue } @@ -473,13 +480,7 @@ func (vp *vplayer) applyEvents(ctx context.Context, relay *relayLog) error { if err != nil { return err } - // No events were received. This likely means that there's a network partition. - // So, we should assume we're falling behind. - if len(items) == 0 { - behind := time.Now().UnixNano() - vp.lastTimestampNs - vp.timeOffsetNs - vp.vr.stats.ReplicationLagSeconds.Store(behind / 1e9) - vp.vr.stats.VReplicationLags.Add(strconv.Itoa(int(vp.vr.id)), time.Duration(behind/1e9)*time.Second) - } + // Empty transactions are saved at most once every idleTimeout. // This covers two situations: // 1. Fetch was idle for idleTimeout. @@ -496,12 +497,21 @@ func (vp *vplayer) applyEvents(ctx context.Context, relay *relayLog) error { return nil } } + + lagSecs = -1 for i, events := range items { for j, event := range events { if event.Timestamp != 0 { - vp.lastTimestampNs = event.Timestamp * 1e9 - vp.timeOffsetNs = time.Now().UnixNano() - event.CurrentTime - sbm = event.CurrentTime/1e9 - event.Timestamp + // If the event is a heartbeat sent while throttled then do not update + // the lag based on it. + // If the batch consists only of throttled heartbeat events then we cannot + // determine the actual lag, as the vstreamer is fully throttled, and we + // will estimate it after processing the batch. + if !(event.Type == binlogdatapb.VEventType_HEARTBEAT && event.Throttled) { + vp.lastTimestampNs = event.Timestamp * 1e9 + vp.timeOffsetNs = time.Now().UnixNano() - event.CurrentTime + lagSecs = event.CurrentTime/1e9 - event.Timestamp + } } mustSave := false switch event.Type { @@ -532,11 +542,12 @@ func (vp *vplayer) applyEvents(ctx context.Context, relay *relayLog) error { } } - if sbm >= 0 { - vp.vr.stats.ReplicationLagSeconds.Store(sbm) - vp.vr.stats.VReplicationLags.Add(strconv.Itoa(int(vp.vr.id)), time.Duration(sbm)*time.Second) + if lagSecs >= 0 { + vp.vr.stats.ReplicationLagSeconds.Store(lagSecs) + vp.vr.stats.VReplicationLags.Add(strconv.Itoa(int(vp.vr.id)), time.Duration(lagSecs)*time.Second) + } else { // We couldn't determine the lag, so we need to estimate it + estimateLag() } - } } diff --git a/go/vt/vttablet/tabletmanager/vreplication/vplayer_flaky_test.go b/go/vt/vttablet/tabletmanager/vreplication/vplayer_flaky_test.go index 04738ee7857..409b2b6d9ee 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/vplayer_flaky_test.go +++ b/go/vt/vttablet/tabletmanager/vreplication/vplayer_flaky_test.go @@ -186,7 +186,6 @@ func TestPlayerInvisibleColumns(t *testing.T) { output := qh.Expect(tcases.output) expectNontxQueries(t, output) time.Sleep(1 * time.Second) - log.Flush() if tcases.table != "" { expectData(t, tcases.table, tcases.data) } @@ -3094,7 +3093,6 @@ func TestPlayerNoBlob(t *testing.T) { output := qh.Expect(tcases.output) expectNontxQueries(t, output) time.Sleep(1 * time.Second) - log.Flush() if tcases.table != "" { expectData(t, tcases.table, tcases.data) } @@ -3333,7 +3331,6 @@ func TestPlayerBatchMode(t *testing.T) { } expectNontxQueries(t, output) time.Sleep(1 * time.Second) - log.Flush() if tcase.table != "" { expectData(t, tcase.table, tcase.data) } diff --git a/go/vt/vttablet/tabletmanager/vreplication/vreplicator.go b/go/vt/vttablet/tabletmanager/vreplication/vreplicator.go index f9f0cc44443..30fd90bb2ed 100644 --- a/go/vt/vttablet/tabletmanager/vreplication/vreplicator.go +++ b/go/vt/vttablet/tabletmanager/vreplication/vreplicator.go @@ -578,10 +578,21 @@ func (vr *vreplicator) throttlerAppName() string { return throttlerapp.Concatenate(names...) } +// updateTimeThrottled updates the time_throttled field in the _vt.vreplication record +// with a rate limit so that it's only saved in the database at most once per +// throttleUpdatesRateLimiter.tickerTime. +// It also increments the throttled count in the stats to keep track of how many +// times a VReplication workflow, and the specific sub-component, is throttled by the +// tablet throttler over time. It also increments the global throttled count to keep +// track of how many times in total vreplication has been throttled across all workflows +// (both ones that currently exist and ones that no longer do). func (vr *vreplicator) updateTimeThrottled(appThrottled throttlerapp.Name) error { + appName := appThrottled.String() + vr.stats.ThrottledCounts.Add([]string{"tablet", appName}, 1) + globalStats.ThrottledCount.Add(1) err := vr.throttleUpdatesRateLimiter.Do(func() error { tm := time.Now().Unix() - update, err := binlogplayer.GenerateUpdateTimeThrottled(vr.id, tm, appThrottled.String()) + update, err := binlogplayer.GenerateUpdateTimeThrottled(vr.id, tm, appName) if err != nil { return err } diff --git a/go/vt/vttablet/tabletserver/tabletenv/logstats.go b/go/vt/vttablet/tabletserver/tabletenv/logstats.go index 962b5f0b122..ad7e09de169 100644 --- a/go/vt/vttablet/tabletserver/tabletenv/logstats.go +++ b/go/vt/vttablet/tabletserver/tabletenv/logstats.go @@ -18,7 +18,6 @@ package tabletenv import ( "context" - "fmt" "io" "net/url" "strings" @@ -26,6 +25,7 @@ import ( "github.com/google/safehtml" + "vitess.io/vitess/go/logstats" "vitess.io/vitess/go/sqltypes" "vitess.io/vitess/go/streamlog" "vitess.io/vitess/go/vt/callerid" @@ -181,55 +181,65 @@ func (stats *LogStats) Logf(w io.Writer, params url.Values) error { return nil } - rewrittenSQL := "[REDACTED]" - formattedBindVars := "\"[REDACTED]\"" - - if !streamlog.GetRedactDebugUIQueries() { - rewrittenSQL = stats.RewrittenSQL() - - _, fullBindParams := params["full"] - formattedBindVars = sqltypes.FormatBindVariables( - stats.BindVariables, - fullBindParams, - streamlog.GetQueryLogFormat() == streamlog.QueryLogFormatJSON, - ) - } - + redacted := streamlog.GetRedactDebugUIQueries() + _, fullBindParams := params["full"] // TODO: remove username here we fully enforce immediate caller id callInfo, username := stats.CallInfo() - // Valid options for the QueryLogFormat are text or json - var fmtString string - switch streamlog.GetQueryLogFormat() { - case streamlog.QueryLogFormatText: - fmtString = "%v\t%v\t%v\t'%v'\t'%v'\t%v\t%v\t%.6f\t%v\t%q\t%v\t%v\t%q\t%v\t%.6f\t%.6f\t%v\t%v\t%v\t%q\t\n" - case streamlog.QueryLogFormatJSON: - fmtString = "{\"Method\": %q, \"CallInfo\": %q, \"Username\": %q, \"ImmediateCaller\": %q, \"Effective Caller\": %q, \"Start\": \"%v\", \"End\": \"%v\", \"TotalTime\": %.6f, \"PlanType\": %q, \"OriginalSQL\": %q, \"BindVars\": %v, \"Queries\": %v, \"RewrittenSQL\": %q, \"QuerySources\": %q, \"MysqlTime\": %.6f, \"ConnWaitTime\": %.6f, \"RowsAffected\": %v,\"TransactionID\": %v,\"ResponseSize\": %v, \"Error\": %q}\n" + log := logstats.NewLogger() + log.Init(streamlog.GetQueryLogFormat() == streamlog.QueryLogFormatJSON) + log.Key("Method") + log.StringUnquoted(stats.Method) + log.Key("CallInfo") + log.StringUnquoted(callInfo) + log.Key("Username") + log.StringUnquoted(username) + log.Key("ImmediateCaller") + log.StringSingleQuoted(stats.ImmediateCaller()) + log.Key("Effective Caller") + log.StringSingleQuoted(stats.EffectiveCaller()) + log.Key("Start") + log.Time(stats.StartTime) + log.Key("End") + log.Time(stats.EndTime) + log.Key("TotalTime") + log.Duration(stats.TotalTime()) + log.Key("PlanType") + log.StringUnquoted(stats.PlanType) + log.Key("OriginalSQL") + log.String(stats.OriginalSQL) + log.Key("BindVars") + if redacted { + log.Redacted() + } else { + log.BindVariables(stats.BindVariables, fullBindParams) } - - _, err := fmt.Fprintf( - w, - fmtString, - stats.Method, - callInfo, - username, - stats.ImmediateCaller(), - stats.EffectiveCaller(), - stats.StartTime.Format("2006-01-02 15:04:05.000000"), - stats.EndTime.Format("2006-01-02 15:04:05.000000"), - stats.TotalTime().Seconds(), - stats.PlanType, - stats.OriginalSQL, - formattedBindVars, - stats.NumberOfQueries, - rewrittenSQL, - stats.FmtQuerySources(), - stats.MysqlResponseTime.Seconds(), - stats.WaitingForConnection.Seconds(), - stats.RowsAffected, - stats.TransactionID, - stats.SizeOfResponse(), - stats.ErrorStr(), - ) - return err + log.Key("Queries") + log.Int(int64(stats.NumberOfQueries)) + log.Key("RewrittenSQL") + if redacted { + log.Redacted() + } else { + log.String(stats.RewrittenSQL()) + } + log.Key("QuerySources") + log.StringUnquoted(stats.FmtQuerySources()) + log.Key("MysqlTime") + log.Duration(stats.MysqlResponseTime) + log.Key("ConnWaitTime") + log.Duration(stats.WaitingForConnection) + log.Key("RowsAffected") + log.Uint(uint64(stats.RowsAffected)) + log.Key("TransactionID") + log.Int(stats.TransactionID) + log.Key("ResponseSize") + log.Int(int64(stats.SizeOfResponse())) + log.Key("Error") + log.String(stats.ErrorStr()) + + // logstats from the vttablet are always tab-terminated; keep this for backwards + // compatibility for existing parsers + log.TabTerminated() + + return log.Flush(w) } diff --git a/go/vt/vttablet/tabletserver/tabletenv/logstats_test.go b/go/vt/vttablet/tabletserver/tabletenv/logstats_test.go index 51e056687b5..7412a0a436c 100644 --- a/go/vt/vttablet/tabletserver/tabletenv/logstats_test.go +++ b/go/vt/vttablet/tabletserver/tabletenv/logstats_test.go @@ -73,7 +73,7 @@ func TestLogStatsFormat(t *testing.T) { streamlog.SetRedactDebugUIQueries(false) streamlog.SetQueryLogFormat("text") got := testFormat(logStats, url.Values(params)) - want := "test\t\t\t''\t''\t2017-01-01 01:02:03.000000\t2017-01-01 01:02:04.000001\t1.000001\t\t\"sql\"\tmap[intVal:type:INT64 value:\"1\"]\t1\t\"sql with pii\"\tmysql\t0.000000\t0.000000\t0\t12345\t1\t\"\"\t\n" + want := "test\t\t\t''\t''\t2017-01-01 01:02:03.000000\t2017-01-01 01:02:04.000001\t1.000001\t\t\"sql\"\t{\"intVal\": {\"type\": \"INT64\", \"value\": 1}}\t1\t\"sql with pii\"\tmysql\t0.000000\t0.000000\t0\t12345\t1\t\"\"\t\n" if got != want { t.Errorf("logstats format: got:\n%q\nwant:\n%q\n", got, want) } @@ -127,7 +127,7 @@ func TestLogStatsFormat(t *testing.T) { streamlog.SetQueryLogFormat("text") got = testFormat(logStats, url.Values(params)) - want = "test\t\t\t''\t''\t2017-01-01 01:02:03.000000\t2017-01-01 01:02:04.000001\t1.000001\t\t\"sql\"\tmap[strVal:type:VARCHAR value:\"abc\"]\t1\t\"sql with pii\"\tmysql\t0.000000\t0.000000\t0\t12345\t1\t\"\"\t\n" + want = "test\t\t\t''\t''\t2017-01-01 01:02:03.000000\t2017-01-01 01:02:04.000001\t1.000001\t\t\"sql\"\t{\"strVal\": {\"type\": \"VARCHAR\", \"value\": \"abc\"}}\t1\t\"sql with pii\"\tmysql\t0.000000\t0.000000\t0\t12345\t1\t\"\"\t\n" if got != want { t.Errorf("logstats format: got:\n%q\nwant:\n%q\n", got, want) } @@ -164,14 +164,14 @@ func TestLogStatsFilter(t *testing.T) { params := map[string][]string{"full": {}} got := testFormat(logStats, url.Values(params)) - want := "test\t\t\t''\t''\t2017-01-01 01:02:03.000000\t2017-01-01 01:02:04.000001\t1.000001\t\t\"sql /* LOG_THIS_QUERY */\"\tmap[intVal:type:INT64 value:\"1\"]\t1\t\"sql with pii\"\tmysql\t0.000000\t0.000000\t0\t0\t1\t\"\"\t\n" + want := "test\t\t\t''\t''\t2017-01-01 01:02:03.000000\t2017-01-01 01:02:04.000001\t1.000001\t\t\"sql /* LOG_THIS_QUERY */\"\t{\"intVal\": {\"type\": \"INT64\", \"value\": 1}}\t1\t\"sql with pii\"\tmysql\t0.000000\t0.000000\t0\t0\t1\t\"\"\t\n" if got != want { t.Errorf("logstats format: got:\n%q\nwant:\n%q\n", got, want) } streamlog.SetQueryLogFilterTag("LOG_THIS_QUERY") got = testFormat(logStats, url.Values(params)) - want = "test\t\t\t''\t''\t2017-01-01 01:02:03.000000\t2017-01-01 01:02:04.000001\t1.000001\t\t\"sql /* LOG_THIS_QUERY */\"\tmap[intVal:type:INT64 value:\"1\"]\t1\t\"sql with pii\"\tmysql\t0.000000\t0.000000\t0\t0\t1\t\"\"\t\n" + want = "test\t\t\t''\t''\t2017-01-01 01:02:03.000000\t2017-01-01 01:02:04.000001\t1.000001\t\t\"sql /* LOG_THIS_QUERY */\"\t{\"intVal\": {\"type\": \"INT64\", \"value\": 1}}\t1\t\"sql with pii\"\tmysql\t0.000000\t0.000000\t0\t0\t1\t\"\"\t\n" if got != want { t.Errorf("logstats format: got:\n%q\nwant:\n%q\n", got, want) } diff --git a/go/vt/vttablet/tabletserver/throttle/base/throttle_metric.go b/go/vt/vttablet/tabletserver/throttle/base/throttle_metric.go index 3d4c4f95a2e..28c6a8d7283 100644 --- a/go/vt/vttablet/tabletserver/throttle/base/throttle_metric.go +++ b/go/vt/vttablet/tabletserver/throttle/base/throttle_metric.go @@ -43,7 +43,10 @@ package base import ( "errors" - "strings" + "net" + + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" ) // MetricResult is what we expect our probes to return. This can be a numeric result, or @@ -66,11 +69,20 @@ var ErrNoSuchMetric = errors.New("No such metric") var ErrInvalidCheckType = errors.New("Unknown throttler check type") // IsDialTCPError sees if the given error indicates a TCP issue -func IsDialTCPError(e error) bool { - if e == nil { +func IsDialTCPError(err error) bool { + if err == nil { return false } - return strings.HasPrefix(e.Error(), "dial tcp") + + if s, ok := status.FromError(err); ok { + return s.Code() == codes.Unavailable || s.Code() == codes.DeadlineExceeded + } + + switch err := err.(type) { + case *net.OpError: + return err.Op == "dial" && err.Net == "tcp" + } + return false } type noHostsMetricResult struct{} diff --git a/go/vt/vttablet/tabletserver/throttle/throttler_test.go b/go/vt/vttablet/tabletserver/throttle/throttler_test.go index 25de8ca96f5..0452913c540 100644 --- a/go/vt/vttablet/tabletserver/throttle/throttler_test.go +++ b/go/vt/vttablet/tabletserver/throttle/throttler_test.go @@ -28,10 +28,15 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials/insecure" + + "vitess.io/vitess/go/vt/grpcclient" "vitess.io/vitess/go/vt/topo" "vitess.io/vitess/go/vt/vtenv" "vitess.io/vitess/go/vt/vttablet/tabletserver/connpool" "vitess.io/vitess/go/vt/vttablet/tabletserver/tabletenv" + "vitess.io/vitess/go/vt/vttablet/tabletserver/throttle/base" "vitess.io/vitess/go/vt/vttablet/tabletserver/throttle/config" "vitess.io/vitess/go/vt/vttablet/tabletserver/throttle/mysql" "vitess.io/vitess/go/vt/vttablet/tmclient" @@ -157,6 +162,21 @@ func newTestThrottler() *Throttler { return throttler } +func TestIsDialTCPError(t *testing.T) { + // Verify that IsDialTCPError actually recognizes grpc dial errors + cc, err := grpcclient.DialContext(context.Background(), ":0", true, grpc.WithTransportCredentials(insecure.NewCredentials())) + require.NoError(t, err) + defer cc.Close() + + err = cc.Invoke(context.Background(), "/Fail", nil, nil) + + require.True(t, base.IsDialTCPError(err)) + require.True(t, base.IsDialTCPError(fmt.Errorf("wrapped: %w", err))) + + nonDialErr := fmt.Errorf("rpc error: code = NotFound desc = method not found") + require.False(t, base.IsDialTCPError(nonDialErr)) +} + func TestIsAppThrottled(t *testing.T) { throttler := Throttler{ throttledApps: cache.New(cache.NoExpiration, 0), diff --git a/go/vt/vttablet/tabletserver/vstreamer/helper_event_test.go b/go/vt/vttablet/tabletserver/vstreamer/helper_event_test.go new file mode 100644 index 00000000000..0b479bd588c --- /dev/null +++ b/go/vt/vttablet/tabletserver/vstreamer/helper_event_test.go @@ -0,0 +1,589 @@ +/* +Copyright 2024 The Vitess Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vstreamer + +// This file contains the test framework for testing the event generation logic in vstreamer. +// The test framework is designed to be used in the following way: +// 1. Define a TestSpec with the following fields: +// - ddls: a list of create table statements for the tables to be used in the test +// - tests: a list of test cases, each test case is a list of TestQuery +// - options: test-specific options, if any +// 2. Call ts.Init() to initialize the test. +// 3. Call ts.Run() to run the test. This will run the queries and validate the events. +// 4. Call ts.Close() to clean up the tables created in the test. +// The test framework will take care of creating the tables, running the queries, and validating the events for +// simpler cases. For more complex cases, the test framework provides hooks to customize the event generation. + +// Note: To simplify the initial implementation, the test framework is designed to be used in the vstreamer package only. +// It makes several assumptions about how the test cases are written. For example, queries are expected to +// use single quotes for string literals, for example: +// `"insert into t1 values (1, 'blob1', 'aaa')"`. +// The test framework will not work if the queries use double quotes for string literals at the moment. + +import ( + "context" + "fmt" + "slices" + "strconv" + "strings" + "testing" + + "github.com/stretchr/testify/require" + + "vitess.io/vitess/go/mysql/collations" + "vitess.io/vitess/go/vt/log" + "vitess.io/vitess/go/vt/proto/binlogdata" + "vitess.io/vitess/go/vt/proto/query" + "vitess.io/vitess/go/vt/schemadiff" + "vitess.io/vitess/go/vt/sqlparser" + "vitess.io/vitess/go/vt/vttablet/tabletserver/vstreamer/testenv" +) + +const ( + lengthInt = 11 + lengthBlob = 65535 + lengthText = 262140 + lengthSet = 56 +) + +func getDefaultCollationID() int64 { + return 45 // utf8mb4_general_ci +} + +var ( + // noEvents is used to indicate that a query is expected to generate no events. + noEvents = []TestRowEvent{} +) + +// TestColumn has all the attributes of a column required for the test cases. +type TestColumn struct { + name, dataType, colType string + len, collationID int64 + dataTypeLowered string + skip bool + collationName string +} + +// TestFieldEvent has all the attributes of a table required for creating a field event. +type TestFieldEvent struct { + table, db string + cols []*TestColumn +} + +// TestQuery represents a database query and the expected events it generates. +type TestQuery struct { + query string + events []TestRowEvent +} + +// TestRowChange represents the before and after state of a row due to a dml +type TestRowChange struct { + before []string + after []string +} + +// TestRowEventSpec is used for defining a custom row event. +type TestRowEventSpec struct { + table string + changes []TestRowChange +} + +// Generates a string representation for a custom row event. +func (s *TestRowEventSpec) String() string { + ev := &binlogdata.RowEvent{ + TableName: s.table, + } + var rowChanges []*binlogdata.RowChange + if s.changes != nil && len(s.changes) > 0 { + for _, c := range s.changes { + rowChange := binlogdata.RowChange{} + if c.before != nil && len(c.before) > 0 { + rowChange.Before = &query.Row{} + for _, val := range c.before { + rowChange.Before.Lengths = append(rowChange.Before.Lengths, int64(len(val))) + rowChange.Before.Values = append(rowChange.Before.Values, []byte(val)...) + } + } + if c.after != nil && len(c.after) > 0 { + rowChange.After = &query.Row{} + for _, val := range c.after { + rowChange.After.Lengths = append(rowChange.After.Lengths, int64(len(val))) + rowChange.After.Values = append(rowChange.After.Values, []byte(val)...) + } + } + rowChanges = append(rowChanges, &rowChange) + } + ev.RowChanges = rowChanges + } + vEvent := &binlogdata.VEvent{ + Type: binlogdata.VEventType_ROW, + RowEvent: ev, + } + return vEvent.String() +} + +// TestRowEvent is used to define either the actual row event string (the `event` field) or a custom row event +// (the `spec` field). Only one should be specified. If a test validates `flags` of a RowEvent then it is set. +type TestRowEvent struct { + event string + spec *TestRowEventSpec + flags int +} + +// TestSpecOptions has any non-standard test-specific options which can modify the event generation behaviour. +type TestSpecOptions struct { + noblob bool + filter *binlogdata.Filter +} + +// TestSpec is defined one per unit test. +type TestSpec struct { + // test=specific parameters + t *testing.T + ddls []string // create table statements + tests [][]*TestQuery // list of input queries and expected events for each query + options *TestSpecOptions // test-specific options + + // internal state + inited bool // whether the test has been initialized + tables []string // list of tables in the schema (created in `ddls`) + pkColumns map[string][]string // map of table name to primary key columns + schema *schemadiff.Schema // parsed schema from `ddls` using `schemadiff` + fieldEvents map[string]*TestFieldEvent // map of table name to field event for the table + fieldEventsSent map[string]bool // whether the field event has been sent for the table in the test + state map[string]*query.Row // last row inserted for each table. Useful to generate events only for inserts + metadata map[string][]string // list of enum/set values for enum/set columns +} + +func (ts *TestSpec) getCurrentState(table string) *query.Row { + return ts.state[table] +} + +func (ts *TestSpec) setCurrentState(table string, row *query.Row) { + ts.state[table] = row +} + +// Init() initializes the test. It creates the tables and sets up the internal state. +func (ts *TestSpec) Init() error { + var err error + if ts.inited { + return nil + } + defer func() { ts.inited = true }() + if ts.options == nil { + ts.options = &TestSpecOptions{} + } + ts.schema, err = schemadiff.NewSchemaFromQueries(schemadiff.NewTestEnv(), ts.ddls) + if err != nil { + return err + } + ts.fieldEvents = make(map[string]*TestFieldEvent) + ts.fieldEventsSent = make(map[string]bool) + ts.state = make(map[string]*query.Row) + ts.metadata = make(map[string][]string) + ts.pkColumns = make(map[string][]string) + // create tables + require.Equal(ts.t, len(ts.ddls), len(ts.schema.Tables()), "number of tables in ddls and schema do not match") + for i, t := range ts.schema.Tables() { + execStatement(ts.t, ts.ddls[i]) + fe := ts.getFieldEvent(t) + ts.fieldEvents[t.Name()] = fe + + var pkColumns []string + var hasPK bool + for _, index := range t.TableSpec.Indexes { + require.NotNil(ts.t, index.Info, "index.Info is nil") + if index.Info.Type == sqlparser.IndexTypePrimary { + for _, col := range index.Columns { + pkColumns = append(pkColumns, col.Column.String()) + } + hasPK = true + } + } + if !hasPK { + // add all columns as pk columns + for _, col := range t.TableSpec.Columns { + pkColumns = append(pkColumns, col.Name.String()) + } + } + ts.pkColumns[t.Name()] = pkColumns + } + engine.se.Reload(context.Background()) + return nil +} + +// Close() should be called (via defer) at the end of the test to clean up the tables created in the test. +func (ts *TestSpec) Close() { + dropStatement := fmt.Sprintf("drop tables %s", strings.Join(ts.schema.TableNames(), ", ")) + execStatement(ts.t, dropStatement) +} + +func (ts *TestSpec) getBindVarsForInsert(stmt sqlparser.Statement) (string, map[string]string) { + bv := make(map[string]string) + ins := stmt.(*sqlparser.Insert) + tn, err := ins.Table.TableName() + require.NoError(ts.t, err) + table := tn.Name.String() + fe := ts.fieldEvents[table] + vals, ok := ins.Rows.(sqlparser.Values) + require.True(ts.t, ok, "insert statement does not have values") + for _, val := range vals { + for i, v := range val { + bufV := sqlparser.NewTrackedBuffer(nil) + v.Format(bufV) + s := bufV.String() + switch fe.cols[i].dataTypeLowered { + case "varchar", "char", "binary", "varbinary", "blob", "text": + s = strings.Trim(s, "'") + case "set", "enum": + s = ts.getMetadataMap(table, fe.cols[i], s) + } + bv[fe.cols[i].name] = s + } + } + return table, bv +} + +func (ts *TestSpec) getBindVarsForUpdate(stmt sqlparser.Statement) (string, map[string]string) { + bv := make(map[string]string) + upd := stmt.(*sqlparser.Update) + //buf := sqlparser.NewTrackedBuffer(nil) + table := sqlparser.String(upd.TableExprs[0].(*sqlparser.AliasedTableExpr).Expr) + //upd.TableExprs[0].(*sqlparser.AliasedTableExpr).Expr.Format(buf) + //table := buf.String() + fe, ok := ts.fieldEvents[table] + require.True(ts.t, ok, "field event for table %s not found", table) + index := int64(0) + state := ts.getCurrentState(table) + for i, col := range fe.cols { + bv[col.name] = string(state.Values[index : index+state.Lengths[i]]) + index += state.Lengths[i] + } + for _, expr := range upd.Exprs { + bufV := sqlparser.NewTrackedBuffer(nil) + bufN := sqlparser.NewTrackedBuffer(nil) + expr.Expr.Format(bufV) + expr.Name.Format(bufN) + bv[bufN.String()] = strings.Trim(bufV.String(), "'") + } + return table, bv +} + +// Run() runs the test. It first initializes the test, then runs the queries and validates the events. +func (ts *TestSpec) Run() { + require.NoError(ts.t, engine.se.Reload(context.Background())) + if !ts.inited { + require.NoError(ts.t, ts.Init()) + } + var testcases []testcase + for _, t := range ts.tests { + var tc testcase + var input []string + var output []string + for _, tq := range t { + var table string + input = append(input, tq.query) + switch { + case tq.events != nil && len(tq.events) == 0: // when an input query is expected to generate no events + continue + case tq.events != nil && // when we define the actual events either as a serialized string or as a TestRowEvent + (len(tq.events) > 0 && + !(len(tq.events) == 1 && tq.events[0].event == "" && tq.events[0].spec == nil)): + for _, e := range tq.events { + if e.event != "" { + output = append(output, e.event) + } else if e.spec != nil { + output = append(output, e.spec.String()) + } else { + panic("invalid event") + } + } + continue + default: + // when we don't define the actual events, we generate them based on the input query + flags := 0 + if len(tq.events) == 1 { + flags = tq.events[0].flags + } + stmt, err := sqlparser.NewTestParser().Parse(tq.query) + require.NoError(ts.t, err) + bv := make(map[string]string) + isRowEvent := false + switch stmt.(type) { + case *sqlparser.Begin: + output = append(output, "begin") + case *sqlparser.Commit: + output = append(output, "gtid", "commit") + case *sqlparser.Insert: + isRowEvent = true + table, bv = ts.getBindVarsForInsert(stmt) + case *sqlparser.Update: + isRowEvent = true + table, bv = ts.getBindVarsForUpdate(stmt) + case *sqlparser.Delete: + isRowEvent = true + del := stmt.(*sqlparser.Delete) + table = del.TableExprs[0].(*sqlparser.AliasedTableExpr).As.String() + default: + require.FailNowf(ts.t, "unsupported statement type", "stmt: %s", stmt) + } + if isRowEvent { + fe := ts.fieldEvents[table] + if fe == nil { + require.FailNowf(ts.t, "field event for table %s not found", table) + } + if !ts.fieldEventsSent[table] { + output = append(output, fe.String()) + ts.fieldEventsSent[table] = true + } + output = append(output, ts.getRowEvent(table, bv, fe, stmt, uint32(flags))) + } + } + + } + tc.input = input + tc.output = append(tc.output, output) + testcases = append(testcases, tc) + } + runCases(ts.t, ts.options.filter, testcases, "current", nil) +} + +func (ts *TestSpec) getFieldEvent(table *schemadiff.CreateTableEntity) *TestFieldEvent { + var tfe TestFieldEvent + tfe.table = table.Name() + tfe.db = testenv.DBName + for _, col := range table.TableSpec.Columns { + tc := TestColumn{} + tc.name = col.Name.String() + sqlType := col.Type.SQLType() + tc.dataType = sqlType.String() + tc.dataTypeLowered = strings.ToLower(tc.dataType) + tc.collationName = col.Type.Options.Collate + switch tc.dataTypeLowered { + case "int32": + tc.len = lengthInt + tc.collationID = collations.CollationBinaryID + tc.colType = "int(11)" + case "varchar", "varbinary", "char", "binary": + l := *col.Type.Length + switch tc.dataTypeLowered { + case "binary", "varbinary": + tc.len = int64(l) + tc.collationID = collations.CollationBinaryID + default: + tc.len = 4 * int64(l) + tc.collationID = getDefaultCollationID() + if tc.dataTypeLowered == "char" && strings.Contains(tc.collationName, "bin") { + tc.dataType = "BINARY" + } + } + tc.colType = fmt.Sprintf("%s(%d)", tc.dataTypeLowered, l) + case "blob": + tc.len = lengthBlob + tc.collationID = collations.CollationBinaryID + tc.colType = "blob" + case "text": + tc.len = lengthText + tc.collationID = getDefaultCollationID() + tc.colType = "text" + case "set": + tc.len = lengthSet + tc.collationID = getDefaultCollationID() + tc.colType = fmt.Sprintf("%s(%s)", tc.dataTypeLowered, strings.Join(col.Type.EnumValues, ",")) + ts.metadata[getMetadataKey(table.Name(), tc.name)] = col.Type.EnumValues + case "enum": + tc.len = int64(len(col.Type.EnumValues) + 1) + tc.collationID = getDefaultCollationID() + tc.colType = fmt.Sprintf("%s(%s)", tc.dataTypeLowered, strings.Join(col.Type.EnumValues, ",")) + ts.metadata[getMetadataKey(table.Name(), tc.name)] = col.Type.EnumValues + default: + log.Infof(fmt.Sprintf("unknown sqlTypeString %s", tc.dataTypeLowered)) + } + tfe.cols = append(tfe.cols, &tc) + } + return &tfe +} + +func getMetadataKey(table, col string) string { + return fmt.Sprintf("%s:%s", table, col) +} + +func (ts *TestSpec) setMetadataMap(table, col, value string) { + values := strings.Split(value, ",") + valuesReversed := slices.Clone(values) + slices.Reverse(valuesReversed) + ts.metadata[getMetadataKey(table, col)] = valuesReversed +} + +func (ts *TestSpec) getMetadataMap(table string, col *TestColumn, value string) string { + var bits int64 + value = strings.Trim(value, "'") + meta := ts.metadata[getMetadataKey(table, col.name)] + values := strings.Split(value, ",") + for _, v := range values { + v2 := strings.Trim(v, "'") + for i, m := range meta { + m2 := strings.Trim(m, "'") + if m2 == v2 { + switch col.dataTypeLowered { + case "set": + bits |= 1 << uint(i) + case "enum": + bits = int64(i) + 1 + } + } + } + } + return strconv.FormatInt(bits, 10) +} + +func (ts *TestSpec) getRowEvent(table string, bv map[string]string, fe *TestFieldEvent, stmt sqlparser.Statement, flags uint32) string { + ev := &binlogdata.RowEvent{ + TableName: table, + RowChanges: []*binlogdata.RowChange{ + { + Before: nil, + After: nil, + }, + }, + Flags: flags, + } + var row query.Row + for i, col := range fe.cols { + if fe.cols[i].skip { + continue + } + if col.dataTypeLowered == "binary" { + bv[col.name] = strings.TrimSuffix(bv[col.name], "\\0") + } + val := []byte(bv[col.name]) + l := int64(len(val)) + if col.dataTypeLowered == "binary" { + for l < col.len { + val = append(val, "\x00"...) + l++ + } + } + row.Values = append(row.Values, val...) + row.Lengths = append(row.Lengths, l) + } + ev.RowChanges = ts.getRowChanges(table, stmt, &row) + vEvent := &binlogdata.VEvent{ + Type: binlogdata.VEventType_ROW, + RowEvent: ev, + } + return vEvent.String() +} + +func (ts *TestSpec) getRowChanges(table string, stmt sqlparser.Statement, row *query.Row) []*binlogdata.RowChange { + var rowChanges []*binlogdata.RowChange + var rowChange binlogdata.RowChange + switch stmt.(type) { + case *sqlparser.Insert: + rowChange.After = row + ts.setCurrentState(table, row) + case *sqlparser.Update: + rowChange = *ts.getRowChangeForUpdate(table, row) + ts.setCurrentState(table, row) + case *sqlparser.Delete: + rowChange.Before = row + ts.setCurrentState(table, nil) + } + rowChanges = append(rowChanges, &rowChange) + return rowChanges +} + +func (ts *TestSpec) getRowChangeForUpdate(table string, newState *query.Row) *binlogdata.RowChange { + var rowChange binlogdata.RowChange + var bitmap byte + var before, after query.Row + + currentState := ts.getCurrentState(table) + if currentState == nil { + return nil + } + var currentValueIndex int64 + var hasSkip bool + for i, l := range currentState.Lengths { + skip := false + isPKColumn := false + for _, pkColumn := range ts.pkColumns[table] { + if pkColumn == ts.fieldEvents[table].cols[i].name { + isPKColumn = true + break + } + } + if ts.options.noblob { + switch ts.fieldEvents[table].cols[i].dataTypeLowered { + case "blob", "text": + currentValue := currentState.Values[currentValueIndex : currentValueIndex+l] + newValue := newState.Values[currentValueIndex : currentValueIndex+l] + if string(currentValue) == string(newValue) { + skip = true + hasSkip = true + } + } + } + if skip && !isPKColumn { + before.Lengths = append(before.Lengths, -1) + } else { + before.Values = append(before.Values, currentState.Values[currentValueIndex:currentValueIndex+l]...) + before.Lengths = append(before.Lengths, l) + } + if skip { + after.Lengths = append(after.Lengths, -1) + } else { + after.Values = append(after.Values, newState.Values[currentValueIndex:currentValueIndex+l]...) + after.Lengths = append(after.Lengths, l) + bitmap |= 1 << uint(i) + } + currentValueIndex += l + } + rowChange.Before = &before + rowChange.After = &after + if hasSkip { + rowChange.DataColumns = &binlogdata.RowChange_Bitmap{ + Count: int64(len(currentState.Lengths)), + Cols: []byte{bitmap}, + } + } + return &rowChange +} + +func (ts *TestSpec) getBefore(table string) *query.Row { + currentState := ts.getCurrentState(table) + if currentState == nil { + return nil + } + var row query.Row + var currentValueIndex int64 + for i, l := range currentState.Lengths { + dataTypeIsRedacted := false + switch ts.fieldEvents[table].cols[i].dataTypeLowered { + case "blob", "text": + dataTypeIsRedacted = true + } + if ts.options.noblob && dataTypeIsRedacted { + row.Lengths = append(row.Lengths, -1) + } else { + row.Values = append(row.Values, currentState.Values[currentValueIndex:currentValueIndex+l]...) + row.Lengths = append(row.Lengths, l) + } + currentValueIndex += l + } + return &row +} diff --git a/go/vt/vttablet/tabletserver/vstreamer/testenv/testenv.go b/go/vt/vttablet/tabletserver/vstreamer/testenv/testenv.go index c056ef1d7e1..9c77ca18594 100644 --- a/go/vt/vttablet/tabletserver/vstreamer/testenv/testenv.go +++ b/go/vt/vttablet/tabletserver/vstreamer/testenv/testenv.go @@ -41,6 +41,8 @@ import ( vttestpb "vitess.io/vitess/go/vt/proto/vttest" ) +const DBName = "vttest" + // Env contains all the env vars for a test against a mysql instance. type Env struct { cluster *vttest.LocalCluster @@ -65,7 +67,7 @@ type Env struct { // Init initializes an Env. func Init(ctx context.Context) (*Env, error) { te := &Env{ - KeyspaceName: "vttest", + KeyspaceName: DBName, ShardName: "0", Cells: []string{"cell1"}, } @@ -89,7 +91,7 @@ func Init(ctx context.Context) (*Env, error) { Shards: []*vttestpb.Shard{ { Name: "0", - DbNameOverride: "vttest", + DbNameOverride: DBName, }, }, }, diff --git a/go/vt/vttablet/tabletserver/vstreamer/uvstreamer_flaky_test.go b/go/vt/vttablet/tabletserver/vstreamer/uvstreamer_flaky_test.go index 203052e981e..ae1bc375036 100644 --- a/go/vt/vttablet/tabletserver/vstreamer/uvstreamer_flaky_test.go +++ b/go/vt/vttablet/tabletserver/vstreamer/uvstreamer_flaky_test.go @@ -71,11 +71,11 @@ const ( numInitialRows = 10 ) -type state struct { +type TestState struct { tables []string } -var testState = &state{} +var testState = &TestState{} var positions map[string]string var allEvents []*binlogdatapb.VEvent diff --git a/go/vt/vttablet/tabletserver/vstreamer/vstreamer.go b/go/vt/vttablet/tabletserver/vstreamer/vstreamer.go index 47a1c117719..4674d43015f 100644 --- a/go/vt/vttablet/tabletserver/vstreamer/vstreamer.go +++ b/go/vt/vttablet/tabletserver/vstreamer/vstreamer.go @@ -30,7 +30,6 @@ import ( "vitess.io/vitess/go/mysql/collations" "vitess.io/vitess/go/mysql/replication" "vitess.io/vitess/go/sqltypes" - "vitess.io/vitess/go/timer" "vitess.io/vitess/go/vt/binlog" "vitess.io/vitess/go/vt/dbconfigs" "vitess.io/vitess/go/vt/log" @@ -283,11 +282,11 @@ func (vs *vstreamer) parseEvents(ctx context.Context, events <-chan mysql.Binlog defer hbTimer.Stop() injectHeartbeat := func(throttled bool) error { - now := time.Now().UnixNano() select { case <-ctx.Done(): return vterrors.Errorf(vtrpcpb.Code_CANCELED, "context has expired") default: + now := time.Now().UnixNano() err := bufferAndTransmit(&binlogdatapb.VEvent{ Type: binlogdatapb.VEventType_HEARTBEAT, Timestamp: now / 1e9, @@ -299,22 +298,16 @@ func (vs *vstreamer) parseEvents(ctx context.Context, events <-chan mysql.Binlog } throttleEvents := func(throttledEvents chan mysql.BinlogEvent) { - throttledHeartbeatsRateLimiter := timer.NewRateLimiter(HeartbeatTime) - defer throttledHeartbeatsRateLimiter.Stop() for { - // check throttler. + // Check throttler. if !vs.vse.throttlerClient.ThrottleCheckOKOrWaitAppName(ctx, vs.throttlerApp) { - // make sure to leave if context is cancelled + // Make sure to leave if context is cancelled. select { case <-ctx.Done(): return default: - // do nothing special + // Do nothing special. } - throttledHeartbeatsRateLimiter.Do(func() error { - return injectHeartbeat(true) - }) - // we won't process events, until we're no longer throttling continue } select { @@ -386,7 +379,8 @@ func (vs *vstreamer) parseEvents(ctx context.Context, events <-chan mysql.Binlog case <-ctx.Done(): return nil case <-hbTimer.C: - if err := injectHeartbeat(false); err != nil { + ok := vs.vse.throttlerClient.ThrottleCheckOK(ctx, vs.throttlerApp) + if err := injectHeartbeat(!ok); err != nil { if err == io.EOF { return nil } diff --git a/go/vt/vttablet/tabletserver/vstreamer/vstreamer_flaky_test.go b/go/vt/vttablet/tabletserver/vstreamer/vstreamer_test.go similarity index 82% rename from go/vt/vttablet/tabletserver/vstreamer/vstreamer_flaky_test.go rename to go/vt/vttablet/tabletserver/vstreamer/vstreamer_test.go index 0fb9a841a7c..b2aafab11c1 100644 --- a/go/vt/vttablet/tabletserver/vstreamer/vstreamer_flaky_test.go +++ b/go/vt/vttablet/tabletserver/vstreamer/vstreamer_test.go @@ -26,6 +26,8 @@ import ( "testing" "time" + "github.com/prometheus/common/version" + "vitess.io/vitess/go/mysql/replication" "vitess.io/vitess/go/vt/vttablet/tabletserver/throttle/throttlerapp" "vitess.io/vitess/go/vt/vttablet/tabletserver/vstreamer/testenv" @@ -57,22 +59,15 @@ func checkIfOptionIsSupported(t *testing.T, variable string) bool { return false } -type TestColumn struct { - name, dataType, colType string - len, charset int64 -} - -type TestFieldEvent struct { - table, db string - cols []*TestColumn -} - func (tfe *TestFieldEvent) String() string { s := fmt.Sprintf("type:FIELD field_event:{table_name:\"%s\"", tfe.table) fld := "" for _, col := range tfe.cols { + if col.skip { + continue + } fld += fmt.Sprintf(" fields:{name:\"%s\" type:%s table:\"%s\" org_table:\"%s\" database:\"%s\" org_name:\"%s\" column_length:%d charset:%d", - col.name, col.dataType, tfe.table, tfe.table, tfe.db, col.name, col.len, col.charset) + col.name, col.dataType, tfe.table, tfe.table, tfe.db, col.name, col.len, col.collationID) if col.colType != "" { fld += fmt.Sprintf(" column_type:\"%s\"", col.colType) } @@ -130,166 +125,103 @@ func TestNoBlob(t *testing.T) { env = nil newEngine(t, ctx, "noblob") defer func() { + if engine != nil { + engine.Close() + } + if env != nil { + env.Close() + } engine = oldEngine env = oldEnv }() - execStatements(t, []string{ - "create table t1(id int, blb blob, val varchar(4), primary key(id))", - "create table t2(id int, txt text, val varchar(4), unique key(id, val))", - }) - defer execStatements(t, []string{ - "drop table t1", - "drop table t2", - }) - engine.se.Reload(context.Background()) - queries := []string{ - "begin", - "insert into t1 values (1, 'blob1', 'aaa')", - "update t1 set val = 'bbb'", - "commit", - "begin", - "insert into t2 values (1, 'text1', 'aaa')", - "update t2 set val = 'bbb'", - "commit", - } - fe1 := &TestFieldEvent{ - table: "t1", - db: "vttest", - cols: []*TestColumn{ - {name: "id", dataType: "INT32", colType: "int(11)", len: 11, charset: 63}, - {name: "blb", dataType: "BLOB", colType: "blob", len: 65535, charset: 63}, - {name: "val", dataType: "VARCHAR", colType: "varchar(4)", len: 16, charset: 45}, + ts := &TestSpec{ + t: t, + ddls: []string{ + // t1 has a blob column and a primary key. The blob column will not be in update row events. + "create table t1(id int, blb blob, val varchar(4), primary key(id))", + // t2 has a text column and no primary key. The text column will be in update row events. + "create table t2(id int, txt text, val varchar(4), unique key(id, val))", + // t3 has a text column and a primary key. The text column will not be in update row events. + "create table t3(id int, txt text, val varchar(4), primary key(id))", }, - } - fe2 := &TestFieldEvent{ - table: "t2", - db: "vttest", - cols: []*TestColumn{ - {name: "id", dataType: "INT32", colType: "int(11)", len: 11, charset: 63}, - {name: "txt", dataType: "TEXT", colType: "text", len: 262140, charset: 45}, - {name: "val", dataType: "VARCHAR", colType: "varchar(4)", len: 16, charset: 45}, + options: &TestSpecOptions{ + noblob: true, }, } - - testcases := []testcase{{ - input: queries, - output: [][]string{{ - "begin", - fe1.String(), - `type:ROW row_event:{table_name:"t1" row_changes:{after:{lengths:1 lengths:5 lengths:3 values:"1blob1aaa"}}}`, - `type:ROW row_event:{table_name:"t1" row_changes:{before:{lengths:1 lengths:-1 lengths:3 values:"1aaa"} after:{lengths:1 lengths:-1 lengths:3 values:"1bbb"} data_columns:{count:3 cols:"\x05"}}}`, - "gtid", - "commit", - }, { - "begin", - fe2.String(), - `type:ROW row_event:{table_name:"t2" row_changes:{after:{lengths:1 lengths:5 lengths:3 values:"1text1aaa"}}}`, - `type:ROW row_event:{table_name:"t2" row_changes:{before:{lengths:1 lengths:5 lengths:3 values:"1text1aaa"} after:{lengths:1 lengths:-1 lengths:3 values:"1bbb"} data_columns:{count:3 cols:"\x05"}}}`, - "gtid", - "commit", - }}, + defer ts.Close() + require.NoError(t, ts.Init()) + ts.tests = [][]*TestQuery{{ + {"begin", nil}, + {"insert into t1 values (1, 'blob1', 'aaa')", nil}, + {"update t1 set val = 'bbb'", nil}, + {"commit", nil}, + }, {{"begin", nil}, + {"insert into t2 values (1, 'text1', 'aaa')", nil}, + {"update t2 set val = 'bbb'", nil}, + {"commit", nil}, + }, {{"begin", nil}, + {"insert into t3 values (1, 'text1', 'aaa')", nil}, + {"update t3 set val = 'bbb'", nil}, + {"commit", nil}, }} - runCases(t, nil, testcases, "current", nil) + ts.Run() } +// TestSetAndEnum confirms that the events for set and enum columns are correct. func TestSetAndEnum(t *testing.T) { - execStatements(t, []string{ - "create table t1(id int, val binary(4), color set('red','green','blue'), size enum('S','M','L'), primary key(id))", - }) - defer execStatements(t, []string{ - "drop table t1", - }) - engine.se.Reload(context.Background()) - queries := []string{ - "begin", - "insert into t1 values (1, 'aaa', 'red,blue', 'S')", - "insert into t1 values (2, 'bbb', 'green', 'M')", - "insert into t1 values (3, 'ccc', 'red,blue,green', 'L')", - "commit", - } - - fe := &TestFieldEvent{ - table: "t1", - db: "vttest", - cols: []*TestColumn{ - {name: "id", dataType: "INT32", colType: "int(11)", len: 11, charset: 63}, - {name: "val", dataType: "BINARY", colType: "binary(4)", len: 4, charset: 63}, - {name: "color", dataType: "SET", colType: "set('red','green','blue')", len: 56, charset: 45}, - {name: "size", dataType: "ENUM", colType: "enum('S','M','L')", len: 4, charset: 45}, + ts := &TestSpec{ + t: t, + ddls: []string{ + "create table t1(id int, val binary(4), color set('red','green','blue'), size enum('S','M','L'), primary key(id))", }, } - - testcases := []testcase{{ - input: queries, - output: [][]string{{ - `begin`, - fe.String(), - `type:ROW row_event:{table_name:"t1" row_changes:{after:{lengths:1 lengths:4 lengths:1 lengths:1 values:"1aaa\x0051"}}}`, - `type:ROW row_event:{table_name:"t1" row_changes:{after:{lengths:1 lengths:4 lengths:1 lengths:1 values:"2bbb\x0022"}}}`, - `type:ROW row_event:{table_name:"t1" row_changes:{after:{lengths:1 lengths:4 lengths:1 lengths:1 values:"3ccc\x0073"}}}`, - `gtid`, - `commit`, - }}, + defer ts.Close() + require.NoError(t, ts.Init()) + ts.tests = [][]*TestQuery{{ + {"begin", nil}, + {"insert into t1 values (1, 'aaa', 'red,blue', 'S')", nil}, + {"insert into t1 values (2, 'bbb', 'green', 'M')", nil}, + {"insert into t1 values (3, 'ccc', 'red,blue,green', 'L')", nil}, + {"commit", nil}, }} - runCases(t, nil, testcases, "current", nil) + ts.Run() } +// TestCellValuePadding tests that the events are correctly padded for binary columns. func TestCellValuePadding(t *testing.T) { - - execStatements(t, []string{ - "create table t1(id int, val binary(4), primary key(val))", - "create table t2(id int, val char(4), primary key(val))", - "create table t3(id int, val char(4) collate utf8mb4_bin, primary key(val))", - }) - defer execStatements(t, []string{ - "drop table t1", - "drop table t2", - "drop table t3", - }) - engine.se.Reload(context.Background()) - queries := []string{ - "begin", - "insert into t1 values (1, 'aaa\000')", - "insert into t1 values (2, 'bbb\000')", - "update t1 set id = 11 where val = 'aaa\000'", - "insert into t2 values (1, 'aaa')", - "insert into t2 values (2, 'bbb')", - "update t2 set id = 11 where val = 'aaa'", - "insert into t3 values (1, 'aaa')", - "insert into t3 values (2, 'bb')", - "update t3 set id = 11 where val = 'aaa'", - "commit", - } - - testcases := []testcase{{ - input: queries, - output: [][]string{{ - `begin`, - `type:FIELD field_event:{table_name:"t1" fields:{name:"id" type:INT32 table:"t1" org_table:"t1" database:"vttest" org_name:"id" column_length:11 charset:63 column_type:"int(11)"} fields:{name:"val" type:BINARY table:"t1" org_table:"t1" database:"vttest" org_name:"val" column_length:4 charset:63 column_type:"binary(4)"}}`, - `type:ROW row_event:{table_name:"t1" row_changes:{after:{lengths:1 lengths:4 values:"1aaa\x00"}}}`, - `type:ROW row_event:{table_name:"t1" row_changes:{after:{lengths:1 lengths:4 values:"2bbb\x00"}}}`, - `type:ROW row_event:{table_name:"t1" row_changes:{before:{lengths:1 lengths:4 values:"1aaa\x00"} after:{lengths:2 lengths:4 values:"11aaa\x00"}}}`, - `type:FIELD field_event:{table_name:"t2" fields:{name:"id" type:INT32 table:"t2" org_table:"t2" database:"vttest" org_name:"id" column_length:11 charset:63 column_type:"int(11)"} fields:{name:"val" type:CHAR table:"t2" org_table:"t2" database:"vttest" org_name:"val" column_length:16 charset:45 column_type:"char(4)"}}`, - `type:ROW row_event:{table_name:"t2" row_changes:{after:{lengths:1 lengths:3 values:"1aaa"}}}`, - `type:ROW row_event:{table_name:"t2" row_changes:{after:{lengths:1 lengths:3 values:"2bbb"}}}`, - `type:ROW row_event:{table_name:"t2" row_changes:{before:{lengths:1 lengths:3 values:"1aaa"} after:{lengths:2 lengths:3 values:"11aaa"}}}`, - `type:FIELD field_event:{table_name:"t3" fields:{name:"id" type:INT32 table:"t3" org_table:"t3" database:"vttest" org_name:"id" column_length:11 charset:63 column_type:"int(11)"} fields:{name:"val" type:BINARY table:"t3" org_table:"t3" database:"vttest" org_name:"val" column_length:16 charset:45 column_type:"char(4)"}}`, - `type:ROW row_event:{table_name:"t3" row_changes:{after:{lengths:1 lengths:3 values:"1aaa"}}}`, - `type:ROW row_event:{table_name:"t3" row_changes:{after:{lengths:1 lengths:2 values:"2bb"}}}`, - `type:ROW row_event:{table_name:"t3" row_changes:{before:{lengths:1 lengths:3 values:"1aaa"} after:{lengths:2 lengths:3 values:"11aaa"}}}`, - `gtid`, - `commit`, + ts := &TestSpec{ + t: t, + ddls: []string{ + "create table t1(id int, val binary(4), primary key(val))", + "create table t2(id int, val char(4), primary key(val))", + "create table t3(id int, val char(4) collate utf8mb4_bin, primary key(val))"}, + } + defer ts.Close() + require.NoError(t, ts.Init()) + ts.tests = [][]*TestQuery{{ + {"begin", nil}, + {"insert into t1 values (1, 'aaa\000')", nil}, + {"insert into t1 values (2, 'bbb\000')", nil}, + {"update t1 set id = 11 where val = 'aaa\000'", []TestRowEvent{ + {spec: &TestRowEventSpec{table: "t1", changes: []TestRowChange{{before: []string{"1", "aaa\x00"}, after: []string{"11", "aaa\x00"}}}}}, + }}, + {"insert into t2 values (1, 'aaa')", nil}, + {"insert into t2 values (2, 'bbb')", nil}, + {"update t2 set id = 11 where val = 'aaa'", []TestRowEvent{ + {spec: &TestRowEventSpec{table: "t2", changes: []TestRowChange{{before: []string{"1", "aaa"}, after: []string{"11", "aaa"}}}}}, + }}, + {"insert into t3 values (1, 'aaa')", nil}, + {"insert into t3 values (2, 'bb')", nil}, + {"update t3 set id = 11 where val = 'aaa'", []TestRowEvent{ + {spec: &TestRowEventSpec{table: "t3", changes: []TestRowChange{{before: []string{"1", "aaa"}, after: []string{"11", "aaa"}}}}}, }}, + {"commit", nil}, }} - runCases(t, nil, testcases, "current", nil) + ts.Run() } func TestSetStatement(t *testing.T) { - - if testing.Short() { - t.Skip() - } if !checkIfOptionIsSupported(t, "log_builtin_as_identified_by_password") { // the combination of setting this option and support for "set password" only works on a few flavors log.Info("Cannot test SetStatement on this flavor") @@ -332,45 +264,25 @@ func TestSetForeignKeyCheck(t *testing.T) { testRowEventFlags = true defer func() { testRowEventFlags = false }() - execStatements(t, []string{ - "create table t1(id int, val binary(4), primary key(id))", - }) - defer execStatements(t, []string{ - "drop table t1", - }) - engine.se.Reload(context.Background()) - queries := []string{ - "begin", - "insert into t1 values (1, 'aaa')", - "set @@session.foreign_key_checks=1", - "insert into t1 values (2, 'bbb')", - "set @@session.foreign_key_checks=0", - "insert into t1 values (3, 'ccc')", - "commit", - } - - fe := &TestFieldEvent{ - table: "t1", - db: "vttest", - cols: []*TestColumn{ - {name: "id", dataType: "INT32", colType: "int(11)", len: 11, charset: 63}, - {name: "val", dataType: "BINARY", colType: "binary(4)", len: 4, charset: 63}, + ts := &TestSpec{ + t: t, + ddls: []string{ + "create table t1(id int, val binary(4), primary key(id))", }, } - - testcases := []testcase{{ - input: queries, - output: [][]string{{ - `begin`, - fe.String(), - `type:ROW row_event:{table_name:"t1" row_changes:{after:{lengths:1 lengths:4 values:"1aaa\x00"}} flags:1}`, - `type:ROW row_event:{table_name:"t1" row_changes:{after:{lengths:1 lengths:4 values:"2bbb\x00"}} flags:1}`, - `type:ROW row_event:{table_name:"t1" row_changes:{after:{lengths:1 lengths:4 values:"3ccc\x00"}} flags:3}`, - `gtid`, - `commit`, - }}, + defer ts.Close() + require.NoError(t, ts.Init()) + ts.tests = [][]*TestQuery{{ + {"begin", nil}, + {"insert into t1 values (1, 'aaa')", []TestRowEvent{{flags: 1}}}, + {"set @@session.foreign_key_checks=1", noEvents}, + {"insert into t1 values (2, 'bbb')", []TestRowEvent{{flags: 1}}}, + {"set @@session.foreign_key_checks=0", noEvents}, + {"insert into t1 values (3, 'ccc')", []TestRowEvent{{flags: 3}}}, + {"commit", nil}, }} - runCases(t, nil, testcases, "current", nil) + ts.Run() + } func TestStmtComment(t *testing.T) { @@ -747,225 +659,179 @@ func TestVStreamCopyWithDifferentFilters(t *testing.T) { } } +// TestFilteredVarBinary confirms that adding a filter using a varbinary column results in the correct set of events. func TestFilteredVarBinary(t *testing.T) { - if testing.Short() { - t.Skip() + ts := &TestSpec{ + t: t, + ddls: []string{ + "create table t1(id1 int, val varbinary(128), primary key(id1))", + }, + options: &TestSpecOptions{ + filter: &binlogdatapb.Filter{ + Rules: []*binlogdatapb.Rule{{ + Match: "t1", + Filter: "select id1, val from t1 where val = 'newton'", + }}, + }, + }, } - - execStatements(t, []string{ - "create table t1(id1 int, val varbinary(128), primary key(id1))", - }) - defer execStatements(t, []string{ - "drop table t1", - }) - engine.se.Reload(context.Background()) - - filter := &binlogdatapb.Filter{ - Rules: []*binlogdatapb.Rule{{ - Match: "t1", - Filter: "select id1, val from t1 where val = 'newton'", + defer ts.Close() + require.NoError(t, ts.Init()) + ts.tests = [][]*TestQuery{{ + {"begin", nil}, + {"insert into t1 values (1, 'kepler')", noEvents}, + {"insert into t1 values (2, 'newton')", nil}, + {"insert into t1 values (3, 'newton')", nil}, + {"insert into t1 values (4, 'kepler')", noEvents}, + {"insert into t1 values (5, 'newton')", nil}, + {"update t1 set val = 'newton' where id1 = 1", []TestRowEvent{ + {spec: &TestRowEventSpec{table: "t1", changes: []TestRowChange{{after: []string{"1", "newton"}}}}}, }}, - } - - testcases := []testcase{{ - input: []string{ - "begin", - "insert into t1 values (1, 'kepler')", - "insert into t1 values (2, 'newton')", - "insert into t1 values (3, 'newton')", - "insert into t1 values (4, 'kepler')", - "insert into t1 values (5, 'newton')", - "update t1 set val = 'newton' where id1 = 1", - "update t1 set val = 'kepler' where id1 = 2", - "update t1 set val = 'newton' where id1 = 2", - "update t1 set val = 'kepler' where id1 = 1", - "delete from t1 where id1 in (2,3)", - "commit", - }, - output: [][]string{{ - `begin`, - `type:FIELD field_event:{table_name:"t1" fields:{name:"id1" type:INT32 table:"t1" org_table:"t1" database:"vttest" org_name:"id1" column_length:11 charset:63 column_type:"int(11)"} fields:{name:"val" type:VARBINARY table:"t1" org_table:"t1" database:"vttest" org_name:"val" column_length:128 charset:63 column_type:"varbinary(128)"}}`, - `type:ROW row_event:{table_name:"t1" row_changes:{after:{lengths:1 lengths:6 values:"2newton"}}}`, - `type:ROW row_event:{table_name:"t1" row_changes:{after:{lengths:1 lengths:6 values:"3newton"}}}`, - `type:ROW row_event:{table_name:"t1" row_changes:{after:{lengths:1 lengths:6 values:"5newton"}}}`, - `type:ROW row_event:{table_name:"t1" row_changes:{after:{lengths:1 lengths:6 values:"1newton"}}}`, - `type:ROW row_event:{table_name:"t1" row_changes:{before:{lengths:1 lengths:6 values:"2newton"}}}`, - `type:ROW row_event:{table_name:"t1" row_changes:{after:{lengths:1 lengths:6 values:"2newton"}}}`, - `type:ROW row_event:{table_name:"t1" row_changes:{before:{lengths:1 lengths:6 values:"1newton"}}}`, - `type:ROW row_event:{table_name:"t1" row_changes:{before:{lengths:1 lengths:6 values:"2newton"}} row_changes:{before:{lengths:1 lengths:6 values:"3newton"}}}`, - `gtid`, - `commit`, + {"update t1 set val = 'kepler' where id1 = 2", []TestRowEvent{ + {spec: &TestRowEventSpec{table: "t1", changes: []TestRowChange{{before: []string{"2", "newton"}}}}}, + }}, + {"update t1 set val = 'newton' where id1 = 2", []TestRowEvent{ + {spec: &TestRowEventSpec{table: "t1", changes: []TestRowChange{{after: []string{"2", "newton"}}}}}, }}, + {"update t1 set val = 'kepler' where id1 = 1", []TestRowEvent{ + {spec: &TestRowEventSpec{table: "t1", changes: []TestRowChange{{before: []string{"1", "newton"}}}}}, + }}, + {"delete from t1 where id1 in (2,3)", []TestRowEvent{ + {spec: &TestRowEventSpec{table: "t1", changes: []TestRowChange{{before: []string{"2", "newton"}}, {before: []string{"3", "newton"}}}}}, + }}, + {"commit", nil}, }} - runCases(t, filter, testcases, "", nil) + ts.Run() } +// TestFilteredInt confirms that adding a filter using an int column results in the correct set of events. func TestFilteredInt(t *testing.T) { - if testing.Short() { - t.Skip() + ts := &TestSpec{ + t: t, + ddls: []string{ + "create table t1(id1 int, id2 int, val varbinary(128), primary key(id1))", + }, + options: &TestSpecOptions{ + filter: &binlogdatapb.Filter{ + Rules: []*binlogdatapb.Rule{{ + Match: "t1", + Filter: "select id1, val from t1 where id2 = 200", + }}, + }, + }, } - engine.se.Reload(context.Background()) - - execStatements(t, []string{ - "create table t1(id1 int, id2 int, val varbinary(128), primary key(id1))", - }) - defer execStatements(t, []string{ - "drop table t1", - }) - engine.se.Reload(context.Background()) - - filter := &binlogdatapb.Filter{ - Rules: []*binlogdatapb.Rule{{ - Match: "t1", - Filter: "select id1, val from t1 where id2 = 200", + defer ts.Close() + require.NoError(t, ts.Init()) + ts.fieldEvents["t1"].cols[1].skip = true + ts.tests = [][]*TestQuery{{ + {"begin", nil}, + {"insert into t1 values (1, 100, 'aaa')", noEvents}, + {"insert into t1 values (2, 200, 'bbb')", nil}, + {"insert into t1 values (3, 100, 'ccc')", noEvents}, + {"insert into t1 values (4, 200, 'ddd')", nil}, + {"insert into t1 values (5, 200, 'eee')", nil}, + {"update t1 set val = 'newddd' where id1 = 4", []TestRowEvent{ + {spec: &TestRowEventSpec{table: "t1", changes: []TestRowChange{{before: []string{"4", "ddd"}, after: []string{"4", "newddd"}}}}}, }}, - } - - testcases := []testcase{{ - input: []string{ - "begin", - "insert into t1 values (1, 100, 'aaa')", - "insert into t1 values (2, 200, 'bbb')", - "insert into t1 values (3, 100, 'ccc')", - "insert into t1 values (4, 200, 'ddd')", - "insert into t1 values (5, 200, 'eee')", - "update t1 set val = 'newddd' where id1 = 4", - "update t1 set id2 = 200 where id1 = 1", - "update t1 set id2 = 100 where id1 = 2", - "update t1 set id2 = 100 where id1 = 1", - "update t1 set id2 = 200 where id1 = 2", - "commit", - }, - output: [][]string{{ - `begin`, - `type:FIELD field_event:{table_name:"t1" fields:{name:"id1" type:INT32 table:"t1" org_table:"t1" database:"vttest" org_name:"id1" column_length:11 charset:63 column_type:"int(11)"} fields:{name:"val" type:VARBINARY table:"t1" org_table:"t1" database:"vttest" org_name:"val" column_length:128 charset:63 column_type:"varbinary(128)"}}`, - `type:ROW row_event:{table_name:"t1" row_changes:{after:{lengths:1 lengths:3 values:"2bbb"}}}`, - `type:ROW row_event:{table_name:"t1" row_changes:{after:{lengths:1 lengths:3 values:"4ddd"}}}`, - `type:ROW row_event:{table_name:"t1" row_changes:{after:{lengths:1 lengths:3 values:"5eee"}}}`, - `type:ROW row_event:{table_name:"t1" row_changes:{before:{lengths:1 lengths:3 values:"4ddd"} after:{lengths:1 lengths:6 values:"4newddd"}}}`, - `type:ROW row_event:{table_name:"t1" row_changes:{after:{lengths:1 lengths:3 values:"1aaa"}}}`, - `type:ROW row_event:{table_name:"t1" row_changes:{before:{lengths:1 lengths:3 values:"2bbb"}}}`, - `type:ROW row_event:{table_name:"t1" row_changes:{before:{lengths:1 lengths:3 values:"1aaa"}}}`, - `type:ROW row_event:{table_name:"t1" row_changes:{after:{lengths:1 lengths:3 values:"2bbb"}}}`, - `gtid`, - `commit`, + {"update t1 set id2 = 200 where id1 = 1", []TestRowEvent{ + {spec: &TestRowEventSpec{table: "t1", changes: []TestRowChange{{after: []string{"1", "aaa"}}}}}, }}, + {"update t1 set id2 = 100 where id1 = 2", []TestRowEvent{ + {spec: &TestRowEventSpec{table: "t1", changes: []TestRowChange{{before: []string{"2", "bbb"}}}}}, + }}, + {"update t1 set id2 = 100 where id1 = 1", []TestRowEvent{ + {spec: &TestRowEventSpec{table: "t1", changes: []TestRowChange{{before: []string{"1", "aaa"}}}}}, + }}, + {"update t1 set id2 = 200 where id1 = 2", []TestRowEvent{ + {spec: &TestRowEventSpec{table: "t1", changes: []TestRowChange{{after: []string{"2", "bbb"}}}}}, + }}, + {"commit", nil}, }} - runCases(t, filter, testcases, "", nil) + ts.Run() } +// TestSavepoint confirms that rolling back to a savepoint drops the dmls that were executed during the savepoint. func TestSavepoint(t *testing.T) { - if testing.Short() { - t.Skip() - } - - execStatements(t, []string{ - "create table stream1(id int, val varbinary(128), primary key(id))", - "create table stream2(id int, val varbinary(128), primary key(id))", - }) - defer execStatements(t, []string{ - "drop table stream1", - "drop table stream2", - }) - engine.se.Reload(context.Background()) - testcases := []testcase{{ - input: []string{ - "begin", - "insert into stream1 values (1, 'aaa')", - "savepoint a", - "insert into stream1 values (2, 'aaa')", - "rollback work to savepoint a", - "savepoint b", - "update stream1 set val='bbb' where id = 1", - "release savepoint b", - "commit", + ts := &TestSpec{ + t: t, + ddls: []string{ + "create table stream1(id int, val varbinary(128), primary key(id))", }, - output: [][]string{{ - `begin`, - `type:FIELD field_event:{table_name:"stream1" fields:{name:"id" type:INT32 table:"stream1" org_table:"stream1" database:"vttest" org_name:"id" column_length:11 charset:63 column_type:"int(11)"} fields:{name:"val" type:VARBINARY table:"stream1" org_table:"stream1" database:"vttest" org_name:"val" column_length:128 charset:63 column_type:"varbinary(128)"}}`, - `type:ROW row_event:{table_name:"stream1" row_changes:{after:{lengths:1 lengths:3 values:"1aaa"}}}`, - `type:ROW row_event:{table_name:"stream1" row_changes:{before:{lengths:1 lengths:3 values:"1aaa"} after:{lengths:1 lengths:3 values:"1bbb"}}}`, - `gtid`, - `commit`, + } + defer ts.Close() + require.NoError(t, ts.Init()) + ts.tests = [][]*TestQuery{{ + {"begin", nil}, + {"insert into stream1 values (1, 'aaa')", nil}, + {"savepoint a", noEvents}, + {"insert into stream1 values (2, 'aaa')", noEvents}, + {"rollback work to savepoint a", noEvents}, + {"savepoint b", noEvents}, + {"update stream1 set val='bbb' where id = 1", []TestRowEvent{ + {spec: &TestRowEventSpec{table: "stream1", changes: []TestRowChange{{before: []string{"1", "aaa"}, after: []string{"1", "bbb"}}}}}, }}, + {"release savepoint b", noEvents}, + {"commit", nil}, }} - runCases(t, nil, testcases, "current", nil) + ts.Run() } +// TestSavepointWithFilter tests that using savepoints with both filtered and unfiltered tables works as expected. func TestSavepointWithFilter(t *testing.T) { - if testing.Short() { - t.Skip() - } - - execStatements(t, []string{ - "create table stream1(id int, val varbinary(128), primary key(id))", - "create table stream2(id int, val varbinary(128), primary key(id))", - }) - defer execStatements(t, []string{ - "drop table stream1", - "drop table stream2", - }) - engine.se.Reload(context.Background()) - testcases := []testcase{{ - input: []string{ - "begin", - "insert into stream1 values (1, 'aaa')", - "savepoint a", - "insert into stream1 values (2, 'aaa')", - "savepoint b", - "insert into stream1 values (3, 'aaa')", - "savepoint c", - "insert into stream1 values (4, 'aaa')", - "savepoint d", - "commit", - - "begin", - "insert into stream1 values (5, 'aaa')", - "savepoint d", - "insert into stream1 values (6, 'aaa')", - "savepoint c", - "insert into stream1 values (7, 'aaa')", - "savepoint b", - "insert into stream1 values (8, 'aaa')", - "savepoint a", - "commit", - - "begin", - "insert into stream1 values (9, 'aaa')", - "savepoint a", - "insert into stream2 values (1, 'aaa')", - "savepoint b", - "insert into stream1 values (10, 'aaa')", - "savepoint c", - "insert into stream2 values (2, 'aaa')", - "savepoint d", - "commit", + ts := &TestSpec{ + t: t, + ddls: []string{ + "create table stream1(id int, val varbinary(128), primary key(id))", + "create table stream2(id int, val varbinary(128), primary key(id))", }, - output: [][]string{{ - `begin`, - `gtid`, - `commit`, - }, { - `begin`, - `gtid`, - `commit`, - }, { - `begin`, - `type:FIELD field_event:{table_name:"stream2" fields:{name:"id" type:INT32 table:"stream2" org_table:"stream2" database:"vttest" org_name:"id" column_length:11 charset:63 column_type:"int(11)"} fields:{name:"val" type:VARBINARY table:"stream2" org_table:"stream2" database:"vttest" org_name:"val" column_length:128 charset:63 column_type:"varbinary(128)"}}`, - `type:ROW row_event:{table_name:"stream2" row_changes:{after:{lengths:1 lengths:3 values:"1aaa"}}}`, - `type:ROW row_event:{table_name:"stream2" row_changes:{after:{lengths:1 lengths:3 values:"2aaa"}}}`, - `gtid`, - `commit`, + options: &TestSpecOptions{ + filter: &binlogdatapb.Filter{ + Rules: []*binlogdatapb.Rule{{ + Match: "stream2", + Filter: "select * from stream2", + }}, + }, + }, + } + defer ts.Close() + require.NoError(t, ts.Init()) + ts.tests = [][]*TestQuery{{ + {"begin", nil}, + {"insert into stream1 values (1, 'aaa')", noEvents}, + {"savepoint a", noEvents}, + {"insert into stream1 values (2, 'aaa')", noEvents}, + {"savepoint b", noEvents}, + {"insert into stream1 values (3, 'aaa')", noEvents}, + {"savepoint c", noEvents}, + {"insert into stream1 values (4, 'aaa')", noEvents}, + {"savepoint d", noEvents}, + {"commit", nil}, + }, { + {"begin", nil}, + {"insert into stream1 values (5, 'aaa')", noEvents}, + {"savepoint d", noEvents}, + {"insert into stream1 values (6, 'aaa')", noEvents}, + {"savepoint c", noEvents}, + {"insert into stream1 values (7, 'aaa')", noEvents}, + {"savepoint b", noEvents}, + {"insert into stream1 values (8, 'aaa')", noEvents}, + {"savepoint a", noEvents}, + {"commit", nil}, + }, { + {"begin", nil}, + {"insert into stream1 values (9, 'aaa')", noEvents}, + {"savepoint a", noEvents}, + {"insert into stream2 values (1, 'aaa')", nil}, + {"savepoint b", noEvents}, + {"insert into stream1 values (10, 'aaa')", noEvents}, + {"savepoint c", noEvents}, + {"insert into stream2 values (2, 'aaa')", []TestRowEvent{ + {spec: &TestRowEventSpec{table: "stream2", changes: []TestRowChange{{after: []string{"2", "aaa"}}}}}, }}, + {"savepoint d", noEvents}, + {"commit", nil}, }} - - filter := &binlogdatapb.Filter{ - Rules: []*binlogdatapb.Rule{{ - Match: "stream2", - Filter: "select * from stream2", - }}, - } - runCases(t, filter, testcases, "current", nil) + ts.Run() } func TestStatements(t *testing.T) { @@ -1721,6 +1587,9 @@ func TestBestEffortNameInFieldEvent(t *testing.T) { // test that vstreamer ignores tables created by OnlineDDL func TestInternalTables(t *testing.T) { + if version.GoOS == "darwin" { + t.Skip("internal online ddl table matching doesn't work on Mac because it is case insensitive") + } if testing.Short() { t.Skip() } @@ -2027,6 +1896,12 @@ func TestMinimalMode(t *testing.T) { env = nil newEngine(t, ctx, "minimal") defer func() { + if engine != nil { + engine.Close() + } + if env != nil { + env.Close() + } engine = oldEngine env = oldEnv }() @@ -2194,11 +2069,11 @@ func TestGeneratedColumns(t *testing.T) { table: "t1", db: "vttest", cols: []*TestColumn{ - {name: "id", dataType: "INT32", colType: "int(11)", len: 11, charset: 63}, - {name: "val", dataType: "VARBINARY", colType: "varbinary(6)", len: 6, charset: 63}, - {name: "val2", dataType: "VARBINARY", colType: "varbinary(6)", len: 6, charset: 63}, - {name: "val3", dataType: "VARBINARY", colType: "varbinary(6)", len: 6, charset: 63}, - {name: "id2", dataType: "INT32", colType: "int(11)", len: 11, charset: 63}, + {name: "id", dataType: "INT32", colType: "int(11)", len: 11, collationID: 63}, + {name: "val", dataType: "VARBINARY", colType: "varbinary(6)", len: 6, collationID: 63}, + {name: "val2", dataType: "VARBINARY", colType: "varbinary(6)", len: 6, collationID: 63}, + {name: "val3", dataType: "VARBINARY", colType: "varbinary(6)", len: 6, collationID: 63}, + {name: "id2", dataType: "INT32", colType: "int(11)", len: 11, collationID: 63}, }, } @@ -2241,8 +2116,8 @@ func TestGeneratedInvisiblePrimaryKey(t *testing.T) { table: "t1", db: "vttest", cols: []*TestColumn{ - {name: "my_row_id", dataType: "UINT64", colType: "bigint unsigned", len: 20, charset: 63}, - {name: "val", dataType: "VARBINARY", colType: "varbinary(6)", len: 6, charset: 63}, + {name: "my_row_id", dataType: "UINT64", colType: "bigint unsigned", len: 20, collationID: 63}, + {name: "val", dataType: "VARBINARY", colType: "varbinary(6)", len: 6, collationID: 63}, }, } @@ -2324,10 +2199,16 @@ func expectLog(ctx context.Context, t *testing.T, input any, ch <-chan []*binlog break } } + + numEventsToMatch := len(evs) if len(wantset) != len(evs) { - t.Fatalf("%v: evs\n%v, want\n%v, >> got length %d, wanted length %d", input, evs, wantset, len(evs), len(wantset)) + log.Warningf("%v: evs\n%v, want\n%v, >> got length %d, wanted length %d", input, evs, wantset, len(evs), len(wantset)) + if len(wantset) < len(evs) { + numEventsToMatch = len(wantset) + } } - for i, want := range wantset { + for i := 0; i < numEventsToMatch; i++ { + want := wantset[i] // CurrentTime is not testable. evs[i].CurrentTime = 0 evs[i].Keyspace = "" @@ -2386,6 +2267,9 @@ func expectLog(ctx context.Context, t *testing.T, input any, ch <-chan []*binlog } } } + if len(wantset) != len(evs) { + t.Fatalf("%v: evs\n%v, want\n%v, got length %d, wanted length %d", input, evs, wantset, len(evs), len(wantset)) + } } } @@ -2421,7 +2305,7 @@ func vstream(ctx context.Context, t *testing.T, pos string, tablePKs []*binlogda timer := time.NewTimer(2 * time.Second) defer timer.Stop() - t.Logf("Received events: %v", evs) + log.Infof("Received events: %v", evs) select { case ch <- evs: case <-ctx.Done(): diff --git a/test/templates/cluster_endtoend_test.tpl b/test/templates/cluster_endtoend_test.tpl index c3626b2a260..507433e7ef2 100644 --- a/test/templates/cluster_endtoend_test.tpl +++ b/test/templates/cluster_endtoend_test.tpl @@ -15,7 +15,7 @@ jobs: build: timeout-minutes: 60 name: Run endtoend tests on {{.Name}} - runs-on: {{if .Cores16}}gh-hosted-runners-16cores-1-24.04{{else}}ubuntu-24.04{{end}} + runs-on: {{if .Cores16}}{{`${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }}`}}{{else}}ubuntu-22.04{{end}} steps: - name: Skip CI diff --git a/test/templates/cluster_endtoend_test_docker.tpl b/test/templates/cluster_endtoend_test_docker.tpl index 7838bd39624..91fff7b8cfd 100644 --- a/test/templates/cluster_endtoend_test_docker.tpl +++ b/test/templates/cluster_endtoend_test_docker.tpl @@ -6,7 +6,7 @@ permissions: read-all jobs: build: name: Run endtoend tests on {{.Name}} - runs-on: {{if .Cores16}}gh-hosted-runners-16cores-1-24.04{{else}}ubuntu-24.04{{end}} + runs-on: {{if .Cores16}}{{`${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }}`}}{{else}}ubuntu-22.04{{end}} steps: - name: Skip CI diff --git a/test/templates/cluster_endtoend_test_mysql57.tpl b/test/templates/cluster_endtoend_test_mysql57.tpl index 369688f9097..e570a202447 100644 --- a/test/templates/cluster_endtoend_test_mysql57.tpl +++ b/test/templates/cluster_endtoend_test_mysql57.tpl @@ -19,7 +19,7 @@ env: jobs: build: name: Run endtoend tests on {{.Name}} - runs-on: {{if .Cores16}}gh-hosted-runners-16cores-1{{else}}ubuntu-22.04{{end}} + runs-on: {{if .Cores16}}{{`${{ github.repository_owner == 'vitessio' && 'gh-hosted-runners-16cores-1' || 'ubuntu-22.04' }}`}}{{else}}ubuntu-22.04{{end}} steps: - name: Skip CI diff --git a/test/templates/cluster_vitess_tester.tpl b/test/templates/cluster_vitess_tester.tpl index 2f054ddfcd9..9c838b8c73c 100644 --- a/test/templates/cluster_vitess_tester.tpl +++ b/test/templates/cluster_vitess_tester.tpl @@ -14,7 +14,7 @@ env: jobs: build: name: Run endtoend tests on {{.Name}} - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI @@ -110,7 +110,7 @@ jobs: # install JUnit report formatter go install github.com/vitessio/go-junit-report@HEAD - + # install vitess tester go install github.com/vitessio/vt/go/vt@e43009309f599378504905d4b804460f47822ac5 @@ -139,12 +139,12 @@ jobs: set -exo pipefail i=1 - for dir in {{.Path}}/*/; do + for dir in {{.Path}}/*/; do # We go over all the directories in the given path. # If there is a vschema file there, we use it, otherwise we let vt tester autogenerate it. if [ -f $dir/vschema.json ]; then vt tester --xunit --vschema "$dir"vschema.json $dir/*.test - else + else vt tester --sharded --xunit $dir/*.test fi # Number the reports by changing their file names. diff --git a/test/templates/unit_test.tpl b/test/templates/unit_test.tpl index b5483b5d242..b2483d148ce 100644 --- a/test/templates/unit_test.tpl +++ b/test/templates/unit_test.tpl @@ -14,7 +14,7 @@ env: jobs: test: name: {{.Name}} - runs-on: ubuntu-24.04 + runs-on: ubuntu-22.04 steps: - name: Skip CI @@ -145,7 +145,7 @@ jobs: go mod download go install golang.org/x/tools/cmd/goimports@latest - + # install JUnit report formatter go install github.com/vitessio/go-junit-report@HEAD diff --git a/tools/rowlog/rowlog.go b/tools/rowlog/rowlog.go index 475006b2b59..8092159c6b6 100644 --- a/tools/rowlog/rowlog.go +++ b/tools/rowlog/rowlog.go @@ -71,7 +71,6 @@ func usage() { func main() { usage() - defer log.Flush() ctx := context.Background() config := parseCommandLine() if !config.Validate() {