feature(scale_test): scale tests without payload

aleksbykov · aleksbykov · commit c70fe4ae3aeb · 2025-09-21T10:29:51.000+07:00
New scale tests have been developed to reproduce and validate the issues identified below. The current implementation of the LongevityTest does not support extended execution without a workload. To address this limitation, a new ScaleClusterTest has been introduced. This test allows for the execution of tests without workloads in various scenarios: - Initializing a large cluster to a specified target size (e.g., from 10 to 100 nodes). - Scaling down the cluster to a desired size (e.g., from 100 to 10 nodes). - Creating a large number of keyspaces and tables with predefined columns or utilizing the cs-profile-template. - Running tests with Nemesis without any payload, with a duration specified using the new 'idle_duration' parameter. The development of these new tests was aimed at simplifying the complexity associated with the LongevityTest object and ensuring compatibility with future scale testing efforts using Kubernetes (K8s), Docker, and other cloud providers. Refs: scylladb/scylladb#24790, scylladb/scylla-enterprise#5626, scylladb/scylla-enterprise#5624
diff --git a/data_dir/templated_100_table.yaml b/data_dir/templated_100_table.yaml
@@ -0,0 +1,62 @@
+### DML ###
+
+# Keyspace Name
+keyspace: testing_keyspaces
+
+# The CQL for creating a keyspace (optional if it already exists)
+keyspace_definition: |
+  CREATE KEYSPACE testing_keyspaces WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 3} AND durable_writes = true;
+
+# Table name
+table: ${table_name}
+
+# The CQL for creating a table you wish to stress (optional if it already exists)
+table_definition: |
+  CREATE TABLE testing_keyspaces.${table_name} (
+      key1 bigint,
+      key2 text,
+      clustering1 bigint,
+      clustering2 timeuuid,
+      column1 text,
+      column2 int, PRIMARY KEY ((key1, key2), clustering1, clustering2)
+  ) WITH bloom_filter_fp_chance = 0.01
+      AND caching = {'keys': 'ALL', 'rows_per_partition': 'ALL'}
+      AND comment = ''
+      AND compaction = {'class': 'SizeTieredCompactionStrategy'}
+      AND compression = {}
+      AND crc_check_chance = 1.0
+      AND dclocal_read_repair_chance = 0.1
+      AND default_time_to_live = 0
+      AND gc_grace_seconds = 864000
+      AND max_index_interval = 2048
+      AND memtable_flush_period_in_ms = 0
+      AND min_index_interval = 128
+      AND read_repair_chance = 0.0
+      AND speculative_retry = '99.0PERCENTILE';
+
+
+# extra_definitions:
+#   - CREATE INDEX IF NOT EXISTS ${table_name}_field4_${table_name} ON feeds.${table_name} (field4);
+
+# ### Column Distribution Specifications ###
+
+# ### Batch Ratio Distribution Specifications ###
+# insert:
+#   partitions: fixed(1)
+#   select:    fixed(1)/1000
+#   batchtype: UNLOGGED
+
+# #
+# # A list of queries you wish to run against the schema
+# #
+# queries:
+#    read1:
+#       cql: SELECT * FROM feeds.${table_name} WHERE field1 = ?
+#       fields: samerow
+
+# Run stress
+# cassandra-stress user profile={} cl=QUORUM 'ops(insert=1, read1=5)' duration={} -rate threads=2 -errors ignore
+
+# customer wish (different than what we are using!)
+# "INSERT INTO short  (k,time,data) values (?,?,?) USING TTL ?"
+# "SELECT * FROM short WHERE name = ? AND time >= ? AND time < ?"
diff --git a/defaults/test_default.yaml b/defaults/test_default.yaml
@@ -1,6 +1,7 @@
 db_type: "scylla"
 
 test_duration: 60
+idle_duration: 0
 prepare_stress_duration: 300 # 5 hours
 stress_duration: 0
 
diff --git a/docs/configuration_options.md b/docs/configuration_options.md
@@ -42,6 +42,15 @@ Test duration (min). Parameter used to keep instances produced by tests<br>and f
 **type:** int
 
 
+## **idle_duration** / SCT_IDLE_DURATION
+
+Idle duration (min). Parameter used to run test without any workload
+
+**default:** N/A
+
+**type:** int
+
+
 ## **prepare_stress_duration** / SCT_PREPARE_STRESS_DURATION
 
 Time in minutes, which is required to run prepare stress commands<br>defined in prepare_*_cmd for dataset generation, and is used in<br>test duration calculation
diff --git a/jenkins-pipelines/oss/scale/scale-120-multidc-cluster-resize.jenkinsfile b/jenkins-pipelines/oss/scale/scale-120-multidc-cluster-resize.jenkinsfile
@@ -0,0 +1,12 @@
+#!groovy
+
+// trick from https://github.com/jenkinsci/workflow-cps-global-lib-plugin/pull/43
+def lib = library identifier: 'sct@snapshot', retriever: legacySCM(scm)
+
+longevityPipeline(
+    backend: 'aws',
+    region: '''["eu-west-1","eu-west-2"]''',
+    availability_zone: 'a,b,c',
+    test_name: 'scale_cluster_test.ScaleClusterTest.test_no_workloads_idle_custom_time',
+    test_config: 'test-cases/scale/scale-multi-dc-100-empty-tables-cluster-resize.yaml',
+)
diff --git a/jenkins-pipelines/oss/scale/scale-20-200-nodes-cluster.jenkinsfile b/jenkins-pipelines/oss/scale/scale-20-200-nodes-cluster.jenkinsfile
@@ -0,0 +1,11 @@
+#!groovy
+
+// trick from https://github.com/jenkinsci/workflow-cps-global-lib-plugin/pull/43
+def lib = library identifier: 'sct@snapshot', retriever: legacySCM(scm)
+
+longevityPipeline(
+    backend: 'aws',
+    region: 'eu-west-1',
+    test_name: 'scale_cluster_test.ScaleClusterTest.test_grow_shrink_cluster',
+    test_config: 'test-cases/scale/scale-20-200-20-cluster-resize.yaml',
+)
diff --git a/scale_cluster_test.py b/scale_cluster_test.py
@@ -0,0 +1,174 @@
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See LICENSE for more details.
+#
+# Copyright (c) 2016 ScyllaDB
+
+import time
+
+from longevity_test import LongevityTest
+from sdcm.utils.adaptive_timeouts import adaptive_timeout, Operations
+from sdcm.utils.cluster_tools import group_nodes_by_dc_idx
+from sdcm.sct_events.system import InfoEvent
+from sdcm.sct_events import Severity
+from sdcm.cluster import MAX_TIME_WAIT_FOR_NEW_NODE_UP, BaseScyllaCluster
+
+
+class ScaleClusterTest(LongevityTest):
+    @staticmethod
+    def is_target_reached(current: list[int], target: list[int]) -> bool:
+        """ Check that cluster size reached target size in each dc"""
+        return all([x >= y for x, y in zip(current, target)])
+
+    @staticmethod
+    def init_nodes(db_cluster: BaseScyllaCluster):
+        """
+        method is required to be rewritten  to support setup large clusters.
+        """
+        db_cluster.set_seeds(first_only=True)
+        db_cluster.wait_for_init(node_list=db_cluster.nodes, timeout=MAX_TIME_WAIT_FOR_NEW_NODE_UP)
+        db_cluster.set_seeds()
+        db_cluster.update_seed_provider()
+
+    @property
+    def cluster_target_size(self) -> list[int]:
+        cluster_target_size = self.params.get('cluster_target_size')
+        if not cluster_target_size:
+            return []
+        return list(map(int, cluster_target_size.split())) if isinstance(cluster_target_size, str) else [cluster_target_size]
+
+    def grow_to_cluster_target_size(self, cluster_target_size: list[int]):
+        """ Bootstrap node in each dc in each rack while cluster size less than target size"""
+        nodes_by_dcx = group_nodes_by_dc_idx(self.db_cluster.data_nodes)
+        current_cluster_size = [len(nodes_by_dcx[dcx]) for dcx in sorted(nodes_by_dcx)]
+        if self.is_target_reached(current_cluster_size, cluster_target_size):
+            self.log.debug("Cluster has required size, no need to grow")
+            return
+        InfoEvent(
+            message=f"Starting to grow cluster from {current_cluster_size} to {cluster_target_size}").publish()
+
+        add_node_cnt = self.params.get('add_node_cnt')
+        try:
+            while not self.is_target_reached(current_cluster_size, cluster_target_size):
+                for dcx, target in enumerate(cluster_target_size):
+                    if current_cluster_size[dcx] >= target:
+                        continue
+                    add_nodes_num = add_node_cnt if (
+                        target - current_cluster_size[dcx]) >= add_node_cnt else target - current_cluster_size[dcx]
+
+                    for rack in range(self.db_cluster.racks_count):
+                        added_nodes = []
+                        InfoEvent(
+                            message=f"Adding next number of nodes {add_nodes_num} to dc_idx {dcx} and rack {rack}").publish()
+                        added_nodes.extend(self.db_cluster.add_nodes(
+                            count=add_nodes_num, enable_auto_bootstrap=True, dc_idx=dcx, rack=rack))
+                        self.monitors.reconfigure_scylla_monitoring()
+                        up_timeout = MAX_TIME_WAIT_FOR_NEW_NODE_UP
+                        with adaptive_timeout(Operations.NEW_NODE, node=self.db_cluster.data_nodes[0], timeout=up_timeout):
+                            self.db_cluster.wait_for_init(
+                                node_list=added_nodes, timeout=up_timeout, check_node_health=False)
+                        self.db_cluster.wait_for_nodes_up_and_normal(nodes=added_nodes)
+                        InfoEvent(f"New nodes up and normal {[node.name for node in added_nodes]}").publish()
+                nodes_by_dcx = group_nodes_by_dc_idx(self.db_cluster.data_nodes)
+                current_cluster_size = [len(nodes_by_dcx[dcx]) for dcx in sorted(nodes_by_dcx)]
+        finally:
+            nodes_by_dcx = group_nodes_by_dc_idx(self.db_cluster.data_nodes)
+            current_cluster_size = [len(nodes_by_dcx[dcx]) for dcx in sorted(nodes_by_dcx)]
+            InfoEvent(message=f"Grow cluster finished, cluster size is {current_cluster_size}").publish()
+
+    def shrink_to_cluster_target_size(self, cluster_target_size: list[int]):
+        """Decommission node in each dc in each rack while cluster size more than target size"""
+        nodes_by_dcx = group_nodes_by_dc_idx(self.db_cluster.data_nodes)
+        current_cluster_size = [len(nodes_by_dcx[dcx]) for dcx in sorted(nodes_by_dcx)]
+        if self.is_target_reached(cluster_target_size, current_cluster_size):
+            self.log.debug("Cluster has required size, no need to shrink")
+            return
+        InfoEvent(
+            message=f"Starting to shrink cluster from {current_cluster_size} to {cluster_target_size}").publish()
+        try:
+            nodes_by_dcx = group_nodes_by_dc_idx(self.db_cluster.data_nodes)
+            while not self.is_target_reached(cluster_target_size, current_cluster_size):
+                for dcx, _ in enumerate(current_cluster_size):
+                    nodes_by_racks = self.db_cluster.get_nodes_per_datacenter_and_rack_idx(nodes_by_dcx[dcx])
+                    for nodes in nodes_by_racks.values():
+                        decommissioning_node = nodes[-1]
+                        decommissioning_node.running_nemesis = "Decommissioning node"
+                        self.db_cluster.decommission(node=decommissioning_node, timeout=7200)
+                nodes_by_dcx = group_nodes_by_dc_idx(self.db_cluster.data_nodes)
+                current_cluster_size = [len(nodes_by_dcx[dcx]) for dcx in sorted(nodes_by_dcx)]
+        finally:
+            nodes_by_dcx = group_nodes_by_dc_idx(self.db_cluster.data_nodes)
+            current_cluster_size = [len(nodes_by_dcx[dcx]) for dcx in sorted(nodes_by_dcx)]
+            InfoEvent(
+                message=f"Reached cluster size {current_cluster_size}").publish()
+
+    def create_schema(self):
+        number_of_table = self.params.get(
+            'user_profile_table_count') or 0
+        cs_user_profiles = self.params.get('cs_user_profiles')
+        keyspace_num = self.params.get('keyspace_num')
+        if not number_of_table and not cs_user_profiles:
+            self.log.debug("User schema will not be created")
+            return
+        if not cs_user_profiles:
+            region_dc_names = self.db_cluster.get_datacenter_name_per_region(self.db_cluster.nodes)
+            replication_factor = self.db_cluster.racks_count
+            InfoEvent("Create keyspace and 100 empty tables").publish()
+            for i in range(1, keyspace_num + 1):
+                self.create_keyspace(keyspace_name=f"testing_keyspace_{i}", replication_factor={
+                    dc_name: replication_factor for dc_name in region_dc_names.values()})
+                for j in range(1, number_of_table + 1):
+                    self.create_table(name=f"table_{j}", keyspace_name=f"testing_keyspace_{i}")
+            InfoEvent(f"{keyspace_num} Keyspaces and {number_of_table} tables were created").publish()
+        else:
+            self._pre_create_templated_user_schema()
+
+    def test_grow_shrink_cluster(self):
+        """
+        Test allow to test cluster reaching target size by growing and shrinking.
+        1. Create schema if needed
+        2. Grow cluster to target size
+        3. if bootstrap failed during grow, try to shrink cluster to initial size
+        4. If shrink failed during step 3, just log error and finish test
+
+        """
+        nodes_by_dcx = group_nodes_by_dc_idx(self.db_cluster.data_nodes)
+        init_cluster_size = [len(nodes_by_dcx[dcx]) for dcx in sorted(nodes_by_dcx)]
+        InfoEvent(message=f"Cluster size is {init_cluster_size}").publish()
+        self.create_schema()
+        try:
+            InfoEvent("Start grow cluster").publish()
+            self.grow_to_cluster_target_size()
+        except Exception as ex:  # noqa: BLE001
+            self.log.error(f"Failed to grow cluster: {ex}")
+            InfoEvent(f"Grow cluster failed with error: {ex}", severity=Severity.ERROR).publish()
+
+        try:
+            InfoEvent("Start shrink cluster").publish()
+            self.shrink_to_cluster_target_size(init_cluster_size)
+        except Exception as ex:  # noqa: BLE001
+            self.log.error(f"Failed to shrink cluster: {ex}")
+            InfoEvent(f"Shrink cluster failed with error: {ex}", severity=Severity.ERROR).publish()
+        nodes_by_dcx = group_nodes_by_dc_idx(self.db_cluster.data_nodes)
+        current_cluster_size = [len(nodes_by_dcx[dcx]) for dcx in sorted(nodes_by_dcx)]
+        InfoEvent(message=f"Cluster size is {current_cluster_size}").publish()
+
+        assert current_cluster_size == init_cluster_size, f"Cluster size {current_cluster_size} is not equal to initial {init_cluster_size}"
+
+    def test_no_workloads_idle_custom_time(self):
+        """
+        The aim of test is nemesis execution without any workload
+        with configured user schema during idle_duration time.
+        """
+        self.create_schema()
+        self.grow_to_cluster_target_size(self.cluster_target_size)
+        self.db_cluster.add_nemesis(nemesis=self.get_nemesis_class(), tester_obj=self)
+        self.db_cluster.start_nemesis()
+        duration = self.params.get('idle_duration')
+        InfoEvent(f"Wait {duration} minutes while cluster resizing").publish()
+        time.sleep(duration * 60)
+
+        self.shrink_to_cluster_target_size(self.params.total_db_nodes)
+        InfoEvent("Test done").publish()
diff --git a/sdcm/sct_config.py b/sdcm/sct_config.py
@@ -257,6 +257,8 @@ class SCTConfiguration(dict):
                   Test duration (min). Parameter used to keep instances produced by tests
                   and for jenkins pipeline timeout and TimoutThread.
              """),
+        dict(name="idle_duration", env="SCT_IDLE_DURATION", type=int,
+             help="""Idle duration (min). Parameter used to run test without any workload"""),
         dict(name="prepare_stress_duration", env="SCT_PREPARE_STRESS_DURATION", type=int,
              help="""
                   Time in minutes, which is required to run prepare stress commands
diff --git a/test-cases/scale/scale-20-200-20-cluster-resize.yaml b/test-cases/scale/scale-20-200-20-cluster-resize.yaml
@@ -0,0 +1,34 @@
+test_duration: 6000
+
+keyspace_num: 0
+user_profile_table_count: 0
+add_cs_user_profiles_extra_tables: true
+
+n_loaders: 0
+n_db_nodes: 20
+add_node_cnt: 1
+cluster_target_size: 200
+
+instance_type_db: 'i4i.2xlarge'
+instance_type_monitor: 'm6i.xlarge'
+root_disk_size_monitor: 1000
+
+nemesis_class_name: 'NoOpMonkey'
+
+# This is in order to start the basic cluster faster
+use_legacy_cluster_init: false
+parallel_node_operations: true
+seeds_num: 2
+# Takes too long on big clusters
+cluster_health_check: false
+
+backtrace_decoding: false
+
+append_scylla_yaml:
+  enable_repair_based_node_ops: true
+
+run_fullscan: []
+
+simulated_racks: 0
+instance_type_runner: 'c7i.16xlarge'
+root_disk_size_runner: 1000
diff --git a/test-cases/scale/scale-multi-dc-100-empty-tables-cluster-resize.yaml b/test-cases/scale/scale-multi-dc-100-empty-tables-cluster-resize.yaml
@@ -0,0 +1,42 @@
+test_duration: 4000
+idle_duration: 180
+
+# cs_user_profiles:
+    # - data_dir/templated_100_table.yaml
+user_profile_table_count: 100
+add_cs_user_profiles_extra_tables: true
+keyspace_num: 1
+
+n_loaders: 0
+n_db_nodes: "60 60"
+add_node_cnt: 1
+round_robin: true
+
+instance_type_db: 'i4i.2xlarge'
+instance_type_loader: 'c7i.4xlarge'
+instance_type_monitor: 'm6i.xlarge'
+root_disk_size_monitor: 2000
+
+
+# decommission 'add_node_cnt' number of nodes and add the same number of nodes
+nemesis_class_name: 'DecommissionMonkey'
+# as fast as possible including health checks
+nemesis_interval: 1
+
+# This is in order to start the basic cluster faster
+use_legacy_cluster_init: false
+parallel_node_operations: true
+seeds_num: 5
+# Takes too long on big clusters
+cluster_health_check: false
+
+backtrace_decoding: false
+
+append_scylla_yaml:
+  enable_repair_based_node_ops: true
+
+run_fullscan: []
+
+simulated_racks: 0
+instance_type_runner: 'c7i.8xlarge'
+root_disk_size_runner: 3000