Skip to content

Feat: Added functionalities to remove duplicate parameters and overwrite existing ones. #527

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 24 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
91b3ae1
feat: Adding a de-dupe function
DannyLiCom Jun 18, 2025
bc47601
Refactored GKE cluster creation to use a dictionary for argument mana…
DannyLiCom Jun 20, 2025
85f0982
feat: Add process_gcloud_args to handle custom gcloud arguments.
DannyLiCom Jun 24, 2025
ab3cd89
feat: Added two unit tests.
DannyLiCom Jun 25, 2025
928f693
Update test_gcloud_arg_processor.py
DannyLiCom Jun 25, 2025
e95b91f
feat: Updated the functionality of the process_gcloud_args function. …
DannyLiCom Jun 26, 2025
1b7442c
Resolve lint issue
DannyLiCom Jul 10, 2025
e743dee
Merge branch 'develop' into lidanny/feature/extended_subnet_ranges
DannyLiCom Jul 11, 2025
9466df4
Added --enable-private-nodes.
DannyLiCom Jul 14, 2025
4edc813
Merge branch 'lidanny/feature/extended_subnet_ranges' of https://gith…
DannyLiCom Jul 14, 2025
a827858
Merge branch 'develop' into lidanny/feature/extended_subnet_ranges
DannyLiCom Jul 14, 2025
8fd41ca
Merge branch 'develop' into lidanny/feature/extended_subnet_ranges
pawloch00 Jul 18, 2025
03f1fd6
Modify the command issue.
DannyLiCom Jul 21, 2025
9cac631
Add the enable-ip-alias flag.
DannyLiCom Jul 21, 2025
5370d17
Merge branch 'develop' into lidanny/feature/extended_subnet_ranges
pawloch00 Jul 21, 2025
721a8ac
Merge branch 'develop' into lidanny/feature/extended_subnet_ranges
DannyLiCom Jul 24, 2025
f1ca15b
Merge branch 'develop' into lidanny/feature/extended_subnet_ranges
pawloch00 Jul 25, 2025
3d728e3
Add the merge_conditional_params and construct_gcloud_command_string …
DannyLiCom Jul 28, 2025
91eb0c2
Merge branch 'lidanny/feature/extended_subnet_ranges' of https://gith…
DannyLiCom Jul 28, 2025
b433738
Merge branch 'develop' into lidanny/feature/extended_subnet_ranges
DannyLiCom Jul 29, 2025
2aae966
fix pyink
DannyLiCom Jul 29, 2025
a53bf91
Merge branch 'lidanny/feature/extended_subnet_ranges' of https://gith…
DannyLiCom Jul 29, 2025
497bdef
resolve conflicts
DannyLiCom Aug 4, 2025
5922023
Merge branch 'develop' into lidanny/feature/extended_subnet_ranges
DannyLiCom Aug 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
206 changes: 162 additions & 44 deletions src/xpk/commands/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
from ..utils.file import write_tmp_file
from . import cluster_gcluster
from .common import set_cluster_command
import shlex
import shutil
import os

Expand Down Expand Up @@ -1054,6 +1055,128 @@ def run_gke_clusters_list_command(args) -> int:
return 0


def parse_command_args_to_dict(arg_string: str) -> dict:
"""Parses a command-line argument string into a dictionary of parameters.

This function safely splits a command-line string, handling quoted arguments
and different parameter formats (e.g., --flag, --key=value, --key value).
It's designed to help convert user-provided custom arguments into a structured
format for easier merging and de-duplication.

Args:
arg_string: A string containing command-line arguments, such as
"--master-ipv4-cidr=10.0.0.0/28 --enable-ip-alias".

Returns:
A dictionary where keys are parameter names (e.g., "--enable-ip-alias",
"--cluster-ipv4-cidr") and values are their corresponding parsed values
(e.g., True for a boolean flag, "10.0.0.0/28" for a string value).
"""
parsed_args = {}
if not arg_string:
return parsed_args

tokens = shlex.split(arg_string)
# After shlex.split: Print the tokens list
xpk_print(f'Shlex-split tokens: {tokens}')
i = 0
while i < len(tokens):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking again at this, this is custom loop, handling string with arguments. I find it hard to mantain and debug. It should be replaced with usage of some library is possible

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to modify argparse before, but I don't think it's flexible enough. You have to pre-define all the parameters it needs to parse.
https://screenshot.googleplex.com/8fpLsUdZMrouXXt

token = tokens[i]
if token.startswith('--'):
if '=' in token:
key, value = token.split('=', 1)
parsed_args[key] = value
else:
if i + 1 < len(tokens) and not tokens[i + 1].startswith('--'):
parsed_args[token] = tokens[i + 1]
i += 1
else:
parsed_args[token] = True
elif token.startswith('-'):
pass
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This if branch could be removed.

i += 1
# After parsing: Print the final parsed dictionary
xpk_print(f'Final parsed_args: {parsed_args}')
xpk_print('-------------------------------------------')
return parsed_args


def process_gcloud_args(user_parsed_args, final_gcloud_args):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please refactor this to not modify the final_gcloud_args input parameter, but copy it and return a copy.

"""
Processes custom cluster arguments and updates the final gcloud arguments dictionary.

This function handles special cases for '--no-' and '--enable-' prefixes
in custom arguments to correctly modify the gcloud arguments.

"""
for key, value in user_parsed_args.items():
if key.startswith('--no-'):
opposite_key = f'--{key[5:]}'
if opposite_key in final_gcloud_args:
del final_gcloud_args[opposite_key]
final_gcloud_args[key] = True
elif key.startswith('--enable-'):
opposite_key = f'--no-{key[2:]}'
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how are we guaranteed that the only keys which will have opposite effect are formed with disable or no prefixes?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Our current processing logic is indeed based on the observed gcloud and our custom argument conventions, meaning keys with opposite effects use prefixes like --no-, --enable-, or --disable-. However, it is indeed a potential risk point.
https://screenshot.googleplex.com/dhSjkYf9LWhGupu
Currently, I'm only referring to this source.
https://cloud.google.com/sdk/gcloud/reference/container/clusters/create

opposite_disable_key = f'--disable-{key[9:]}'
if opposite_key in final_gcloud_args:
del final_gcloud_args[opposite_key]
if opposite_disable_key in final_gcloud_args:
del final_gcloud_args[opposite_disable_key]
final_gcloud_args[key] = value
elif key.startswith('--disable-'):
feature_name = key[10:]
opposite_enable_key = f'--enable-{feature_name}'
if opposite_enable_key in final_gcloud_args:
del final_gcloud_args[opposite_enable_key]
final_gcloud_args[key] = True
else:
# For all other arguments, simply add or update their values.
final_gcloud_args[key] = value


def merge_conditional_params(conditional_params, final_gcloud_args):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please refactor this to not modify the final_gcloud_args input parameter, but copy it and return a copy.

"""
Merge conditional parameters into the final gcloud arguments dictionary. Specifically handle the --addons parameter by merging its values.
"""
for key, value in conditional_params.items():
if key not in final_gcloud_args:
final_gcloud_args[key] = value
elif key == '--addons' and key in final_gcloud_args:
final_gcloud_args[key] = ','.join(
list(set(final_gcloud_args[key].split(',') + value.split(',')))
)


def construct_gcloud_command_string(
cluster_name: str, gcloud_args: dict
) -> str:
"""
Constructs the gcloud command string from a dictionary of arguments.

Args:
cluster_name: The name of the cluster.
gcloud_args: A dictionary where keys are gcloud argument names
and values are their corresponding parsed values.

Returns:
A complete gcloud command string.
"""
command_parts = ['gcloud beta container clusters create', cluster_name]

for key, value in gcloud_args.items():
if value is True:
command_parts.append(key)
elif value is False:
pass
elif value is not None and str(value).strip() != '':
if ' ' in str(value):
command_parts.append(f'{key}="{value}"')
else:
command_parts.append(f'{key}={value}')

return ' '.join(command_parts)


def run_gke_cluster_create_command(
args, gke_control_plane_version: str, system: SystemCharacteristics
) -> int:
Expand All @@ -1077,59 +1200,50 @@ def run_gke_cluster_create_command(
)
machine_type = args.cluster_cpu_machine_type

# Create the regional cluster with `num-nodes` CPU nodes in the same zone as
# TPUs. This has been tested with clusters of 300 VMs. Larger clusters will
# benefit from a larger initial `--num-nodes`. After the cluster is created,
# the auto-scaler can reduce/increase the nodes based on the load.
final_gcloud_args = {}
final_gcloud_args['--project'] = args.project
final_gcloud_args['--region'] = zone_to_region(args.zone)
final_gcloud_args['--node-locations'] = args.zone
final_gcloud_args['--cluster-version'] = gke_control_plane_version
final_gcloud_args['--machine-type'] = machine_type
final_gcloud_args['--enable-autoscaling'] = True
final_gcloud_args['--total-min-nodes'] = 1
final_gcloud_args['--total-max-nodes'] = 1000
final_gcloud_args['--num-nodes'] = args.default_pool_cpu_num_nodes
final_gcloud_args['--enable-dns-access'] = True
# This value is from here: https://cloud.google.com/kubernetes-engine/docs/how-to/legacy/network-isolation
final_gcloud_args['--master-ipv4-cidr'] = '172.16.0.32/28'
# This value is from here https://cloud.google.com/vpc/docs/subnets
final_gcloud_args['--cluster-ipv4-cidr'] = '10.224.0.0/12'
final_gcloud_args['--enable-private-nodes'] = True
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldn't this be False?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you want to use --master-ipv4-cidr, you must also include --enable-private-nodes, otherwise an error will occur.

error: ERROR: (gcloud.beta.container.clusters.create) Cannot specify --master-ipv4-cidr without --enable-private-nodes. It seems I might need to add --enable-private-nodes to execute python3 xpk/xpk.py cluster create-pathways.

final_gcloud_args['--enable-ip-alias'] = True
final_gcloud_args['--autoscaling-profile'] = 'optimize-utilization'

# If the user passes in the gke version then we use that directly instead of the rapid release.
# This allows users to directly pass a specified gke version without release channel constraints.
rapid_release_cmd = ''
if args.gke_version is not None:
rapid_release_cmd = ' --release-channel rapid'

command = (
'gcloud beta container clusters create'
f' {args.cluster} --project={args.project}'
f' --region={zone_to_region(args.zone)}'
f' --node-locations={args.zone}'
f' --cluster-version={gke_control_plane_version}'
f' --machine-type={machine_type}'
' --enable-autoscaling'
' --total-min-nodes 1 --total-max-nodes 1000'
f' --num-nodes {args.default_pool_cpu_num_nodes}'
f' {args.custom_cluster_arguments}'
f' {rapid_release_cmd}'
' --enable-dns-access'
' --autoscaling-profile=optimize-utilization'
)

enable_ip_alias = False
final_gcloud_args['--release-channel'] = 'rapid'

conditional_params = {}
if args.private or args.authorized_networks is not None:
enable_ip_alias = True
command += ' --enable-master-authorized-networks --enable-private-nodes'
conditional_params['--enable-master-authorized-networks'] = True
conditional_params['--enable-private-nodes'] = True
conditional_params['--enable-ip-alias'] = True

if system.accelerator_type == AcceleratorType['GPU']:
enable_ip_alias = True
command += (
' --enable-dataplane-v2'
' --enable-multi-networking --no-enable-autoupgrade'
)
conditional_params['--enable-dataplane-v2'] = True
conditional_params['--enable-multi-networking'] = True
conditional_params['--no-enable-autoupgrade'] = True
conditional_params['--enable-ip-alias'] = True
else:
command += ' --location-policy=BALANCED --scopes=storage-full,gke-default'

conditional_params['--location-policy'] = 'BALANCED'
conditional_params['--scopes'] = 'storage-full,gke-default'
if args.enable_pathways:
enable_ip_alias = True

if enable_ip_alias:
command += ' --enable-ip-alias'
conditional_params['--enable-ip-alias'] = True

if args.enable_ray_cluster:
command += ' --addons RayOperator'
conditional_params['--addons'] = 'RayOperator'

if args.enable_workload_identity or args.enable_gcsfuse_csi_driver:
command += f' --workload-pool={args.project}.svc.id.goog'
conditional_params['--workload-pool'] = f'{args.project}.svc.id.goog'

addons = []
if args.enable_gcsfuse_csi_driver:
Expand All @@ -1146,14 +1260,18 @@ def run_gke_cluster_create_command(

if args.enable_lustre_csi_driver:
addons.append('LustreCsiDriver')
command += ' --enable-legacy-lustre-port'
conditional_params['--enable-legacy-lustre-port'] = True

if hasattr(args, 'enable_mtc') and args.enable_mtc:
addons.append('HighScaleCheckpointing')

if len(addons) > 0:
addons_str = ','.join(addons)
command += f' --addons={addons_str}'
conditional_params['--addons'] = ','.join(addons)

merge_conditional_params(conditional_params, final_gcloud_args)
user_parsed_args = parse_command_args_to_dict(args.custom_cluster_arguments)
process_gcloud_args(user_parsed_args, final_gcloud_args)
command = construct_gcloud_command_string(args.cluster, final_gcloud_args)

return_code = run_command_with_updates(command, 'GKE Cluster Create', args)
if return_code != 0:
Expand Down
58 changes: 58 additions & 0 deletions src/xpk/commands/tests/unit/test_arg_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""Unit tests for the arg_parser module in xpk.commands."""

import unittest
from src.xpk.commands.cluster import parse_command_args_to_dict


class TestParseCommandArgsToDict(unittest.TestCase):
"""Tests the parse_command_args_to_dict function from the cluster module."""

def test_empty_string(self):
self.assertEqual(parse_command_args_to_dict(''), {})

def test_simple_key_value_pairs(self):
result = parse_command_args_to_dict('--key1=value1 --key2=value2')
self.assertEqual(result, {'--key1': 'value1', '--key2': 'value2'})

def test_flag_with_space_value(self):
result = parse_command_args_to_dict('--key1 value1 --key2 value2')
self.assertEqual(result, {'--key1': 'value1', '--key2': 'value2'})

def test_boolean_flags(self):
result = parse_command_args_to_dict('--enable-feature --no-logs')
self.assertEqual(result, {'--enable-feature': True, '--no-logs': True})

def test_mixed_formats(self):
result = parse_command_args_to_dict(
'--project=my-project --zone us-central1 --dry-run'
)
self.assertEqual(
result,
{'--project': 'my-project', '--zone': 'us-central1', '--dry-run': True},
)

def test_quoted_values(self):
result = parse_command_args_to_dict(
'--description "My cluster with spaces" --name=test-cluster'
)
self.assertEqual(
result,
{'--description': 'My cluster with spaces', '--name': 'test-cluster'},
)

def test_no_double_hyphen_flags(self):
result = parse_command_args_to_dict('random-word -f --flag')
self.assertEqual(result, {'--flag': True}) # Only --flag should be parsed

def test_duplicate_keys_last_one_wins(self):
result = parse_command_args_to_dict('--key=value1 --key=value2')
self.assertEqual(result, {'--key': 'value2'})

def test_hyphenated_keys(self):
result = parse_command_args_to_dict('--api-endpoint=some-url')
self.assertEqual(result, {'--api-endpoint': 'some-url'})


if __name__ == '__main__':
# Run python3 -m src.xpk.commands.tests.unit.test_arg_parser under the xpk folder.
unittest.main()
Loading
Loading