From 7abfbe9f4797e8f92028e11bf20a6d7a9ab3d602 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Thu, 11 Sep 2025 22:35:36 -0700 Subject: [PATCH] Optimize sql_processor by building string in list instead of concatenating immutable strings --- python/pyspark/pandas/sql_processor.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/python/pyspark/pandas/sql_processor.py b/python/pyspark/pandas/sql_processor.py index e24c369cd43f4..8437ff6e48cff 100644 --- a/python/pyspark/pandas/sql_processor.py +++ b/python/pyspark/pandas/sql_processor.py @@ -293,13 +293,12 @@ def execute(self, index_col: Optional[Union[str, List[str]]]) -> DataFrame: 0 True False """ blocks = _string.formatter_parser(self._statement) - # TODO: use a string builder - res = "" + res = [] try: for pre, inner, _, _ in blocks: var_next = "" if inner is None else self._convert(inner) - res = res + pre + var_next - self._normalized_statement = res + res.append(pre + var_next) + self._normalized_statement = "".join(res) sdf = self._session.sql(self._normalized_statement) finally: