From 22fce11107c02ec36e9811edec655b93a73ac2f3 Mon Sep 17 00:00:00 2001
From: Eric Chen <eicchen02@gmail.com>
Date: Tue, 10 Jun 2025 13:00:25 -0700
Subject: [PATCH 1/5] Initial testcase provided in Issue

---
 pandas/tests/io/test_stata.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index e73de78847c8f..cbf245d42b7f9 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -2601,3 +2601,12 @@ def test_strl_missings(temp_file, version):
         ]
     )
     df.to_stata(temp_file, version=version)
+
+@pytest.mark.parametrize("version", [117, 118, 119, None])
+def test_ascii_error(temp_file, version):
+    # GH #61583
+    # Check that 2 byte long unicode characters doesn't cause export error
+    df = pd.DataFrame({'doubleByteCol': ['§'*1500]})
+    df.to_stata(temp_file, write_index=0, version=version)
+    df_input = read_stata(temp_file)
+    tm.assert_frame_equal(df, df_input)
\ No newline at end of file

From bb411801cf387fb5af97a08ee1b242fe8c72ee60 Mon Sep 17 00:00:00 2001
From: Eric Chen <eicchen02@gmail.com>
Date: Tue, 10 Jun 2025 14:55:41 -0700
Subject: [PATCH 2/5] Replaced check for encoded with unencoded check to
 prevent edge cases where two values are different

---
 pandas/io/stata.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index cd290710ddbaa..271a1a96aa8fc 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -2739,7 +2739,7 @@ def _encode_strings(self) -> None:
                 encoded = self.data[col].str.encode(self._encoding)
                 # If larger than _max_string_length do nothing
                 if (
-                    max_len_string_array(ensure_object(encoded._values))
+                    max_len_string_array(ensure_object(self.data[col]._values))
                     <= self._max_string_length
                 ):
                     self.data[col] = encoded
@@ -3263,11 +3263,15 @@ def generate_blob(self, gso_table: dict[str, tuple[int, int]]) -> bytes:
             bio.write(gso_type)
 
             # llll
-            utf8_string = bytes(strl, "utf-8")
-            bio.write(struct.pack(len_type, len(utf8_string) + 1))
+            if(type(strl) == str):
+                strl_convert = bytes(strl, "utf-8")
+            else:
+                strl_convert = strl
+                
+            bio.write(struct.pack(len_type, len(strl_convert) + 1))
 
             # xxx...xxx
-            bio.write(utf8_string)
+            bio.write(strl_convert)
             bio.write(null)
 
         return bio.getvalue()

From c3251bc606b2fdb54f63c628e71472f99eeb0860 Mon Sep 17 00:00:00 2001
From: Eric Chen <eicchen02@gmail.com>
Date: Tue, 10 Jun 2025 15:31:01 -0700
Subject: [PATCH 3/5] replaced type check with isinstance()

---
 pandas/io/stata.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 271a1a96aa8fc..77adea6b02c61 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -3263,7 +3263,7 @@ def generate_blob(self, gso_table: dict[str, tuple[int, int]]) -> bytes:
             bio.write(gso_type)
 
             # llll
-            if(type(strl) == str):
+            if(isinstance(strl, str)):
                 strl_convert = bytes(strl, "utf-8")
             else:
                 strl_convert = strl

From 4a13c49b618b7aaa06dbfe0539d52c7091cd2c13 Mon Sep 17 00:00:00 2001
From: Eric Chen <eicchen02@gmail.com>
Date: Tue, 10 Jun 2025 15:33:05 -0700
Subject: [PATCH 4/5] Updated patch notes

---
 doc/source/whatsnew/v3.0.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 03a386708323d..1e315906e8240 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -775,6 +775,7 @@ I/O
 - Bug in :meth:`DataFrame.to_stata` when exporting a column containing both long strings (Stata strL) and :class:`pd.NA` values (:issue:`23633`)
 - Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`)
 - Bug in :meth:`DataFrame.to_stata` when writing more than 32,000 value labels. (:issue:`60107`)
+- Bug in :meth:`DataFrame.to_stata` when input encoded length and normal length are mismatched (:issue:`61583`)
 - Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
 - Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`)
 - Bug in :meth:`HDFStore.select` causing queries on categorical string columns to return unexpected results (:issue:`57608`)

From 0ca0c9ece8d5352f7f31df119a360a8c132589f5 Mon Sep 17 00:00:00 2001
From: Eric Chen <eicchen02@gmail.com>
Date: Tue, 10 Jun 2025 15:41:14 -0700
Subject: [PATCH 5/5] pre-commit checks

---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 pandas/io/stata.py             | 4 ++--
 pandas/tests/io/test_stata.py  | 5 +++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 1e315906e8240..61951e25bb35f 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -773,9 +773,9 @@ I/O
 - Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
 - Bug in :meth:`DataFrame.to_excel` where the :class:`MultiIndex` index with a period level was not a date (:issue:`60099`)
 - Bug in :meth:`DataFrame.to_stata` when exporting a column containing both long strings (Stata strL) and :class:`pd.NA` values (:issue:`23633`)
+- Bug in :meth:`DataFrame.to_stata` when input encoded length and normal length are mismatched (:issue:`61583`)
 - Bug in :meth:`DataFrame.to_stata` when writing :class:`DataFrame` and ``byteorder=`big```. (:issue:`58969`)
 - Bug in :meth:`DataFrame.to_stata` when writing more than 32,000 value labels. (:issue:`60107`)
-- Bug in :meth:`DataFrame.to_stata` when input encoded length and normal length are mismatched (:issue:`61583`)
 - Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
 - Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`)
 - Bug in :meth:`HDFStore.select` causing queries on categorical string columns to return unexpected results (:issue:`57608`)
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 77adea6b02c61..092c24f0d31c3 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -3263,11 +3263,11 @@ def generate_blob(self, gso_table: dict[str, tuple[int, int]]) -> bytes:
             bio.write(gso_type)
 
             # llll
-            if(isinstance(strl, str)):
+            if isinstance(strl, str):
                 strl_convert = bytes(strl, "utf-8")
             else:
                 strl_convert = strl
-                
+
             bio.write(struct.pack(len_type, len(strl_convert) + 1))
 
             # xxx...xxx
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index cbf245d42b7f9..b155c0cca4aa6 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -2602,11 +2602,12 @@ def test_strl_missings(temp_file, version):
     )
     df.to_stata(temp_file, version=version)
 
+
 @pytest.mark.parametrize("version", [117, 118, 119, None])
 def test_ascii_error(temp_file, version):
     # GH #61583
     # Check that 2 byte long unicode characters doesn't cause export error
-    df = pd.DataFrame({'doubleByteCol': ['§'*1500]})
+    df = DataFrame({"doubleByteCol": ["§" * 1500]})
     df.to_stata(temp_file, write_index=0, version=version)
     df_input = read_stata(temp_file)
-    tm.assert_frame_equal(df, df_input)
\ No newline at end of file
+    tm.assert_frame_equal(df, df_input)