|
| 1 | +import pytest |
| 2 | + |
| 3 | +from conftest import DataFramePair |
| 4 | + |
| 5 | + |
| 6 | +@pytest.fixture |
| 7 | +def str_columns_df(): |
| 8 | + """Fixture that creates a DataFrame with multiple string columns for concatenation tests""" |
| 9 | + return DataFramePair( |
| 10 | + query=""" |
| 11 | + SELECT |
| 12 | + 'First' AS first_name, |
| 13 | + 'Last' AS last_name, |
| 14 | + 'Title' AS title |
| 15 | + UNION ALL |
| 16 | + SELECT |
| 17 | + 'John' AS first_name, |
| 18 | + 'Doe' AS last_name, |
| 19 | + 'Mr.' AS title |
| 20 | + UNION ALL |
| 21 | + SELECT |
| 22 | + 'Jane' AS first_name, |
| 23 | + 'Smith' AS last_name, |
| 24 | + 'Ms.' AS title |
| 25 | + UNION ALL |
| 26 | + SELECT |
| 27 | + NULL AS first_name, |
| 28 | + 'Brown' AS last_name, |
| 29 | + 'Dr.' AS title |
| 30 | + UNION ALL |
| 31 | + SELECT |
| 32 | + 'Alice' AS first_name, |
| 33 | + NULL AS last_name, |
| 34 | + 'Prof.' AS title |
| 35 | + """ |
| 36 | + ) |
| 37 | + |
| 38 | + |
| 39 | +def test_str_cat_basic(str_columns_df): |
| 40 | + """Tests the basic string concatenation functionality with the str.cat method""" |
| 41 | + # Applying cat with empty separator in LazyFrame |
| 42 | + str_columns_df.lazy_df["full_name"] = str_columns_df.lazy_df["first_name"].str.cat( |
| 43 | + str_columns_df.lazy_df["last_name"] |
| 44 | + ) |
| 45 | + lazy_result = str_columns_df.lazy_df.collect() |
| 46 | + |
| 47 | + # Verifications for DuckDB behavior (different from pandas) |
| 48 | + # DuckDB's concat_ws ignores NULL values and concatenates what's available |
| 49 | + expected_values = ["FirstLast", "JohnDoe", "JaneSmith", "Brown", "Alice"] |
| 50 | + assert lazy_result["full_name"].tolist() == expected_values |
| 51 | + |
| 52 | + |
| 53 | +def test_str_cat_with_separator(str_columns_df): |
| 54 | + """Tests the string concatenation with a custom separator using the str.cat method""" |
| 55 | + # Applying cat with space separator in LazyFrame |
| 56 | + str_columns_df.lazy_df["full_name"] = str_columns_df.lazy_df["first_name"].str.cat( |
| 57 | + str_columns_df.lazy_df["last_name"], sep=" " |
| 58 | + ) |
| 59 | + lazy_result = str_columns_df.lazy_df.collect() |
| 60 | + |
| 61 | + # Verifications for DuckDB behavior (different from pandas) |
| 62 | + # DuckDB's concat_ws ignores NULL values and concatenates what's available |
| 63 | + expected_values = ["First Last", "John Doe", "Jane Smith", "Brown", "Alice"] |
| 64 | + assert lazy_result["full_name"].tolist() == expected_values |
| 65 | + |
| 66 | + |
| 67 | +def test_str_cat_multiple_columns(str_columns_df): |
| 68 | + """Tests concatenating multiple string columns in sequence""" |
| 69 | + # Chaining cat operations to concatenate three columns |
| 70 | + str_columns_df.lazy_df["formatted_name"] = ( |
| 71 | + str_columns_df.lazy_df["title"] |
| 72 | + .str.cat(str_columns_df.lazy_df["first_name"], sep=" ") |
| 73 | + .str.cat(str_columns_df.lazy_df["last_name"], sep=" ") |
| 74 | + ) |
| 75 | + lazy_result = str_columns_df.lazy_df.collect() |
| 76 | + |
| 77 | + # Verifications for DuckDB behavior (different from pandas) |
| 78 | + # DuckDB's concat_ws ignores NULL values and concatenates what's available |
| 79 | + expected_values = ["Title First Last", "Mr. John Doe", "Ms. Jane Smith", "Dr. Brown", "Prof. Alice"] |
| 80 | + assert lazy_result["formatted_name"].tolist() == expected_values |
0 commit comments