From 421024cf9beac599351b2bf6d65e7839d8909c2b Mon Sep 17 00:00:00 2001
From: lkhagvadorj-amp <lkhagvadorj.amp@gmail.com>
Date: Thu, 17 Apr 2025 16:56:20 +0100
Subject: [PATCH] feat: allow loading table from dataframe with extra fields,
 #1812

---
 google/cloud/bigquery/_pandas_helpers.py | 11 ++++++--
 tests/unit/test__pandas_helpers.py       | 34 ++++++++++++++++++++++++
 2 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py
index 457eb9078..dacbe1515 100644
--- a/google/cloud/bigquery/_pandas_helpers.py
+++ b/google/cloud/bigquery/_pandas_helpers.py
@@ -484,6 +484,10 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
         Optional[Sequence[google.cloud.bigquery.schema.SchemaField]]:
             The automatically determined schema. Returns None if the type of
             any column cannot be determined.
+
+    Note:
+        - If `bq_schema` contains fields not found in the DataFrame, they will
+          still be included in the resulting schema, and a warning will be issued.
     """
     if pandas_gbq is None:
         warnings.warn(
@@ -537,11 +541,14 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
     # Catch any schema mismatch. The developer explicitly asked to serialize a
     # column, but it was not found.
     if bq_schema_unused:
-        raise ValueError(
+        warnings.warn(
             "bq_schema contains fields not present in dataframe: {}".format(
                 bq_schema_unused
-            )
+            ),
+            category=UserWarning,
         )
+        for unused_field_name in bq_schema_unused:
+            bq_schema_out.append(bq_schema_index.get(unused_field_name))
 
     # If schema detection was not successful for all columns, also try with
     # pyarrow, if available.
diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py
index 48c085c1d..c59b6d7d3 100644
--- a/tests/unit/test__pandas_helpers.py
+++ b/tests/unit/test__pandas_helpers.py
@@ -1385,6 +1385,40 @@ def test_dataframe_to_bq_schema_w_bq_schema(module_under_test, monkeypatch):
     assert returned_schema == expected_schema
 
 
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+def test_dataframe_to_bq_schema_allows_extra_fields(module_under_test, monkeypatch):
+    monkeypatch.setattr(module_under_test, "pandas_gbq", None)
+
+    df_data = collections.OrderedDict(
+        [
+            ("str_column", ["hello", "world"]),
+            ("int_column", [42, 8]),
+            ("bool_column", [True, False]),
+        ]
+    )
+    dataframe = pandas.DataFrame(df_data)
+
+    dict_schema = [
+        {"name": "str_column", "type": "STRING", "mode": "NULLABLE"},
+        {"name": "int_column", "type": "INTEGER", "mode": "NULLABLE"},
+        {"name": "bool_column", "type": "BOOL", "mode": "REQUIRED"},
+        {"name": "extra_column", "type": "STRING", "mode": "NULLABLE"},
+    ]
+
+    with pytest.warns(UserWarning, match="bq_schema contains fields not present"):
+        returned_schema = module_under_test.dataframe_to_bq_schema(
+            dataframe, dict_schema
+        )
+
+    expected_schema = (
+        schema.SchemaField("str_column", "STRING", "NULLABLE"),
+        schema.SchemaField("int_column", "INTEGER", "NULLABLE"),
+        schema.SchemaField("bool_column", "BOOL", "REQUIRED"),
+        schema.SchemaField("extra_column", "STRING", "NULLABLE"),
+    )
+    assert returned_schema == expected_schema
+
+
 @pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
 def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(
     module_under_test, monkeypatch