Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ Other enhancements
- :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`)
- :meth:`DataFrame.iloc` and :meth:`Series.iloc` now support boolean masks in ``__getitem__`` for more consistent indexing behavior (:issue:`60994`)
- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support Python's new-style format strings (e.g., ``"{:.6f}"``) for the ``float_format`` parameter, in addition to old-style ``%`` format strings and callables. This allows for more flexible and modern formatting of floating point numbers when exporting to CSV. (:issue:`49580`)
- :meth:`DataFrame.to_sql` and :func:`to_sql` now accept a ``nullable`` parameter to specify which columns should allow NULL values. This allows control over the NOT NULL constraint when creating SQL tables, supporting use cases like programmatic table creation from data dictionaries (:issue:`63116`)
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
- :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
Expand All @@ -232,7 +233,6 @@ Other enhancements
- Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`)
- Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`)
- Switched wheel upload to **PyPI Trusted Publishing** (OIDC) for release-tag pushes in ``wheels.yml``. (:issue:`61718`)
-

.. ---------------------------------------------------------------------------
.. _whatsnew_300.notable_bug_fixes:
Expand Down
41 changes: 41 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2798,6 +2798,7 @@ def to_sql(
chunksize: int | None = None,
dtype: DtypeArg | None = None,
method: Literal["multi"] | Callable | None = None,
nullable: dict[str, bool] | None = None,
) -> int | None:
"""
Write records stored in a DataFrame to a SQL database.
Expand Down Expand Up @@ -2861,6 +2862,25 @@ def to_sql(

Details and a sample callable implementation can be found in the
section :ref:`insert method <io.sql.method>`.
nullable : dict, optional
Specifies whether columns should allow NULL values. If a dictionary is used,
the keys should be the column names and the values should be boolean values.
``True`` indicates the column is nullable (can contain NULL),
``False`` indicates the column is NOT NULL.

For SQLAlchemy connections: If a column is not specified in the dictionary,
the default behavior is typically nullable=True.

For ADBC connections: The PyArrow table schema is modified to set the
nullability constraint. If data contains NULL values for a column marked
as ``nullable=False``, a ValueError will be raised.

This parameter only applies when creating a new table or replacing an existing
table (i.e., when ``if_exists='fail'`` and table doesn't exist, ``if_exists='replace'``).
When ``if_exists='append'``, this parameter is ignored as the table schema
already exists.

.. versionadded:: 3.0.0

Returns
-------
Expand Down Expand Up @@ -3013,6 +3033,26 @@ def to_sql(
... conn.execute(text("SELECT * FROM integers")).fetchall()
[(1,), (None,), (2,)]

Specify nullable constraints when creating a table. This is useful for
enforcing NOT NULL constraints based on data dictionaries or schemas.

>>> df = pd.DataFrame({'id': [1, 2, 3], 'name': ['Alice', 'Bob', 'Charlie']})
>>> df.to_sql(name='users_with_constraints', con=engine, if_exists='replace',
... index=False, nullable={'id': False, 'name': False})
3

The table is created with NOT NULL constraints on id and name columns:

>>> with engine.connect() as conn:
... result = conn.execute(text(
... "SELECT sql FROM sqlite_master WHERE name='users_with_constraints'"
... )).fetchone() # doctest:+SKIP
... print(result[0]) # doctest:+SKIP
CREATE TABLE users_with_constraints (
id BIGINT NOT NULL,
name TEXT NOT NULL
)

.. versionadded:: 2.2.0

pandas now supports writing via ADBC drivers
Expand Down Expand Up @@ -3042,6 +3082,7 @@ def to_sql(
chunksize=chunksize,
dtype=dtype,
method=method,
nullable=nullable,
)

@final
Expand Down
84 changes: 77 additions & 7 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,7 @@ def to_sql(
dtype: DtypeArg | None = None,
method: Literal["multi"] | Callable | None = None,
engine: str = "auto",
nullable: dict[str, bool] | None = None,
**engine_kwargs,
) -> int | None:
"""
Expand Down Expand Up @@ -808,6 +809,25 @@ def to_sql(
SQL engine library to use. If 'auto', then the option
``io.sql.engine`` is used. The default ``io.sql.engine``
behavior is 'sqlalchemy'
nullable : dict, optional
Specifies whether columns should allow NULL values. If a dictionary is used,
the keys should be the column names and the values should be boolean values.
``True`` indicates the column is nullable (can contain NULL),
``False`` indicates the column is NOT NULL.

For SQLAlchemy connections: If a column is not specified in the dictionary,
the default behavior is typically nullable=True.

For ADBC connections: The PyArrow table schema is modified to set the
nullability constraint. If data contains NULL values for a column marked
as ``nullable=False``, a ValueError will be raised.

This parameter only applies when creating a new table or replacing an existing
table (i.e., when ``if_exists='fail'`` and table doesn't exist, ``if_exists='replace'``).
When ``if_exists='append'``, this parameter is ignored as the table schema
already exists.

.. versionadded:: 3.0.0

**engine_kwargs
Any additional kwargs are passed to the engine.
Expand Down Expand Up @@ -849,6 +869,7 @@ def to_sql(
dtype=dtype,
method=method,
engine=engine,
nullable=nullable,
**engine_kwargs,
)

Expand Down Expand Up @@ -944,6 +965,7 @@ def __init__(
schema=None,
keys=None,
dtype: DtypeArg | None = None,
nullable: dict[str, bool] | None = None,
) -> None:
self.name = name
self.pd_sql = pandas_sql_engine
Expand All @@ -954,6 +976,7 @@ def __init__(
self.if_exists = if_exists
self.keys = keys
self.dtype = dtype
self.nullable = nullable

if frame is not None:
# We want to initialize based on a dataframe
Expand Down Expand Up @@ -1267,10 +1290,15 @@ def _create_table_setup(self):

column_names_and_types = self._get_column_names_and_types(self._sqlalchemy_type)

columns: list[Any] = [
Column(name, typ, index=is_index)
for name, typ, is_index in column_names_and_types
]
columns: list[Any] = []
for name, typ, is_index in column_names_and_types:
if self.nullable is not None and name in self.nullable:
nullable_value = self.nullable[name]
columns.append(
Column(name, typ, index=is_index, nullable=nullable_value)
)
else:
columns.append(Column(name, typ, index=is_index))

if self.keys is not None:
if not is_list_like(self.keys):
Expand Down Expand Up @@ -1504,6 +1532,7 @@ def to_sql(
dtype: DtypeArg | None = None,
method: Literal["multi"] | Callable | None = None,
engine: str = "auto",
nullable: dict[str, bool] | None = None,
**engine_kwargs,
) -> int | None:
pass
Expand Down Expand Up @@ -1893,6 +1922,7 @@ def prep_table(
index_label=None,
schema=None,
dtype: DtypeArg | None = None,
nullable: dict[str, bool] | None = None,
) -> SQLTable:
"""
Prepares table in the database for data insertion. Creates it if needed, etc.
Expand Down Expand Up @@ -1928,6 +1958,7 @@ def prep_table(
index_label=index_label,
schema=schema,
dtype=dtype,
nullable=nullable,
)
table.create()
return table
Expand Down Expand Up @@ -1973,6 +2004,7 @@ def to_sql(
dtype: DtypeArg | None = None,
method: Literal["multi"] | Callable | None = None,
engine: str = "auto",
nullable: dict[str, bool] | None = None,
**engine_kwargs,
) -> int | None:
"""
Expand Down Expand Up @@ -2032,6 +2064,7 @@ def to_sql(
index_label=index_label,
schema=schema,
dtype=dtype,
nullable=nullable,
)

total_inserted = sql_engine.insert_records(
Expand Down Expand Up @@ -2333,6 +2366,7 @@ def to_sql(
dtype: DtypeArg | None = None,
method: Literal["multi"] | Callable | None = None,
engine: str = "auto",
nullable: dict[str, bool] | None = None,
**engine_kwargs,
) -> int | None:
"""
Expand Down Expand Up @@ -2362,6 +2396,15 @@ def to_sql(
Raises NotImplementedError
method : {None', 'multi', callable}, default None
Raises NotImplementedError
nullable : dict, optional
Specifies whether columns should allow NULL values. If a dictionary is used,
the keys should be column names and the values should be boolean values.
``True`` indicates the column is nullable (can contain NULL),
``False`` indicates the column is NOT NULL.
Only applies when creating or replacing tables (``if_exists='fail'`` or
``if_exists='replace'``). When ``if_exists='append'``, this parameter is
ignored. If the data contains NULL values for a column marked as
``nullable=False``, a ValueError will be raised.
engine : {'auto', 'sqlalchemy'}, default 'auto'
Raises NotImplementedError if not set to 'auto'
"""
Expand Down Expand Up @@ -2410,6 +2453,27 @@ def to_sql(
except pa.ArrowNotImplementedError as exc:
raise ValueError("datatypes not supported") from exc

if nullable and mode == "create":
current_schema = tbl.schema
new_fields = []

for field in current_schema:
if field.name in nullable:
if not nullable[field.name]:
col_data = tbl.column(field.name)
if col_data.null_count > 0:
raise ValueError(
f"Column '{field.name}' contains {col_data.null_count} "
f"null value(s) but nullable=False was specified"
)
new_field = field.with_nullable(nullable[field.name])
new_fields.append(new_field)
else:
new_fields.append(field)

new_schema = pa.schema(new_fields, metadata=current_schema.metadata)
tbl = tbl.cast(new_schema)

with self.con.cursor() as cur:
try:
total_inserted = cur.adbc_ingest(
Expand Down Expand Up @@ -2588,9 +2652,13 @@ def _create_table_setup(self):
column_names_and_types = self._get_column_names_and_types(self._sql_type_name)
escape = _get_valid_sqlite_name

create_tbl_stmts = [
escape(cname) + " " + ctype for cname, ctype, _ in column_names_and_types
]
create_tbl_stmts = []
for cname, ctype, _ in column_names_and_types:
col_def = escape(cname) + " " + ctype
if self.nullable is not None and cname in self.nullable:
if not self.nullable[cname]:
col_def += " NOT NULL"
create_tbl_stmts.append(col_def)

if self.keys is not None and len(self.keys):
if not is_list_like(self.keys):
Expand Down Expand Up @@ -2810,6 +2878,7 @@ def to_sql(
dtype: DtypeArg | None = None,
method: Literal["multi"] | Callable | None = None,
engine: str = "auto",
nullable: dict[str, bool] | None = None,
**engine_kwargs,
) -> int | None:
"""
Expand Down Expand Up @@ -2875,6 +2944,7 @@ def to_sql(
if_exists=if_exists,
index_label=index_label,
dtype=dtype,
nullable=nullable,
)
table.create()
return table.insert(chunksize, method)
Expand Down
Loading
Loading