Skip to content

Commit d922af1

Browse files
committed
feat(datasets): add back changes unrelated to mode
This reverts commit e4636b7. Signed-off-by: Deepyaman Datta <[email protected]>
1 parent e4636b7 commit d922af1

File tree

4 files changed

+212
-3
lines changed

4 files changed

+212
-3
lines changed

kedro-datasets/RELEASE.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
| `polars.PolarsDatabaseDataset` | A dataset to load and save data to a SQL backend using Polars | `kedro_datasets_experimental.polars` |
1010

1111
- `ibis.TableDataset`: Added configurable save modes via `save_args.mode`, supporting "append", "overwrite", "error"/"errorifexists", and "ignore". Legacy `save_args.overwrite` is mapped to `mode` for backward compatibility; specifying both is now an error.
12+
- `ibis.TableDataset`: Added a `credentials` parameter (string URI or dict, optionally with `con`) that supersedes the `connection` parameter. If both are provided, `credentials` takes precedence and a deprecation warning is issued.
1213

1314
## Bug fixes and other changes
1415

kedro-datasets/kedro_datasets/ibis/table_dataset.py

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ def __init__( # noqa: PLR0913
8585
*,
8686
table_name: str,
8787
database: str | None = None,
88+
credentials: dict[str, Any] | str | None = None,
8889
connection: dict[str, Any] | None = None,
8990
load_args: dict[str, Any] | None = None,
9091
save_args: dict[str, Any] | None = None,
@@ -116,6 +117,10 @@ def __init__( # noqa: PLR0913
116117
in a multi-level table hierarchy.
117118
connection: Configuration for connecting to an Ibis backend.
118119
If not provided, connect to DuckDB in in-memory mode.
120+
credentials: Connection information (e.g.
121+
user, password, token, account). If provided, these values
122+
overlay the base `connection` configuration. May also be a
123+
connection string.
119124
load_args: Additional arguments passed to the Ibis backend's
120125
`read_{file_format}` method.
121126
save_args: Additional arguments passed to the Ibis backend's
@@ -134,7 +139,32 @@ def __init__( # noqa: PLR0913
134139

135140
self._table_name = table_name
136141
self._database = database
137-
self._connection_config = connection or self.DEFAULT_CONNECTION_CONFIG
142+
self._credentials = deepcopy(credentials) or {}
143+
if self._credentials != {}:
144+
self._backend_name = self._get_backend_name()
145+
146+
# Prefer credentials if provided, else fallback to connection.
147+
self._connection_config = connection or (
148+
self.DEFAULT_CONNECTION_CONFIG if self._credentials == {} else {}
149+
)
150+
if self._credentials is not None:
151+
if isinstance(self._credentials, str):
152+
self._connection_config.update(
153+
{
154+
"backend": self._backend_name,
155+
"con": self._credentials,
156+
}
157+
)
158+
elif (
159+
isinstance(self._credentials, dict)
160+
and "backend" not in self._credentials
161+
and "con" in self._credentials
162+
):
163+
self._connection_config.update(
164+
self._credentials | {"backend": self._backend_name}
165+
)
166+
else:
167+
self._connection_config.update(self._credentials)
138168
self.metadata = metadata
139169

140170
# Set load and save arguments, overwriting defaults if provided.
@@ -171,6 +201,10 @@ def _connect(self) -> BaseBackend:
171201
import ibis # noqa: PLC0415
172202

173203
config = deepcopy(self._connection_config)
204+
# If credentials is a dict with a 'con' key, treat as connection string
205+
if isinstance(config, dict) and "con" in config:
206+
return ibis.connect(config.pop("con"))
207+
# Otherwise, treat as expanded dict (params)
174208
backend = getattr(ibis, config.pop("backend"))
175209
return backend.connect(**config)
176210

@@ -182,7 +216,12 @@ def connection(self) -> BaseBackend:
182216
def load(self) -> ir.Table:
183217
return self.connection.table(self._table_name, **self._load_args)
184218

219+
# Users should wrap their DataFrame-like object in an ibis.memtable for in-memory data.
220+
# https://github.com/ibis-project/ibis/blob/df4f1858e7f9bc36d589dcf2a53f9f16b1acd92a/ibis/backends/duckdb/__init__.py#L180
185221
def save(self, data: ir.Table) -> None:
222+
# treat empty ir.Table as a no-op
223+
if isinstance(data, ir.Table) and data.count().execute() == 0:
224+
return
186225
writer = getattr(self.connection, f"create_{self._materialized}")
187226
if self._mode == "append":
188227
if not self._exists():
@@ -204,6 +243,22 @@ def save(self, data: ir.Table) -> None:
204243
return
205244
writer(self._table_name, data, overwrite=False, **self._save_args)
206245

246+
def _get_backend_name(self) -> str | None:
247+
"""Get the backend name from the connection config or connection string."""
248+
config = self._credentials
249+
# If dict and has 'backend'
250+
if isinstance(config, dict) and "backend" in config:
251+
return config["backend"]
252+
# If string, parse as backend://...
253+
if isinstance(config, str):
254+
if "://" in config:
255+
return config.split("://", 1)[0]
256+
# If dict with 'con' key
257+
if isinstance(config, dict) and "con" in config:
258+
con_str = config["con"]
259+
if "://" in con_str:
260+
return con_str.split("://", 1)[0]
261+
207262
def _describe(self) -> dict[str, Any]:
208263
load_args = deepcopy(self._load_args)
209264
save_args = deepcopy(self._save_args)
@@ -212,7 +267,7 @@ def _describe(self) -> dict[str, Any]:
212267
return {
213268
"table_name": self._table_name,
214269
"database": self._database,
215-
"backend": self._connection_config["backend"],
270+
"backend": self.connection.name,
216271
"load_args": load_args,
217272
"save_args": save_args,
218273
"materialized": self._materialized,
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import importlib
2+
from unittest import mock
3+
4+
# Stub expensive/optional ibis backends that may raise ImportError when
5+
# accessed via attribute lookup during tests (for example, ibis.mssql
6+
# tries to import pyodbc which may not be installed in the test env).
7+
# Creating module attributes here prevents ibis.__getattr__ from being
8+
# invoked by tests that call mocker.patch("ibis.<backend>")...
9+
10+
_IBIS_BACKENDS_TO_STUB = (
11+
"mssql",
12+
"postgres",
13+
)
14+
15+
try:
16+
ibis = importlib.import_module("ibis")
17+
except Exception:
18+
# If ibis itself is unavailable for some reason, nothing to stub.
19+
ibis = None
20+
21+
if ibis is not None:
22+
for _b in _IBIS_BACKENDS_TO_STUB:
23+
# Directly set attribute on the module to avoid triggering
24+
# ibis.__getattr__ lazy-loading logic when tests access it.
25+
try:
26+
setattr(ibis, _b, mock.MagicMock(name=f"ibis.{_b}"))
27+
except Exception:
28+
# Guard against any unexpected issues in CI/test envs.
29+
pass

kedro-datasets/tests/ibis/test_table_dataset.py

Lines changed: 125 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,17 +37,39 @@ def connection_config(request, database):
3737
)
3838

3939

40+
@pytest.fixture(params=[_SENTINEL])
41+
def credentials_config(request, database):
42+
return (
43+
None
44+
if request.param is _SENTINEL # `None` is a valid value to test
45+
else request.param
46+
)
47+
48+
4049
@pytest.fixture(params=[_SENTINEL])
4150
def table_name(request):
4251
# Default table name when not explicitly parametrized
4352
return "test" if request.param is _SENTINEL else request.param
4453

4554

55+
# @pytest.fixture(params=['test'])
56+
# def table_name(request):
57+
# return request.param
58+
59+
4660
@pytest.fixture
47-
def table_dataset(table_name, database_name, connection_config, load_args, save_args):
61+
def table_dataset(
62+
database_name,
63+
credentials_config,
64+
connection_config,
65+
load_args,
66+
save_args,
67+
table_name,
68+
):
4869
return TableDataset(
4970
table_name=table_name,
5071
database=database_name,
72+
credentials=credentials_config,
5173
connection=connection_config,
5274
load_args=load_args,
5375
save_args=save_args,
@@ -238,6 +260,17 @@ def test_describe_includes_backend_mode_and_materialized(self, table_dataset):
238260
assert "database" not in desc["load_args"]
239261
assert "database" not in desc["save_args"]
240262

263+
@pytest.mark.parametrize(
264+
"save_args",
265+
[{"materialized": "table"}],
266+
indirect=True,
267+
)
268+
def test_save_empty_dataframe_is_noop(self, table_dataset):
269+
"""Saving an empty DataFrame should be a no-op (no table created)."""
270+
empty_table = ibis.memtable(pd.DataFrame({"col1": [], "col2": [], "col3": []}))
271+
table_dataset.save(empty_table)
272+
assert not table_dataset.exists()
273+
241274
@pytest.mark.parametrize("load_args", [{"database": "test"}], indirect=True)
242275
def test_load_extra_params(self, table_dataset, load_args):
243276
"""Test overriding the default load arguments."""
@@ -345,9 +378,100 @@ def test_connection_config(self, mocker, table_dataset, connection_config, key):
345378
table_dataset.load()
346379
assert ("ibis", key) in table_dataset._connections
347380

381+
@pytest.mark.parametrize(
382+
("credentials_config", "key"),
383+
[
384+
(
385+
"postgres://xxxxxx.postgres.database.azure.com:5432/postgres",
386+
(
387+
("backend", "postgres"),
388+
(
389+
"con",
390+
"postgres://xxxxxx.postgres.database.azure.com:5432/postgres",
391+
),
392+
),
393+
),
394+
(
395+
{
396+
"con": "postgres://[email protected]:5432/postgres"
397+
},
398+
(
399+
("backend", "postgres"),
400+
(
401+
"con",
402+
"postgres://[email protected]:5432/postgres",
403+
),
404+
),
405+
),
406+
(
407+
{
408+
"backend": "postgres",
409+
"database": "postgres",
410+
"host": "xxxx.postgres.database.azure.com",
411+
},
412+
(
413+
("backend", "postgres"),
414+
("database", "postgres"),
415+
("host", "xxxx.postgres.database.azure.com"),
416+
),
417+
),
418+
],
419+
)
420+
@pytest.mark.parametrize("connection_config", [None], indirect=True)
421+
def test_connection_config_with_credentials(
422+
self, mocker, table_dataset, credentials_config, key
423+
):
424+
# 1) isolate the cache so parametrized cases don't reuse connections
425+
426+
if isinstance(credentials_config, str) or "backend" not in credentials_config:
427+
backend = "postgres"
428+
else:
429+
backend = credentials_config["backend"]
430+
431+
mocker.patch(f"ibis.{backend}")
432+
conn = table_dataset.connection
433+
assert conn is not None
434+
table_dataset.load()
435+
assert ("ibis", key) in table_dataset._connections
436+
348437
def test_save_data_loaded_using_file_dataset(self, file_dataset, table_dataset):
349438
"""Test interoperability of Ibis datasets sharing a database."""
350439
dummy_table = file_dataset.load()
351440
assert not table_dataset.exists()
352441
table_dataset.save(dummy_table)
353442
assert table_dataset.exists()
443+
444+
# Additional tests for _get_backend_name branch coverage
445+
class TestGetBackendName:
446+
def test_get_backend_name_dict_with_backend(self):
447+
ds = TableDataset(table_name="t")
448+
ds._credentials = {"backend": "postgres", "database": "db"}
449+
assert ds._get_backend_name() == "postgres"
450+
451+
def test_get_backend_name_dict_without_backend_or_con(self):
452+
ds = TableDataset(table_name="t")
453+
ds._credentials = {"user": "u", "password": "p"}
454+
assert ds._get_backend_name() is None
455+
456+
def test_get_backend_name_string_with_scheme(self):
457+
ds = TableDataset(table_name="t")
458+
ds._credentials = "mysql://xxxxxx@host:3306/dbname"
459+
assert ds._get_backend_name() == "mysql"
460+
461+
def test_get_backend_name_string_without_scheme(self):
462+
ds = TableDataset(table_name="t")
463+
ds._credentials = "not_a_url_string"
464+
assert ds._get_backend_name() is None
465+
466+
def test_get_backend_name_dict_with_con_and_scheme(self):
467+
ds = TableDataset(table_name="t")
468+
ds._credentials = {
469+
"con": "postgres://xxxxxx@host:5432/dbname",
470+
"some_other": "value",
471+
}
472+
assert ds._get_backend_name() == "postgres"
473+
474+
def test_get_backend_name_dict_with_con_without_scheme(self):
475+
ds = TableDataset(table_name="t")
476+
ds._credentials = {"con": "sqlite_memory"}
477+
assert ds._get_backend_name() is None

0 commit comments

Comments
 (0)