Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 12 additions & 13 deletions kedro-datasets/RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,22 @@
- Dropped support for Python 3.9 (EOL Oct 2025). Minimum supported version is now 3.10.
- Removed the deprecated `MatplotlibWriter` datset. Matplotlib objects can now be handled using `MatplotlibDataset`.
- Group datasets documentation according to the dependencies to clean up the nav bar.
- Added `mode` save argument to `ibis.TableDataset`, supporting "append", "overwrite", "error"/"errorifexists", and "ignore" save modes. The deprecated `overwrite` save argument is mapped to `mode` for backward compatibility and will be removed in a future release. Specifying both `mode` and `overwrite` results in an error.
- Added credentials support to `ibis.TableDataset`.
- Added the following new datasets:

| Type | Description | Location |
|-----------------------|-----------------------------------------------------------------------------------|--------------------------|
| Type | Description | Location |
|-----------------------|-----------------------------------------------------------------------------------------|--------------------------|
| `openxml.PptxDataset` | A dataset for loading and saving .pptx files (Microsoft PowerPoint) using `python-pptx` | `kedro_datasets.openxml` |

- Added the following new **experimental** datasets:

| Type | Description | Location |
|--------------------------------|-------------------------------------------------------------|--------------------------------------|
| `langfuse.LangfuseTraceDataset` | Kedro dataset to provide Langfuse tracing clients and callbacks | `kedro_datasets_experimental.langfuse` |
| `langchain.LangChainPromptDataset` | Kedro dataset for loading LangChain prompts | `kedro_datasets_experimental.langchain` |
| `pypdf.PDFDataset` | Kedro dataset to read PDF files and extract text using pypdf | `kedro_datasets_experimental.pypdf` |
| `langfuse.LangfusePromptDataset` | Kedro dataset for managing Langfuse prompts | `kedro_datasets_experimental.langfuse` |

| Type | Description | Location |
|------------------------------------|-----------------------------------------------------------------|-----------------------------------------|
| `langfuse.LangfuseTraceDataset` | Kedro dataset to provide Langfuse tracing clients and callbacks | `kedro_datasets_experimental.langfuse` |
| `langchain.LangChainPromptDataset` | Kedro dataset for loading LangChain prompts | `kedro_datasets_experimental.langchain` |
| `pypdf.PDFDataset` | Kedro dataset to read PDF files and extract text using pypdf | `kedro_datasets_experimental.pypdf` |
| `langfuse.LangfusePromptDataset` | Kedro dataset for managing Langfuse prompts | `kedro_datasets_experimental.langfuse` |

## Bug fixes and other changes
- Add HTMLPreview type.
Expand All @@ -29,8 +30,10 @@

Many thanks to the following Kedroids for contributing PRs to this release:
- [Guillaume Tauzin](https://github.com/gtauzin)
- [gitgud5000](https://github.com/gitgud5000)

# Release 8.1.0

## Major features and improvements

- Added the following new experimental datasets:
Expand All @@ -39,8 +42,6 @@ Many thanks to the following Kedroids for contributing PRs to this release:
| ------------------------------ | ------------------------------------------------------------- | ------------------------------------ |
| `polars.PolarsDatabaseDataset` | A dataset to load and save data to a SQL backend using Polars | `kedro_datasets_experimental.polars` |

- Added `mode` save argument to `ibis.TableDataset`, supporting "append", "overwrite", "error"/"errorifexists", and "ignore" save modes. The deprecated `overwrite` save argument is mapped to `mode` for backward compatibility and will be removed in a future release. Specifying both `mode` and `overwrite` results in an error.

## Bug fixes and other changes

- Added primary key constraint to BaseTable.
Expand Down Expand Up @@ -82,7 +83,6 @@ Many thanks to the following Kedroids for contributing PRs to this release:
- [Seohyun Park](https://github.com/soyamimi)
- [Daniel Russell-Brain](https://github.com/killerfridge)


# Release 7.0.0

## Major features and improvements
Expand Down Expand Up @@ -116,7 +116,6 @@ Many thanks to the following Kedroids for contributing PRs to this release:
- [Abhishek Bhatia](https://github.com/abhi8893)
- [Guillaume Tauzin](https://github.com/gtauzin)


# Release 6.0.0

## Major features and improvements
Expand Down
18 changes: 16 additions & 2 deletions kedro-datasets/kedro_datasets/ibis/table_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class SaveMode(StrEnum):


class TableDataset(ConnectionMixin, AbstractDataset[ir.Table, ir.Table]):
"""`TableDataset` loads/saves data from/to Ibis table expressions.
"""``TableDataset`` loads/saves data from/to Ibis table expressions.

Examples:
Using the [YAML API](https://docs.kedro.org/en/stable/catalog-data/data_catalog_yaml_examples/):
Expand All @@ -58,7 +58,7 @@ class TableDataset(ConnectionMixin, AbstractDataset[ir.Table, ir.Table]):
save_args:
materialized: view
mode: overwrite
```
```

Using the [Python API](https://docs.kedro.org/en/stable/catalog-data/advanced_data_catalog_usage/):

Expand Down Expand Up @@ -96,6 +96,7 @@ def __init__( # noqa: PLR0913
table_name: str,
database: str | None = None,
connection: dict[str, Any] | None = None,
credentials: dict[str, Any] | str | None = None,
load_args: dict[str, Any] | None = None,
save_args: dict[str, Any] | None = None,
metadata: dict[str, Any] | None = None,
Expand Down Expand Up @@ -126,6 +127,9 @@ def __init__( # noqa: PLR0913
in a multi-level table hierarchy.
connection: Configuration for connecting to an Ibis backend.
If not provided, connect to DuckDB in in-memory mode.
credentials: Connection information (e.g.
user, password, token, account). If provided, these values
overlay the base `connection` configuration.
load_args: Additional arguments passed to the Ibis backend's
`read_{file_format}` method.
save_args: Additional arguments passed to the Ibis backend's
Expand All @@ -145,6 +149,16 @@ def __init__( # noqa: PLR0913
self._table_name = table_name
self._database = database
self._connection_config = connection or self.DEFAULT_CONNECTION_CONFIG
# Credentials overlay connection config
if credentials:
if isinstance(credentials, dict):
self._connection_config.update(credentials)
elif isinstance(credentials, str):
raise ValueError(
"Connection string credentials are not supported for Ibis TableDataset."
)
else:
raise TypeError("Credentials must be a dict or None.")
Comment on lines +152 to +161
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If looking at other datasets, I feel like this may be excessive; most datasets (e.g. pandas.CSVDataset) simply splat a deep copy of the credentials with the other data structure, something like:

Suggested change
# Credentials overlay connection config
if credentials:
if isinstance(credentials, dict):
self._connection_config.update(credentials)
elif isinstance(credentials, str):
raise ValueError(
"Connection string credentials are not supported for Ibis TableDataset."
)
else:
raise TypeError("Credentials must be a dict or None.")
_connection_config = connection or self.DEFAULT_CONNECTION_CONFIG
_credentials = deepcopy(credentials) or {}
self._connection_config = {**_connection_config, **_credentials}

I think what you have with:

        if credentials:
            self._connection_config.update(credentials)

should also work (just need to make sure about deepcopy maybe), but generally the datasets are very similar to each other in implementation, so I think following the same **(deepcopy(credentials) or {}) for that sake may still make sense.

self.metadata = metadata

# Set load and save arguments, overwriting defaults if provided.
Expand Down
99 changes: 98 additions & 1 deletion kedro-datasets/tests/ibis/test_table_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,24 @@ def connection_config(request, database):
)


@pytest.fixture(params=[_SENTINEL])
def credentials_config(request, database):
return (
None
if request.param is _SENTINEL # `None` is a valid value to test
else request.param
)


@pytest.fixture
def table_dataset(database_name, connection_config, load_args, save_args):
def table_dataset(
database_name, connection_config, credentials_config, load_args, save_args
):
ds = TableDataset(
table_name="test",
database=database_name,
connection=connection_config,
credentials=credentials_config,
load_args=load_args,
save_args=save_args,
)
Expand Down Expand Up @@ -348,6 +360,91 @@ def test_connection_config(self, mocker, table_dataset, connection_config, key):
table_dataset.load()
assert ("ibis", key) in table_dataset._connections

@pytest.mark.parametrize(
("connection_config", "credentials_config", "key"),
[
(
{"backend": "duckdb", "database": "file.db", "extensions": ["spatial"]},
{"user": "admin", "password": "secret"}, # pragma: allowlist secret
(
("backend", "duckdb"),
("database", "file.db"),
("extensions", ("spatial",)),
("password", "secret"),
("user", "admin"),
),
),
(
[],
{
"host": "xxx.sql.azuresynapse.net",
"database": "xxx",
"query": {"driver": "ODBC Driver 17 for SQL Server"},
"backend": "mssql",
},
(
("backend", "mssql"),
("database", "xxx"),
("host", "xxx.sql.azuresynapse.net"),
("query", (("driver", "ODBC Driver 17 for SQL Server"),)),
),
),
(
None,
None,
(
("backend", "duckdb"),
("database", ":memory:"),
),
),
],
indirect=["connection_config", "credentials_config"],
)
def test_connection_config_with_credentials(
self, mocker, table_dataset, connection_config, credentials_config, key
):
# Fix: handle non-dict connection_config/credentials_config
if isinstance(connection_config, dict) and "backend" in connection_config:
backend = connection_config["backend"]
elif isinstance(credentials_config, dict) and "backend" in credentials_config:
backend = credentials_config["backend"]
else:
backend = "duckdb"
mocker.patch(f"ibis.{backend}")
table_dataset.load()
assert ("ibis", key) in table_dataset._connections

@pytest.mark.parametrize(
"credentials,expected_exception,expected_message",
[
(
"postgresql://xxx:xxx@localhost/db", # pragma: allowlist secret
ValueError,
"Connection string credentials are not supported",
),
(123, TypeError, "Credentials must be a dict"),
(["backend", "duckdb"], TypeError, "Credentials must be a dict"),
(("backend", "duckdb"), TypeError, "Credentials must be a dict"),
(True, TypeError, "Credentials must be a dict"),
],
)
def test_invalid_credentials_types_raise(
self,
database_name,
connection_config,
credentials,
expected_exception,
expected_message,
):
"""Test that invalid credentials types raise appropriate exceptions."""
with pytest.raises(expected_exception, match=expected_message):
TableDataset(
table_name="test",
database=database_name,
connection=connection_config,
credentials=credentials,
)

def test_save_data_loaded_using_file_dataset(self, file_dataset, table_dataset):
"""Test interoperability of Ibis datasets sharing a database."""
dummy_table = file_dataset.load()
Expand Down
Loading