Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,12 @@ repos:
- --fix
- --exit-non-zero-on-fix
- id: ruff-format
- repo: https://github.com/adamchainz/blacken-docs
rev: "1.19.1"
hooks:
- id: blacken-docs
args:
- --line-length
- '80'
additional_dependencies:
- black==22.12.0
105 changes: 63 additions & 42 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
# duckdb_engine

[![Supported Python Versions](https://img.shields.io/pypi/pyversions/duckdb-engine)](https://pypi.org/project/duckdb-engine/) [![PyPI version](https://badge.fury.io/py/duckdb-engine.svg)](https://badge.fury.io/py/duckdb-engine) [![PyPI Downloads](https://img.shields.io/pypi/dm/duckdb-engine.svg)](https://pypi.org/project/duckdb-engine/) [![codecov](https://codecov.io/gh/Mause/duckdb_engine/graph/badge.svg)](https://codecov.io/gh/Mause/duckdb_engine)
Expand Down Expand Up @@ -36,7 +36,13 @@
Once you've installed this package, you should be able to just use it, as SQLAlchemy does a python path search

```python
from sqlalchemy import Column, Integer, Sequence, String, create_engine
from sqlalchemy import (
Column,
Integer,
Sequence,
String,
create_engine,
)
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm.session import Session

Expand All @@ -46,7 +52,11 @@
class FakeModel(Base): # type: ignore
__tablename__ = "fake"

id = Column(Integer, Sequence("fakemodel_id_sequence"), primary_key=True)
id = Column(
Integer,
Sequence("fakemodel_id_sequence"),
primary_key=True,
)
name = Column(String)


Expand All @@ -71,14 +81,14 @@

You can configure DuckDB by passing `connect_args` to the create_engine function
```python
from sqlalchemy.engine import create_engine

create_engine(
'duckdb:///:memory:',
"duckdb:///:memory:",
connect_args={
'read_only': False,
'config': {
'memory_limit': '500mb'
}
}
"read_only": False,
"config": {"memory_limit": "500mb"},
},
)
```

Expand All @@ -87,15 +97,25 @@
## How to register a pandas DataFrame

```python
import pandas as pd
from sqlalchemy import text, __version__ as sqla_version
from sqlalchemy.engine import create_engine

conn = create_engine("duckdb:///:memory:").connect()

# with SQLAlchemy 1.3
conn.execute("register", ("dataframe_name", pd.DataFrame(...)))
df = pd.DataFrame([{"id": 0}])

# with SQLAlchemy 1.4+
conn.execute(text("register(:name, :df)"), {"name": "test_df", "df": df})
if sqla_version.startswith("1.3."):
# with SQLAlchemy 1.3
conn.execute("register", ("dataframe_name", df))
else:
# with SQLAlchemy 1.4+
conn.execute(
text("register(:name, :df)"),
{"name": "dataframe_name", "df": df},
)

conn.execute("select * from dataframe_name")
conn.execute(text("select * from dataframe_name"))
```

## Things to keep in mind
Expand All @@ -107,22 +127,23 @@
The following example demonstrates how to create an auto-incrementing ID column for a simple table:

```python
>>> import sqlalchemy
>>> engine = sqlalchemy.create_engine('duckdb:////path/to/duck.db')
>>> metadata = sqlalchemy.MetaData(engine)
>>> user_id_seq = sqlalchemy.Sequence('user_id_seq')
>>> users_table = sqlalchemy.Table(
... 'users',
... metadata,
... sqlalchemy.Column(
... 'id',
... sqlalchemy.Integer,
... user_id_seq,
... server_default=user_id_seq.next_value(),
... primary_key=True,
... ),
... )
>>> metadata.create_all(bind=engine)
import sqlalchemy

engine = sqlalchemy.create_engine("duckdb:///:memory:")
metadata = sqlalchemy.MetaData(engine)
user_id_seq = sqlalchemy.Sequence("user_id_seq")
users_table = sqlalchemy.Table(
"users",
metadata,
sqlalchemy.Column(
"id",
sqlalchemy.Integer,
user_id_seq,
server_default=user_id_seq.next_value(),
primary_key=True,
),
)
metadata.create_all(bind=engine)
```

### Pandas `read_sql()` chunksize
Expand All @@ -131,12 +152,13 @@

The `pandas.read_sql()` method can read tables from `duckdb_engine` into DataFrames, but the `sqlalchemy.engine.result.ResultProxy` trips up when `fetchmany()` is called. Therefore, for now `chunksize=None` (default) is necessary when reading duckdb tables into DataFrames. For example:

```python
>>> import pandas as pd
>>> import sqlalchemy
>>> engine = sqlalchemy.create_engine('duckdb:////path/to/duck.db')
>>> df = pd.read_sql('users', engine) ### Works as expected
>>> df = pd.read_sql('users', engine, chunksize=25) ### Throws an exception
```python notest
import pandas as pd
import sqlalchemy

engine = sqlalchemy.create_engine("duckdb:////path/to/duck.db")
df = pd.read_sql("users", engine) ### Works as expected
df = pd.read_sql("users", engine, chunksize=25) ### Throws an exception
```

### Unsigned integer support
Expand All @@ -149,9 +171,10 @@

This support can be enabling by adding an Alembic implementation class for the `duckdb` dialect.

```python
```python notest
from alembic.ddl.impl import DefaultImpl


class AlembicDuckDBImpl(DefaultImpl):
"""Alembic implementation for DuckDB."""

Expand All @@ -170,13 +193,11 @@
from sqlalchemy import create_engine

create_engine(
'duckdb:///:memory:',
"duckdb:///:memory:",
connect_args={
'preload_extensions': ['https'],
'config': {
's3_region': 'ap-southeast-1'
}
}
"preload_extensions": ["https"],
"config": {"s3_region": "ap-southeast-1"},
},
)
```

Expand Down
21 changes: 10 additions & 11 deletions duckdb_engine/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,12 @@ class Struct(TypeEngine):

```python
from duckdb_engine.datatypes import Struct
from sqlalchemy import Table, Column, String
from sqlalchemy import Table, Column, String, MetaData

Table(
'hello',
Column('name', Struct({'first': String, 'last': String})
"hello",
MetaData(),
Column("name", Struct({"first": String, "last": String})),
)
```

Expand All @@ -138,12 +139,9 @@ class Map(TypeEngine):

```python
from duckdb_engine.datatypes import Map
from sqlalchemy import Table, Column, String
from sqlalchemy import Table, Column, String, MetaData

Table(
'hello',
Column('name', Map(String, String)
)
Table("hello", MetaData(), Column("name", Map(String, String)))
```
"""

Expand Down Expand Up @@ -179,11 +177,12 @@ class Union(TypeEngine):

```python
from duckdb_engine.datatypes import Union
from sqlalchemy import Table, Column, String
from sqlalchemy import Table, Column, String, MetaData

Table(
'hello',
Column('name', Union({"name": String, "age": String})
"hello",
MetaData(),
Column("name", Union({"name": String, "age": String})),
)
```
"""
Expand Down
9 changes: 7 additions & 2 deletions noxfile.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
from contextlib import contextmanager
from typing import Generator

import github_action_utils as gha
import nox
try:
import github_action_utils as gha
import nox
except ImportError:
import pytest

pytest.skip(allow_module_level=True)

nox.options.default_venv_backend = "uv"
nox.options.error_on_external_run = True
Expand Down
66 changes: 54 additions & 12 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ pytest-cov = {extras = ["coverage"], version = "^5.0.0"}
snapshottest = "^0.6.0"
pytest-remotedata = "^0.4.0"
toml = "^0.10.2"
pytest-markdown-docs = "^0.7.1"

[tool.poetry.group.devtools.dependencies]
pdbpp = "^0.10.3"
Expand All @@ -39,7 +40,7 @@ pre-commit = { version = "^4.0.0", markers = "python_version >= '3.9'" }
duckdb = "duckdb_engine:Dialect"

[tool.pytest.ini_options]
addopts = "--hypothesis-show-statistics --strict --strict-markers"
addopts = "--hypothesis-show-statistics --strict --strict-markers --markdown-docs"
xfail_strict = true

[tool.mypy]
Expand Down
Loading