Skip to content

Commit 1d35671

Browse files
committed
Dependencies: Isolate pyodbc package
Per package metadata definition, it is an optional "extra" dependency. Let's make it a reality, and also let CI cover the scenarios well.
1 parent 6f4ec67 commit 1d35671

File tree

9 files changed

+130
-105
lines changed

9 files changed

+130
-105
lines changed

.github/workflows/tests.yml

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,16 @@ concurrency:
1313
cancel-in-progress: true
1414

1515
jobs:
16-
tests:
16+
tests-full:
17+
name: Full tests on Python ${{ matrix.python-version }}, ${{ matrix.os }}
1718
strategy:
1819
matrix:
1920
# I tried running stuff on macOS but it was too slow and unreliable.
2021
# I also tried windows runners but couldn't get Docker to work there, so I gave up.
2122
os: [ubuntu-latest]
2223
python-version: ['3.10', '3.11', '3.12']
24+
env:
25+
UV_SYSTEM_PYTHON: true
2326
runs-on: ${{ matrix.os }}
2427
steps:
2528
- uses: actions/checkout@v4
@@ -52,7 +55,7 @@ jobs:
5255
- name: install uv
5356
uses: astral-sh/setup-uv@v3
5457
- name: Install pip dependencies
55-
run: make deps-ci
58+
run: make deps-full
5659
- name: run tests (macOS)
5760
if: matrix.os == 'macos-13'
5861
run: make test-ci
@@ -64,3 +67,26 @@ jobs:
6467
run: make test-ci
6568
- name: check the formatting
6669
run: make lint-ci
70+
71+
tests-minimal:
72+
name: Minimal tests on Python ${{ matrix.python-version }}, ${{ matrix.os }}
73+
strategy:
74+
matrix:
75+
os: [ubuntu-latest]
76+
python-version: ['3.10', '3.11', '3.12']
77+
runs-on: ${{ matrix.os }}
78+
env:
79+
UV_SYSTEM_PYTHON: true
80+
steps:
81+
- name: Acquire sources
82+
uses: actions/checkout@v4
83+
- name: Install Python
84+
uses: actions/setup-python@v5
85+
with:
86+
python-version: ${{ matrix.python-version }}
87+
- name: Install uv
88+
uses: astral-sh/setup-uv@v6
89+
- name: Install project
90+
run: make deps-minimal
91+
- name: Invoke tests
92+
run: make test-ci

Makefile

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,13 @@ lock-deps:
1515
@uv pip compile requirements.in --quiet -o requirements.txt
1616
@uv pip compile requirements.in --quiet -o requirements_arm64.txt --python-platform aarch64-unknown-linux-gnu
1717

18-
deps: lock-deps
18+
deps: deps-full
19+
20+
deps-minimal: lock-deps
1921
uv pip install -r requirements-dev.txt
2022

21-
deps-ci:
22-
uv pip install --system -r requirements-dev.txt
23+
deps-full: lock-deps
24+
uv pip install -r requirements-dev-full.txt
2325

2426
test-ci:
2527
set -a; source test.env; set +a; TESTCONTAINERS_RYUK_DISABLED=true pytest -n auto -x -rP -vv --tb=short --durations=10 --cov=ingestr --no-cov-on-fail

ingestr/main_test.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import base64
22
import csv
33
import gzip
4+
import importlib
45
import io
56
import json
67
import logging
@@ -545,16 +546,16 @@ def stop_fully(self):
545546
mysqlDocker = DockerImage(
546547
"mysql", lambda: MySqlContainer(MYSQL8_IMAGE, username="root").start()
547548
)
549+
msSqlServerDocker = DockerImage(
550+
"sqlserver",
551+
lambda: SqlServerContainer(MSSQL22_IMAGE, dialect="mssql").start(),
552+
"?driver=ODBC+Driver+18+for+SQL+Server&TrustServerCertificate=Yes",
553+
)
548554

549555
SOURCES = {
550556
"postgres": pgDocker,
551557
"duckdb": EphemeralDuckDb(),
552558
"mysql8": mysqlDocker,
553-
"sqlserver": DockerImage(
554-
"sqlserver",
555-
lambda: SqlServerContainer(MSSQL22_IMAGE, dialect="mssql").start(),
556-
"?driver=ODBC+Driver+18+for+SQL+Server&TrustServerCertificate=Yes",
557-
),
558559
}
559560

560561
DESTINATIONS = {
@@ -563,6 +564,9 @@ def stop_fully(self):
563564
"clickhouse+native": clickHouseDocker,
564565
}
565566

567+
if importlib.util.find_spec("pyodbc") is not None:
568+
SOURCES["sqlserver"] = msSqlServerDocker
569+
566570

567571
@pytest.fixture(scope="session", autouse=True)
568572
def manage_containers(request):

ingestr/src/destinations.py

Lines changed: 4 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,19 @@
11
import abc
22
import base64
33
import csv
4-
import datetime
54
import json
65
import os
76
import shutil
8-
import struct
97
import tempfile
108
from urllib.parse import parse_qs, quote, urlparse
119

1210
import dlt
1311
import dlt.destinations.impl.filesystem.filesystem
1412
from dlt.common.configuration.specs import AwsCredentials
15-
from dlt.common.destination.capabilities import DestinationCapabilitiesContext
16-
from dlt.common.schema import Schema
1713
from dlt.common.storages.configuration import FileSystemCredentials
1814
from dlt.destinations.impl.clickhouse.configuration import (
1915
ClickHouseCredentials,
2016
)
21-
from dlt.destinations.impl.mssql.configuration import MsSqlClientConfiguration
22-
from dlt.destinations.impl.mssql.mssql import (
23-
HINT_TO_MSSQL_ATTR,
24-
MsSqlJobClient,
25-
)
26-
from dlt.destinations.impl.mssql.sql_client import (
27-
PyOdbcMsSqlClient,
28-
)
2917

3018
from ingestr.src.errors import MissingValueError
3119
from ingestr.src.loader import load_dlt_file
@@ -155,88 +143,12 @@ def dlt_dest(self, uri: str, **kwargs):
155143
return dlt.destinations.duckdb(uri, **kwargs)
156144

157145

158-
def handle_datetimeoffset(dto_value: bytes) -> datetime.datetime:
159-
# ref: https://github.com/mkleehammer/pyodbc/issues/134#issuecomment-281739794
160-
tup = struct.unpack(
161-
"<6hI2h", dto_value
162-
) # e.g., (2017, 3, 16, 10, 35, 18, 500000000, -6, 0)
163-
return datetime.datetime(
164-
tup[0],
165-
tup[1],
166-
tup[2],
167-
tup[3],
168-
tup[4],
169-
tup[5],
170-
tup[6] // 1000,
171-
datetime.timezone(datetime.timedelta(hours=tup[7], minutes=tup[8])),
172-
)
173-
174-
175-
class OdbcMsSqlClient(PyOdbcMsSqlClient):
176-
SQL_COPT_SS_ACCESS_TOKEN = 1256
177-
SKIP_CREDENTIALS = {"PWD", "AUTHENTICATION", "UID"}
178-
179-
def open_connection(self):
180-
cfg = self.credentials._get_odbc_dsn_dict()
181-
if (
182-
cfg.get("AUTHENTICATION", "").strip().lower()
183-
!= "activedirectoryaccesstoken"
184-
):
185-
return super().open_connection()
186-
187-
import pyodbc # type: ignore
188-
189-
dsn = ";".join(
190-
[f"{k}={v}" for k, v in cfg.items() if k not in self.SKIP_CREDENTIALS]
191-
)
192-
193-
self._conn = pyodbc.connect(
194-
dsn,
195-
timeout=self.credentials.connect_timeout,
196-
attrs_before={
197-
self.SQL_COPT_SS_ACCESS_TOKEN: self.serialize_token(cfg["PWD"]),
198-
},
199-
)
200-
201-
# https://github.com/mkleehammer/pyodbc/wiki/Using-an-Output-Converter-function
202-
self._conn.add_output_converter(-155, handle_datetimeoffset)
203-
self._conn.autocommit = True
204-
return self._conn
205-
206-
def serialize_token(self, token):
207-
# https://github.com/mkleehammer/pyodbc/issues/228#issuecomment-494773723
208-
encoded = token.encode("utf_16_le")
209-
return struct.pack("<i", len(encoded)) + encoded
210-
211-
212-
class MsSqlClient(MsSqlJobClient):
213-
def __init__(
214-
self,
215-
schema: Schema,
216-
config: MsSqlClientConfiguration,
217-
capabilities: DestinationCapabilitiesContext,
218-
) -> None:
219-
sql_client = OdbcMsSqlClient(
220-
config.normalize_dataset_name(schema),
221-
config.normalize_staging_dataset_name(schema),
222-
config.credentials,
223-
capabilities,
224-
)
225-
super(MsSqlJobClient, self).__init__(schema, config, sql_client)
226-
self.config: MsSqlClientConfiguration = config
227-
self.sql_client = sql_client
228-
self.active_hints = HINT_TO_MSSQL_ATTR if self.config.create_indexes else {}
229-
self.type_mapper = capabilities.get_type_mapper()
230-
231-
232-
class MsSqlDestImpl(dlt.destinations.mssql):
233-
@property
234-
def client_class(self):
235-
return MsSqlClient
236-
237-
238146
class MsSQLDestination(GenericSqlDestination):
239147
def dlt_dest(self, uri: str, **kwargs):
148+
from ingestr.src.destinations_mssql import ( # type: ignore[import-untyped]
149+
MsSqlDestImpl,
150+
)
151+
240152
return MsSqlDestImpl(credentials=uri, **kwargs)
241153

242154

ingestr/src/destinations_mssql..py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import struct
2+
3+
import dlt
4+
from dlt.common.destination.capabilities import DestinationCapabilitiesContext
5+
from dlt.common.schema import Schema
6+
from dlt.destinations.impl.mssql.configuration import MsSqlClientConfiguration
7+
from dlt.destinations.impl.mssql.mssql import (
8+
HINT_TO_MSSQL_ATTR,
9+
MsSqlJobClient,
10+
)
11+
from dlt.destinations.impl.mssql.sql_client import (
12+
PyOdbcMsSqlClient,
13+
handle_datetimeoffset,
14+
)
15+
16+
17+
class OdbcMsSqlClient(PyOdbcMsSqlClient):
18+
SQL_COPT_SS_ACCESS_TOKEN = 1256
19+
SKIP_CREDENTIALS = {"PWD", "AUTHENTICATION", "UID"}
20+
21+
def open_connection(self):
22+
cfg = self.credentials._get_odbc_dsn_dict()
23+
if (
24+
cfg.get("AUTHENTICATION", "").strip().lower()
25+
!= "activedirectoryaccesstoken"
26+
):
27+
return super().open_connection()
28+
29+
import pyodbc # type: ignore
30+
31+
dsn = ";".join(
32+
[f"{k}={v}" for k, v in cfg.items() if k not in self.SKIP_CREDENTIALS]
33+
)
34+
35+
self._conn = pyodbc.connect(
36+
dsn,
37+
timeout=self.credentials.connect_timeout,
38+
attrs_before={
39+
self.SQL_COPT_SS_ACCESS_TOKEN: self.serialize_token(cfg["PWD"]),
40+
},
41+
)
42+
43+
# https://github.com/mkleehammer/pyodbc/wiki/Using-an-Output-Converter-function
44+
self._conn.add_output_converter(-155, handle_datetimeoffset)
45+
self._conn.autocommit = True
46+
return self._conn
47+
48+
def serialize_token(self, token):
49+
# https://github.com/mkleehammer/pyodbc/issues/228#issuecomment-494773723
50+
encoded = token.encode("utf_16_le")
51+
return struct.pack("<i", len(encoded)) + encoded
52+
53+
54+
class MsSqlClient(MsSqlJobClient):
55+
def __init__(
56+
self,
57+
schema: Schema,
58+
config: MsSqlClientConfiguration,
59+
capabilities: DestinationCapabilitiesContext,
60+
) -> None:
61+
sql_client = OdbcMsSqlClient(
62+
config.normalize_dataset_name(schema),
63+
config.normalize_staging_dataset_name(schema),
64+
config.credentials,
65+
capabilities,
66+
)
67+
super(MsSqlJobClient, self).__init__(schema, config, sql_client)
68+
self.config: MsSqlClientConfiguration = config
69+
self.sql_client = sql_client
70+
self.active_hints = HINT_TO_MSSQL_ATTR if self.config.create_indexes else {}
71+
self.type_mapper = capabilities.get_type_mapper()
72+
73+
74+
class MsSqlDestImpl(dlt.destinations.mssql):
75+
@property
76+
def client_class(self):
77+
return MsSqlClient

ingestr/src/sources.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ def table_rows(
267267
import pyodbc # type: ignore
268268
from sqlalchemy import create_engine
269269

270-
from ingestr.src.destinations import (
270+
from ingestr.src.destinations_mssql import ( # type: ignore[import-untyped]
271271
OdbcMsSqlClient,
272272
handle_datetimeoffset,
273273
)

pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,11 +148,14 @@ classifiers = [
148148
]
149149

150150
[project.optional-dependencies]
151+
full = [
152+
"ingestr[oracle,odbc]",
153+
]
151154
oracle = [
152155
"cx_Oracle==8.3.0",
153156
]
154157
odbc = [
155-
"pyodbc==5.1.0",
158+
"pyodbc==5.2.0",
156159
]
157160

158161
[project.urls]

requirements-dev-full.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
.[full]
2+
-r requirements-dev.txt

requirements-dev.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ pytest==8.3.3
66
ruff==0.11.4
77
hatchling==1.27.0
88
build==1.2.1
9-
pyodbc==5.2.0
109
twine==6.0.1
1110
testcontainers[postgres,mysql]==4.8.2
1211
pytest-xdist[psutil]==3.6.1

0 commit comments

Comments
 (0)