Skip to content

Commit 6533418

Browse files
committed
First pass at duckdb data interface
1 parent 323a834 commit 6533418

File tree

3 files changed

+173
-25
lines changed

3 files changed

+173
-25
lines changed
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
commodity_name,description,type,unit
1+
name,description,type,unit
22
electricity,Electricity,energy,PJ
33
gas,Gas,energy,PJ
44
heat,Heat,energy,PJ
55
wind,Wind,energy,PJ
6-
C02f,Carbon dioxide,energy,kt
6+
CO2f,Carbon dioxide,energy,kt

src/muse/new_input/readers.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import duckdb
2+
import numpy as np
3+
import xarray as xr
4+
5+
6+
def read_inputs(data_dir):
7+
data = {}
8+
con = duckdb.connect(":memory:")
9+
10+
with open(data_dir / "regions.csv") as f:
11+
regions = read_regions_csv(f, con) # noqa: F841
12+
13+
with open(data_dir / "commodities.csv") as f:
14+
commodities = read_commodities_csv(f, con)
15+
16+
with open(data_dir / "demand.csv") as f:
17+
demand = read_demand_csv(f, con) # noqa: F841
18+
19+
data["global_commodities"] = calculate_global_commodities(commodities)
20+
return data
21+
22+
23+
def read_regions_csv(buffer_, con):
24+
sql = """CREATE TABLE regions (
25+
name VARCHAR PRIMARY KEY,
26+
);
27+
"""
28+
con.sql(sql)
29+
rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841
30+
con.sql("INSERT INTO regions SELECT name FROM rel;")
31+
return con.sql("SELECT name from regions").fetchnumpy()
32+
33+
34+
def read_commodities_csv(buffer_, con):
35+
sql = """CREATE TABLE commodities (
36+
name VARCHAR PRIMARY KEY,
37+
type VARCHAR CHECK (type IN ('energy', 'service', 'material', 'environmental')),
38+
unit VARCHAR,
39+
);
40+
"""
41+
con.sql(sql)
42+
rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841
43+
con.sql("INSERT INTO commodities SELECT name, type, unit FROM rel;")
44+
45+
return con.sql("select name, type, unit from commodities").fetchnumpy()
46+
47+
48+
def calculate_global_commodities(commodities):
49+
names = commodities["name"].astype(np.dtype("str"))
50+
types = commodities["type"].astype(np.dtype("str"))
51+
units = commodities["unit"].astype(np.dtype("str"))
52+
53+
type_array = xr.DataArray(
54+
data=types, dims=["commodity"], coords=dict(commodity=names)
55+
)
56+
57+
unit_array = xr.DataArray(
58+
data=units, dims=["commodity"], coords=dict(commodity=names)
59+
)
60+
61+
data = xr.Dataset(data_vars=dict(type=type_array, unit=unit_array))
62+
return data
63+
64+
65+
def read_demand_csv(buffer_, con):
66+
sql = """CREATE TABLE demand (
67+
year BIGINT,
68+
commodity VARCHAR REFERENCES commodities(name),
69+
region VARCHAR REFERENCES regions(name),
70+
demand DOUBLE,
71+
);
72+
"""
73+
con.sql(sql)
74+
rel = con.read_csv(buffer_, header=True, delimiter=",") # noqa: F841
75+
con.sql("INSERT INTO demand SELECT year, commodity_name, region, demand FROM rel;")
76+
return con.sql("SELECT * from demand").fetchnumpy()

tests/test_readers.py

Lines changed: 95 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
from io import StringIO
12
from itertools import chain, permutations
23
from pathlib import Path
34
from unittest.mock import patch
45

6+
import duckdb
57
import numpy as np
68
import toml
79
import xarray as xr
@@ -861,40 +863,110 @@ def default_new_input(tmp_path):
861863
from muse.examples import copy_model
862864

863865
copy_model("default_new_input", tmp_path)
864-
return tmp_path
866+
return tmp_path / "model"
865867

866868

867-
@mark.xfail
868-
def test_read_new_global_commodities(default_new_input):
869-
from muse.new_input.readers import read_inputs
869+
@fixture
870+
def con():
871+
return duckdb.connect(":memory:")
870872

871-
all_data = read_inputs(default_new_input)
872-
data = all_data["global_commodities"]
873+
874+
@fixture
875+
def populate_regions(default_new_input, con):
876+
from muse.new_input.readers import read_regions_csv
877+
878+
with open(default_new_input / "regions.csv") as f:
879+
return read_regions_csv(f, con)
880+
881+
882+
@fixture
883+
def populate_commodities(default_new_input, con):
884+
from muse.new_input.readers import read_commodities_csv
885+
886+
with open(default_new_input / "commodities.csv") as f:
887+
return read_commodities_csv(f, con)
888+
889+
890+
@fixture
891+
def populate_demand(default_new_input, con, populate_regions, populate_commodities):
892+
from muse.new_input.readers import read_demand_csv
893+
894+
with open(default_new_input / "demand.csv") as f:
895+
return read_demand_csv(f, con)
896+
897+
898+
def test_read_regions(populate_regions):
899+
assert populate_regions["name"] == np.array(["R1"])
900+
901+
902+
def test_read_new_global_commodities(populate_commodities):
903+
data = populate_commodities
904+
assert list(data["name"]) == ["electricity", "gas", "heat", "wind", "CO2f"]
905+
assert list(data["type"]) == ["energy"] * 5
906+
assert list(data["unit"]) == ["PJ"] * 4 + ["kt"]
907+
908+
909+
def test_calculate_global_commodities(populate_commodities):
910+
from muse.new_input.readers import calculate_global_commodities
911+
912+
data = calculate_global_commodities(populate_commodities)
873913

874914
assert isinstance(data, xr.Dataset)
875915
assert set(data.dims) == {"commodity"}
876-
assert dict(data.dtypes) == dict(
877-
type=np.dtype("str"),
878-
unit=np.dtype("str"),
879-
)
916+
for dt in data.dtypes.values():
917+
assert np.issubdtype(dt, np.dtype("str"))
918+
919+
assert list(data.coords["commodity"].values) == list(populate_commodities["name"])
920+
assert list(data.data_vars["type"].values) == list(populate_commodities["type"])
921+
assert list(data.data_vars["unit"].values) == list(populate_commodities["unit"])
922+
923+
924+
def test_read_new_global_commodities_type_constraint(default_new_input, con):
925+
from muse.new_input.readers import read_commodities_csv
926+
927+
csv = StringIO("name,type,unit\nfoo,invalid,bar\n")
928+
with raises(duckdb.ConstraintException):
929+
read_commodities_csv(csv, con)
880930

881-
assert list(data.coords["commodity"].values) == [
882-
"electricity",
883-
"gas",
884-
"heat",
885-
"wind",
886-
"CO2f",
887-
]
888-
assert list(data.data_vars["type"].values) == ["energy"] * 5
889-
assert list(data.data_vars["unit"].values) == ["PJ"] * 4 + ["kt"]
931+
932+
def test_new_read_demand_csv(populate_demand):
933+
data = populate_demand
934+
assert np.all(data["year"] == np.array([2020, 2050]))
935+
assert np.all(data["commodity"] == np.array(["heat", "heat"]))
936+
assert np.all(data["region"] == np.array(["R1", "R1"]))
937+
assert np.all(data["demand"] == np.array([10, 30]))
938+
939+
940+
def test_new_read_demand_csv_commodity_constraint(
941+
default_new_input, con, populate_commodities, populate_regions
942+
):
943+
from muse.new_input.readers import read_demand_csv
944+
945+
csv = StringIO("year,commodity_name,region,demand\n2020,invalid,R1,0\n")
946+
with raises(duckdb.ConstraintException, match=".*foreign key.*"):
947+
read_demand_csv(csv, con)
948+
949+
950+
def test_new_read_demand_csv_region_constraint(
951+
default_new_input, con, populate_commodities, populate_regions
952+
):
953+
from muse.new_input.readers import read_demand_csv
954+
955+
csv = StringIO("year,commodity_name,region,demand\n2020,heat,invalid,0\n")
956+
with raises(duckdb.ConstraintException, match=".*foreign key.*"):
957+
read_demand_csv(csv, con)
890958

891959

892960
@mark.xfail
893-
def test_read_demand(default_new_input):
894-
from muse.new_input.readers import read_inputs
961+
def test_demand_dataset(default_new_input):
962+
import duckdb
963+
from muse.new_input.readers import read_commodities, read_demand, read_regions
895964

896-
all_data = read_inputs(default_new_input)
897-
data = all_data["demand"]
965+
con = duckdb.connect(":memory:")
966+
967+
read_regions(default_new_input, con)
968+
read_commodities(default_new_input, con)
969+
data = read_demand(default_new_input, con)
898970

899971
assert isinstance(data, xr.DataArray)
900972
assert data.dtype == np.float64

0 commit comments

Comments
 (0)