Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 126 additions & 0 deletions scripts/sedos_structure_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import pandas as pd
import re

total_list = []

df_com = pd.read_excel(
io=r"C:\Users\christoph.muschner\CWM\Python\SEDOS\SEDOS_Modellstruktur.xlsx",
engine="openpyxl",
sheet_name="Nomenclature_Commodities",
usecols=["old name", "new name suggestion"],
)

process_set = pd.read_excel(
io=r"C:\Users\christoph.muschner\CWM\Python\SEDOS\SEDOS_Modellstruktur.xlsx", engine="openpyxl",
sheet_name="Process_Set", usecols=["input", "process", "output"]
)


def replace_string(row):

if not isinstance(row, str):
print("Row is type:", type(row), row)
row = ""
return row

commodity_mapping = dict(zip(df_com["old name"], df_com["new name suggestion"]))

row_list = re.split(",|\+", row)

new_list = [commodity_mapping.get(value, value) for value in row_list]

common_list = list(set(row_list).intersection(new_list))

if common_list:
total_list.extend(common_list)

replaced_row = ", ".join([str(elem) for elem in new_list])

return replaced_row

def map_old_to_new_commodity_names():

char_replace_dict = {"[": "", "]": "", "+": ",", " ": "", ".": "_", " ": ""}
for col in process_set.columns:
for key, value in char_replace_dict.items():
process_set[f"{col}"] = process_set[f"{col}"].str.replace(key, value, regex=True)

cols = ["input", "output"]

for col in cols:
process_set[f"{col}"] = process_set[f"{col}"].apply(replace_string)

process_set.to_csv(r"C:\Users\christoph.muschner\CWM\Python\SEDOS_DB\process_set_new_com.csv", sep=";")

return process_set


def read_sedos_bwshare_excel(file_path: str) -> dict:
"""
Read SEDOS B&W-share excel file.

Parameters
----------
file_path
Path to downloaded B&W share file.
Returns
-------
dict of dataframes
"""
processes = pd.read_excel(
io=file_path, engine="openpyxl", sheet_name="Processes", usecols=["Input", "Process", "Output"]
)

input_output = pd.read_excel(
io=file_path, engine="openpyxl", sheet_name="input_output", usecols=["parameter", "process", "input", "output"]
)

return {"processes": processes, "input_output": input_output}


def parse_es_structure(sedos_es_dict: dict) -> pd.DataFrame:
"""
Parse the es_structure in SEDOS project from two different B&W share tables.

Parameters
----------
sedos_es_dict: dict
Dict with dataframe of "processes" and "input_output" sheet

Returns
-------
es_structure: pd.Dataframe
Structure of energy system with default and parameter-specific inputs & outputs per process
"""

processes = sedos_es_dict["processes"]
input_output = sedos_es_dict["input_output"]

inputs_outputs_default = pd.DataFrame(
data={
"parameter": "default",
"process": processes.Process,
"input": processes.Input,
"output": processes.Output,
}
)

es_structure = pd.concat([inputs_outputs_default, input_output], axis=0)

# clean sheet and replace unwanted characters
char_replace_dict = {"[": "", "]": "", "+": ",", " ": "", ".": "_"}
for col in es_structure.columns:
for key, value in char_replace_dict.items():
es_structure[f"{col}"] = es_structure[f"{col}"].str.replace(key, value, regex=True)

# sort values
es_structure.sort_values(by=["process", "parameter"], inplace=True)
es_structure.reset_index(inplace=True, drop=True)

return es_structure


def write_es_structure_file(es_structure: pd.DataFrame, output_path: str) -> None:

# save to excel
es_structure.to_excel(rf"{output_path}", index=False)
Binary file not shown.
46 changes: 46 additions & 0 deletions tests/test_scripts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import pandas as pd
from pandas.testing import assert_frame_equal

from scripts.sedos_structure_parser import parse_es_structure, read_sedos_bwshare_excel


def test_parse_es_structure():
data = {
"parameter": ["ACT_BND", "ACT_COST", "FLO_EFF", "default", "default", "default", "default"],
"process": [
"ind_cement_rk_ccs_1",
"ind_cement_rk_ccs_1",
"ind_cement_rk_ccs_1",
"pow_combustion_gt",
"pow_combustion_gt_SNG",
"pow_combustion_gt_biogas",
"pow_combustion_gt_natgas",
],
"input": [
"coal,coke,coke_oven_gas,heavy_fuel_oil,natgas,hydrogen,SNG,,biomass,waste,sludge,elec,"
"cement_rawmeal_mats",
"coal,coke,coke_oven_gas,heavy_fuel_oil,natgas,hydrogen,SNG,,biomass,"
"waste,sludge,elec,cement_rawmeal_mats",
"coal,coke,coke_oven_gas,heavy_fuel_oil,natgas,hydrogen,SNG,,biomass,waste,sludge,elec,cement_rawmeal_mats",
"biogas,natgas,SNG_ren,SNG_conv,hydrogen_ren,hydrogen_conv,heating_oil",
"SNG_ren,SNG_conv",
"biogas",
"natgas",
],
"output": [
"cement_clinker_mats,CO2p,CO2f,CH4f,N2Of",
"cement_clinker_mats,CO2p,CO2f,CH4f,N2Of",
"cement_clinker_mats,CO2p,CO2f,CH4f,N2Of",
"elec_ren,elec_conv,CO2",
"elec_ren,elec_conv,CO2",
"elec_ren",
"elec_conv,CO2",
],
}
expected_output = pd.DataFrame(data)

function_df = parse_es_structure(
sedos_es_dict=read_sedos_bwshare_excel("test_data/test_structures/SEDOS_Prozesse&Parameter.xlsx")
)

assert_frame_equal(expected_output, function_df)