From 0bdcf032604d8664637a4e797a6ebac9aa5d8723 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Fri, 22 Nov 2024 10:23:11 -0800 Subject: [PATCH 01/70] Tinkering with MC worker Queue --- .../extensions/monte_carlo/MC_solver_options.toml | 6 +++++- temoa/extensions/monte_carlo/mc_run.py | 3 ++- temoa/extensions/monte_carlo/mc_sequencer.py | 15 ++++++++++++++- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/temoa/extensions/monte_carlo/MC_solver_options.toml b/temoa/extensions/monte_carlo/MC_solver_options.toml index 869a5338..524781cf 100644 --- a/temoa/extensions/monte_carlo/MC_solver_options.toml +++ b/temoa/extensions/monte_carlo/MC_solver_options.toml @@ -20,7 +20,11 @@ TimeLimit= 18000 # 5 hrs # 'LPWarmStart': 2, # pass basis [cbc] +primalT = 1e-3 +dualT = 1e-3 # tbd [appsi_highs] -# tbd +threads = 2 +primal_feasibility_tolerance = 1e-3 +dual_feasibility_tolerance = 1e-3 diff --git a/temoa/extensions/monte_carlo/mc_run.py b/temoa/extensions/monte_carlo/mc_run.py index de030a22..ff1bc357 100644 --- a/temoa/extensions/monte_carlo/mc_run.py +++ b/temoa/extensions/monte_carlo/mc_run.py @@ -329,7 +329,8 @@ def run_generator(self) -> Generator[MCRun, None, None]: """ ts_gen = self.tweak_set_generator() for run, tweaks in ts_gen: - logger.info('Making run %d from %d tweaks: %s', run, len(tweaks), tweaks) + logger.info('Making run %d from %d tweaks', run, len(tweaks)) + logger.debug('Run %d tweaks: %s', run, tweaks) # need to make a DEEP copy of the orig, which holds other dictionaries... data_store = {k: v.copy() for k, v in self.data_store.items()} diff --git a/temoa/extensions/monte_carlo/mc_sequencer.py b/temoa/extensions/monte_carlo/mc_sequencer.py index 7d52f759..f51d6ab1 100644 --- a/temoa/extensions/monte_carlo/mc_sequencer.py +++ b/temoa/extensions/monte_carlo/mc_sequencer.py @@ -113,7 +113,7 @@ def start(self): # 4. Set up the workers num_workers = self.num_workers - work_queue = Queue(1) # restrict the queue to hold just 1 models in it max + work_queue = Queue(2) # restrict the queue to hold just 1 models in it max result_queue = Queue( num_workers + 1 ) # must be able to hold a shutdown signal from all workers at once! @@ -149,6 +149,7 @@ def start(self): # capture the "tweaks" self.writer.write_tweaks(iteration=mc_run.run_index, change_records=mc_run.change_records) instance = mc_run.model + iter_counter = 0 while more_runs: try: work_queue.put(instance, block=False) # put a log on the fire, if room @@ -188,6 +189,18 @@ def start(self): break time.sleep(0.1) # prevent hyperactivity... + # check the queues... + if iter_counter % 100 == 0: + try: + logger.info('Work queue size: %d', work_queue.qsize()) + logger.info('Result queue size: %d', result_queue.qsize()) + except NotImplementedError: + pass + # not implemented on OSX + finally: + iter_counter = 0 + iter_counter += 1 + # 7. Shut down the workers and then the logging queue if self.verbose: print('shutting it down') From b8e739e881ae57437df9653d00f37431978ca906 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Fri, 22 Nov 2024 11:30:46 -0800 Subject: [PATCH 02/70] Tinkering with MC worker Queue --- temoa/extensions/modeling_to_generate_alternatives/worker.py | 1 + 1 file changed, 1 insertion(+) diff --git a/temoa/extensions/modeling_to_generate_alternatives/worker.py b/temoa/extensions/modeling_to_generate_alternatives/worker.py index 6632032b..ce4f76cd 100644 --- a/temoa/extensions/modeling_to_generate_alternatives/worker.py +++ b/temoa/extensions/modeling_to_generate_alternatives/worker.py @@ -106,6 +106,7 @@ def run(self): tic = datetime.now() try: self.solve_count += 1 + logger.info('Worker %d solving', self.worker_number) res: SolverResults | None = self.opt.solve(model) except Exception as e: From 9f6c35996ddd1e47e01b1888715ff40a52b3e4cc Mon Sep 17 00:00:00 2001 From: Jeff H Date: Fri, 29 Nov 2024 10:48:44 -0800 Subject: [PATCH 03/70] Tinkering with MC worker Queue --- temoa/extensions/monte_carlo/mc_sequencer.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/temoa/extensions/monte_carlo/mc_sequencer.py b/temoa/extensions/monte_carlo/mc_sequencer.py index f51d6ab1..f4e79786 100644 --- a/temoa/extensions/monte_carlo/mc_sequencer.py +++ b/temoa/extensions/monte_carlo/mc_sequencer.py @@ -113,7 +113,7 @@ def start(self): # 4. Set up the workers num_workers = self.num_workers - work_queue = Queue(2) # restrict the queue to hold just 1 models in it max + work_queue = Queue(6) # restrict the queue to hold just 1 models in it max result_queue = Queue( num_workers + 1 ) # must be able to hold a shutdown signal from all workers at once! @@ -165,7 +165,7 @@ def start(self): logger.debug('Pulled last run from run generator') more_runs = False except queue.Full: - # print('work queue is full') + print('work queue is full') pass # see if there is a result ready to pick up, if not, pass try: @@ -188,10 +188,13 @@ def start(self): except queue.Empty: break time.sleep(0.1) # prevent hyperactivity... + # print(f'the run generator size: {sys.getsizeof(log_queue)}') + # print(f'the size of the writer is: {sys.getsizeof(self.writer)}') # check the queues... if iter_counter % 100 == 0: try: + logger.info('Work queue size: %d', work_queue.qsize()) logger.info('Result queue size: %d', result_queue.qsize()) except NotImplementedError: @@ -199,7 +202,7 @@ def start(self): # not implemented on OSX finally: iter_counter = 0 - iter_counter += 1 + iter_counter += 1 # 7. Shut down the workers and then the logging queue if self.verbose: From 983b8985beb9f2225244246af71f12ee0ceea0f5 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Fri, 29 Nov 2024 14:22:20 -0800 Subject: [PATCH 04/70] Tinkering with MC worker Queue - adding timing to post-processing --- temoa/extensions/monte_carlo/mc_sequencer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/temoa/extensions/monte_carlo/mc_sequencer.py b/temoa/extensions/monte_carlo/mc_sequencer.py index f4e79786..d4c7ac0b 100644 --- a/temoa/extensions/monte_carlo/mc_sequencer.py +++ b/temoa/extensions/monte_carlo/mc_sequencer.py @@ -174,6 +174,7 @@ def start(self): next_result = None # print('no result') if next_result is not None: + logger.info('Starting post-processing on %d', self.solve_count) self.process_solve_results(next_result) logger.info('Solve count: %d', self.solve_count) self.solve_count += 1 From 912591d2d73627b5ed7c1195d9dfc214de73e6bb Mon Sep 17 00:00:00 2001 From: Jeff H Date: Sat, 30 Nov 2024 10:32:09 -0800 Subject: [PATCH 05/70] Tinkering with MC worker Queue - adding timing to post-processing --- temoa/extensions/monte_carlo/mc_sequencer.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/temoa/extensions/monte_carlo/mc_sequencer.py b/temoa/extensions/monte_carlo/mc_sequencer.py index d4c7ac0b..04ff8ff8 100644 --- a/temoa/extensions/monte_carlo/mc_sequencer.py +++ b/temoa/extensions/monte_carlo/mc_sequencer.py @@ -155,39 +155,48 @@ def start(self): work_queue.put(instance, block=False) # put a log on the fire, if room logger.info('Putting an instance in the work queue') try: + logger.info('1. Pulling from gen') mc_run = next(run_gen) + logger.info('2. Pulled from gen') # capture the "tweaks" + logger.info('3. Putting making instance') self.writer.write_tweaks( iteration=mc_run.run_index, change_records=mc_run.change_records ) instance = mc_run.model + logger.info('4. Instance made') except StopIteration: logger.debug('Pulled last run from run generator') more_runs = False except queue.Full: - print('work queue is full') + # print('work queue is full') pass # see if there is a result ready to pick up, if not, pass try: + logger.info('5. looking for result') next_result = result_queue.get_nowait() + logger.info('6a. got result') except queue.Empty: + logger.info('6b. No result') next_result = None # print('no result') if next_result is not None: - logger.info('Starting post-processing on %d', self.solve_count) + logger.info('7. Starting post-processing on %d', self.solve_count) self.process_solve_results(next_result) - logger.info('Solve count: %d', self.solve_count) + logger.info('8. Solve count: %d', self.solve_count) self.solve_count += 1 if self.verbose or not self.config.silent: print(f'MC Solve count: {self.solve_count}') # pull anything from the logging queue and log it... while True: + logger.info('9. polling log queue') try: record = log_queue.get_nowait() process_logger = getLogger(record.name) process_logger.handle(record) except queue.Empty: break + logger.info('10. Finished polling log queue') time.sleep(0.1) # prevent hyperactivity... # print(f'the run generator size: {sys.getsizeof(log_queue)}') # print(f'the size of the writer is: {sys.getsizeof(self.writer)}') From 53e13b8dbf4b57300a08c2ed390e6a911dda391a Mon Sep 17 00:00:00 2001 From: Jeff H Date: Sat, 30 Nov 2024 10:49:31 -0800 Subject: [PATCH 06/70] Tinkering with MC worker Queue - adding timing to post-processing --- temoa/extensions/monte_carlo/mc_run.py | 2 ++ temoa/extensions/monte_carlo/mc_sequencer.py | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/temoa/extensions/monte_carlo/mc_run.py b/temoa/extensions/monte_carlo/mc_run.py index ff1bc357..8cc24930 100644 --- a/temoa/extensions/monte_carlo/mc_run.py +++ b/temoa/extensions/monte_carlo/mc_run.py @@ -194,7 +194,9 @@ def change_records(self) -> list[ChangeRecord]: def model(self) -> TemoaModel: dp = HybridLoader.data_portal_from_data(self.data_store) model = TemoaModel() + logger.info('3.6 Making instance in MCRun') instance = model.create_instance(data=dp) + logger.info('3.7 instance done in MCRun') # update the name to indexed... instance.name = f'{self.scenario_name}-{self.run_index}' logger.info('Created model instance for run %d', self.run_index) diff --git a/temoa/extensions/monte_carlo/mc_sequencer.py b/temoa/extensions/monte_carlo/mc_sequencer.py index 04ff8ff8..14caaf16 100644 --- a/temoa/extensions/monte_carlo/mc_sequencer.py +++ b/temoa/extensions/monte_carlo/mc_sequencer.py @@ -159,10 +159,11 @@ def start(self): mc_run = next(run_gen) logger.info('2. Pulled from gen') # capture the "tweaks" - logger.info('3. Putting making instance') + logger.info('3. Writing Tweaks') self.writer.write_tweaks( iteration=mc_run.run_index, change_records=mc_run.change_records ) + logger.info('3.1 Making Instance') instance = mc_run.model logger.info('4. Instance made') except StopIteration: From 4a4b48bcd5c948df1b61fe9166b4aaaa3baedff8 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Sat, 30 Nov 2024 15:55:18 -0800 Subject: [PATCH 07/70] Tinkering with MC worker Queue - adding timing to post-processing --- temoa/extensions/monte_carlo/mc_sequencer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/temoa/extensions/monte_carlo/mc_sequencer.py b/temoa/extensions/monte_carlo/mc_sequencer.py index 14caaf16..4bf47eeb 100644 --- a/temoa/extensions/monte_carlo/mc_sequencer.py +++ b/temoa/extensions/monte_carlo/mc_sequencer.py @@ -175,7 +175,8 @@ def start(self): # see if there is a result ready to pick up, if not, pass try: logger.info('5. looking for result') - next_result = result_queue.get_nowait() + # next_result = result_queue.get_nowait() # taking like 16 minutes to pull + next_result = result_queue.get(block=True, timeout=1) # wait for 1 second logger.info('6a. got result') except queue.Empty: logger.info('6b. No result') From e443ad7a296ea9cf01c48262f5c3c8cd026aa4ba Mon Sep 17 00:00:00 2001 From: Jeff H Date: Fri, 22 Nov 2024 10:23:11 -0800 Subject: [PATCH 08/70] Tinkering with MC worker Queue --- .../extensions/monte_carlo/MC_solver_options.toml | 6 +++++- temoa/extensions/monte_carlo/mc_run.py | 3 ++- temoa/extensions/monte_carlo/mc_sequencer.py | 15 ++++++++++++++- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/temoa/extensions/monte_carlo/MC_solver_options.toml b/temoa/extensions/monte_carlo/MC_solver_options.toml index 869a5338..524781cf 100644 --- a/temoa/extensions/monte_carlo/MC_solver_options.toml +++ b/temoa/extensions/monte_carlo/MC_solver_options.toml @@ -20,7 +20,11 @@ TimeLimit= 18000 # 5 hrs # 'LPWarmStart': 2, # pass basis [cbc] +primalT = 1e-3 +dualT = 1e-3 # tbd [appsi_highs] -# tbd +threads = 2 +primal_feasibility_tolerance = 1e-3 +dual_feasibility_tolerance = 1e-3 diff --git a/temoa/extensions/monte_carlo/mc_run.py b/temoa/extensions/monte_carlo/mc_run.py index de030a22..ff1bc357 100644 --- a/temoa/extensions/monte_carlo/mc_run.py +++ b/temoa/extensions/monte_carlo/mc_run.py @@ -329,7 +329,8 @@ def run_generator(self) -> Generator[MCRun, None, None]: """ ts_gen = self.tweak_set_generator() for run, tweaks in ts_gen: - logger.info('Making run %d from %d tweaks: %s', run, len(tweaks), tweaks) + logger.info('Making run %d from %d tweaks', run, len(tweaks)) + logger.debug('Run %d tweaks: %s', run, tweaks) # need to make a DEEP copy of the orig, which holds other dictionaries... data_store = {k: v.copy() for k, v in self.data_store.items()} diff --git a/temoa/extensions/monte_carlo/mc_sequencer.py b/temoa/extensions/monte_carlo/mc_sequencer.py index 7d52f759..f51d6ab1 100644 --- a/temoa/extensions/monte_carlo/mc_sequencer.py +++ b/temoa/extensions/monte_carlo/mc_sequencer.py @@ -113,7 +113,7 @@ def start(self): # 4. Set up the workers num_workers = self.num_workers - work_queue = Queue(1) # restrict the queue to hold just 1 models in it max + work_queue = Queue(2) # restrict the queue to hold just 1 models in it max result_queue = Queue( num_workers + 1 ) # must be able to hold a shutdown signal from all workers at once! @@ -149,6 +149,7 @@ def start(self): # capture the "tweaks" self.writer.write_tweaks(iteration=mc_run.run_index, change_records=mc_run.change_records) instance = mc_run.model + iter_counter = 0 while more_runs: try: work_queue.put(instance, block=False) # put a log on the fire, if room @@ -188,6 +189,18 @@ def start(self): break time.sleep(0.1) # prevent hyperactivity... + # check the queues... + if iter_counter % 100 == 0: + try: + logger.info('Work queue size: %d', work_queue.qsize()) + logger.info('Result queue size: %d', result_queue.qsize()) + except NotImplementedError: + pass + # not implemented on OSX + finally: + iter_counter = 0 + iter_counter += 1 + # 7. Shut down the workers and then the logging queue if self.verbose: print('shutting it down') From 69404adeb849715efa3e8c4fd3203e724c102c71 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Fri, 22 Nov 2024 11:30:46 -0800 Subject: [PATCH 09/70] Tinkering with MC worker Queue --- temoa/extensions/modeling_to_generate_alternatives/worker.py | 1 + 1 file changed, 1 insertion(+) diff --git a/temoa/extensions/modeling_to_generate_alternatives/worker.py b/temoa/extensions/modeling_to_generate_alternatives/worker.py index 6632032b..ce4f76cd 100644 --- a/temoa/extensions/modeling_to_generate_alternatives/worker.py +++ b/temoa/extensions/modeling_to_generate_alternatives/worker.py @@ -106,6 +106,7 @@ def run(self): tic = datetime.now() try: self.solve_count += 1 + logger.info('Worker %d solving', self.worker_number) res: SolverResults | None = self.opt.solve(model) except Exception as e: From bd6499ddfb27319ea12510045f064aea6b0f0cd7 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Fri, 29 Nov 2024 10:48:44 -0800 Subject: [PATCH 10/70] Tinkering with MC worker Queue --- temoa/extensions/monte_carlo/mc_sequencer.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/temoa/extensions/monte_carlo/mc_sequencer.py b/temoa/extensions/monte_carlo/mc_sequencer.py index f51d6ab1..f4e79786 100644 --- a/temoa/extensions/monte_carlo/mc_sequencer.py +++ b/temoa/extensions/monte_carlo/mc_sequencer.py @@ -113,7 +113,7 @@ def start(self): # 4. Set up the workers num_workers = self.num_workers - work_queue = Queue(2) # restrict the queue to hold just 1 models in it max + work_queue = Queue(6) # restrict the queue to hold just 1 models in it max result_queue = Queue( num_workers + 1 ) # must be able to hold a shutdown signal from all workers at once! @@ -165,7 +165,7 @@ def start(self): logger.debug('Pulled last run from run generator') more_runs = False except queue.Full: - # print('work queue is full') + print('work queue is full') pass # see if there is a result ready to pick up, if not, pass try: @@ -188,10 +188,13 @@ def start(self): except queue.Empty: break time.sleep(0.1) # prevent hyperactivity... + # print(f'the run generator size: {sys.getsizeof(log_queue)}') + # print(f'the size of the writer is: {sys.getsizeof(self.writer)}') # check the queues... if iter_counter % 100 == 0: try: + logger.info('Work queue size: %d', work_queue.qsize()) logger.info('Result queue size: %d', result_queue.qsize()) except NotImplementedError: @@ -199,7 +202,7 @@ def start(self): # not implemented on OSX finally: iter_counter = 0 - iter_counter += 1 + iter_counter += 1 # 7. Shut down the workers and then the logging queue if self.verbose: From a9db3066f534c2eefdb751c81b22117c70f9f88c Mon Sep 17 00:00:00 2001 From: Jeff H Date: Fri, 29 Nov 2024 14:22:20 -0800 Subject: [PATCH 11/70] Tinkering with MC worker Queue - adding timing to post-processing --- temoa/extensions/monte_carlo/mc_sequencer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/temoa/extensions/monte_carlo/mc_sequencer.py b/temoa/extensions/monte_carlo/mc_sequencer.py index f4e79786..d4c7ac0b 100644 --- a/temoa/extensions/monte_carlo/mc_sequencer.py +++ b/temoa/extensions/monte_carlo/mc_sequencer.py @@ -174,6 +174,7 @@ def start(self): next_result = None # print('no result') if next_result is not None: + logger.info('Starting post-processing on %d', self.solve_count) self.process_solve_results(next_result) logger.info('Solve count: %d', self.solve_count) self.solve_count += 1 From cacbfd5ed46ae0ce365b247d77dd3f7b256b441d Mon Sep 17 00:00:00 2001 From: Jeff H Date: Sat, 30 Nov 2024 10:32:09 -0800 Subject: [PATCH 12/70] Tinkering with MC worker Queue - adding timing to post-processing --- temoa/extensions/monte_carlo/mc_sequencer.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/temoa/extensions/monte_carlo/mc_sequencer.py b/temoa/extensions/monte_carlo/mc_sequencer.py index d4c7ac0b..04ff8ff8 100644 --- a/temoa/extensions/monte_carlo/mc_sequencer.py +++ b/temoa/extensions/monte_carlo/mc_sequencer.py @@ -155,39 +155,48 @@ def start(self): work_queue.put(instance, block=False) # put a log on the fire, if room logger.info('Putting an instance in the work queue') try: + logger.info('1. Pulling from gen') mc_run = next(run_gen) + logger.info('2. Pulled from gen') # capture the "tweaks" + logger.info('3. Putting making instance') self.writer.write_tweaks( iteration=mc_run.run_index, change_records=mc_run.change_records ) instance = mc_run.model + logger.info('4. Instance made') except StopIteration: logger.debug('Pulled last run from run generator') more_runs = False except queue.Full: - print('work queue is full') + # print('work queue is full') pass # see if there is a result ready to pick up, if not, pass try: + logger.info('5. looking for result') next_result = result_queue.get_nowait() + logger.info('6a. got result') except queue.Empty: + logger.info('6b. No result') next_result = None # print('no result') if next_result is not None: - logger.info('Starting post-processing on %d', self.solve_count) + logger.info('7. Starting post-processing on %d', self.solve_count) self.process_solve_results(next_result) - logger.info('Solve count: %d', self.solve_count) + logger.info('8. Solve count: %d', self.solve_count) self.solve_count += 1 if self.verbose or not self.config.silent: print(f'MC Solve count: {self.solve_count}') # pull anything from the logging queue and log it... while True: + logger.info('9. polling log queue') try: record = log_queue.get_nowait() process_logger = getLogger(record.name) process_logger.handle(record) except queue.Empty: break + logger.info('10. Finished polling log queue') time.sleep(0.1) # prevent hyperactivity... # print(f'the run generator size: {sys.getsizeof(log_queue)}') # print(f'the size of the writer is: {sys.getsizeof(self.writer)}') From 95805102316dd939dec4106d14e82762dc976b68 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Sat, 30 Nov 2024 10:49:31 -0800 Subject: [PATCH 13/70] Tinkering with MC worker Queue - adding timing to post-processing --- temoa/extensions/monte_carlo/mc_run.py | 2 ++ temoa/extensions/monte_carlo/mc_sequencer.py | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/temoa/extensions/monte_carlo/mc_run.py b/temoa/extensions/monte_carlo/mc_run.py index ff1bc357..8cc24930 100644 --- a/temoa/extensions/monte_carlo/mc_run.py +++ b/temoa/extensions/monte_carlo/mc_run.py @@ -194,7 +194,9 @@ def change_records(self) -> list[ChangeRecord]: def model(self) -> TemoaModel: dp = HybridLoader.data_portal_from_data(self.data_store) model = TemoaModel() + logger.info('3.6 Making instance in MCRun') instance = model.create_instance(data=dp) + logger.info('3.7 instance done in MCRun') # update the name to indexed... instance.name = f'{self.scenario_name}-{self.run_index}' logger.info('Created model instance for run %d', self.run_index) diff --git a/temoa/extensions/monte_carlo/mc_sequencer.py b/temoa/extensions/monte_carlo/mc_sequencer.py index 04ff8ff8..14caaf16 100644 --- a/temoa/extensions/monte_carlo/mc_sequencer.py +++ b/temoa/extensions/monte_carlo/mc_sequencer.py @@ -159,10 +159,11 @@ def start(self): mc_run = next(run_gen) logger.info('2. Pulled from gen') # capture the "tweaks" - logger.info('3. Putting making instance') + logger.info('3. Writing Tweaks') self.writer.write_tweaks( iteration=mc_run.run_index, change_records=mc_run.change_records ) + logger.info('3.1 Making Instance') instance = mc_run.model logger.info('4. Instance made') except StopIteration: From 218c61503d7df3fd539e6e2278ea99e65d07c126 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Sat, 30 Nov 2024 15:55:18 -0800 Subject: [PATCH 14/70] Tinkering with MC worker Queue - adding timing to post-processing --- temoa/extensions/monte_carlo/mc_sequencer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/temoa/extensions/monte_carlo/mc_sequencer.py b/temoa/extensions/monte_carlo/mc_sequencer.py index 14caaf16..4bf47eeb 100644 --- a/temoa/extensions/monte_carlo/mc_sequencer.py +++ b/temoa/extensions/monte_carlo/mc_sequencer.py @@ -175,7 +175,8 @@ def start(self): # see if there is a result ready to pick up, if not, pass try: logger.info('5. looking for result') - next_result = result_queue.get_nowait() + # next_result = result_queue.get_nowait() # taking like 16 minutes to pull + next_result = result_queue.get(block=True, timeout=1) # wait for 1 second logger.info('6a. got result') except queue.Empty: logger.info('6b. No result') From 1d7551ced11c0159e1199b12fc6d1b776f936c7c Mon Sep 17 00:00:00 2001 From: Jeff H Date: Thu, 5 Dec 2024 16:18:26 -0800 Subject: [PATCH 15/70] MC running with new MC Worker - queues are now swapping data vs. models - table_writer.py broken up into data pulling portion/writing portion --- data_files/my_configs/monte_carlo_utopia.toml | 2 +- .../worker.py | 1 - temoa/extensions/monte_carlo/mc_run.py | 13 +- temoa/extensions/monte_carlo/mc_sequencer.py | 59 ++- temoa/extensions/monte_carlo/mc_worker.py | 154 ++++++ temoa/temoa_model/data_brick.py | 123 +++++ temoa/temoa_model/table_data_puller.py | 470 +++++++++++++++++ temoa/temoa_model/table_writer.py | 473 +++--------------- tests/test_table_writer.py | 6 +- 9 files changed, 859 insertions(+), 442 deletions(-) create mode 100644 temoa/extensions/monte_carlo/mc_worker.py create mode 100644 temoa/temoa_model/data_brick.py create mode 100644 temoa/temoa_model/table_data_puller.py diff --git a/data_files/my_configs/monte_carlo_utopia.toml b/data_files/my_configs/monte_carlo_utopia.toml index 263afb91..575dfc63 100644 --- a/data_files/my_configs/monte_carlo_utopia.toml +++ b/data_files/my_configs/monte_carlo_utopia.toml @@ -110,5 +110,5 @@ activity_labels = [] [monte_carlo] # a path from the PROJECT ROOT to the settings file that contains the run data. -run_settings = 'data_files/monte_carlo/run_settings_1.csv' +run_settings = 'data_files/monte_carlo/run_settings_2.csv' diff --git a/temoa/extensions/modeling_to_generate_alternatives/worker.py b/temoa/extensions/modeling_to_generate_alternatives/worker.py index ce4f76cd..6632032b 100644 --- a/temoa/extensions/modeling_to_generate_alternatives/worker.py +++ b/temoa/extensions/modeling_to_generate_alternatives/worker.py @@ -106,7 +106,6 @@ def run(self): tic = datetime.now() try: self.solve_count += 1 - logger.info('Worker %d solving', self.worker_number) res: SolverResults | None = self.opt.solve(model) except Exception as e: diff --git a/temoa/extensions/monte_carlo/mc_run.py b/temoa/extensions/monte_carlo/mc_run.py index 8cc24930..a6eb23a7 100644 --- a/temoa/extensions/monte_carlo/mc_run.py +++ b/temoa/extensions/monte_carlo/mc_run.py @@ -31,6 +31,8 @@ from logging import getLogger from pathlib import Path +from pyomo.dataportal import DataPortal + from definitions import PROJECT_ROOT from temoa.temoa_model.hybrid_loader import HybridLoader from temoa.temoa_model.temoa_config import TemoaConfig @@ -191,12 +193,17 @@ def change_records(self) -> list[ChangeRecord]: return res @property - def model(self) -> TemoaModel: + def model_dp(self) -> tuple[str, DataPortal]: + """tuple of the indexed name for the scenario, and the DP""" + name = f'{self.scenario_name}-{self.run_index}' dp = HybridLoader.data_portal_from_data(self.data_store) + return name, dp + + @property + def model(self) -> TemoaModel: + dp = self.model_dp model = TemoaModel() - logger.info('3.6 Making instance in MCRun') instance = model.create_instance(data=dp) - logger.info('3.7 instance done in MCRun') # update the name to indexed... instance.name = f'{self.scenario_name}-{self.run_index}' logger.info('Created model instance for run %d', self.run_index) diff --git a/temoa/extensions/monte_carlo/mc_sequencer.py b/temoa/extensions/monte_carlo/mc_sequencer.py index 4bf47eeb..171064e6 100644 --- a/temoa/extensions/monte_carlo/mc_sequencer.py +++ b/temoa/extensions/monte_carlo/mc_sequencer.py @@ -37,13 +37,15 @@ from multiprocessing import Queue from pathlib import Path +from pyomo.dataportal import DataPortal + from definitions import PROJECT_ROOT, get_OUTPUT_PATH -from temoa.extensions.modeling_to_generate_alternatives.worker import Worker from temoa.extensions.monte_carlo.mc_run import MCRunFactory +from temoa.extensions.monte_carlo.mc_worker import MCWorker +from temoa.temoa_model.data_brick import DataBrick from temoa.temoa_model.hybrid_loader import HybridLoader from temoa.temoa_model.table_writer import TableWriter from temoa.temoa_model.temoa_config import TemoaConfig -from temoa.temoa_model.temoa_model import TemoaModel logger = getLogger(__name__) @@ -113,8 +115,10 @@ def start(self): # 4. Set up the workers num_workers = self.num_workers - work_queue = Queue(6) # restrict the queue to hold just 1 models in it max - result_queue = Queue( + work_queue: Queue[tuple[str, DataPortal] | str] = Queue( + 6 + ) # restrict the queue to hold just 1 models in it max + result_queue: Queue[DataBrick | str] = Queue( num_workers + 1 ) # must be able to hold a shutdown signal from all workers at once! log_queue = Queue(50) @@ -129,8 +133,8 @@ def start(self): if not s_path.exists(): s_path.mkdir() for i in range(num_workers): - w = Worker( - model_queue=work_queue, + w = MCWorker( + dp_queue=work_queue, results_queue=result_queue, log_root_name=__name__, log_queue=log_queue, @@ -148,42 +152,41 @@ def start(self): mc_run = next(run_gen) # capture the "tweaks" self.writer.write_tweaks(iteration=mc_run.run_index, change_records=mc_run.change_records) - instance = mc_run.model + run_name, dp = mc_run.model_dp iter_counter = 0 while more_runs: try: - work_queue.put(instance, block=False) # put a log on the fire, if room + work_queue.put((run_name, dp), block=False) # put a log on the fire, if room logger.info('Putting an instance in the work queue') try: - logger.info('1. Pulling from gen') + tic = datetime.now() mc_run = next(run_gen) - logger.info('2. Pulled from gen') + toc = datetime.now() + logger.info('Made mc_run from generator in %0.2f', (toc - tic).total_seconds()) # capture the "tweaks" - logger.info('3. Writing Tweaks') self.writer.write_tweaks( iteration=mc_run.run_index, change_records=mc_run.change_records ) - logger.info('3.1 Making Instance') - instance = mc_run.model - logger.info('4. Instance made') + # ready the next one + run_name, dp = mc_run.model_dp except StopIteration: - logger.debug('Pulled last run from run generator') + logger.debug('Pulled last DP from run generator') more_runs = False except queue.Full: # print('work queue is full') pass # see if there is a result ready to pick up, if not, pass try: - logger.info('5. looking for result') - # next_result = result_queue.get_nowait() # taking like 16 minutes to pull - next_result = result_queue.get(block=True, timeout=1) # wait for 1 second - logger.info('6a. got result') + tic = datetime.now() + next_result = result_queue.get_nowait() + toc = datetime.now() + logger.info( + 'Pulled DataBrick from result_queue in %0.2f', (toc - tic).total_seconds() + ) except queue.Empty: - logger.info('6b. No result') next_result = None # print('no result') if next_result is not None: - logger.info('7. Starting post-processing on %d', self.solve_count) self.process_solve_results(next_result) logger.info('8. Solve count: %d', self.solve_count) self.solve_count += 1 @@ -191,22 +194,17 @@ def start(self): print(f'MC Solve count: {self.solve_count}') # pull anything from the logging queue and log it... while True: - logger.info('9. polling log queue') try: record = log_queue.get_nowait() process_logger = getLogger(record.name) process_logger.handle(record) except queue.Empty: break - logger.info('10. Finished polling log queue') time.sleep(0.1) # prevent hyperactivity... - # print(f'the run generator size: {sys.getsizeof(log_queue)}') - # print(f'the size of the writer is: {sys.getsizeof(self.writer)}') # check the queues... if iter_counter % 100 == 0: try: - logger.info('Work queue size: %d', work_queue.qsize()) logger.info('Result queue size: %d', result_queue.qsize()) except NotImplementedError: @@ -267,15 +265,16 @@ def start(self): if self.verbose: print('result queue joined') - def process_solve_results(self, instance: TemoaModel): + def process_solve_results(self, brick: DataBrick): """write the results as required""" # get the instance number from the model name, if provided - if '-' not in instance.name: + if '-' not in brick.name: raise ValueError( 'Instance name does not appear to contain a -idx value. The manager should be tagging/updating this' ) - idx = int(instance.name.split('-')[-1]) + idx = int(brick.name.split('-')[-1]) if idx in self.seen_instance_indices: raise ValueError(f'Instance index {idx} already seen. Likely coding error') self.seen_instance_indices.add(idx) - self.writer.write_mc_results(M=instance, iteration=idx) + logger.info('Processing results for %s', brick.name) + self.writer.write_mc_results(brick=brick, iteration=idx) diff --git a/temoa/extensions/monte_carlo/mc_worker.py b/temoa/extensions/monte_carlo/mc_worker.py new file mode 100644 index 00000000..1a2a201d --- /dev/null +++ b/temoa/extensions/monte_carlo/mc_worker.py @@ -0,0 +1,154 @@ +""" +Tools for Energy Model Optimization and Analysis (Temoa): +An open source framework for energy systems optimization modeling + +Copyright (C) 2015, NC State University + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +A complete copy of the GNU General Public License v2 (GPLv2) is available +in LICENSE.txt. Users uncompressing this from an archive may not have +received this license file. If not, see . + + +Written by: J. F. Hyink +jeff@westernspark.us +https://westernspark.us +Created on: 5/5/24 + +Class to contain Workers that execute solves in separate processes + +dev note: +This class is derived from the original Worker class in MGA extension, but is just different enough +that it is a separate class. In future, it may make sense to re-combine these. RN, this worker will +ingest DataPortal objects to make new models. The MGA will (in future) likely just take in new obj functions + +""" +import logging.handlers +from datetime import datetime +from logging import getLogger +from multiprocessing import Process, Queue +from pathlib import Path + +from pyomo.dataportal import DataPortal +from pyomo.opt import SolverFactory, SolverResults, check_optimal_termination + +from temoa.temoa_model.data_brick import DataBrick, data_brick_factory +from temoa.temoa_model.temoa_model import TemoaModel + +verbose = False # for T/S or monitoring... + + +class MCWorker(Process): + worker_idx = 1 + + def __init__( + self, + dp_queue: Queue, + results_queue: Queue, + log_root_name, + log_queue, + log_level, + solver_log_path: Path | None = None, + **kwargs, + ): + super(MCWorker, self).__init__(daemon=True) + self.worker_number = MCWorker.worker_idx + MCWorker.worker_idx += 1 + self.dp_queue: Queue[DataPortal | str] = dp_queue + self.results_queue: Queue[DataBrick | str] = results_queue + self.solver_name = kwargs['solver_name'] + self.solver_options = kwargs['solver_options'] + self.opt = SolverFactory(self.solver_name, options=self.solver_options) + self.log_queue = log_queue + self.log_level = log_level + self.root_logger_name = log_root_name + self.solver_log_path = solver_log_path + self.solve_count = 0 + + def run(self): + logger = getLogger('.'.join((self.root_logger_name, 'worker', str(self.worker_number)))) + logger.setLevel(self.log_level) + logger.propagate = ( + False # not propagating up the chain fixes issue on TRACE where we were getting dupes. + ) + handler = logging.handlers.QueueHandler(self.log_queue) + logger.addHandler(handler) + logger.info('Worker %d spun up', self.worker_number) + + # update the solver options to pass in a log location + while True: + if self.solver_log_path: + # add the solver log path to options, if one is provided + log_location = Path( + self.solver_log_path, + f'solver_log_{str(self.worker_number)}_{self.solve_count}.log', + ) + log_location = str(log_location) + match self.solver_name: + case 'gurobi': + self.solver_options.update({'LogFile': log_location}) + # case 'appsi_highs': + # self.solver_options.update({'log_file': log_location}) + case _: + pass + + self.opt.options = self.solver_options + + # wait for a DataPortal object to show up, then get to work + data = self.dp_queue.get() + if data == 'ZEBRA': # shutdown signal + if verbose: + print(f'worker {self.worker_number} got shutdown signal') + logger.info('Worker %d received shutdown signal', self.worker_number) + self.results_queue.put('COYOTE') + break + name, dp = data + abstract_model = TemoaModel() + model: TemoaModel = abstract_model.create_instance(data=dp) + model.name = name # set the name from the input + tic = datetime.now() + try: + self.solve_count += 1 + res: SolverResults | None = self.opt.solve(model) + + except Exception as e: + if verbose: + print('bad solve') + logger.warning( + 'Worker %d failed to solve model: %s... skipping. Exception: %s', + self.worker_number, + model.name, + e, + ) + res = None + toc = datetime.now() + + # guard against a bad "res" object... + try: + good_solve = check_optimal_termination(res) + if good_solve: + data_brick = data_brick_factory(model) + self.results_queue.put(data_brick) + logger.info( + 'Worker %d solved a model in %0.2f minutes', + self.worker_number, + (toc - tic).total_seconds() / 60, + ) + if verbose: + print(f'Worker {self.worker_number} completed a successful solve') + else: + status = res['Solver'].termination_condition + logger.info( + 'Worker %d did not solve. Results status: %s', self.worker_number, status + ) + except AttributeError: + pass diff --git a/temoa/temoa_model/data_brick.py b/temoa/temoa_model/data_brick.py new file mode 100644 index 00000000..01c0a96e --- /dev/null +++ b/temoa/temoa_model/data_brick.py @@ -0,0 +1,123 @@ +""" +Tools for Energy Model Optimization and Analysis (Temoa): +An open source framework for energy systems optimization modeling + +Copyright (C) 2015, NC State University + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +A complete copy of the GNU General Public License v2 (GPLv2) is available +in LICENSE.txt. Users uncompressing this from an archive may not have +received this license file. If not, see . + + +Written by: J. F. Hyink +jeff@westernspark.us +https://westernspark.us +Created on: 12/5/24 + +Objective of this module is to build a lightweight container to hold a selection of model results from a +Worker process with the intent to send this back via multiprocessing queue in lieu of sending the entire +model back (which is giant and slow). It will probably be a "superset" of data elements required to report +for MC and MGA right now, and maybe others + +""" + +from temoa.temoa_model.table_data_puller import ( + EI, + CapData, + poll_cost_results, + poll_flow_results, + poll_emissions, + poll_objective, + poll_capacity_results, +) +from temoa.temoa_model.temoa_model import TemoaModel + + +class DataBrick: + """ + A utility container for bundling assorted data structures for runs done by Worker objects. + """ + + def __init__( + self, + name, + emission_costs, + emission_flows, + capacity_data, + flow_data, + obj_data, + regular_costs, + exchange_costs, + ): + """ + Build a data brick storage object from a model instance + :param model: + """ + self._name = name + self._emission_costs = emission_costs + self._emission_flows = emission_flows + self._capacity_data = capacity_data + self._flow_data = flow_data + self._obj_data = obj_data + self._regular_costs = regular_costs + self._exchange_costs = exchange_costs + + @property + def name(self) -> str: + return self._name + + @property + def emission_flows(self) -> dict[EI, float]: + return self._emission_flows + + @property + def capacity_data(self) -> CapData: + return self._capacity_data + + @property + def flow_data(self) -> dict: + return self._flow_data + + @property + def obj_data(self) -> list: + return self._obj_data + + @property + def cost_data(self): + return self._regular_costs + + @property + def exchange_cost_data(self): + return self._exchange_costs + + @property + def emission_cost_data(self): + return self._emission_costs + + +def data_brick_factory(model: TemoaModel) -> DataBrick: + _name = model.name + # process costs + _regular_costs, _exchange_costs = poll_cost_results(model, p_0=None) + + # process flows + _flow_data = poll_flow_results(model) + + # process emissions + _emission_costs, _emission_flows = poll_emissions(model) + + # poll capacity + _capacity_data = poll_capacity_results(model) + + # process objectives + _obj_data = poll_objective(model) diff --git a/temoa/temoa_model/table_data_puller.py b/temoa/temoa_model/table_data_puller.py new file mode 100644 index 00000000..bfb0d49e --- /dev/null +++ b/temoa/temoa_model/table_data_puller.py @@ -0,0 +1,470 @@ +""" +Tools for Energy Model Optimization and Analysis (Temoa): +An open source framework for energy systems optimization modeling + +Copyright (C) 2015, NC State University + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +A complete copy of the GNU General Public License v2 (GPLv2) is available +in LICENSE.txt. Users uncompressing this from an archive may not have +received this license file. If not, see . + + +Written by: J. F. Hyink +jeff@westernspark.us +https://westernspark.us +Created on: 12/5/24 + +A companion module to the table writer to hold some data-pulling functions and small utilities and separate them +from the writing process for organization and to isolate the DB access in the writer such that +these functions can be called on a model instance without any DB interactions. (Intended to support use +by Workers who shouldn't interact with DB). Dev Note: In future, if transition away from sqlite, this +could all be refactored to perform tasks within workers, but concurrent access to sqlite is a no-go +""" +import logging +from collections import namedtuple, defaultdict +from enum import unique, Enum + +from pyomo.common.numeric_types import value +from pyomo.core import Objective + +from temoa.temoa_model import temoa_rules +from temoa.temoa_model.exchange_tech_cost_ledger import ExchangeTechCostLedger, CostType +from temoa.temoa_model.temoa_model import TemoaModel + +logger = logging.getLogger(__name__) + + +def _marks(num: int) -> str: + """convenience to make a sequence of question marks for query""" + qs = ','.join('?' for _ in range(num)) + marks = '(' + qs + ')' + return marks + + +EI = namedtuple('EI', ['r', 'p', 't', 'v', 'e']) +"""Emission Index""" + + +@unique +class FlowType(Enum): + """Types of flow tracked""" + + IN = 1 + OUT = 2 + CURTAIL = 3 + FLEX = 4 + LOST = 5 + + +FI = namedtuple('FI', ['r', 'p', 's', 'd', 'i', 't', 'v', 'o']) +"""Flow Index""" + +CapData = namedtuple('CapData', ['built', 'net', 'retired']) +"""Small container to hold named dictionaries of capacity data for processing""" + + +def ritvo(fi: FI) -> tuple: + """convert FI to ritvo index""" + return fi.r, fi.i, fi.t, fi.v, fi.o + + +def rpetv(fi: FI, e: str) -> tuple: + """convert FI and emission to rpetv index""" + return fi.r, fi.p, e, fi.t, fi.v + + +def poll_capacity_results(M: TemoaModel, epsilon=1e-5) -> CapData: + # Built Capacity + built = [] + for r, t, v in M.V_NewCapacity: + if v in M.time_optimize: + val = value(M.V_NewCapacity[r, t, v]) + if abs(val) < epsilon: + continue + new_cap = (r, t, v, val) + built.append(new_cap) + + # NetCapacity + net = [] + for r, p, t, v in M.V_Capacity: + val = value(M.V_Capacity[r, p, t, v]) + if abs(val) < epsilon: + continue + new_net_cap = (r, p, t, v, val) + net.append(new_net_cap) + + # Retired Capacity + ret = [] + for r, p, t, v in M.V_RetiredCapacity: + val = value(M.V_RetiredCapacity[r, p, t, v]) + if abs(val) < epsilon: + continue + new_retired_cap = (r, p, t, v, val) + ret.append(new_retired_cap) + + return CapData(built=built, net=net, retired=ret) + + +def poll_flow_results(M: TemoaModel, epsilon=1e-5) -> dict[FI, dict[FlowType, float]]: + """A static version that can be called directly by a solver worker without making a class instance""" + res: dict[FI, dict[FlowType, float]] = defaultdict(lambda: defaultdict(float)) + + # ---- NON-annual ---- + + # Storage, which has a unique v_flow_in (non-storage techs do not have this variable) + for key in M.V_FlowIn: + fi = FI(*key) + flow = value(M.V_FlowIn[fi]) + if abs(flow) < epsilon: + continue + res[fi][FlowType.IN] = flow + res[fi][FlowType.LOST] = (1 - value(M.Efficiency[ritvo(fi)])) * flow + + # regular flows + for key in M.V_FlowOut: + fi = FI(*key) + flow = value(M.V_FlowOut[fi]) + if abs(flow) < epsilon: + continue + res[fi][FlowType.OUT] = flow + + if fi.t not in M.tech_storage: # we can get the flow in by out/eff... + flow = value(M.V_FlowOut[fi]) / value(M.Efficiency[ritvo(fi)]) + res[fi][FlowType.IN] = flow + res[fi][FlowType.LOST] = (1 - value(M.Efficiency[ritvo(fi)])) * flow + + # curtailment flows + for key in M.V_Curtailment: + fi = FI(*key) + val = value(M.V_Curtailment[fi]) + if abs(val) < epsilon: + continue + res[fi][FlowType.CURTAIL] = val + + # flex techs. This will subtract the flex from their output flow IOT make OUT the "net" + for key in M.V_Flex: + fi = FI(*key) + flow = value(M.V_Flex[fi]) + if abs(flow) < epsilon: + continue + res[fi][FlowType.FLEX] = flow + res[fi][FlowType.OUT] -= flow + + # ---- annual ---- + + # basic annual flows + for r, p, i, t, v, o in M.V_FlowOutAnnual: + for s in M.time_season: + for d in M.time_of_day: + fi = FI(r, p, s, d, i, t, v, o) + flow = value(M.V_FlowOutAnnual[r, p, i, t, v, o]) * value(M.SegFrac[s, d]) + if abs(flow) < epsilon: + continue + res[fi][FlowType.OUT] = flow + res[fi][FlowType.IN] = flow / value(M.Efficiency[ritvo(fi)]) + res[fi][FlowType.LOST] = (1 - value(M.Efficiency[ritvo(fi)])) * res[fi][FlowType.IN] + + # flex annual + for r, p, i, t, v, o in M.V_FlexAnnual: + for s in M.time_season: + for d in M.time_of_day: + fi = FI(r, p, s, d, i, t, v, o) + flow = value(M.V_FlexAnnual[r, p, i, t, v, o]) * value(M.SegFrac[s, d]) + if abs(flow) < epsilon: + continue + res[fi][FlowType.FLEX] = flow + res[fi][FlowType.OUT] -= flow + + return res + + +def poll_objective(M: TemoaModel) -> list[tuple[str, float]]: + """gather objective name, value tuples for all active objectives""" + + objs: list[Objective] = list(M.component_data_objects(Objective)) + active_objs = [obj for obj in objs if obj.active] + if len(active_objs) > 1: + logger.warning('Multiple active objectives found. All will be logged in db') + res = [] + for obj in active_objs: + obj_name, obj_value = obj.getname(fully_qualified=True), value(obj) + res.append((obj_name, obj_value)) + return res + + +def poll_cost_results( + M: TemoaModel, p_0: int | None, epsilon=1e-5 +) -> tuple[dict[tuple, dict], ...]: + """gather all cost data from solved model""" + if not p_0: + p_0 = min(M.time_optimize) + + p_e = M.time_future.last() + + # conveniences... + GDR = value(M.GlobalDiscountRate) + MPL = M.ModelProcessLife + LLN = M.LoanLifetimeProcess + + exchange_costs = ExchangeTechCostLedger(M) + entries = defaultdict(dict) + for r, t, v in M.CostInvest.sparse_iterkeys(): # Returns only non-zero values + # gather details... + cap = value(M.V_NewCapacity[r, t, v]) + if abs(cap) < epsilon: + continue + loan_life = value(LLN[r, t, v]) + loan_rate = value(M.LoanRate[r, t, v]) + + model_loan_cost, undiscounted_cost = loan_costs( + loan_rate=loan_rate, + loan_life=loan_life, + capacity=cap, + invest_cost=value(M.CostInvest[r, t, v]), + process_life=value(M.LifetimeProcess[r, t, v]), + p_0=p_0, + p_e=p_e, + global_discount_rate=GDR, + vintage=v, + ) + # screen for linked region... + if '-' in r: + exchange_costs.add_cost_record( + r, + period=v, + tech=t, + vintage=v, + cost=model_loan_cost, + cost_type=CostType.D_INVEST, + ) + exchange_costs.add_cost_record( + r, + period=v, + tech=t, + vintage=v, + cost=undiscounted_cost, + cost_type=CostType.INVEST, + ) + else: + # enter it into the entries table with period of cost = vintage (p=v) + entries[r, v, t, v].update( + {CostType.D_INVEST: model_loan_cost, CostType.INVEST: undiscounted_cost} + ) + + for r, p, t, v in M.CostFixed.sparse_iterkeys(): + cap = value(M.V_Capacity[r, p, t, v]) + if abs(cap) < epsilon: + continue + + fixed_cost = value(M.CostFixed[r, p, t, v]) + undiscounted_fixed_cost = cap * fixed_cost * value(MPL[r, p, t, v]) + + model_fixed_cost = temoa_rules.fixed_or_variable_cost( + cap, fixed_cost, value(MPL[r, p, t, v]), GDR=GDR, P_0=p_0, p=p + ) + if '-' in r: + exchange_costs.add_cost_record( + r, + period=p, + tech=t, + vintage=v, + cost=model_fixed_cost, + cost_type=CostType.D_FIXED, + ) + exchange_costs.add_cost_record( + r, + period=p, + tech=t, + vintage=v, + cost=undiscounted_fixed_cost, + cost_type=CostType.FIXED, + ) + else: + entries[r, p, t, v].update( + {CostType.D_FIXED: model_fixed_cost, CostType.FIXED: undiscounted_fixed_cost} + ) + + for r, p, t, v in M.CostVariable.sparse_iterkeys(): + if t not in M.tech_annual: + activity = sum( + value(M.V_FlowOut[r, p, S_s, S_d, S_i, t, v, S_o]) + for S_i in M.processInputs[r, p, t, v] + for S_o in M.ProcessOutputsByInput[r, p, t, v, S_i] + for S_s in M.time_season + for S_d in M.time_of_day + ) + else: + activity = sum( + value(M.V_FlowOutAnnual[r, p, S_i, t, v, S_o]) + for S_i in M.processInputs[r, p, t, v] + for S_o in M.ProcessOutputsByInput[r, p, t, v, S_i] + ) + if abs(activity) < epsilon: + continue + + var_cost = value(M.CostVariable[r, p, t, v]) + undiscounted_var_cost = activity * var_cost * value(MPL[r, p, t, v]) + + model_var_cost = temoa_rules.fixed_or_variable_cost( + activity, var_cost, value(MPL[r, p, t, v]), GDR=GDR, P_0=p_0, p=p + ) + if '-' in r: + exchange_costs.add_cost_record( + r, + period=p, + tech=t, + vintage=v, + cost=model_var_cost, + cost_type=CostType.D_VARIABLE, + ) + exchange_costs.add_cost_record( + r, + period=p, + tech=t, + vintage=v, + cost=undiscounted_var_cost, + cost_type=CostType.VARIABLE, + ) + else: + entries[r, p, t, v].update( + {CostType.D_VARIABLE: model_var_cost, CostType.VARIABLE: undiscounted_var_cost} + ) + exchange_entries = exchange_costs.get_entries() + return entries, exchange_entries + + +def loan_costs( + loan_rate, # this is referred to as LoanRate in parameters + loan_life, + capacity, + invest_cost, + process_life, + p_0, + p_e, + global_discount_rate, + vintage, + **kwargs, +) -> tuple[float, float]: + """ + Calculate Loan costs by calling the loan annualize and loan cost functions in temoa_rules + :return: tuple of [model-view discounted cost, un-discounted annuity] + """ + # dev note: this is a passthrough function. Sole intent is to use the EXACT formula the + # model uses for these costs + loan_ar = temoa_rules.loan_annualization_rate(loan_rate=loan_rate, loan_life=loan_life) + model_ic = temoa_rules.loan_cost( + capacity, + invest_cost, + loan_annualize=loan_ar, + lifetime_loan_process=loan_life, + P_0=p_0, + P_e=p_e, + GDR=global_discount_rate, + vintage=vintage, + ) + # Override the GDR to get the undiscounted value + global_discount_rate = 0 + undiscounted_cost = temoa_rules.loan_cost( + capacity, + invest_cost, + loan_annualize=loan_ar, + lifetime_loan_process=loan_life, + P_0=p_0, + P_e=p_e, + GDR=global_discount_rate, + vintage=vintage, + ) + return model_ic, undiscounted_cost + + +def poll_emissions( + M: 'TemoaModel', p_0=None, epsilon=1e-5 +) -> tuple[dict[tuple, dict], dict[EI, float]]: + """ + Gather all emission flows, cost them and provide a tuple of costs and flows + :param M: the model + :param p_0: the first period, if other than min(time_optimize), as in MYOPIC + :param epsilon: a minimal epsilon for ignored values + :return: + """ + + # UPDATE: older versions brought forward had some accounting errors here for flex/curtailed emissions + # see the note on emissions in the Cost function in temoa_rules + if not p_0: + p_0 = min(M.time_optimize) + + GDR = value(M.GlobalDiscountRate) + MPL = M.ModelProcessLife + + base = [ + (r, p, e, i, t, v, o) + for (r, e, i, t, v, o) in M.EmissionActivity + for p in M.time_optimize + if (r, p, t, v) in M.processInputs + ] + + # The "base set" can be expanded now to cover normal/annual indexing sets + normal = [ + (r, p, e, s, d, i, t, v, o) + for (r, p, e, i, t, v, o) in base + for s in M.time_season + for d in M.time_of_day + if t not in M.tech_annual + ] + annual = [(r, p, e, i, t, v, o) for (r, p, e, i, t, v, o) in base if t in M.tech_annual] + + flows: dict[EI, float] = defaultdict(float) + # iterate through the normal and annual and accumulate flow values + for r, p, e, s, d, i, t, v, o in normal: + flows[EI(r, p, t, v, e)] += ( + value(M.V_FlowOut[r, p, s, d, i, t, v, o]) * M.EmissionActivity[r, e, i, t, v, o] + ) + + for r, p, e, i, t, v, o in annual: + flows[EI(r, p, t, v, e)] += ( + value(M.V_FlowOutAnnual[r, p, i, t, v, o]) * M.EmissionActivity[r, e, i, t, v, o] + ) + + # gather costs + ud_costs = defaultdict(float) + d_costs = defaultdict(float) + for ei in flows: + # screen to see if there is an associated cost + cost_index = (ei.r, ei.p, ei.e) + if cost_index not in M.CostEmission: + continue + # check for epsilon + if abs(flows[ei]) < epsilon: + flows[ei] = 0.0 + continue + undiscounted_emiss_cost = ( + flows[ei] * M.CostEmission[ei.r, ei.p, ei.e] * MPL[ei.r, ei.p, ei.t, ei.v] + ) + discounted_emiss_cost = temoa_rules.fixed_or_variable_cost( + cap_or_flow=flows[ei], + cost_factor=M.CostEmission[ei.r, ei.p, ei.e], + process_lifetime=MPL[ei.r, ei.p, ei.t, ei.v], + GDR=GDR, + P_0=p_0, + p=ei.p, + ) + ud_costs[ei.r, ei.p, ei.t, ei.v] += undiscounted_emiss_cost + d_costs[ei.r, ei.p, ei.t, ei.v] += discounted_emiss_cost + costs = defaultdict(dict) + for k in ud_costs: + costs[k][CostType.EMISS] = ud_costs[k] + for k in d_costs: + costs[k][CostType.D_EMISS] = d_costs[k] + + # wow, that was like pulling teeth + return costs, flows diff --git a/temoa/temoa_model/table_writer.py b/temoa/temoa_model/table_writer.py index 86533183..64e79d4c 100644 --- a/temoa/temoa_model/table_writer.py +++ b/temoa/temoa_model/table_writer.py @@ -3,20 +3,30 @@ """ import sqlite3 import sys -from collections import defaultdict, namedtuple +from collections import defaultdict from collections.abc import Iterable -from enum import Enum, unique from logging import getLogger from pathlib import Path from typing import TYPE_CHECKING -from pyomo.core import value, Objective from pyomo.opt import SolverResults from definitions import PROJECT_ROOT from temoa.extensions.monte_carlo.mc_run import ChangeRecord -from temoa.temoa_model import temoa_rules -from temoa.temoa_model.exchange_tech_cost_ledger import CostType, ExchangeTechCostLedger +from temoa.temoa_model.data_brick import DataBrick +from temoa.temoa_model.exchange_tech_cost_ledger import CostType +from temoa.temoa_model.table_data_puller import ( + poll_capacity_results, + poll_flow_results, + FI, + FlowType, + EI, + _marks, + CapData, + poll_objective, + poll_cost_results, + poll_emissions, +) from temoa.temoa_model.temoa_config import TemoaConfig from temoa.temoa_model.temoa_mode import TemoaMode from temoa.temoa_model.temoa_model import TemoaModel @@ -77,42 +87,6 @@ mc_tweaks_file_loc = Path(PROJECT_ROOT, 'temoa/extensions/monte_carlo/make_deltas_table.sql') -def _marks(num: int) -> str: - """convenience to make a sequence of question marks for query""" - qs = ','.join('?' for _ in range(num)) - marks = '(' + qs + ')' - return marks - - -EI = namedtuple('EI', ['r', 'p', 't', 'v', 'e']) -"""Emission Index""" - - -@unique -class FlowType(Enum): - """Types of flow tracked""" - - IN = 1 - OUT = 2 - CURTAIL = 3 - FLEX = 4 - LOST = 5 - - -FI = namedtuple('FI', ['r', 'p', 's', 'd', 'i', 't', 'v', 'o']) -"""Flow Index""" - - -def ritvo(fi: FI) -> tuple: - """convert FI to ritvo index""" - return fi.r, fi.i, fi.t, fi.v, fi.o - - -def rpetv(fi: FI, e: str) -> tuple: - """convert FI and emission to rpetv index""" - return fi.r, fi.p, e, fi.t, fi.v - - class TableWriter: def __init__(self, config: TemoaConfig, epsilon=1e-5): self.config = config @@ -149,7 +123,11 @@ def write_results( self.write_objective(M, iteration=iteration) self.write_capacity_tables(M, iteration=iteration) # analyze the emissions to get the costs and flows - e_costs, e_flows = self._gather_emission_costs_and_flows(M) + if self.config.scenario_mode == TemoaMode.MYOPIC: + p_0 = M.MyopicBaseyear + else: + p_0 = None # min year will be used in poll + e_costs, e_flows = poll_emissions(M=M, p_0=p_0) self.emission_register = e_flows self.write_emissions(iteration=iteration) self.write_costs(M, emission_entries=e_costs, iteration=iteration) @@ -176,13 +154,13 @@ def write_mm_results(self, M: TemoaModel, iteration: int): self._set_tech_sectors() self.write_objective(M, iteration=iteration) # analyze the emissions to get the costs and flows - e_costs, e_flows = self._gather_emission_costs_and_flows(M) + e_costs, e_flows = poll_emissions(M=M) self.emission_register = e_flows self.write_emissions(iteration=iteration) self.con.commit() self.con.execute('VACUUM') - def write_mc_results(self, M: TemoaModel, iteration: int): + def write_mc_results(self, brick: DataBrick, iteration: int): """ tailored write function to capture appropriate monte carlo results :param M: solve model @@ -192,12 +170,20 @@ def write_mc_results(self, M: TemoaModel, iteration: int): if not self.tech_sectors: self._set_tech_sectors() # analyze the emissions to get the costs and flows - e_costs, e_flows = self._gather_emission_costs_and_flows(M) + e_costs, e_flows = brick.emission_cost_data, brick.emission_flows self.emission_register = e_flows self.write_emissions(iteration=iteration) - self.write_capacity_tables(M, iteration=iteration) - self.write_summary_flow(M, iteration=iteration) - self.write_objective(M, iteration=iteration) + + # the rest can be directly inserted from the data_brick + self._insert_capacity_results(brick.capacity_data, iteration=iteration) + self._insert_summary_flow_results(flow_data=brick.flow_data, iteration=iteration) + self._insert_cost_results( + regular_entries=brick.cost_data, + exchange_entries=brick.exchange_cost_data, + emission_entries=e_costs, + iteration=iteration, + ) + self._insert_objective_results(brick.obj_data, iteration=iteration) self.con.commit() self.con.execute('VACUUM') @@ -234,20 +220,16 @@ def clear_iterative_runs(self): def write_objective(self, M: TemoaModel, iteration=None) -> None: """Write the value of all ACTIVE objectives to the DB""" - objs: list[Objective] = list(M.component_data_objects(Objective)) - active_objs = [obj for obj in objs if obj.active] - if len(active_objs) > 1: - logger.warning( - 'Multiple active objectives found for scenario: %s. All will be logged in db', - self.config.scenario, - ) + obj_vals = poll_objective(M=M) + self._insert_objective_results(obj_vals, iteration=iteration) + + def _insert_objective_results(self, obj_vals: list, iteration: int) -> None: scenario_name = ( self.config.scenario + f'-{iteration}' if iteration is not None else self.config.scenario ) - for obj in active_objs: - obj_name, obj_value = obj.getname(fully_qualified=True), value(obj) + for obj_name, obj_value in obj_vals: qry = 'INSERT INTO OutputObjective VALUES (?, ?, ?)' data = (scenario_name, obj_name, obj_value) self.con.execute(qry, data) @@ -275,8 +257,7 @@ def write_emissions(self, iteration=None) -> None: self.con.executemany(qry, data) self.con.commit() - def write_capacity_tables(self, M: TemoaModel, iteration: int | None = None) -> None: - """Write the capacity tables to the DB""" + def _insert_capacity_results(self, cap_data: CapData, iteration: int | None) -> None: if not self.tech_sectors: raise RuntimeError('tech sectors not available... code error') scenario = self.config.scenario @@ -284,23 +265,16 @@ def write_capacity_tables(self, M: TemoaModel, iteration: int | None = None) -> scenario = scenario + f'-{iteration}' # Built Capacity data = [] - for r, t, v in M.V_NewCapacity: - if v in M.time_optimize: - val = value(M.V_NewCapacity[r, t, v]) - s = self.tech_sectors.get(t) - if abs(val) < self.epsilon: - continue - new_cap = (scenario, r, s, t, v, val) - data.append(new_cap) + for r, t, v, val in cap_data.built: + s = self.tech_sectors.get(t) + new_cap = (scenario, r, s, t, v, val) + data.append(new_cap) qry = 'INSERT INTO OutputBuiltCapacity VALUES (?, ?, ?, ?, ?, ?)' self.con.executemany(qry, data) # NetCapacity data = [] - for r, p, t, v in M.V_Capacity: - val = value(M.V_Capacity[r, p, t, v]) - if abs(val) < self.epsilon: - continue + for r, p, t, v, val in cap_data.net: s = self.tech_sectors.get(t) new_net_cap = (scenario, r, s, p, t, v, val) data.append(new_net_cap) @@ -309,18 +283,19 @@ def write_capacity_tables(self, M: TemoaModel, iteration: int | None = None) -> # Retired Capacity data = [] - for r, p, t, v in M.V_RetiredCapacity: - val = value(M.V_RetiredCapacity[r, p, t, v]) - if abs(val) < self.epsilon: - continue + for r, p, t, v, val in cap_data.retired: s = self.tech_sectors.get(t) new_retired_cap = (scenario, r, s, p, t, v, val) data.append(new_retired_cap) qry = 'INSERT INTO OutputRetiredCapacity VALUES (?, ?, ?, ?, ?, ?, ?)' self.con.executemany(qry, data) - self.con.commit() + def write_capacity_tables(self, M: TemoaModel, iteration: int | None = None) -> None: + """Write the capacity tables to the DB""" + cap_data = poll_capacity_results(M=M) + self._insert_capacity_results(cap_data=cap_data, iteration=iteration) + def write_flow_tables(self, iteration=None) -> None: """Write the flow tables""" if not self.tech_sectors: @@ -365,11 +340,14 @@ def write_summary_flow(self, M: TemoaModel, iteration: int | None = None): :param M: The solved model :return: None """ + flow_data = self.calculate_flows(M=M) + self._insert_summary_flow_results(flow_data=flow_data, iteration=iteration) + + def _insert_summary_flow_results(self, flow_data: dict, iteration: int | None) -> None: if not self.tech_sectors: raise RuntimeError('tech sectors not available... code error') - # must recalculate flows from the model - self.flow_register = self.calculate_flows(M) + self.flow_register = flow_data if isinstance(iteration, int): scenario = self.config.scenario + f'-{iteration}' elif iteration is None: @@ -402,6 +380,10 @@ def write_summary_flow(self, M: TemoaModel, iteration: int | None = None): self.con.commit() + # @staticmethod + # def poll_summary_flow_results( M:TemoaModel) -> dict: + # flow_data = self.calculate_flows(M) + def check_flow_balance(self, M: TemoaModel) -> bool: """An easy sanity check to ensure that the flow tables are balanced, except for storage""" flows = self.flow_register @@ -461,122 +443,7 @@ def check_flow_balance(self, M: TemoaModel) -> bool: def calculate_flows(self, M: TemoaModel) -> dict[FI, dict[FlowType, float]]: """Gather all flows by Flow Index and Type""" - - res: dict[FI, dict[FlowType, float]] = defaultdict(lambda: defaultdict(float)) - - # ---- NON-annual ---- - - # Storage, which has a unique v_flow_in (non-storage techs do not have this variable) - for key in M.V_FlowIn: - fi = FI(*key) - flow = value(M.V_FlowIn[fi]) - if abs(flow) < self.epsilon: - continue - res[fi][FlowType.IN] = flow - res[fi][FlowType.LOST] = (1 - value(M.Efficiency[ritvo(fi)])) * flow - - # regular flows - for key in M.V_FlowOut: - fi = FI(*key) - flow = value(M.V_FlowOut[fi]) - if abs(flow) < self.epsilon: - continue - res[fi][FlowType.OUT] = flow - - if fi.t not in M.tech_storage: # we can get the flow in by out/eff... - flow = value(M.V_FlowOut[fi]) / value(M.Efficiency[ritvo(fi)]) - res[fi][FlowType.IN] = flow - res[fi][FlowType.LOST] = (1 - value(M.Efficiency[ritvo(fi)])) * flow - - # curtailment flows - for key in M.V_Curtailment: - fi = FI(*key) - val = value(M.V_Curtailment[fi]) - if abs(val) < self.epsilon: - continue - res[fi][FlowType.CURTAIL] = val - - # flex techs. This will subtract the flex from their output flow IOT make OUT the "net" - for key in M.V_Flex: - fi = FI(*key) - flow = value(M.V_Flex[fi]) - if abs(flow) < self.epsilon: - continue - res[fi][FlowType.FLEX] = flow - res[fi][FlowType.OUT] -= flow - - # ---- annual ---- - - # basic annual flows - for r, p, i, t, v, o in M.V_FlowOutAnnual: - for s in M.time_season: - for d in M.time_of_day: - fi = FI(r, p, s, d, i, t, v, o) - flow = value(M.V_FlowOutAnnual[r, p, i, t, v, o]) * value(M.SegFrac[s, d]) - if abs(flow) < self.epsilon: - continue - res[fi][FlowType.OUT] = flow - res[fi][FlowType.IN] = flow / value(M.Efficiency[ritvo(fi)]) - res[fi][FlowType.LOST] = (1 - value(M.Efficiency[ritvo(fi)])) * res[fi][ - FlowType.IN - ] - - # flex annual - for r, p, i, t, v, o in M.V_FlexAnnual: - for s in M.time_season: - for d in M.time_of_day: - fi = FI(r, p, s, d, i, t, v, o) - flow = value(M.V_FlexAnnual[r, p, i, t, v, o]) * value(M.SegFrac[s, d]) - if abs(flow) < self.epsilon: - continue - res[fi][FlowType.FLEX] = flow - res[fi][FlowType.OUT] -= flow - - return res - - @staticmethod - def loan_costs( - loan_rate, # this is referred to as LoanRate in parameters - loan_life, - capacity, - invest_cost, - process_life, - p_0, - p_e, - global_discount_rate, - vintage, - **kwargs, - ) -> tuple[float, float]: - """ - Calculate Loan costs by calling the loan annualize and loan cost functions in temoa_rules - :return: tuple of [model-view discounted cost, un-discounted annuity] - """ - # dev note: this is a passthrough function. Sole intent is to use the EXACT formula the - # model uses for these costs - loan_ar = temoa_rules.loan_annualization_rate(loan_rate=loan_rate, loan_life=loan_life) - model_ic = temoa_rules.loan_cost( - capacity, - invest_cost, - loan_annualize=loan_ar, - lifetime_loan_process=loan_life, - P_0=p_0, - P_e=p_e, - GDR=global_discount_rate, - vintage=vintage, - ) - # Override the GDR to get the undiscounted value - global_discount_rate = 0 - undiscounted_cost = temoa_rules.loan_cost( - capacity, - invest_cost, - loan_annualize=loan_ar, - lifetime_loan_process=loan_life, - P_0=p_0, - P_e=p_e, - GDR=global_discount_rate, - vintage=vintage, - ) - return model_ic, undiscounted_cost + return poll_flow_results(M, self.epsilon) def write_costs(self, M: TemoaModel, emission_entries=None, iteration=None): """ @@ -593,221 +460,19 @@ def write_costs(self, M: TemoaModel, emission_entries=None, iteration=None): p_0 = M.MyopicBaseyear else: p_0 = min(M.time_optimize) - # NOTE: The end period in myopic mode is specific to the window / MyopicIndex - # the time_future set is specific to the window - p_e = M.time_future.last() - - # conveniences... - GDR = value(M.GlobalDiscountRate) - MPL = M.ModelProcessLife - LLN = M.LoanLifetimeProcess - - exchange_costs = ExchangeTechCostLedger(M) - entries = defaultdict(dict) - for r, t, v in M.CostInvest.sparse_iterkeys(): # Returns only non-zero values - # gather details... - cap = value(M.V_NewCapacity[r, t, v]) - if abs(cap) < self.epsilon: - continue - loan_life = value(LLN[r, t, v]) - loan_rate = value(M.LoanRate[r, t, v]) - - model_loan_cost, undiscounted_cost = self.loan_costs( - loan_rate=loan_rate, - loan_life=loan_life, - capacity=cap, - invest_cost=value(M.CostInvest[r, t, v]), - process_life=value(M.LifetimeProcess[r, t, v]), - p_0=p_0, - p_e=p_e, - global_discount_rate=GDR, - vintage=v, - ) - # screen for linked region... - if '-' in r: - exchange_costs.add_cost_record( - r, - period=v, - tech=t, - vintage=v, - cost=model_loan_cost, - cost_type=CostType.D_INVEST, - ) - exchange_costs.add_cost_record( - r, - period=v, - tech=t, - vintage=v, - cost=undiscounted_cost, - cost_type=CostType.INVEST, - ) - else: - # enter it into the entries table with period of cost = vintage (p=v) - entries[r, v, t, v].update( - {CostType.D_INVEST: model_loan_cost, CostType.INVEST: undiscounted_cost} - ) - for r, p, t, v in M.CostFixed.sparse_iterkeys(): - cap = value(M.V_Capacity[r, p, t, v]) - if abs(cap) < self.epsilon: - continue - - fixed_cost = value(M.CostFixed[r, p, t, v]) - undiscounted_fixed_cost = cap * fixed_cost * value(MPL[r, p, t, v]) - - model_fixed_cost = temoa_rules.fixed_or_variable_cost( - cap, fixed_cost, value(MPL[r, p, t, v]), GDR=GDR, P_0=p_0, p=p - ) - if '-' in r: - exchange_costs.add_cost_record( - r, - period=p, - tech=t, - vintage=v, - cost=model_fixed_cost, - cost_type=CostType.D_FIXED, - ) - exchange_costs.add_cost_record( - r, - period=p, - tech=t, - vintage=v, - cost=undiscounted_fixed_cost, - cost_type=CostType.FIXED, - ) - else: - entries[r, p, t, v].update( - {CostType.D_FIXED: model_fixed_cost, CostType.FIXED: undiscounted_fixed_cost} - ) - - for r, p, t, v in M.CostVariable.sparse_iterkeys(): - if t not in M.tech_annual: - activity = sum( - value(M.V_FlowOut[r, p, S_s, S_d, S_i, t, v, S_o]) - for S_i in M.processInputs[r, p, t, v] - for S_o in M.ProcessOutputsByInput[r, p, t, v, S_i] - for S_s in M.time_season - for S_d in M.time_of_day - ) - else: - activity = sum( - value(M.V_FlowOutAnnual[r, p, S_i, t, v, S_o]) - for S_i in M.processInputs[r, p, t, v] - for S_o in M.ProcessOutputsByInput[r, p, t, v, S_i] - ) - if abs(activity) < self.epsilon: - continue + entries, exchange_entries = poll_cost_results(M, p_0, self.epsilon) - var_cost = value(M.CostVariable[r, p, t, v]) - undiscounted_var_cost = activity * var_cost * value(MPL[r, p, t, v]) + # write to table + self._insert_cost_results(entries, exchange_entries, emission_entries, iteration) - model_var_cost = temoa_rules.fixed_or_variable_cost( - activity, var_cost, value(MPL[r, p, t, v]), GDR=GDR, P_0=p_0, p=p - ) - if '-' in r: - exchange_costs.add_cost_record( - r, - period=p, - tech=t, - vintage=v, - cost=model_var_cost, - cost_type=CostType.D_VARIABLE, - ) - exchange_costs.add_cost_record( - r, - period=p, - tech=t, - vintage=v, - cost=undiscounted_var_cost, - cost_type=CostType.VARIABLE, - ) - else: - entries[r, p, t, v].update( - {CostType.D_VARIABLE: model_var_cost, CostType.VARIABLE: undiscounted_var_cost} - ) + def _insert_cost_results(self, regular_entries, exchange_entries, emission_entries, iteration): + # add the emission costs to the same row data, if provided if emission_entries: for k in emission_entries.keys(): - entries[k].update(emission_entries[k]) - # write to table - # translate the entries into fodder for the query - self._write_cost_rows(entries, iteration=iteration) - self._write_cost_rows(exchange_costs.get_entries(), iteration=iteration) - - def _gather_emission_costs_and_flows(self, M: 'TemoaModel'): - """Gather all emission flows and price them""" - - # UPDATE: older versions brought forward had some accounting errors here for flex/curtailed emissions - # see the note on emissions in the Cost function in temoa_rules - - GDR = value(M.GlobalDiscountRate) - MPL = M.ModelProcessLife - if self.config.scenario_mode == TemoaMode.MYOPIC: - p_0 = M.MyopicBaseyear - else: - p_0 = min(M.time_optimize) - - base = [ - (r, p, e, i, t, v, o) - for (r, e, i, t, v, o) in M.EmissionActivity - for p in M.time_optimize - if (r, p, t, v) in M.processInputs - ] - - # The "base set" can be expanded now to cover normal/annual indexing sets - normal = [ - (r, p, e, s, d, i, t, v, o) - for (r, p, e, i, t, v, o) in base - for s in M.time_season - for d in M.time_of_day - if t not in M.tech_annual - ] - annual = [(r, p, e, i, t, v, o) for (r, p, e, i, t, v, o) in base if t in M.tech_annual] - - flows: dict[EI, float] = defaultdict(float) - # iterate through the normal and annual and accumulate flow values - for r, p, e, s, d, i, t, v, o in normal: - flows[EI(r, p, t, v, e)] += ( - value(M.V_FlowOut[r, p, s, d, i, t, v, o]) * M.EmissionActivity[r, e, i, t, v, o] - ) - - for r, p, e, i, t, v, o in annual: - flows[EI(r, p, t, v, e)] += ( - value(M.V_FlowOutAnnual[r, p, i, t, v, o]) * M.EmissionActivity[r, e, i, t, v, o] - ) - - # gather costs - ud_costs = defaultdict(float) - d_costs = defaultdict(float) - for ei in flows: - # screen to see if there is an associated cost - cost_index = (ei.r, ei.p, ei.e) - if cost_index not in M.CostEmission: - continue - # check for epsilon - if abs(flows[ei]) < self.epsilon: - flows[ei] = 0.0 - continue - undiscounted_emiss_cost = ( - flows[ei] * M.CostEmission[ei.r, ei.p, ei.e] * MPL[ei.r, ei.p, ei.t, ei.v] - ) - discounted_emiss_cost = temoa_rules.fixed_or_variable_cost( - cap_or_flow=flows[ei], - cost_factor=M.CostEmission[ei.r, ei.p, ei.e], - process_lifetime=MPL[ei.r, ei.p, ei.t, ei.v], - GDR=GDR, - P_0=p_0, - p=ei.p, - ) - ud_costs[ei.r, ei.p, ei.t, ei.v] += undiscounted_emiss_cost - d_costs[ei.r, ei.p, ei.t, ei.v] += discounted_emiss_cost - costs = defaultdict(dict) - for k in ud_costs: - costs[k][CostType.EMISS] = ud_costs[k] - for k in d_costs: - costs[k][CostType.D_EMISS] = d_costs[k] - - # wow, that was like pulling teeth - return costs, flows + regular_entries[k].update(emission_entries[k]) + self._write_cost_rows(regular_entries, iteration=iteration) + self._write_cost_rows(exchange_entries, iteration=iteration) def _write_cost_rows(self, entries, iteration=None): """Write the entries to the OutputCost table""" diff --git a/tests/test_table_writer.py b/tests/test_table_writer.py index 67382d18..ac43c5f3 100644 --- a/tests/test_table_writer.py +++ b/tests/test_table_writer.py @@ -28,7 +28,7 @@ import pytest -from temoa.temoa_model import table_writer +from temoa.temoa_model.table_data_puller import loan_costs params = [ { @@ -84,7 +84,7 @@ def test_loan_costs(param): Test the loan cost calculations """ # we will test with a 1% error to accommodate the approximation of GDR=0 - model_cost, undiscounted_cost = table_writer.TableWriter.loan_costs(**param) + model_cost, undiscounted_cost = loan_costs(**param) assert model_cost == pytest.approx(param['model_cost'], rel=0.01) assert undiscounted_cost == pytest.approx(param['undiscounted_cost'], rel=0.01) @@ -97,6 +97,6 @@ def test_loan_costs_with_zero_GDR(param): Test the formula with zero for GDR to make sure it is handled correctly. The formula risks division by zero if this is not correct. """ - model_cost, undiscounted_cost = table_writer.TableWriter.loan_costs(**param) + model_cost, undiscounted_cost = loan_costs(**param) assert model_cost == pytest.approx(param['model_cost'], abs=0.01) assert undiscounted_cost == pytest.approx(param['undiscounted_cost'], abs=0.01) From f31b86bf51696853965d3e8c32d9d7023e24622e Mon Sep 17 00:00:00 2001 From: Jeff H Date: Thu, 5 Dec 2024 16:40:07 -0800 Subject: [PATCH 16/70] Fixed bug in data_brick.py that was not actually returning an obj. --- data_files/my_configs/monte_carlo_utopia.toml | 2 +- temoa/extensions/monte_carlo/mc_sequencer.py | 6 +++-- temoa/temoa_model/data_brick.py | 24 ++++++++++++++----- temoa/temoa_model/table_data_puller.py | 4 +++- 4 files changed, 26 insertions(+), 10 deletions(-) diff --git a/data_files/my_configs/monte_carlo_utopia.toml b/data_files/my_configs/monte_carlo_utopia.toml index 575dfc63..875946bf 100644 --- a/data_files/my_configs/monte_carlo_utopia.toml +++ b/data_files/my_configs/monte_carlo_utopia.toml @@ -110,5 +110,5 @@ activity_labels = [] [monte_carlo] # a path from the PROJECT ROOT to the settings file that contains the run data. -run_settings = 'data_files/monte_carlo/run_settings_2.csv' +run_settings = 'data_files/monte_carlo/run_settings_4.csv' diff --git a/temoa/extensions/monte_carlo/mc_sequencer.py b/temoa/extensions/monte_carlo/mc_sequencer.py index 171064e6..73b362d0 100644 --- a/temoa/extensions/monte_carlo/mc_sequencer.py +++ b/temoa/extensions/monte_carlo/mc_sequencer.py @@ -188,7 +188,7 @@ def start(self): # print('no result') if next_result is not None: self.process_solve_results(next_result) - logger.info('8. Solve count: %d', self.solve_count) + logger.info('Solve count: %d', self.solve_count) self.solve_count += 1 if self.verbose or not self.config.silent: print(f'MC Solve count: {self.solve_count}') @@ -276,5 +276,7 @@ def process_solve_results(self, brick: DataBrick): if idx in self.seen_instance_indices: raise ValueError(f'Instance index {idx} already seen. Likely coding error') self.seen_instance_indices.add(idx) - logger.info('Processing results for %s', brick.name) + tic = datetime.now() self.writer.write_mc_results(brick=brick, iteration=idx) + toc = datetime.now() + logger.info('Processed results for %s in %0.2f seconds', brick.name, (toc - tic).total_seconds()) diff --git a/temoa/temoa_model/data_brick.py b/temoa/temoa_model/data_brick.py index 01c0a96e..df49eba6 100644 --- a/temoa/temoa_model/data_brick.py +++ b/temoa/temoa_model/data_brick.py @@ -106,18 +106,30 @@ def emission_cost_data(self): def data_brick_factory(model: TemoaModel) -> DataBrick: - _name = model.name + name = model.name # process costs - _regular_costs, _exchange_costs = poll_cost_results(model, p_0=None) + regular_costs, exchange_costs = poll_cost_results(model, p_0=None) # process flows - _flow_data = poll_flow_results(model) + flow_data = poll_flow_results(model) # process emissions - _emission_costs, _emission_flows = poll_emissions(model) + emission_costs, emission_flows = poll_emissions(model) # poll capacity - _capacity_data = poll_capacity_results(model) + capacity_data = poll_capacity_results(model) # process objectives - _obj_data = poll_objective(model) + obj_data = poll_objective(model) + + db = DataBrick( + name=name, + emission_costs=emission_costs, + emission_flows=emission_flows, + capacity_data=capacity_data, + flow_data=flow_data, + obj_data=obj_data, + regular_costs=regular_costs, + exchange_costs=exchange_costs, + ) + return db \ No newline at end of file diff --git a/temoa/temoa_model/table_data_puller.py b/temoa/temoa_model/table_data_puller.py index bfb0d49e..a0728e6b 100644 --- a/temoa/temoa_model/table_data_puller.py +++ b/temoa/temoa_model/table_data_puller.py @@ -30,6 +30,7 @@ by Workers who shouldn't interact with DB). Dev Note: In future, if transition away from sqlite, this could all be refactored to perform tasks within workers, but concurrent access to sqlite is a no-go """ +import functools import logging from collections import namedtuple, defaultdict from enum import unique, Enum @@ -117,7 +118,8 @@ def poll_capacity_results(M: TemoaModel, epsilon=1e-5) -> CapData: def poll_flow_results(M: TemoaModel, epsilon=1e-5) -> dict[FI, dict[FlowType, float]]: """A static version that can be called directly by a solver worker without making a class instance""" - res: dict[FI, dict[FlowType, float]] = defaultdict(lambda: defaultdict(float)) + dd = functools.partial(defaultdict, float) + res: dict[FI, dict[FlowType, float]] = defaultdict(dd) # ---- NON-annual ---- From 8ad98b080470e81a3de039f05cb8d6aa7e583976 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Thu, 5 Dec 2024 16:43:34 -0800 Subject: [PATCH 17/70] adjust workers for TRACE --- temoa/extensions/monte_carlo/MC_solver_options.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/temoa/extensions/monte_carlo/MC_solver_options.toml b/temoa/extensions/monte_carlo/MC_solver_options.toml index 524781cf..0aef60e2 100644 --- a/temoa/extensions/monte_carlo/MC_solver_options.toml +++ b/temoa/extensions/monte_carlo/MC_solver_options.toml @@ -1,12 +1,12 @@ # A container for solver options # the top level solver name in brackets should align with the solver name in the config.toml -num_workers = 6 +num_workers = 20 [gurobi] Method= 2 # Barrier ONLY -Threads= 20 # per solver instance +Threads= 5 # per solver instance BarConvTol = 0.01 # Relative Barrier Tolerance primal-dual FeasibilityTol= 1e-2 # pretty loose Crossover= 0 # Disabled From 59c1284a294beefdd3dea0a49ccc454dd3d2c878 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Fri, 6 Dec 2024 10:17:48 -0800 Subject: [PATCH 18/70] adding some additional logging, reverting worker settings to 6 workers at 20 threads ea. --- .../extensions/monte_carlo/MC_solver_options.toml | 4 ++-- temoa/extensions/monte_carlo/mc_sequencer.py | 14 ++++++++++---- temoa/extensions/monte_carlo/mc_worker.py | 3 +++ 3 files changed, 15 insertions(+), 6 deletions(-) diff --git a/temoa/extensions/monte_carlo/MC_solver_options.toml b/temoa/extensions/monte_carlo/MC_solver_options.toml index 0aef60e2..524781cf 100644 --- a/temoa/extensions/monte_carlo/MC_solver_options.toml +++ b/temoa/extensions/monte_carlo/MC_solver_options.toml @@ -1,12 +1,12 @@ # A container for solver options # the top level solver name in brackets should align with the solver name in the config.toml -num_workers = 20 +num_workers = 6 [gurobi] Method= 2 # Barrier ONLY -Threads= 5 # per solver instance +Threads= 20 # per solver instance BarConvTol = 0.01 # Relative Barrier Tolerance primal-dual FeasibilityTol= 1e-2 # pretty loose Crossover= 0 # Disabled diff --git a/temoa/extensions/monte_carlo/mc_sequencer.py b/temoa/extensions/monte_carlo/mc_sequencer.py index 73b362d0..38d0d81f 100644 --- a/temoa/extensions/monte_carlo/mc_sequencer.py +++ b/temoa/extensions/monte_carlo/mc_sequencer.py @@ -30,6 +30,7 @@ import logging import queue import sqlite3 +import sys import time import tomllib from datetime import datetime @@ -156,13 +157,17 @@ def start(self): iter_counter = 0 while more_runs: try: + tic = datetime.now() work_queue.put((run_name, dp), block=False) # put a log on the fire, if room - logger.info('Putting an instance in the work queue') + toc = datetime.now() + + logger.info('Put a DataPortal in the work queue of size %0.2f MB in work queue in %0.2f seconds', + sys.getsizeof((run_name, dp)), (toc - tic).total_seconds()) try: tic = datetime.now() mc_run = next(run_gen) toc = datetime.now() - logger.info('Made mc_run from generator in %0.2f', (toc - tic).total_seconds()) + logger.info('Made mc_run from generator in %0.2f seconds', (toc - tic).total_seconds()) # capture the "tweaks" self.writer.write_tweaks( iteration=mc_run.run_index, change_records=mc_run.change_records @@ -181,7 +186,7 @@ def start(self): next_result = result_queue.get_nowait() toc = datetime.now() logger.info( - 'Pulled DataBrick from result_queue in %0.2f', (toc - tic).total_seconds() + 'Pulled DataBrick from result_queue in %0.2f seconds', (toc - tic).total_seconds() ) except queue.Empty: next_result = None @@ -203,7 +208,7 @@ def start(self): time.sleep(0.1) # prevent hyperactivity... # check the queues... - if iter_counter % 100 == 0: + if iter_counter % 6000 == 0: # about every 10 minutes...post the queue sizes try: logger.info('Work queue size: %d', work_queue.qsize()) logger.info('Result queue size: %d', result_queue.qsize()) @@ -276,6 +281,7 @@ def process_solve_results(self, brick: DataBrick): if idx in self.seen_instance_indices: raise ValueError(f'Instance index {idx} already seen. Likely coding error') self.seen_instance_indices.add(idx) + logger.info('Starting processing of DataBrick of size %0.2f MB', sys.getsizeof(brick)/1e6) tic = datetime.now() self.writer.write_mc_results(brick=brick, iteration=idx) toc = datetime.now() diff --git a/temoa/extensions/monte_carlo/mc_worker.py b/temoa/extensions/monte_carlo/mc_worker.py index 1a2a201d..32d3a3e4 100644 --- a/temoa/extensions/monte_carlo/mc_worker.py +++ b/temoa/extensions/monte_carlo/mc_worker.py @@ -104,7 +104,10 @@ def run(self): self.opt.options = self.solver_options # wait for a DataPortal object to show up, then get to work + tic = datetime.now() data = self.dp_queue.get() + toc = datetime.now() + logger.info('Worker %d pulled a DataPortal from work queue in %0.2f seconds', self.worker_number, (toc - tic).total_seconds()) if data == 'ZEBRA': # shutdown signal if verbose: print(f'worker {self.worker_number} got shutdown signal') From db7eeb24d3c40d4f95b3fdb0562c627e48e48d73 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Fri, 6 Dec 2024 10:22:32 -0800 Subject: [PATCH 19/70] adding some additional logging, reverting worker settings to 6 workers at 20 threads ea. --- temoa/extensions/monte_carlo/mc_sequencer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/temoa/extensions/monte_carlo/mc_sequencer.py b/temoa/extensions/monte_carlo/mc_sequencer.py index 38d0d81f..dcf5918a 100644 --- a/temoa/extensions/monte_carlo/mc_sequencer.py +++ b/temoa/extensions/monte_carlo/mc_sequencer.py @@ -161,8 +161,8 @@ def start(self): work_queue.put((run_name, dp), block=False) # put a log on the fire, if room toc = datetime.now() - logger.info('Put a DataPortal in the work queue of size %0.2f MB in work queue in %0.2f seconds', - sys.getsizeof((run_name, dp)), (toc - tic).total_seconds()) + logger.info('Put a DataPortal in the work queue of size %0.2f KB in work queue in %0.2f seconds', + sys.getsizeof((run_name, dp))/1e3, (toc - tic).total_seconds()) try: tic = datetime.now() mc_run = next(run_gen) @@ -281,7 +281,7 @@ def process_solve_results(self, brick: DataBrick): if idx in self.seen_instance_indices: raise ValueError(f'Instance index {idx} already seen. Likely coding error') self.seen_instance_indices.add(idx) - logger.info('Starting processing of DataBrick of size %0.2f MB', sys.getsizeof(brick)/1e6) + logger.info('Starting processing of DataBrick of size %0.2f KB', sys.getsizeof(brick)/1e3) tic = datetime.now() self.writer.write_mc_results(brick=brick, iteration=idx) toc = datetime.now() From 1ca0677e0ce3d5914c967eadd4e32d825fb570a4 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Fri, 6 Dec 2024 10:51:06 -0800 Subject: [PATCH 20/70] adding some additional logging, reverting worker settings to 6 workers at 20 threads ea. --- temoa/extensions/monte_carlo/mc_sequencer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/temoa/extensions/monte_carlo/mc_sequencer.py b/temoa/extensions/monte_carlo/mc_sequencer.py index dcf5918a..a7237238 100644 --- a/temoa/extensions/monte_carlo/mc_sequencer.py +++ b/temoa/extensions/monte_carlo/mc_sequencer.py @@ -162,7 +162,7 @@ def start(self): toc = datetime.now() logger.info('Put a DataPortal in the work queue of size %0.2f KB in work queue in %0.2f seconds', - sys.getsizeof((run_name, dp))/1e3, (toc - tic).total_seconds()) + sys.getsizeof(dp)/1e3, (toc - tic).total_seconds()) try: tic = datetime.now() mc_run = next(run_gen) From dd3b60b83f16a6e112fd5c248627348b0c3fbe67 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Sat, 7 Dec 2024 09:50:16 -0800 Subject: [PATCH 21/70] Clean Up work on MC - commented some functions - removed some erroneous "sizeof" measures --- .../monte_carlo/MC_solver_options.toml | 6 ++--- temoa/extensions/monte_carlo/mc_run.py | 2 +- temoa/extensions/monte_carlo/mc_sequencer.py | 23 +++++++++++------- temoa/extensions/monte_carlo/mc_worker.py | 6 ++++- temoa/temoa_model/data_brick.py | 14 +++++------ temoa/temoa_model/table_data_puller.py | 24 +++++++++++++++---- 6 files changed, 50 insertions(+), 25 deletions(-) diff --git a/temoa/extensions/monte_carlo/MC_solver_options.toml b/temoa/extensions/monte_carlo/MC_solver_options.toml index 524781cf..da91cd56 100644 --- a/temoa/extensions/monte_carlo/MC_solver_options.toml +++ b/temoa/extensions/monte_carlo/MC_solver_options.toml @@ -1,14 +1,14 @@ # A container for solver options # the top level solver name in brackets should align with the solver name in the config.toml -num_workers = 6 +num_workers = 11 [gurobi] Method= 2 # Barrier ONLY Threads= 20 # per solver instance -BarConvTol = 0.01 # Relative Barrier Tolerance primal-dual -FeasibilityTol= 1e-2 # pretty loose +BarConvTol = 1.0e-2 # Relative Barrier Tolerance primal-dual +FeasibilityTol= 1.0e-2 # pretty loose Crossover= 0 # Disabled TimeLimit= 18000 # 5 hrs diff --git a/temoa/extensions/monte_carlo/mc_run.py b/temoa/extensions/monte_carlo/mc_run.py index a6eb23a7..33b2b6ba 100644 --- a/temoa/extensions/monte_carlo/mc_run.py +++ b/temoa/extensions/monte_carlo/mc_run.py @@ -170,7 +170,7 @@ def row_parser(self, row_number: int, row: str) -> RowData: class MCRun: """ - The data (and more?) to support a model build + run + A Container class to hold the data (and more?) to support a model build + run """ def __init__( diff --git a/temoa/extensions/monte_carlo/mc_sequencer.py b/temoa/extensions/monte_carlo/mc_sequencer.py index a7237238..629a4d43 100644 --- a/temoa/extensions/monte_carlo/mc_sequencer.py +++ b/temoa/extensions/monte_carlo/mc_sequencer.py @@ -30,7 +30,6 @@ import logging import queue import sqlite3 -import sys import time import tomllib from datetime import datetime @@ -161,13 +160,17 @@ def start(self): work_queue.put((run_name, dp), block=False) # put a log on the fire, if room toc = datetime.now() - logger.info('Put a DataPortal in the work queue of size %0.2f KB in work queue in %0.2f seconds', - sys.getsizeof(dp)/1e3, (toc - tic).total_seconds()) + logger.info( + 'Put a DataPortal in the work queue in work queue in %0.2f seconds', + (toc - tic).total_seconds(), + ) try: tic = datetime.now() mc_run = next(run_gen) toc = datetime.now() - logger.info('Made mc_run from generator in %0.2f seconds', (toc - tic).total_seconds()) + logger.info( + 'Made mc_run from generator in %0.2f seconds', (toc - tic).total_seconds() + ) # capture the "tweaks" self.writer.write_tweaks( iteration=mc_run.run_index, change_records=mc_run.change_records @@ -186,15 +189,16 @@ def start(self): next_result = result_queue.get_nowait() toc = datetime.now() logger.info( - 'Pulled DataBrick from result_queue in %0.2f seconds', (toc - tic).total_seconds() + 'Pulled DataBrick from result_queue in %0.2f seconds', + (toc - tic).total_seconds(), ) except queue.Empty: next_result = None # print('no result') if next_result is not None: self.process_solve_results(next_result) - logger.info('Solve count: %d', self.solve_count) self.solve_count += 1 + logger.info('Solve count: %d', self.solve_count) if self.verbose or not self.config.silent: print(f'MC Solve count: {self.solve_count}') # pull anything from the logging queue and log it... @@ -239,8 +243,8 @@ def start(self): if next_result is not None and next_result != 'COYOTE': logger.debug('bagged a result post-shutdown') self.process_solve_results(next_result) - logger.info('Solve count: %d', self.solve_count) self.solve_count += 1 + logger.info('Solve count: %d', self.solve_count) if self.verbose or not self.config.silent: print(f'MC Solve count: {self.solve_count}') while True: @@ -281,8 +285,9 @@ def process_solve_results(self, brick: DataBrick): if idx in self.seen_instance_indices: raise ValueError(f'Instance index {idx} already seen. Likely coding error') self.seen_instance_indices.add(idx) - logger.info('Starting processing of DataBrick of size %0.2f KB', sys.getsizeof(brick)/1e3) tic = datetime.now() self.writer.write_mc_results(brick=brick, iteration=idx) toc = datetime.now() - logger.info('Processed results for %s in %0.2f seconds', brick.name, (toc - tic).total_seconds()) + logger.info( + 'Processed results for %s in %0.2f seconds', brick.name, (toc - tic).total_seconds() + ) diff --git a/temoa/extensions/monte_carlo/mc_worker.py b/temoa/extensions/monte_carlo/mc_worker.py index 32d3a3e4..7226ce6f 100644 --- a/temoa/extensions/monte_carlo/mc_worker.py +++ b/temoa/extensions/monte_carlo/mc_worker.py @@ -107,7 +107,11 @@ def run(self): tic = datetime.now() data = self.dp_queue.get() toc = datetime.now() - logger.info('Worker %d pulled a DataPortal from work queue in %0.2f seconds', self.worker_number, (toc - tic).total_seconds()) + logger.debug( + 'Worker %d waited for and pulled a DataPortal from work queue in %0.2f seconds', + self.worker_number, + (toc - tic).total_seconds(), + ) if data == 'ZEBRA': # shutdown signal if verbose: print(f'worker {self.worker_number} got shutdown signal') diff --git a/temoa/temoa_model/data_brick.py b/temoa/temoa_model/data_brick.py index df49eba6..0b026834 100644 --- a/temoa/temoa_model/data_brick.py +++ b/temoa/temoa_model/data_brick.py @@ -45,7 +45,7 @@ class DataBrick: """ - A utility container for bundling assorted data structures for runs done by Worker objects. + A utility container for bundling assorted data structures for solved models done by Worker objects. """ def __init__( @@ -59,10 +59,6 @@ def __init__( regular_costs, exchange_costs, ): - """ - Build a data brick storage object from a model instance - :param model: - """ self._name = name self._emission_costs = emission_costs self._emission_flows = emission_flows @@ -106,6 +102,10 @@ def emission_cost_data(self): def data_brick_factory(model: TemoaModel) -> DataBrick: + """ + Build a data brick storage object from a model instance + :param model: A solved model to pull data from. + """ name = model.name # process costs regular_costs, exchange_costs = poll_cost_results(model, p_0=None) @@ -114,7 +114,7 @@ def data_brick_factory(model: TemoaModel) -> DataBrick: flow_data = poll_flow_results(model) # process emissions - emission_costs, emission_flows = poll_emissions(model) + emission_costs, emission_flows = poll_emissions(model, p_0=None) # poll capacity capacity_data = poll_capacity_results(model) @@ -132,4 +132,4 @@ def data_brick_factory(model: TemoaModel) -> DataBrick: regular_costs=regular_costs, exchange_costs=exchange_costs, ) - return db \ No newline at end of file + return db diff --git a/temoa/temoa_model/table_data_puller.py b/temoa/temoa_model/table_data_puller.py index a0728e6b..e67bd852 100644 --- a/temoa/temoa_model/table_data_puller.py +++ b/temoa/temoa_model/table_data_puller.py @@ -85,6 +85,12 @@ def rpetv(fi: FI, e: str) -> tuple: def poll_capacity_results(M: TemoaModel, epsilon=1e-5) -> CapData: + """ + Poll a solved model for capacity results. + :param M: Solved Model + :param epsilon: epsilon (default 1e-5) + :return: a CapData object + """ # Built Capacity built = [] for r, t, v in M.V_NewCapacity: @@ -117,7 +123,12 @@ def poll_capacity_results(M: TemoaModel, epsilon=1e-5) -> CapData: def poll_flow_results(M: TemoaModel, epsilon=1e-5) -> dict[FI, dict[FlowType, float]]: - """A static version that can be called directly by a solver worker without making a class instance""" + f""" + Poll a solved model for flow results. + :param M: A solved Model + :param epsilon: epsilon (default 1e-5) + :return: nested dictionary of FlowIndex, FlowType : value + """ dd = functools.partial(defaultdict, float) res: dict[FI, dict[FlowType, float]] = defaultdict(dd) @@ -192,7 +203,6 @@ def poll_flow_results(M: TemoaModel, epsilon=1e-5) -> dict[FI, dict[FlowType, fl def poll_objective(M: TemoaModel) -> list[tuple[str, float]]: """gather objective name, value tuples for all active objectives""" - objs: list[Objective] = list(M.component_data_objects(Objective)) active_objs = [obj for obj in objs if obj.active] if len(active_objs) > 1: @@ -207,7 +217,13 @@ def poll_objective(M: TemoaModel) -> list[tuple[str, float]]: def poll_cost_results( M: TemoaModel, p_0: int | None, epsilon=1e-5 ) -> tuple[dict[tuple, dict], ...]: - """gather all cost data from solved model""" + """ + Poll a solved model for all cost results + :param M: Solved Model + :param p_0: a base year for discounting of loans, typically only used in MYOPIC. If none, first optimization year used + :param epsilon: epsilon (default 1e-5) + :return: tuple of cost_dict, exchange_cost_dict (for exchange techs) + """ if not p_0: p_0 = min(M.time_optimize) @@ -397,7 +413,7 @@ def poll_emissions( :param M: the model :param p_0: the first period, if other than min(time_optimize), as in MYOPIC :param epsilon: a minimal epsilon for ignored values - :return: + :return: cost_dict, flow_dict """ # UPDATE: older versions brought forward had some accounting errors here for flex/curtailed emissions From 45d1321a8c75a6c5f7ca823c0ea26a3b4b76a972 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Sun, 8 Dec 2024 11:11:08 -0800 Subject: [PATCH 22/70] Adding T/S log entries --- temoa/extensions/monte_carlo/mc_sequencer.py | 11 +++++++++-- temoa/extensions/monte_carlo/mc_worker.py | 3 +++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/temoa/extensions/monte_carlo/mc_sequencer.py b/temoa/extensions/monte_carlo/mc_sequencer.py index 629a4d43..f1d25bb5 100644 --- a/temoa/extensions/monte_carlo/mc_sequencer.py +++ b/temoa/extensions/monte_carlo/mc_sequencer.py @@ -48,6 +48,7 @@ from temoa.temoa_model.temoa_config import TemoaConfig logger = getLogger(__name__) +logger.setLevel(logging.DEBUG) solver_options_path = Path(PROJECT_ROOT, 'temoa/extensions/monte_carlo/MC_solver_options.toml') @@ -82,7 +83,7 @@ def __init__(self, config: TemoaConfig): self.orig_label = self.config.scenario self.writer = TableWriter(self.config) - self.verbose = False # for troubleshooting + self.verbose = True # for troubleshooting def start(self): """Run the sequencer""" @@ -138,7 +139,7 @@ def start(self): results_queue=result_queue, log_root_name=__name__, log_queue=log_queue, - log_level=logging.INFO, + log_level=logging.DEBUG, solver_log_path=s_path, **kwargs, ) @@ -230,13 +231,18 @@ def start(self): if self.verbose: print('shutdown sent') work_queue.put('ZEBRA') # shutdown signal + logger.debug('Put "ZEBRA" on work queue (shutdown signal)') # 7b. Keep pulling results from the queue to empty it out empty = 0 + logger.debug('*** Starting the waiting process to wrap up... ***') while True: + # print(f'{empty}-', end='') + # logger.debug('Polling result queue...') try: next_result = result_queue.get_nowait() if next_result == 'COYOTE': # shutdown signal + logger.debug('Got COYOTE (shutdown received)') empty += 1 except queue.Empty: next_result = None @@ -263,6 +269,7 @@ def start(self): log_queue.close() log_queue.join_thread() + logger.debug('All queues closed') if self.verbose: print('log queue closed') work_queue.close() diff --git a/temoa/extensions/monte_carlo/mc_worker.py b/temoa/extensions/monte_carlo/mc_worker.py index 7226ce6f..d1f4797d 100644 --- a/temoa/extensions/monte_carlo/mc_worker.py +++ b/temoa/extensions/monte_carlo/mc_worker.py @@ -112,6 +112,8 @@ def run(self): self.worker_number, (toc - tic).total_seconds(), ) + if isinstance(data, str): + logger.info('Worker %d received string: %s', self.worker_number, data) if data == 'ZEBRA': # shutdown signal if verbose: print(f'worker {self.worker_number} got shutdown signal') @@ -159,3 +161,4 @@ def run(self): ) except AttributeError: pass + logger.info('Worker %d finished', self.worker_number) From 7a80fb38dc781205106479b510b113f9cc11eba6 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Sun, 8 Dec 2024 11:20:17 -0800 Subject: [PATCH 23/70] Increase size of work queue - was hanging while putting in shutdown signals --- temoa/extensions/monte_carlo/mc_sequencer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/temoa/extensions/monte_carlo/mc_sequencer.py b/temoa/extensions/monte_carlo/mc_sequencer.py index f1d25bb5..2b3eed8a 100644 --- a/temoa/extensions/monte_carlo/mc_sequencer.py +++ b/temoa/extensions/monte_carlo/mc_sequencer.py @@ -117,8 +117,8 @@ def start(self): # 4. Set up the workers num_workers = self.num_workers work_queue: Queue[tuple[str, DataPortal] | str] = Queue( - 6 - ) # restrict the queue to hold just 1 models in it max + num_workers + 1 + ) # must be able to hold all shutdowns at once (could be changed later to not lock on insertion...) result_queue: Queue[DataBrick | str] = Queue( num_workers + 1 ) # must be able to hold a shutdown signal from all workers at once! From f59e01116ec760ca455e6971d93aa35044ac010b Mon Sep 17 00:00:00 2001 From: Jeff H Date: Mon, 9 Dec 2024 14:29:53 -0800 Subject: [PATCH 24/70] Rev 2.0 of MC complete - works with Utopia - logging cleaned up --- temoa/extensions/monte_carlo/mc_sequencer.py | 11 +++++------ temoa/extensions/monte_carlo/mc_worker.py | 4 +--- temoa/temoa_model/table_data_puller.py | 2 +- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/temoa/extensions/monte_carlo/mc_sequencer.py b/temoa/extensions/monte_carlo/mc_sequencer.py index 2b3eed8a..80cffaef 100644 --- a/temoa/extensions/monte_carlo/mc_sequencer.py +++ b/temoa/extensions/monte_carlo/mc_sequencer.py @@ -48,7 +48,6 @@ from temoa.temoa_model.temoa_config import TemoaConfig logger = getLogger(__name__) -logger.setLevel(logging.DEBUG) solver_options_path = Path(PROJECT_ROOT, 'temoa/extensions/monte_carlo/MC_solver_options.toml') @@ -83,7 +82,7 @@ def __init__(self, config: TemoaConfig): self.orig_label = self.config.scenario self.writer = TableWriter(self.config) - self.verbose = True # for troubleshooting + self.verbose = False # for troubleshooting def start(self): """Run the sequencer""" @@ -122,7 +121,7 @@ def start(self): result_queue: Queue[DataBrick | str] = Queue( num_workers + 1 ) # must be able to hold a shutdown signal from all workers at once! - log_queue = Queue(50) + log_queue = Queue() # make workers workers = [] kwargs = { @@ -139,7 +138,7 @@ def start(self): results_queue=result_queue, log_root_name=__name__, log_queue=log_queue, - log_level=logging.DEBUG, + log_level=logging.INFO, solver_log_path=s_path, **kwargs, ) @@ -179,7 +178,7 @@ def start(self): # ready the next one run_name, dp = mc_run.model_dp except StopIteration: - logger.debug('Pulled last DP from run generator') + logger.info('Pulled last DP from run generator') more_runs = False except queue.Full: # print('work queue is full') @@ -235,7 +234,7 @@ def start(self): # 7b. Keep pulling results from the queue to empty it out empty = 0 - logger.debug('*** Starting the waiting process to wrap up... ***') + logger.debug('Starting the waiting process to wrap up...') while True: # print(f'{empty}-', end='') # logger.debug('Polling result queue...') diff --git a/temoa/extensions/monte_carlo/mc_worker.py b/temoa/extensions/monte_carlo/mc_worker.py index d1f4797d..73cbf249 100644 --- a/temoa/extensions/monte_carlo/mc_worker.py +++ b/temoa/extensions/monte_carlo/mc_worker.py @@ -112,12 +112,10 @@ def run(self): self.worker_number, (toc - tic).total_seconds(), ) - if isinstance(data, str): - logger.info('Worker %d received string: %s', self.worker_number, data) if data == 'ZEBRA': # shutdown signal if verbose: print(f'worker {self.worker_number} got shutdown signal') - logger.info('Worker %d received shutdown signal', self.worker_number) + logger.debug('Worker %d received shutdown signal', self.worker_number) self.results_queue.put('COYOTE') break name, dp = data diff --git a/temoa/temoa_model/table_data_puller.py b/temoa/temoa_model/table_data_puller.py index e67bd852..4301662c 100644 --- a/temoa/temoa_model/table_data_puller.py +++ b/temoa/temoa_model/table_data_puller.py @@ -123,7 +123,7 @@ def poll_capacity_results(M: TemoaModel, epsilon=1e-5) -> CapData: def poll_flow_results(M: TemoaModel, epsilon=1e-5) -> dict[FI, dict[FlowType, float]]: - f""" + """ Poll a solved model for flow results. :param M: A solved Model :param epsilon: epsilon (default 1e-5) From 06dd6883e29e6a7f779d8f21267ea9208fdd2c7f Mon Sep 17 00:00:00 2001 From: Jeff H Date: Tue, 17 Dec 2024 07:18:08 -0800 Subject: [PATCH 25/70] Rev 2.0 of MC complete - works with Utopia - logging cleaned up --- temoa/extensions/{monte_carlo => stochastics}/README.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename temoa/extensions/{monte_carlo => stochastics}/README.txt (100%) diff --git a/temoa/extensions/monte_carlo/README.txt b/temoa/extensions/stochastics/README.txt similarity index 100% rename from temoa/extensions/monte_carlo/README.txt rename to temoa/extensions/stochastics/README.txt From eefe33a28a8e46551f543a9d5b0a954db122bfd0 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Wed, 9 Apr 2025 15:28:01 -0700 Subject: [PATCH 26/70] Added ability to output Dual Variables to Myopic Mode --- temoa/extensions/myopic/myopic_sequencer.py | 8 +++++++- temoa/temoa_model/table_writer.py | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/temoa/extensions/myopic/myopic_sequencer.py b/temoa/extensions/myopic/myopic_sequencer.py index 332f2442..ebb026bd 100644 --- a/temoa/extensions/myopic/myopic_sequencer.py +++ b/temoa/extensions/myopic/myopic_sequencer.py @@ -271,6 +271,9 @@ def start(self): self.progress_mapper.report(idx, 'report') # write results by appending. We have already cleared necessary items self.table_writer.write_results(M=model, append=True) + # handle side-case for writing duals + if self.config.save_duals: + self.table_writer.write_dual_variables(results=results, iteration=idx.base_year) # prep next loop last_base_year = idx.base_year # update @@ -501,7 +504,10 @@ def clear_old_results(self): logger.debug('Deleting old results for scenario name %s', scenario_name) for table in self.tables_with_scenario_reference: try: - self.cursor.execute(f'DELETE FROM {table} WHERE scenario = ?', (scenario_name,)) + self.cursor.execute( + f'DELETE FROM {table} WHERE scenario = ? OR scenario like ?', + (scenario_name, f'{scenario_name}-%'), + ) except sqlite3.OperationalError: SE.write(f'no scenario ref in table {table}\n') raise sqlite3.OperationalError diff --git a/temoa/temoa_model/table_writer.py b/temoa/temoa_model/table_writer.py index 64e79d4c..7e97b6bf 100644 --- a/temoa/temoa_model/table_writer.py +++ b/temoa/temoa_model/table_writer.py @@ -507,7 +507,7 @@ def _write_cost_rows(self, entries, iteration=None): self.con.commit() def write_dual_variables(self, results: SolverResults, iteration=None): - """Write the dual variables to the OutputCost table""" + """Write the dual variables to the OutputDualVariable table""" scenario_name = ( self.config.scenario + f'-{iteration}' if iteration is not None From 1543cf892744d264baabcca32df99743b31150c4 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Sat, 14 Jun 2025 14:21:15 -0700 Subject: [PATCH 27/70] Tuned up migration utility Added additional QA steps during data transfer. --- temoa/utilities/db_migration_to_v3.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/temoa/utilities/db_migration_to_v3.py b/temoa/utilities/db_migration_to_v3.py index cac299ab..29645fe1 100644 --- a/temoa/utilities/db_migration_to_v3.py +++ b/temoa/utilities/db_migration_to_v3.py @@ -154,6 +154,7 @@ data = con_old.execute(f'SELECT * FROM {old_name}').fetchall() except sqlite3.OperationalError: print('TABLE NOT FOUND: ' + old_name) + data = [] continue if not data: @@ -177,11 +178,18 @@ data = con_old.execute(f'SELECT * FROM {old_name}').fetchall() except sqlite3.OperationalError: print('table not found: ' + old_name) + data = [] continue if not data: print('no data for: ' + old_name) continue - + # quick check for expected number of fields... + if len(data[0]) != 5: + print(f'\nWARNING: unexpected number of fields in table: {old_name}. Was expecting 5: region, period, group, value, notes') + print('\nIt is possible that the older table you have was not indexed by REGION, which might be common' + 'for old datasets. If so, that cannot be moved automatically. You will need to do it manually.') + print(f'\n *** IGNORING TABLE: {old_name} in transfer!! ***\n') + continue query = f'INSERT OR REPLACE INTO {new_name} VALUES (?, ?, ?, ?, "", ?)' con_new.executemany(query, data) print(f'inserted {len(data)} rows into {new_name}') @@ -289,7 +297,11 @@ ) cur.execute('INSERT OR REPLACE INTO TechGroupMember VALUES (?, ?)', (new_name, tech)) except sqlite3.OperationalError: - print('souce does not appear to employ tech_groups...skipping.') + print('source does not appear to employ tech_groups...skipping.') + skip_tech_groups = True +except ValueError as e: + print('\nWARNING: unusual schema variation discovered in tech_groups. Error: ' + str(e)) + print('\n *** SKIPPING TRANSITION OF tech_groups and associated weightings, etc. ***\n') skip_tech_groups = True if not skip_tech_groups: # ------- FIX TABLES THAT USED TO USE tech_groups ----------- @@ -315,6 +327,7 @@ pairs = cur.execute(f'SELECT DISTINCT region, group_name FROM {table}').fetchall() except sqlite3.OperationalError: print(f'table not found: {table}') + pairs = [] continue if len(pairs) == 0: print(f'No groups found for: {table}') @@ -435,11 +448,11 @@ data = con_new.execute('PRAGMA FOREIGN_KEY_CHECK;').fetchall() print('FK check fails (MUST BE FIXED):') if not data: - print('No Foreign Key Failures. (Good news!)') + print('\tNo Foreign Key Failures. (Good news!)') else: - print('(Table, Row ID, Reference Table, (fkid) )') + print('\t(Table, Row ID, Reference Table, (fkid) )') for row in data: - print(row) + print(f'\t{row}') except sqlite3.OperationalError as e: print('Foreign Key Check FAILED on new DB. Something may be wrong with schema.') print(e) From 60b404c70d4e537116dc22bd5c8ba2a27f5ec179 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Sat, 14 Jun 2025 14:22:11 -0700 Subject: [PATCH 28/70] Formatting --- temoa/utilities/db_migration_to_v3.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/temoa/utilities/db_migration_to_v3.py b/temoa/utilities/db_migration_to_v3.py index 29645fe1..9cbcb092 100644 --- a/temoa/utilities/db_migration_to_v3.py +++ b/temoa/utilities/db_migration_to_v3.py @@ -185,9 +185,13 @@ continue # quick check for expected number of fields... if len(data[0]) != 5: - print(f'\nWARNING: unexpected number of fields in table: {old_name}. Was expecting 5: region, period, group, value, notes') - print('\nIt is possible that the older table you have was not indexed by REGION, which might be common' - 'for old datasets. If so, that cannot be moved automatically. You will need to do it manually.') + print( + f'\nWARNING: unexpected number of fields in table: {old_name}. Was expecting 5: region, period, group, value, notes' + ) + print( + '\nIt is possible that the older table you have was not indexed by REGION, which might be common' + 'for old datasets. If so, that cannot be moved automatically. You will need to do it manually.' + ) print(f'\n *** IGNORING TABLE: {old_name} in transfer!! ***\n') continue query = f'INSERT OR REPLACE INTO {new_name} VALUES (?, ?, ?, ?, "", ?)' From f226ff8e240bc22d1630a117f28113a89bbbf1a8 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Wed, 10 Sep 2025 15:45:05 -0700 Subject: [PATCH 29/70] Make v3.1 schema and minimal schema file. Add quick utility to do schema and db comparisons. --- data_files/temoa_schema_minimal_v3_1.sql | 730 ++++++++++++++++++ data_files/temoa_schema_v3_1.sql | 937 +++++++++++++++++++++++ temoa/utilities/db_schema_compare.py | 158 ++++ 3 files changed, 1825 insertions(+) create mode 100644 data_files/temoa_schema_minimal_v3_1.sql create mode 100644 data_files/temoa_schema_v3_1.sql create mode 100644 temoa/utilities/db_schema_compare.py diff --git a/data_files/temoa_schema_minimal_v3_1.sql b/data_files/temoa_schema_minimal_v3_1.sql new file mode 100644 index 00000000..567c8033 --- /dev/null +++ b/data_files/temoa_schema_minimal_v3_1.sql @@ -0,0 +1,730 @@ +PRAGMA foreign_keys= OFF; +BEGIN TRANSACTION; + +CREATE TABLE IF NOT EXISTS MetaData +( + element TEXT, + value INT, + notes TEXT, + PRIMARY KEY (element) +); +REPLACE INTO MetaData +VALUES ('myopic_base_year', 2000, 'Base Year for Myopic Analysis'); +REPLACE INTO MetaData +VALUES ('DB_MAJOR', 3, 'DB major version number'); +REPLACE INTO MetaData +VALUES ('DB_MINOR', 1, 'DB minor version number'); + +CREATE TABLE IF NOT EXISTS MetaDataReal +( + element TEXT, + value REAL, + notes TEXT, + + PRIMARY KEY (element) +); +REPLACE INTO MetaDataReal +VALUES ('global_discount_rate', 0.05, 'Discount Rate for future costs'); +REPLACE INTO MetaDataReal +VALUES ('default_loan_rate', 0.05, 'Default Loan Rate if not specified in LoanRate table'); + +CREATE TABLE IF NOT EXISTS OutputDualVariable +( + scenario TEXT, + constraint_name TEXT, + dual REAL, + PRIMARY KEY (constraint_name, scenario) +); +CREATE TABLE IF NOT EXISTS OutputObjective +( + scenario TEXT, + objective_name TEXT, + total_system_cost REAL, + units TEXT, + PRIMARY KEY (scenario, objective_name) +); +CREATE TABLE IF NOT EXISTS SectorLabel +( + sector TEXT, + PRIMARY KEY (sector) +); + + +CREATE TABLE IF NOT EXISTS CapacityFactorProcess +( + region TEXT, + season TEXT + REFERENCES TimeSeason (season), + tod TEXT + REFERENCES TimeOfDay (tod), + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER, + factor REAL, + notes TEXT, + PRIMARY KEY (region, season, tod, tech, vintage), + CHECK (factor >= 0 AND factor <= 1) +); +CREATE TABLE IF NOT EXISTS CapacityFactorTech +( + region TEXT, + season TEXT + REFERENCES TimeSeason (season), + tod TEXT + REFERENCES TimeOfDay (tod), + tech TEXT + REFERENCES Technology (tech), + factor REAL, + notes TEXT, + PRIMARY KEY (region, season, tod, tech), + CHECK (factor >= 0 AND factor <= 1) +); +CREATE TABLE IF NOT EXISTS CapacityToActivity +( + region TEXT, + tech TEXT + REFERENCES Technology (tech), + c2a REAL, + units TEXT, + + notes TEXT, + PRIMARY KEY (region, tech) +); +CREATE TABLE IF NOT EXISTS Commodity +( + name TEXT + PRIMARY KEY, + flag TEXT + REFERENCES CommodityType (label), + units TEXT, + + description TEXT +); +CREATE TABLE IF NOT EXISTS CommodityType +( + label TEXT + PRIMARY KEY, + description TEXT +); +REPLACE INTO CommodityType +VALUES ('p', 'physical commodity'); +REPLACE INTO CommodityType +VALUES ('e', 'emissions commodity'); +REPLACE INTO CommodityType +VALUES ('d', 'demand commodity'); +REPLACE INTO CommodityType +VALUES ('s', 'source commodity'); + +CREATE TABLE IF NOT EXISTS CostEmission +( + region TEXT + REFERENCES Region (region), + period INTEGER + REFERENCES TimePeriod (period), + emis_comm TEXT NOT NULL + REFERENCES Commodity (name), + cost REAL NOT NULL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, emis_comm) +); +CREATE TABLE IF NOT EXISTS CostFixed +( + region TEXT NOT NULL, + period INTEGER NOT NULL + REFERENCES TimePeriod (period), + tech TEXT NOT NULL + REFERENCES Technology (tech), + vintage INTEGER NOT NULL + REFERENCES TimePeriod (period), + cost REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, tech, vintage) +); +CREATE TABLE IF NOT EXISTS CostInvest +( + region TEXT, + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + cost REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, tech, vintage) +); +CREATE TABLE IF NOT EXISTS CostVariable +( + region TEXT NOT NULL, + period INTEGER NOT NULL + REFERENCES TimePeriod (period), + tech TEXT NOT NULL + REFERENCES Technology (tech), + vintage INTEGER NOT NULL + REFERENCES TimePeriod (period), + cost REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, tech, vintage) +); +CREATE TABLE IF NOT EXISTS Demand +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + commodity TEXT + REFERENCES Commodity (name), + demand REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, commodity) +); +CREATE TABLE IF NOT EXISTS DemandSpecificDistribution +( + region TEXT, + season TEXT + REFERENCES TimeSeason (season), + tod TEXT + REFERENCES TimeOfDay (tod), + demand_name TEXT + REFERENCES Commodity (name), + dds REAL, + dds_notes TEXT, + PRIMARY KEY (region, season, tod, demand_name), + CHECK (dds >= 0 AND dds <= 1) +); +CREATE TABLE IF NOT EXISTS LoanRate +( + region TEXT, + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + rate REAL, + notes TEXT, + PRIMARY KEY (region, tech, vintage) +); +CREATE TABLE IF NOT EXISTS Efficiency +( + region TEXT, + input_comm TEXT + REFERENCES Commodity (name), + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + output_comm TEXT + REFERENCES Commodity (name), + efficiency REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, input_comm, tech, vintage, output_comm), + CHECK (efficiency > 0) +); +CREATE TABLE IF NOT EXISTS EmissionActivity +( + region TEXT, + emis_comm TEXT + REFERENCES Commodity (name), + input_comm TEXT + REFERENCES Commodity (name), + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + output_comm TEXT + REFERENCES Commodity (name), + activity REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, emis_comm, input_comm, tech, vintage, output_comm) +); +CREATE TABLE IF NOT EXISTS ExistingCapacity +( + region TEXT, + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + capacity REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, tech, vintage) +); + +CREATE TABLE IF NOT EXISTS LoanLifetimeTech +( + region TEXT, + tech TEXT + REFERENCES Technology (tech), + lifetime REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, tech) +); +CREATE TABLE IF NOT EXISTS LifetimeProcess +( + region TEXT, + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + lifetime REAL, + units TEXT, + + notes TEXT, + PRIMARY KEY (region, tech, vintage) +); +CREATE TABLE IF NOT EXISTS LifetimeTech +( + region TEXT, + tech TEXT + REFERENCES Technology (tech), + lifetime REAL, + units TEXT, + + notes TEXT, + PRIMARY KEY (region, tech) +); +CREATE TABLE IF NOT EXISTS LinkedTech +( + primary_region TEXT, + primary_tech TEXT + REFERENCES Technology (tech), + emis_comm TEXT + REFERENCES Commodity (name), + driven_tech TEXT + REFERENCES Technology (tech), + notes TEXT, + PRIMARY KEY (primary_region, primary_tech, emis_comm) +); +CREATE TABLE IF NOT EXISTS MaxActivity +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + max_act REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, tech) +); +CREATE TABLE IF NOT EXISTS MaxCapacity +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + max_cap REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, tech) +); + +CREATE TABLE IF NOT EXISTS MinActivity +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + min_act REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, tech) +); + +CREATE TABLE IF NOT EXISTS MinCapacity +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + min_cap REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, tech) +); + +CREATE TABLE IF NOT EXISTS OutputCurtailment +( + scenario TEXT, + region TEXT, + sector TEXT, + period INTEGER + REFERENCES TimePeriod (period), + season TEXT + REFERENCES TimePeriod (period), + tod TEXT + REFERENCES TimeOfDay (tod), + input_comm TEXT + REFERENCES Commodity (name), + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + output_comm TEXT + REFERENCES Commodity (name), + curtailment REAL, + units TEXT, + PRIMARY KEY (region, scenario, period, season, tod, input_comm, tech, vintage, output_comm) +); +CREATE TABLE IF NOT EXISTS OutputNetCapacity +( + scenario TEXT, + region TEXT, + sector TEXT + REFERENCES SectorLabel (sector), + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + capacity REAL, + units TEXT, + PRIMARY KEY (region, scenario, period, tech, vintage) +); +CREATE TABLE IF NOT EXISTS OutputBuiltCapacity +( + scenario TEXT, + region TEXT, + sector TEXT + REFERENCES SectorLabel (sector), + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + capacity REAL, + units TEXT, + PRIMARY KEY (region, scenario, tech, vintage) +); +CREATE TABLE IF NOT EXISTS OutputRetiredCapacity +( + scenario TEXT, + region TEXT, + sector TEXT + REFERENCES SectorLabel (sector), + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + capacity REAL, + units TEXT, + PRIMARY KEY (region, scenario, period, tech, vintage) +); +CREATE TABLE IF NOT EXISTS OutputFlowIn +( + scenario TEXT, + region TEXT, + sector TEXT + REFERENCES SectorLabel (sector), + period INTEGER + REFERENCES TimePeriod (period), + season TEXT + REFERENCES TimeSeason (season), + tod TEXT + REFERENCES TimeOfDay (tod), + input_comm TEXT + REFERENCES Commodity (name), + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + output_comm TEXT + REFERENCES Commodity (name), + flow REAL, + units TEXT, + PRIMARY KEY (region, scenario, period, season, tod, input_comm, tech, vintage, output_comm) +); +CREATE TABLE IF NOT EXISTS OutputFlowOut +( + scenario TEXT, + region TEXT, + sector TEXT + REFERENCES SectorLabel (sector), + period INTEGER + REFERENCES TimePeriod (period), + season TEXT + REFERENCES TimeSeason (season), + tod TEXT + REFERENCES TimeOfDay (tod), + input_comm TEXT + REFERENCES Commodity (name), + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + output_comm TEXT + REFERENCES Commodity (name), + flow REAL, + units TEXT, + PRIMARY KEY (region, scenario, period, season, tod, input_comm, tech, vintage, output_comm) +); +CREATE TABLE IF NOT EXISTS PlanningReserveMargin +( + region TEXT + PRIMARY KEY + REFERENCES Region (region), + margin REAL +); +CREATE TABLE IF NOT EXISTS RampDown +( + region TEXT, + tech TEXT + REFERENCES Technology (tech), + rate REAL, + PRIMARY KEY (region, tech) +); +CREATE TABLE IF NOT EXISTS RampUp +( + region TEXT, + tech TEXT + REFERENCES Technology (tech), + rate REAL, + PRIMARY KEY (region, tech) +); +CREATE TABLE IF NOT EXISTS Region +( + region TEXT + PRIMARY KEY, + notes TEXT +); +CREATE TABLE IF NOT EXISTS TimeSegmentFraction +( + season TEXT + REFERENCES TimeSeason (season), + tod TEXT + REFERENCES TimeOfDay (tod), + segfrac REAL, + notes TEXT, + PRIMARY KEY (season, tod), + CHECK (segfrac >= 0 AND segfrac <= 1) +); +CREATE TABLE IF NOT EXISTS StorageDuration +( + region TEXT, + tech TEXT, + duration REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, tech) +); + +CREATE TABLE IF NOT EXISTS TechnologyType +( + label TEXT + PRIMARY KEY, + description TEXT +); +REPLACE INTO TechnologyType +VALUES ('r', 'resource technology'); +REPLACE INTO TechnologyType +VALUES ('p', 'production technology'); +REPLACE INTO TechnologyType +VALUES ('pb', 'baseload production technology'); +REPLACE INTO TechnologyType +VALUES ('ps', 'storage production technology'); + +CREATE TABLE IF NOT EXISTS TechInputSplit +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + input_comm TEXT + REFERENCES Commodity (name), + tech TEXT + REFERENCES Technology (tech), + min_proportion REAL, + notes TEXT, + PRIMARY KEY (region, period, input_comm, tech) +); +CREATE TABLE IF NOT EXISTS TechInputSplitAverage +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + input_comm TEXT + REFERENCES Commodity (name), + tech TEXT + REFERENCES Technology (tech), + min_proportion REAL, + notes TEXT, + PRIMARY KEY (region, period, input_comm, tech) +); +CREATE TABLE IF NOT EXISTS TechOutputSplit +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + output_comm TEXT + REFERENCES Commodity (name), + min_proportion REAL, + notes TEXT, + PRIMARY KEY (region, period, tech, output_comm) +); +CREATE TABLE IF NOT EXISTS TimeOfDay +( + sequence INTEGER UNIQUE, + tod TEXT + PRIMARY KEY +); +CREATE TABLE IF NOT EXISTS TimePeriod +( + sequence INTEGER UNIQUE, + period INTEGER + PRIMARY KEY, + flag TEXT + REFERENCES TimePeriodType (label) +); +CREATE TABLE IF NOT EXISTS TimeSeason +( + sequence INTEGER UNIQUE, + season TEXT + PRIMARY KEY +); +CREATE TABLE IF NOT EXISTS TimePeriodType +( + label TEXT + PRIMARY KEY, + description TEXT +); + +CREATE TABLE IF NOT EXISTS MaxAnnualCapacityFactor +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + output_comm TEXT + REFERENCES Commodity (name), + factor REAL, + source TEXT, + notes TEXT, + PRIMARY KEY (region, period, tech), + CHECK (factor >= 0 AND factor <= 1) +); +CREATE TABLE IF NOT EXISTS MaxNewCapacity +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + max_cap REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, tech) +); + + +CREATE TABLE IF NOT EXISTS MinAnnualCapacityFactor +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + output_comm TEXT + REFERENCES Commodity (name), + factor REAL, + source TEXT, + notes TEXT, + PRIMARY KEY (region, period, tech), + CHECK (factor >= 0 AND factor <= 1) +); + +CREATE TABLE IF NOT EXISTS MinNewCapacity +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + min_cap REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, tech) +); + +CREATE TABLE IF NOT EXISTS OutputEmission +( + scenario TEXT, + region TEXT, + sector TEXT + REFERENCES SectorLabel (sector), + period INTEGER + REFERENCES TimePeriod (period), + emis_comm TEXT + REFERENCES Commodity (name), + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + emission REAL, + units TEXT, + PRIMARY KEY (region, scenario, period, emis_comm, tech, vintage) +); + +CREATE TABLE IF NOT EXISTS EmissionLimit +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + emis_comm TEXT + REFERENCES Commodity (name), + value REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, emis_comm) +); + +CREATE TABLE IF NOT EXISTS Technology +( + tech TEXT NOT NULL PRIMARY KEY, + flag TEXT NOT NULL, + sector TEXT, + category TEXT, + sub_category TEXT, + unlim_cap INTEGER NOT NULL DEFAULT 0, + annual INTEGER NOT NULL DEFAULT 0, + reserve INTEGER NOT NULL DEFAULT 0, + curtail INTEGER NOT NULL DEFAULT 0, + retire INTEGER NOT NULL DEFAULT 0, + flex INTEGER NOT NULL DEFAULT 0, + variable INTEGER NOT NULL DEFAULT 0, + exchange INTEGER NOT NULL DEFAULT 0, + description TEXT, + FOREIGN KEY (flag) REFERENCES TechnologyType (label) +); +CREATE TABLE IF NOT EXISTS OutputCost +( + scenario TEXT, + region TEXT, + period INTEGER, + tech TEXT, + vintage INTEGER, + d_invest REAL, + d_fixed REAL, + d_var REAL, + d_emiss REAL, + invest REAL, + fixed REAL, + var REAL, + emiss REAL, + units TEXT, + PRIMARY KEY (scenario, region, period, tech, vintage), + FOREIGN KEY (vintage) REFERENCES TimePeriod (period), + FOREIGN KEY (tech) REFERENCES Technology (tech) +); +COMMIT; +PRAGMA FOREIGN_KEYS = 1; diff --git a/data_files/temoa_schema_v3_1.sql b/data_files/temoa_schema_v3_1.sql new file mode 100644 index 00000000..b5373e68 --- /dev/null +++ b/data_files/temoa_schema_v3_1.sql @@ -0,0 +1,937 @@ +PRAGMA foreign_keys= OFF; +BEGIN TRANSACTION; + +CREATE TABLE IF NOT EXISTS MetaData +( + element TEXT, + value INT, + notes TEXT, + PRIMARY KEY (element) +); +REPLACE INTO MetaData +VALUES ('myopic_base_year', 2000, 'Base Year for Myopic Analysis'); +REPLACE INTO MetaData +VALUES ('DB_MAJOR', 3, 'DB major version number'); +REPLACE INTO MetaData +VALUES ('DB_MINOR', 1, 'DB minor version number'); + +CREATE TABLE IF NOT EXISTS MetaDataReal +( + element TEXT, + value REAL, + notes TEXT, + + PRIMARY KEY (element) +); +REPLACE INTO MetaDataReal +VALUES ('global_discount_rate', 0.05, 'Discount Rate for future costs'); +REPLACE INTO MetaDataReal +VALUES ('default_loan_rate', 0.05, 'Default Loan Rate if not specified in LoanRate table'); + +CREATE TABLE IF NOT EXISTS OutputDualVariable +( + scenario TEXT, + constraint_name TEXT, + dual REAL, + PRIMARY KEY (constraint_name, scenario) +); +CREATE TABLE IF NOT EXISTS OutputObjective +( + scenario TEXT, + objective_name TEXT, + total_system_cost REAL, + units TEXT +); +CREATE TABLE IF NOT EXISTS SectorLabel +( + sector TEXT, + PRIMARY KEY (sector) +); + +CREATE TABLE IF NOT EXISTS CapacityCredit +( + region TEXT, + period INTEGER, + tech TEXT, + vintage INTEGER, + credit REAL, + notes TEXT, + PRIMARY KEY (region, period, tech, vintage), + CHECK (credit >= 0 AND credit <= 1) +); +CREATE TABLE IF NOT EXISTS CapacityFactorProcess +( + region TEXT, + season TEXT + REFERENCES TimeSeason (season), + tod TEXT + REFERENCES TimeOfDay (tod), + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER, + factor REAL, + notes TEXT, + PRIMARY KEY (region, season, tod, tech, vintage), + CHECK (factor >= 0 AND factor <= 1) +); +CREATE TABLE IF NOT EXISTS CapacityFactorTech +( + region TEXT, + season TEXT + REFERENCES TimeSeason (season), + tod TEXT + REFERENCES TimeOfDay (tod), + tech TEXT + REFERENCES Technology (tech), + factor REAL, + notes TEXT, + PRIMARY KEY (region, season, tod, tech), + CHECK (factor >= 0 AND factor <= 1) +); +CREATE TABLE IF NOT EXISTS CapacityToActivity +( + region TEXT, + tech TEXT + REFERENCES Technology (tech), + c2a REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, tech) +); +CREATE TABLE IF NOT EXISTS Commodity +( + name TEXT + PRIMARY KEY, + flag TEXT + REFERENCES CommodityType (label), + units TEXT, + description TEXT +); +CREATE TABLE IF NOT EXISTS CommodityType +( + label TEXT + PRIMARY KEY, + description TEXT +); +REPLACE INTO CommodityType +VALUES ('p', 'physical commodity'); +REPLACE INTO CommodityType +VALUES ('e', 'emissions commodity'); +REPLACE INTO CommodityType +VALUES ('d', 'demand commodity'); +REPLACE INTO CommodityType +VALUES ('s', 'source commodity'); + +CREATE TABLE IF NOT EXISTS CostEmission +( + region TEXT + REFERENCES Region (region), + period INTEGER + REFERENCES TimePeriod (period), + emis_comm TEXT NOT NULL + REFERENCES Commodity (name), + cost REAL NOT NULL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, emis_comm) +); +CREATE TABLE IF NOT EXISTS CostFixed +( + region TEXT NOT NULL, + period INTEGER NOT NULL + REFERENCES TimePeriod (period), + tech TEXT NOT NULL + REFERENCES Technology (tech), + vintage INTEGER NOT NULL + REFERENCES TimePeriod (period), + cost REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, tech, vintage) +); +CREATE TABLE IF NOT EXISTS CostInvest +( + region TEXT, + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + cost REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, tech, vintage) +); +CREATE TABLE IF NOT EXISTS CostVariable +( + region TEXT NOT NULL, + period INTEGER NOT NULL + REFERENCES TimePeriod (period), + tech TEXT NOT NULL + REFERENCES Technology (tech), + vintage INTEGER NOT NULL + REFERENCES TimePeriod (period), + cost REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, tech, vintage) +); +CREATE TABLE IF NOT EXISTS Demand +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + commodity TEXT + REFERENCES Commodity (name), + demand REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, commodity) +); +CREATE TABLE IF NOT EXISTS DemandSpecificDistribution +( + region TEXT, + season TEXT + REFERENCES TimeSeason (season), + tod TEXT + REFERENCES TimeOfDay (tod), + demand_name TEXT + REFERENCES Commodity (name), + dds REAL, + dds_notes TEXT, + PRIMARY KEY (region, season, tod, demand_name), + CHECK (dds >= 0 AND dds <= 1) +); +CREATE TABLE IF NOT EXISTS LoanRate +( + region TEXT, + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + rate REAL, + notes TEXT, + PRIMARY KEY (region, tech, vintage) +); +CREATE TABLE IF NOT EXISTS Efficiency +( + region TEXT, + input_comm TEXT + REFERENCES Commodity (name), + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + output_comm TEXT + REFERENCES Commodity (name), + efficiency REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, input_comm, tech, vintage, output_comm), + CHECK (efficiency > 0) +); +CREATE TABLE IF NOT EXISTS EmissionActivity +( + region TEXT, + emis_comm TEXT + REFERENCES Commodity (name), + input_comm TEXT + REFERENCES Commodity (name), + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + output_comm TEXT + REFERENCES Commodity (name), + activity REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, emis_comm, input_comm, tech, vintage, output_comm) +); +CREATE TABLE IF NOT EXISTS ExistingCapacity +( + region TEXT, + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + capacity REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, tech, vintage) +); +CREATE TABLE IF NOT EXISTS TechGroup +( + group_name TEXT + PRIMARY KEY, + notes TEXT +); +CREATE TABLE IF NOT EXISTS GrowthRateMax +( + region TEXT, + tech TEXT + REFERENCES Technology (tech), + rate REAL, + notes TEXT, + PRIMARY KEY (region, tech) +); +CREATE TABLE IF NOT EXISTS GrowthRateSeed +( + region TEXT, + tech TEXT + REFERENCES Technology (tech), + seed REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, tech) +); +CREATE TABLE IF NOT EXISTS LoanLifetimeTech +( + region TEXT, + tech TEXT + REFERENCES Technology (tech), + lifetime REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, tech) +); +CREATE TABLE IF NOT EXISTS LifetimeProcess +( + region TEXT, + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + lifetime REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, tech, vintage) +); +CREATE TABLE IF NOT EXISTS LifetimeTech +( + region TEXT, + tech TEXT + REFERENCES Technology (tech), + lifetime REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, tech) +); +CREATE TABLE IF NOT EXISTS LinkedTech +( + primary_region TEXT, + primary_tech TEXT + REFERENCES Technology (tech), + emis_comm TEXT + REFERENCES Commodity (name), + driven_tech TEXT + REFERENCES Technology (tech), + notes TEXT, + PRIMARY KEY (primary_region, primary_tech, emis_comm) +); +CREATE TABLE IF NOT EXISTS MaxActivity +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + max_act REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, tech) +); +CREATE TABLE IF NOT EXISTS MaxCapacity +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + max_cap REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, tech) +); +CREATE TABLE IF NOT EXISTS MaxResource +( + region TEXT, + tech TEXT + REFERENCES Technology (tech), + max_res REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, tech) +); +CREATE TABLE IF NOT EXISTS MinActivity +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + min_act REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, tech) +); +CREATE TABLE IF NOT EXISTS MaxCapacityGroup +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + group_name TEXT + REFERENCES TechGroup (group_name), + max_cap REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, group_name) +); +CREATE TABLE IF NOT EXISTS MinCapacity +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + min_cap REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, tech) +); +CREATE TABLE IF NOT EXISTS MinCapacityGroup +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + group_name TEXT + REFERENCES TechGroup (group_name), + min_cap REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, group_name) +); +CREATE TABLE IF NOT EXISTS OutputCurtailment +( + scenario TEXT, + region TEXT, + sector TEXT, + period INTEGER + REFERENCES TimePeriod (period), + season TEXT + REFERENCES TimePeriod (period), + tod TEXT + REFERENCES TimeOfDay (tod), + input_comm TEXT + REFERENCES Commodity (name), + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + output_comm TEXT + REFERENCES Commodity (name), + curtailment REAL, + units TEXT, + PRIMARY KEY (region, scenario, period, season, tod, input_comm, tech, vintage, output_comm) +); +CREATE TABLE IF NOT EXISTS OutputNetCapacity +( + scenario TEXT, + region TEXT, + sector TEXT + REFERENCES SectorLabel (sector), + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + capacity REAL, + units TEXT, + PRIMARY KEY (region, scenario, period, tech, vintage) +); +CREATE TABLE IF NOT EXISTS OutputBuiltCapacity +( + scenario TEXT, + region TEXT, + sector TEXT + REFERENCES SectorLabel (sector), + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + capacity REAL, + units TEXT, + PRIMARY KEY (region, scenario, tech, vintage) +); +CREATE TABLE IF NOT EXISTS OutputRetiredCapacity +( + scenario TEXT, + region TEXT, + sector TEXT + REFERENCES SectorLabel (sector), + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + capacity REAL, + units TEXT, + PRIMARY KEY (region, scenario, period, tech, vintage) +); +CREATE TABLE IF NOT EXISTS OutputFlowIn +( + scenario TEXT, + region TEXT, + sector TEXT + REFERENCES SectorLabel (sector), + period INTEGER + REFERENCES TimePeriod (period), + season TEXT + REFERENCES TimeSeason (season), + tod TEXT + REFERENCES TimeOfDay (tod), + input_comm TEXT + REFERENCES Commodity (name), + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + output_comm TEXT + REFERENCES Commodity (name), + flow REAL, + units TEXT, + PRIMARY KEY (region, scenario, period, season, tod, input_comm, tech, vintage, output_comm) +); +CREATE TABLE IF NOT EXISTS OutputFlowOut +( + scenario TEXT, + region TEXT, + sector TEXT + REFERENCES SectorLabel (sector), + period INTEGER + REFERENCES TimePeriod (period), + season TEXT + REFERENCES TimeSeason (season), + tod TEXT + REFERENCES TimeOfDay (tod), + input_comm TEXT + REFERENCES Commodity (name), + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + output_comm TEXT + REFERENCES Commodity (name), + flow REAL, + units TEXT, + PRIMARY KEY (region, scenario, period, season, tod, input_comm, tech, vintage, output_comm) +); +CREATE TABLE IF NOT EXISTS PlanningReserveMargin +( + region TEXT + PRIMARY KEY + REFERENCES Region (region), + margin REAL +); +CREATE TABLE IF NOT EXISTS RampDown +( + region TEXT, + tech TEXT + REFERENCES Technology (tech), + rate REAL, + PRIMARY KEY (region, tech) +); +CREATE TABLE IF NOT EXISTS RampUp +( + region TEXT, + tech TEXT + REFERENCES Technology (tech), + rate REAL, + PRIMARY KEY (region, tech) +); +CREATE TABLE IF NOT EXISTS Region +( + region TEXT + PRIMARY KEY, + notes TEXT +); +CREATE TABLE IF NOT EXISTS TimeSegmentFraction +( + season TEXT + REFERENCES TimeSeason (season), + tod TEXT + REFERENCES TimeOfDay (tod), + segfrac REAL, + notes TEXT, + PRIMARY KEY (season, tod), + CHECK (segfrac >= 0 AND segfrac <= 1) +); +CREATE TABLE IF NOT EXISTS StorageDuration +( + region TEXT, + tech TEXT, + duration REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, tech) +); +CREATE TABLE IF NOT EXISTS StorageInit +( + tech TEXT + PRIMARY KEY, + value REAL, + notes TEXT +); +CREATE TABLE IF NOT EXISTS TechnologyType +( + label TEXT + PRIMARY KEY, + description TEXT +); +REPLACE INTO TechnologyType +VALUES ('r', 'resource technology'); +REPLACE INTO TechnologyType +VALUES ('p', 'production technology'); +REPLACE INTO TechnologyType +VALUES ('pb', 'baseload production technology'); +REPLACE INTO TechnologyType +VALUES ('ps', 'storage production technology'); + +CREATE TABLE IF NOT EXISTS TechInputSplit +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + input_comm TEXT + REFERENCES Commodity (name), + tech TEXT + REFERENCES Technology (tech), + min_proportion REAL, + notes TEXT, + PRIMARY KEY (region, period, input_comm, tech) +); +CREATE TABLE IF NOT EXISTS TechInputSplitAverage +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + input_comm TEXT + REFERENCES Commodity (name), + tech TEXT + REFERENCES Technology (tech), + min_proportion REAL, + notes TEXT, + PRIMARY KEY (region, period, input_comm, tech) +); +CREATE TABLE IF NOT EXISTS TechOutputSplit +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + output_comm TEXT + REFERENCES Commodity (name), + min_proportion REAL, + notes TEXT, + PRIMARY KEY (region, period, tech, output_comm) +); +CREATE TABLE IF NOT EXISTS TimeOfDay +( + sequence INTEGER UNIQUE, + tod TEXT + PRIMARY KEY +); +CREATE TABLE IF NOT EXISTS TimePeriod +( + sequence INTEGER UNIQUE, + period INTEGER + PRIMARY KEY, + flag TEXT + REFERENCES TimePeriodType (label) +); +CREATE TABLE IF NOT EXISTS TimeSeason +( + sequence INTEGER UNIQUE, + season TEXT + PRIMARY KEY +); +CREATE TABLE IF NOT EXISTS TimePeriodType +( + label TEXT + PRIMARY KEY, + description TEXT +); +CREATE TABLE IF NOT EXISTS MaxActivityShare +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + group_name TEXT + REFERENCES TechGroup (group_name), + max_proportion REAL, + notes TEXT, + PRIMARY KEY (region, period, tech, group_name) +); +CREATE TABLE IF NOT EXISTS MaxCapacityShare +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + group_name TEXT + REFERENCES TechGroup (group_name), + max_proportion REAL, + notes TEXT, + PRIMARY KEY (region, period, tech, group_name) +); +CREATE TABLE IF NOT EXISTS MaxAnnualCapacityFactor +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + output_comm TEXT + REFERENCES Commodity (name), + factor REAL, + source TEXT, + notes TEXT, + PRIMARY KEY (region, period, tech), + CHECK (factor >= 0 AND factor <= 1) +); +CREATE TABLE IF NOT EXISTS MaxNewCapacity +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + max_cap REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, tech) +); +CREATE TABLE IF NOT EXISTS MaxNewCapacityGroup +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + group_name TEXT + REFERENCES TechGroup (group_name), + max_new_cap REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, group_name) +); +CREATE TABLE IF NOT EXISTS MaxNewCapacityShare +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + group_name TEXT + REFERENCES TechGroup (group_name), + max_proportion REAL, + notes TEXT, + PRIMARY KEY (region, period, tech, group_name) +); +CREATE TABLE IF NOT EXISTS MinActivityShare +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + group_name TEXT + REFERENCES TechGroup (group_name), + min_proportion REAL, + notes TEXT, + PRIMARY KEY (region, period, tech, group_name) +); +CREATE TABLE IF NOT EXISTS MinAnnualCapacityFactor +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + output_comm TEXT + REFERENCES Commodity (name), + factor REAL, + source TEXT, + notes TEXT, + PRIMARY KEY (region, period, tech), + CHECK (factor >= 0 AND factor <= 1) +); +CREATE TABLE IF NOT EXISTS MinCapacityShare +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + group_name TEXT + REFERENCES TechGroup (group_name), + min_proportion REAL, + notes TEXT, + PRIMARY KEY (region, period, tech, group_name) +); +CREATE TABLE IF NOT EXISTS MinNewCapacity +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + min_cap REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, tech) +); +CREATE TABLE IF NOT EXISTS MinNewCapacityGroup +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + group_name TEXT + REFERENCES TechGroup (group_name), + min_new_cap REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, group_name) +); +CREATE TABLE IF NOT EXISTS MinNewCapacityShare +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + tech TEXT + REFERENCES Technology (tech), + group_name TEXT + REFERENCES TechGroup (group_name), + max_proportion REAL, + notes TEXT, + PRIMARY KEY (region, period, tech, group_name) +); +CREATE TABLE IF NOT EXISTS OutputEmission +( + scenario TEXT, + region TEXT, + sector TEXT + REFERENCES SectorLabel (sector), + period INTEGER + REFERENCES TimePeriod (period), + emis_comm TEXT + REFERENCES Commodity (name), + tech TEXT + REFERENCES Technology (tech), + vintage INTEGER + REFERENCES TimePeriod (period), + emission REAL, + units TEXT, + PRIMARY KEY (region, scenario, period, emis_comm, tech, vintage) +); +CREATE TABLE IF NOT EXISTS MinActivityGroup +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + group_name TEXT + REFERENCES TechGroup (group_name), + min_act REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, group_name) +); +CREATE TABLE IF NOT EXISTS EmissionLimit +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + emis_comm TEXT + REFERENCES Commodity (name), + value REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, emis_comm) +); +CREATE TABLE IF NOT EXISTS MaxActivityGroup +( + region TEXT, + period INTEGER + REFERENCES TimePeriod (period), + group_name TEXT + REFERENCES TechGroup (group_name), + max_act REAL, + units TEXT, + notes TEXT, + PRIMARY KEY (region, period, group_name) +); + +CREATE TABLE RPSRequirement +( + region TEXT NOT NULL + REFERENCES Region (region), + period INTEGER NOT NULL + REFERENCES TimePeriod (period), + tech_group TEXT NOT NULL + REFERENCES TechGroup (group_name), + requirement REAL NOT NULL, + notes TEXT +); +CREATE TABLE TechGroupMember +( + group_name TEXT + REFERENCES TechGroup (group_name), + tech TEXT + REFERENCES Technology (tech), + PRIMARY KEY (group_name, tech) +); +CREATE TABLE IF NOT EXISTS Technology +( + tech TEXT NOT NULL PRIMARY KEY, + flag TEXT NOT NULL, + sector TEXT, + category TEXT, + sub_category TEXT, + unlim_cap INTEGER NOT NULL DEFAULT 0, + annual INTEGER NOT NULL DEFAULT 0, + reserve INTEGER NOT NULL DEFAULT 0, + curtail INTEGER NOT NULL DEFAULT 0, + retire INTEGER NOT NULL DEFAULT 0, + flex INTEGER NOT NULL DEFAULT 0, + variable INTEGER NOT NULL DEFAULT 0, + exchange INTEGER NOT NULL DEFAULT 0, + description TEXT, + FOREIGN KEY (flag) REFERENCES TechnologyType (label) +); +CREATE TABLE IF NOT EXISTS OutputCost +( + scenario TEXT, + region TEXT, + period INTEGER, + tech TEXT, + vintage INTEGER, + d_invest REAL, + d_fixed REAL, + d_var REAL, + d_emiss REAL, + invest REAL, + fixed REAL, + var REAL, + emiss REAL, + units TEXT, + PRIMARY KEY (scenario, region, period, tech, vintage), + FOREIGN KEY (vintage) REFERENCES TimePeriod (period), + FOREIGN KEY (tech) REFERENCES Technology (tech) +); +COMMIT; +PRAGMA FOREIGN_KEYS = 1; + + diff --git a/temoa/utilities/db_schema_compare.py b/temoa/utilities/db_schema_compare.py new file mode 100644 index 00000000..9268b683 --- /dev/null +++ b/temoa/utilities/db_schema_compare.py @@ -0,0 +1,158 @@ +""" +Tools for Energy Model Optimization and Analysis (Temoa): +An open source framework for energy systems optimization modeling + +Copyright (C) 2015, NC State University + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +A complete copy of the GNU General Public License v2 (GPLv2) is available +in LICENSE.txt. Users uncompressing this from an archive may not have +received this license file. If not, see . + + +Written by: J. F. Hyink +jeff@westernspark.us +https://westernspark.us +Created on: 9/9/25 + +This quick utility compares the schema of a built sqlite database to a reference schema in the form of a SQL file + +""" +import sqlite3 +import tempfile +from pathlib import Path + +from definitions import PROJECT_ROOT + + +def table_fields(db_file: Path) -> dict[str, list[str]]: + """ + Extract the field names from the table definitions in the sqlite database + :param db_file: target database + :return: dictionary of table name : list of field names + """ + with sqlite3.connect(db_file) as con: + cur = con.cursor() + # Get all table definitions from sqlite_master + tables = cur.execute("SELECT name, sql FROM sqlite_master WHERE type='table'").fetchall() + + result = {} + for table_name, create_sql in tables: + # Extract column definitions between parentheses + fields_def = create_sql.split('(', 1)[1].rsplit(')', 1)[0] + + # this is a little janky, but the internals here aren't consistent across versions, so we eliminate some + # characters to make it easier to compare + fields_def = fields_def.replace('`', '').replace(')', '') + + # Parse field names from definitions + fields = [] + for field in fields_def.split(','): + field = field.strip() + if field: + # Take first word as field name + field_name = field.split()[0].strip('"[]') + fields.append(field_name) + + result[table_name] = fields + + return result + + +def has_units_field(table_data: dict[str, list[str]]) -> dict[str, bool]: + """ + determine if the table has a field for units + :param table_data: + :return: dictionary of table name : has units field + """ + res = {} + for key in table_data: + res[key] = any('unit' in val for val in table_data[key]) + return res + + +def compare_tables(orig_fields: dict, other_fields: dict) -> dict[str, tuple[list[str], list[str]]]: + """ + compare the table data from two databases and return a table-based comparison + :param orig_fields: + :param other_fields: + :return: dictionary of table name : (list of fields missing in other, list of new fields in other) + """ + res = {} + for key in orig_fields: + if key in other_fields: + res[key] = ( + list(set(orig_fields[key]) - set(other_fields[key])), + list(set(other_fields[key]) - set(orig_fields[key])), + ) + else: + res[key] = (orig_fields[key], []) + return res + + +def compare_db_to_schema( + db_file: Path, schema_file: Path +) -> dict[str, tuple[list[str], list[str]]]: + """ + compare the db provided to a baseline schema and return a table-based comparison + :param db_file: + :param schema_file: + :return: dictionary of table name : (list of fields missing in other, list of new fields in other) + """ + td = tempfile.TemporaryDirectory() + temp_db = Path(td.name) / 'temp.db' + with sqlite3.connect(temp_db) as con: + # bring in the new schema and execute + with open(schema_file, 'r') as src: + sql_script = src.read() + con.executescript(sql_script) + original_fields = table_fields(temp_db) + other_fields = table_fields(db_file) + + return compare_tables(orig_fields=original_fields, other_fields=other_fields) + + +def write_comparison_md(output_file: Path, orig_schema: Path, new_db: Path): + """write a shell of a markdown file with the comparison results""" + comp = compare_db_to_schema(new_db, orig_schema) + units_present = has_units_field(table_fields(new_db)) + with open(output_file, 'w') as f: + f.write(f'## Comparison of `{new_db.name}` to schema file `{orig_schema.name}`\n') + f.write('## Table Comparison\n') + f.write('| Table | Missing in Other | New in Other | Units in Other DB |\n') + f.write('|--------|-----------------|--------------|----------------|\n') + for key in comp: + f.write( + f"| {key} | {comp[key][0] if comp[key][0] else ''} | {comp[key][1] if comp[key][1] else ''} | {'yes' if units_present[key] else ''} |\n" + ) + + +if __name__ == '__main__': + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + '--db_file', help='database file to compare', action='store', dest='db_file' + ) + parser.add_argument( + '--schema_file', help='schema file to compare against', action='store', dest='schema_file' + ) + args = parser.parse_args() + res = compare_db_to_schema(Path(args.db_file), Path(args.schema_file)) + for key in res: + if len(res[key][0]) > 0 or len(res[key][1]) > 0: + print(f'Table {key}:') + print(f' Fields/descriptors missing in other: {res[key][0]}') + print(f' Fields/descriptors new in other: {res[key][1]}') + + output_file = Path(PROJECT_ROOT, 'output_files', 'db_schema_comparison.md') + write_comparison_md(output_file, Path(args.schema_file), Path(args.db_file)) From c9b4622fe145c9131671e7446362d4487c2016a9 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Fri, 19 Sep 2025 14:02:05 -0700 Subject: [PATCH 30/70] Comment cleanup --- temoa/utilities/db_migration_to_v3.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/temoa/utilities/db_migration_to_v3.py b/temoa/utilities/db_migration_to_v3.py index 9cbcb092..939c43b7 100644 --- a/temoa/utilities/db_migration_to_v3.py +++ b/temoa/utilities/db_migration_to_v3.py @@ -48,10 +48,10 @@ ) parser.add_argument( '--schema', - help='Path to schema file (default=../../data_files/temoa_schema_v3.sql)', + help='Path to schema file (default=data_files/temoa_schema_v3.sql)', required=False, dest='schema', - default='../../data_files/temoa_schema_v3.sql', + default='data_files/temoa_schema_v3.sql', ) options = parser.parse_args() legacy_db: Path = Path(options.source_db) From cde9d75cd5e70d1440001ac300c5bce5bb7ffe60 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Fri, 19 Sep 2025 14:49:13 -0700 Subject: [PATCH 31/70] Modify table_writer.py to specify target fields to enable flexible writing to v3.0 or v3.1 databases --- temoa/temoa_model/table_writer.py | 48 ++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/temoa/temoa_model/table_writer.py b/temoa/temoa_model/table_writer.py index 7e97b6bf..310534bc 100644 --- a/temoa/temoa_model/table_writer.py +++ b/temoa/temoa_model/table_writer.py @@ -62,6 +62,8 @@ Note: This file borrows heavily from the legacy pformat_results.py, and is somewhat of a restructure of that code to accommodate the run modes more cleanly + +update 9/19/2025: Modifications to allow functionality with V3.0 or V3.1 database schema """ @@ -230,7 +232,7 @@ def _insert_objective_results(self, obj_vals: list, iteration: int) -> None: else self.config.scenario ) for obj_name, obj_value in obj_vals: - qry = 'INSERT INTO OutputObjective VALUES (?, ?, ?)' + qry = 'INSERT INTO OutputObjective (scenario, objective_name, total_system_cost) VALUES (?, ?, ?)' data = (scenario_name, obj_name, obj_value) self.con.execute(qry, data) self.con.commit() @@ -253,7 +255,7 @@ def write_emissions(self, iteration=None) -> None: continue entry = (scenario, ei.r, sector, ei.p, ei.e, ei.t, ei.v, val) data.append(entry) - qry = f'INSERT INTO OutputEmission VALUES {_marks(8)}' + qry = f'INSERT INTO OutputEmission (scenario, region, sector, period, emis_comm, tech, vintage, emission) VALUES {_marks(8)}' self.con.executemany(qry, data) self.con.commit() @@ -269,7 +271,7 @@ def _insert_capacity_results(self, cap_data: CapData, iteration: int | None) -> s = self.tech_sectors.get(t) new_cap = (scenario, r, s, t, v, val) data.append(new_cap) - qry = 'INSERT INTO OutputBuiltCapacity VALUES (?, ?, ?, ?, ?, ?)' + qry = 'INSERT INTO OutputBuiltCapacity (scenario, region, sector, tech, vintage, capacity) VALUES (?, ?, ?, ?, ?, ?)' self.con.executemany(qry, data) # NetCapacity @@ -278,7 +280,7 @@ def _insert_capacity_results(self, cap_data: CapData, iteration: int | None) -> s = self.tech_sectors.get(t) new_net_cap = (scenario, r, s, p, t, v, val) data.append(new_net_cap) - qry = 'INSERT INTO OutputNetCapacity VALUES (?, ?, ?, ?, ?, ?, ?)' + qry = 'INSERT INTO OutputNetCapacity (scenario, region, sector, period, tech, vintage, capacity) VALUES (?, ?, ?, ?, ?, ?, ?)' self.con.executemany(qry, data) # Retired Capacity @@ -287,7 +289,7 @@ def _insert_capacity_results(self, cap_data: CapData, iteration: int | None) -> s = self.tech_sectors.get(t) new_retired_cap = (scenario, r, s, p, t, v, val) data.append(new_retired_cap) - qry = 'INSERT INTO OutputRetiredCapacity VALUES (?, ?, ?, ?, ?, ?, ?)' + qry = 'INSERT INTO OutputRetiredCapacity (scenario, region, sector, period, tech, vintage, capacity) VALUES (?, ?, ?, ?, ?, ?, ?)' self.con.executemany(qry, data) self.con.commit() @@ -319,14 +321,28 @@ def write_flow_tables(self, iteration=None) -> None: flows_by_type[flow_type].append(entry) table_associations = { - FlowType.OUT: 'OutputFlowOut', - FlowType.IN: 'OutputFlowIn', - FlowType.CURTAIL: 'OutputCurtailment', - FlowType.FLEX: 'OutputCurtailment', + FlowType.OUT: ('OutputFlowOut', 'flow'), + FlowType.IN: ('OutputFlowIn', 'flow'), + FlowType.CURTAIL: ('OutputCurtailment', 'curtailment'), + FlowType.FLEX: ('OutputCurtailment', 'curtailment'), } - - for flow_type, table_name in table_associations.items(): - qry = f'INSERT INTO {table_name} VALUES {_marks(11)}' + for flow_type, (table_name, value_field_name) in table_associations.items(): + fields = ', '.join( + ( + 'scenario', + 'region', + 'sector', + 'period', + 'season', + 'tod', + 'input_comm', + 'tech', + 'vintage', + 'output_comm', + value_field_name, + ) + ) + qry = f'INSERT INTO {table_name} ({fields}) VALUES {_marks(11)}' self.con.executemany(qry, flows_by_type[flow_type]) self.con.commit() @@ -375,7 +391,7 @@ def _insert_summary_flow_results(self, flow_data: dict, iteration: int | None) - entry = (*idx, flow) entries.append(entry) - qry = f'INSERT INTO OutputFlowOutSummary VALUES {_marks(9)}' + qry = f'INSERT INTO OutputFlowOutSummary (scenario, region, sector, period, input_comm, tech, vintage, output_comm, flow) VALUES {_marks(9)}' self.con.executemany(qry, entries) self.con.commit() @@ -502,7 +518,7 @@ def _write_cost_rows(self, entries, iteration=None): # let's be kind and sort by something reasonable (r, v, t, p) rows.sort(key=lambda r: (r[1], r[4], r[3], r[2])) cur = self.con.cursor() - qry = 'INSERT INTO OutputCost VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)' + qry = 'INSERT INTO OutputCost (scenario, region, period, tech, vintage, d_invest, d_fixed, d_var, d_emiss, invest, fixed, var, emiss) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)' cur.executemany(qry, rows) self.con.commit() @@ -515,7 +531,7 @@ def write_dual_variables(self, results: SolverResults, iteration=None): ) # collect the values constraint_data = results['Solution'].Constraint.items() dual_data = [(scenario_name, t[0], t[1]['Dual']) for t in constraint_data] - qry = 'INSERT INTO OutputDualVariable VALUES (?, ?, ?)' + qry = 'INSERT INTO OutputDualVariable (scenario, constraint_name, dual) VALUES (?, ?, ?)' self.con.executemany(qry, dual_data) self.con.commit() @@ -534,7 +550,7 @@ def write_tweaks(self, iteration: int, change_records: Iterable[ChangeRecord]): change_record.new_value, ) records.append(element) - qry = 'INSERT INTO OutputMCDelta VALUES (?, ?, ?, ?, ?, ?)' + qry = 'INSERT INTO OutputMCDelta (scenario, run, param, param_index, old_val, new_val) VALUES (?, ?, ?, ?, ?, ?)' self.con.executemany(qry, records) self.con.commit() From 7833774c08fe26de53456939cbd18744ef7eb2b7 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Fri, 19 Sep 2025 15:24:03 -0700 Subject: [PATCH 32/70] Add to and Upgrade requirements --- requirements.in | 3 + requirements.txt | 315 ++++++++++++++++++++++++----------------------- 2 files changed, 167 insertions(+), 151 deletions(-) diff --git a/requirements.in b/requirements.in index b5e948a6..b1bca79d 100644 --- a/requirements.in +++ b/requirements.in @@ -42,3 +42,6 @@ gravis highspy scipy gurobipy + +# for database v3.1 +pint diff --git a/requirements.txt b/requirements.txt index bba2cd77..5a16a86c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,122 +1,120 @@ # -# This file is autogenerated by pip-compile with Python 3.12 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile +# pip-compile requirements.in # -alabaster==0.7.16 +alabaster==1.0.0 # via sphinx -alembic==1.13.2 +alembic==1.16.5 # via ixmp4 annotated-types==0.7.0 # via pydantic -anyio==4.4.0 +anyio==4.10.0 # via # httpx # jupyter-server # starlette -appdirs==1.4.4 - # via pint appnope==0.1.4 # via ipykernel -argon2-cffi==23.1.0 +argon2-cffi==25.1.0 # via jupyter-server -argon2-cffi-bindings==21.2.0 +argon2-cffi-bindings==25.1.0 # via argon2-cffi arrow==1.3.0 # via isoduration -asttokens==2.4.1 +asttokens==3.0.0 # via stack-data -async-lru==2.0.4 +async-lru==2.0.5 # via jupyterlab -attrs==24.2.0 +attrs==25.3.0 # via # jsonschema # referencing -babel==2.16.0 +babel==2.17.0 # via # jupyterlab-server # sphinx -beautifulsoup4==4.12.3 +beautifulsoup4==4.13.5 # via nbconvert -bleach==6.1.0 +bleach[css]==6.2.0 # via nbconvert -certifi==2024.8.30 +certifi==2025.8.3 # via # httpcore # httpx # requests -cffi==1.17.0 +cffi==2.0.0 # via argon2-cffi-bindings -charset-normalizer==3.3.2 +charset-normalizer==3.4.3 # via requests -click==8.1.7 +click==8.3.0 # via typer -comm==0.2.2 +comm==0.2.3 # via # ipykernel # ipywidgets -contourpy==1.3.0 +contourpy==1.3.3 # via matplotlib cycler==0.12.1 # via matplotlib -debugpy==1.8.5 +debugpy==1.8.17 # via ipykernel -decorator==5.1.1 +decorator==5.2.1 # via ipython defusedxml==0.7.1 # via nbconvert -deprecated==1.2.14 +deprecated==1.2.18 # via -r requirements.in -dill==0.3.8 +dill==0.4.0 # via multiprocess -docutils==0.20.1 +docutils==0.21.2 # via # pybtex-docutils # sphinx # sphinx-rtd-theme # sphinxcontrib-bibtex -et-xmlfile==1.1.0 +et-xmlfile==2.0.0 # via openpyxl -executing==2.0.1 +executing==2.2.1 # via stack-data -fastapi==0.112.2 +fastapi==0.116.2 # via ixmp4 -fastjsonschema==2.20.0 +fastjsonschema==2.21.2 # via nbformat flexcache==0.3 # via pint -flexparser==0.3.1 +flexparser==0.4 # via pint -fonttools==4.53.1 +fonttools==4.60.0 # via matplotlib fqdn==1.5.1 # via jsonschema -graphviz==0.20.3 +graphviz==0.21 # via -r requirements.in gravis==0.1.0 # via -r requirements.in -gurobipy==11.0.3 +gurobipy==12.0.3 # via -r requirements.in -h11==0.14.0 +h11==0.16.0 # via httpcore -h2==4.1.0 +h2==4.3.0 # via httpx -highspy==1.7.2 +highspy==1.11.0 # via -r requirements.in -hpack==4.0.0 +hpack==4.1.0 # via h2 -httpcore==1.0.5 +httpcore==1.0.9 # via httpx -httpx[http2]==0.27.2 +httpx[http2]==0.28.1 # via # ixmp4 # jupyterlab -hyperframe==6.0.1 +hyperframe==6.1.0 # via h2 -iam-units==2023.9.12 +iam-units==2025.9.12 # via pyam-iamc -idna==3.8 +idna==3.10 # via # anyio # httpx @@ -124,15 +122,15 @@ idna==3.8 # requests imagesize==1.4.1 # via sphinx -iniconfig==2.0.0 +iniconfig==2.1.0 # via pytest -ipykernel==6.29.5 +ipykernel==6.30.1 # via # -r requirements.in # jupyter # jupyter-console # jupyterlab -ipython==8.26.0 +ipython==9.5.0 # via # -r requirements.in # ipykernel @@ -140,37 +138,39 @@ ipython==8.26.0 # jupyter-console ipython-genutils==0.2.0 # via jupyter-contrib-nbextensions -ipywidgets==8.1.5 +ipython-pygments-lexers==1.1.1 + # via ipython +ipywidgets==8.1.7 # via jupyter isoduration==20.11.0 # via jsonschema -ixmp4==0.9.2 +ixmp4==0.12.0 # via pyam-iamc -jedi==0.19.1 +jedi==0.19.2 # via ipython -jinja2==3.1.4 +jinja2==3.1.6 # via # jupyter-server # jupyterlab # jupyterlab-server # nbconvert # sphinx -joblib==1.4.2 +joblib==1.5.2 # via -r requirements.in -json5==0.9.25 +json5==0.12.1 # via jupyterlab-server jsonpointer==3.0.0 # via jsonschema -jsonschema[format-nongpl]==4.23.0 +jsonschema[format-nongpl]==4.25.1 # via # jupyter-events # jupyterlab-server # nbformat -jsonschema-specifications==2023.12.1 +jsonschema-specifications==2025.9.1 # via jsonschema -jupyter==1.1.0 +jupyter==1.1.1 # via -r requirements.in -jupyter-client==8.6.2 +jupyter-client==8.6.3 # via # ipykernel # jupyter-console @@ -184,7 +184,7 @@ jupyter-contrib-core==0.4.2 # jupyter-nbextensions-configurator jupyter-contrib-nbextensions==0.7.0 # via -r requirements.in -jupyter-core==5.7.2 +jupyter-core==5.8.1 # via # ipykernel # jupyter-client @@ -197,15 +197,15 @@ jupyter-core==5.7.2 # nbclient # nbconvert # nbformat -jupyter-events==0.10.0 +jupyter-events==0.12.0 # via jupyter-server jupyter-highlight-selected-word==0.2.0 # via jupyter-contrib-nbextensions -jupyter-lsp==2.2.5 +jupyter-lsp==2.3.0 # via jupyterlab jupyter-nbextensions-configurator==0.6.4 # via jupyter-contrib-nbextensions -jupyter-server==2.14.2 +jupyter-server==2.17.0 # via # jupyter-lsp # jupyter-nbextensions-configurator @@ -215,7 +215,7 @@ jupyter-server==2.14.2 # notebook-shim jupyter-server-terminals==0.5.3 # via jupyter-server -jupyterlab==4.2.5 +jupyterlab==4.4.7 # via # jupyter # notebook @@ -225,24 +225,26 @@ jupyterlab-server==2.27.3 # via # jupyterlab # notebook -jupyterlab-widgets==3.0.13 +jupyterlab-widgets==3.0.15 # via ipywidgets -kiwisolver==1.4.5 +kiwisolver==1.4.9 # via matplotlib -latexcodec==3.0.0 +lark==1.2.2 + # via rfc3987-syntax +latexcodec==3.0.1 # via pybtex -lxml==5.3.0 +lxml==6.0.1 # via jupyter-contrib-nbextensions -mako==1.3.5 +mako==1.3.10 # via alembic -markdown-it-py==3.0.0 +markdown-it-py==4.0.0 # via rich -markupsafe==2.1.5 +markupsafe==3.0.2 # via # jinja2 # mako # nbconvert -matplotlib==3.9.2 +matplotlib==3.10.6 # via # -r requirements.in # pyam-iamc @@ -254,21 +256,21 @@ matplotlib-inline==0.1.7 # ipython mdurl==0.1.2 # via markdown-it-py -mistune==3.0.2 +mistune==3.1.4 # via nbconvert -multimethod==1.10 - # via pandera -multiprocess==0.70.16 +multiprocess==0.70.18 # via salib -mypy==1.11.2 +mypy==1.18.2 # via sqlalchemy -mypy-extensions==1.0.0 +mypy-extensions==1.1.0 # via # mypy # typing-inspect -nbclient==0.10.0 +narwhals==2.5.0 + # via plotly +nbclient==0.10.2 # via nbconvert -nbconvert==7.16.4 +nbconvert==7.16.6 # via # jupyter # jupyter-contrib-nbextensions @@ -280,11 +282,11 @@ nbformat==5.10.4 # nbconvert nest-asyncio==1.6.0 # via ipykernel -networkx==3.3 +networkx==3.5 # via -r requirements.in nose==1.3.7 # via pyutilib -notebook==7.2.2 +notebook==7.4.5 # via # jupyter # jupyter-contrib-core @@ -294,14 +296,13 @@ notebook-shim==0.2.4 # via # jupyterlab # notebook -numpy==2.1.0 +numpy==2.3.3 # via # -r requirements.in # contourpy # highspy # matplotlib # pandas - # pandera # pyam-iamc # pydoe # salib @@ -315,9 +316,10 @@ openpyxl==3.1.5 # pyam-iamc overrides==7.7.0 # via jupyter-server -packaging==24.1 +packaging==25.0 # via # ipykernel + # jupyter-events # jupyter-server # jupyterlab # jupyterlab-server @@ -327,47 +329,51 @@ packaging==24.1 # plotly # pytest # sphinx -pandas==2.2.2 +pandas==2.3.2 # via # -r requirements.in # ixmp4 - # pandera # pyam-iamc # salib # seaborn -pandera==0.20.3 +pandera==0.26.1 # via ixmp4 pandocfilters==1.5.1 # via nbconvert -parso==0.8.4 +parso==0.8.5 # via jedi +pathspec==0.12.1 + # via mypy pexpect==4.9.0 # via ipython -pillow==10.4.0 +pillow==11.3.0 # via matplotlib -pint==0.24.3 +pint==0.25 # via + # -r requirements.in # iam-units # pyam-iamc -platformdirs==4.2.2 - # via jupyter-core -plotly==5.24.0 +platformdirs==4.4.0 + # via + # jupyter-core + # pint +plotly==6.3.0 # via -r requirements.in -pluggy==1.5.0 +pluggy==1.6.0 # via pytest ply==3.11 # via pyomo -prometheus-client==0.20.0 +prometheus-client==0.23.1 # via jupyter-server -prompt-toolkit==3.0.47 +prompt-toolkit==3.0.52 # via # ipython # jupyter-console -psutil==6.0.0 +psutil==7.1.0 # via ipykernel -psycopg[binary]==3.2.1 +psycopg[binary]==3.2.10 # via ixmp4 -psycopg-binary==3.2.1 +psycopg-binary==3.2.10 # via psycopg ptyprocess==0.7.0 # via @@ -375,42 +381,44 @@ ptyprocess==0.7.0 # terminado pure-eval==0.2.3 # via stack-data -pyam-iamc==2.2.4 +pyam-iamc==3.0.0 # via -r requirements.in -pybtex==0.24.0 +pybtex==0.25.1 # via # pybtex-docutils # sphinxcontrib-bibtex pybtex-docutils==1.0.3 # via sphinxcontrib-bibtex -pycparser==2.22 +pycparser==2.23 # via cffi -pydantic==2.8.2 +pydantic==2.11.9 # via # fastapi # ixmp4 # pandera # pydantic-settings -pydantic-core==2.20.1 +pydantic-core==2.33.2 # via pydantic -pydantic-settings==2.4.0 +pydantic-settings==2.10.1 # via ixmp4 pydoe==0.3.8 # via -r requirements.in -pygments==2.18.0 +pygments==2.19.2 # via # ipython + # ipython-pygments-lexers # jupyter-console # nbconvert + # pytest # rich # sphinx -pyjwt==2.9.0 +pyjwt==2.10.1 # via ixmp4 -pyomo==6.8.0 +pyomo==6.9.4 # via -r requirements.in -pyparsing==3.1.4 +pyparsing==3.2.4 # via matplotlib -pytest==8.3.2 +pytest==8.4.2 # via -r requirements.in python-dateutil==2.9.0.post0 # via @@ -418,13 +426,13 @@ python-dateutil==2.9.0.post0 # jupyter-client # matplotlib # pandas -python-dotenv==1.0.1 +python-dotenv==1.1.1 # via # ixmp4 # pydantic-settings -python-json-logger==2.0.7 +python-json-logger==3.3.0 # via jupyter-events -pytz==2024.1 +pytz==2025.2 # via pandas pyutilib==6.0.0 # via -r requirements.in @@ -434,18 +442,18 @@ pyyaml==6.0.2 # jupyter-nbextensions-configurator # pyam-iamc # pybtex -pyzmq==26.2.0 +pyzmq==27.1.0 # via # ipykernel # jupyter-client # jupyter-console # jupyter-server -referencing==0.35.1 +referencing==0.36.2 # via # jsonschema # jsonschema-specifications # jupyter-events -requests==2.32.3 +requests==2.32.5 # via # jupyterlab-server # pyam-iamc @@ -458,17 +466,21 @@ rfc3986-validator==0.1.1 # via # jsonschema # jupyter-events -rich==13.8.0 +rfc3987-syntax==1.1.0 + # via jsonschema +rich==14.1.0 # via # ixmp4 # typer -rpds-py==0.20.0 +roman-numerals-py==3.1.0 + # via sphinx +rpds-py==0.27.1 # via # jsonschema # referencing salib==1.5.1 # via -r requirements.in -scipy==1.14.1 +scipy==1.16.2 # via # -r requirements.in # pyam-iamc @@ -482,33 +494,28 @@ send2trash==1.8.3 # via jupyter-server shellingham==1.5.4 # via typer -six==1.16.0 +six==1.17.0 # via - # asttokens - # bleach - # pybtex # python-dateutil # pyutilib # rfc3339-validator sniffio==1.3.1 - # via - # anyio - # httpx -snowballstemmer==2.2.0 + # via anyio +snowballstemmer==3.0.1 # via sphinx -soupsieve==2.6 +soupsieve==2.8 # via beautifulsoup4 -sphinx==7.4.7 +sphinx==8.2.3 # via # -r requirements.in # sphinx-rtd-theme # sphinxcontrib-bibtex # sphinxcontrib-jquery -sphinx-rtd-theme==2.0.0 +sphinx-rtd-theme==3.0.2 # via -r requirements.in sphinxcontrib-applehelp==2.0.0 # via sphinx -sphinxcontrib-bibtex==2.6.2 +sphinxcontrib-bibtex==2.6.5 # via -r requirements.in sphinxcontrib-devhelp==2.0.0 # via sphinx @@ -526,31 +533,29 @@ sphinxcontrib-serializinghtml==2.0.0 # via # -r requirements.in # sphinx -sqlalchemy[mypy]==2.0.32 +sqlalchemy[mypy]==2.0.43 # via # alembic # ixmp4 # sqlalchemy # sqlalchemy-utils -sqlalchemy-utils==0.41.2 +sqlalchemy-utils==0.42.0 # via ixmp4 stack-data==0.6.3 # via ipython -starlette==0.38.2 +starlette==0.48.0 # via fastapi tabulate==0.9.0 # via -r requirements.in -tenacity==9.0.0 - # via plotly terminado==0.18.1 # via # jupyter-server # jupyter-server-terminals -tinycss2==1.3.0 - # via nbconvert +tinycss2==1.4.0 + # via bleach toml==0.10.2 # via ixmp4 -tornado==6.4.1 +tornado==6.5.2 # via # ipykernel # jupyter-client @@ -563,7 +568,6 @@ tornado==6.4.1 # terminado traitlets==5.14.3 # via - # comm # ipykernel # ipython # ipywidgets @@ -580,38 +584,49 @@ traitlets==5.14.3 # nbclient # nbconvert # nbformat -typeguard==4.3.0 +typeguard==4.4.4 # via pandera -typer==0.12.5 +typer==0.18.0 # via ixmp4 -types-python-dateutil==2.9.0.20240821 +types-python-dateutil==2.9.0.20250822 # via arrow -typing-extensions==4.12.2 +typing-extensions==4.15.0 # via # alembic + # anyio + # beautifulsoup4 # fastapi # flexcache # flexparser + # ipython # mypy + # pandera # pint # psycopg # pydantic # pydantic-core + # referencing # sqlalchemy + # starlette # typeguard # typer # typing-inspect + # typing-inspection typing-inspect==0.9.0 # via pandera -tzdata==2024.1 +typing-inspection==0.4.1 + # via + # pydantic + # pydantic-settings +tzdata==2025.2 # via pandas uri-template==1.3.0 # via jsonschema -urllib3==2.2.2 +urllib3==2.5.0 # via requests wcwidth==0.2.13 # via prompt-toolkit -webcolors==24.8.0 +webcolors==24.11.1 # via jsonschema webencodings==0.5.1 # via @@ -619,15 +634,13 @@ webencodings==0.5.1 # tinycss2 websocket-client==1.8.0 # via jupyter-server -widgetsnbextension==4.0.13 +widgetsnbextension==4.0.14 # via ipywidgets wquantiles==0.6 # via pyam-iamc -wrapt==1.16.0 - # via - # deprecated - # pandera -xlsxwriter==3.2.0 +wrapt==1.17.3 + # via deprecated +xlsxwriter==3.2.9 # via # -r requirements.in # pyam-iamc From ad035bb33495bec44c33e717b59faf34aafd50fc Mon Sep 17 00:00:00 2001 From: Jeff H Date: Mon, 22 Sep 2025 18:43:24 -0700 Subject: [PATCH 33/70] Add migration tool for database to move from v3.0 to v3.1 [database schema] --- temoa/utilities/db_migration_v3_to_v3_1.py | 310 +++++++++++++++++++++ 1 file changed, 310 insertions(+) create mode 100644 temoa/utilities/db_migration_v3_to_v3_1.py diff --git a/temoa/utilities/db_migration_v3_to_v3_1.py b/temoa/utilities/db_migration_v3_to_v3_1.py new file mode 100644 index 00000000..114b371d --- /dev/null +++ b/temoa/utilities/db_migration_v3_to_v3_1.py @@ -0,0 +1,310 @@ +""" +Tools for Energy Model Optimization and Analysis (Temoa): +An open source framework for energy systems optimization modeling + +Copyright (C) 2015, NC State University + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +A complete copy of the GNU General Public License v2 (GPLv2) is available +in LICENSE.txt. Users uncompressing this from an archive may not have +received this license file. If not, see . + + +Written by: J. F. Hyink +jeff@westernspark.us +https://westernspark.us +Created on: 3/21/24 + +Utility to transition a Version 3.0 Database to 3.1 + +Dev Note: By copying data into the new schema (instead of adding columns) we can (a) control the sequence of the +columns (a nice touch, but the program should not be relying on this) and (b) capture any upgrades to FK's or +such in the new schema. + +Dev Note: This will also work if the "units" column is already added to the legacy DB, perhaps in non-standard location + +Version 3.1 *only* adds the "units" to various tables. No new tables are introduced +""" + +import argparse +import sqlite3 +import sys +from pathlib import Path + + +direct_transfer_tables = [ + 'MetaDataReal', + 'OutputDualVariable', + 'SectorLabel', + 'CapacityCredit', + 'CapacityFactorProcess', + 'CapacityFactorTech', + 'CommodityType', + 'CostEmission', + 'CostFixed', + 'CostInvest', + 'CostVariable', + 'Demand', + 'DemandSpecificDistribution', + 'LoanRate', + 'EmissionActivity', + 'ExistingCapacity', + 'TechGroup', + 'GrowthRateMax', + 'GrowthRateSeed', + 'LinkedTech', + 'MaxActivity', + 'MaxCapacity', + 'MaxResource', + 'MinActivity', + 'MaxCapacityGroup', + 'MinCapacity', + 'MinCapacityGroup', + 'PlanningReserveMargin', + 'RampDown', + 'RampUp', + 'Region', + 'TimeSegmentFraction', + 'StorageInit', + 'TechnologyType', + 'TechInputSplit', + 'TechInputSplitAverage', + 'TechOutputSplit', + 'TimeOfDay', + 'TimePeriod', + 'TimeSeason', + 'TimePeriodType', + 'MaxActivityShare', + 'MaxCapacityShare', + 'MaxAnnualCapacityFactor', + 'MaxNewCapacity', + 'MaxNewCapacityGroup', + 'MaxNewCapacityShare', + 'MinActivityShare', + 'MinAnnualCapacityFactor', + 'MinCapacityShare', + 'MinNewCapacity', + 'MinNewCapacityGroup', + 'MinNewCapacityShare', + 'MinActivityGroup', + 'EmissionLimit', + 'MaxActivityGroup', + 'RPSRequirement', + 'TechGroupMember', + 'Technology', +] +# transfer with omission (allow new schema values to persist) or modification (Not Implemented) +# omits are field, value tuples in the given table +transfer_with_mod = {'MetaData': {'omits': [('element', 'DB_MAJOR'), ('element', 'DB_MINOR')]}} + +add_units_tables = [ + 'OutputObjective', + 'CapacityToActivity', + 'Commodity', + 'Efficiency', + 'LoanLifetimeTech', + 'LifetimeProcess', + 'LifetimeTech', + 'OutputCurtailment', + 'OutputNetCapacity', + 'OutputBuiltCapacity', + 'OutputRetiredCapacity', + 'OutputFlowIn', + 'OutputFlowOut', + 'StorageDuration', + 'OutputEmission', + 'OutputCost', +] + +parser = argparse.ArgumentParser() +parser.add_argument( + '--source', + help='Path to original database', + required=True, + action='store', + dest='source_db', +) +parser.add_argument( + '--schema', + help='Path to schema file (default=data_files/temoa_schema_v3_1.sql)', + required=False, + dest='schema', + default='data_files/temoa_schema_v3_1.sql', +) +options = parser.parse_args() +legacy_db: Path = Path(options.source_db) +schema_file = Path(options.schema) + +new_db_name = legacy_db.stem + '_v3_1.sqlite' +new_db_path = Path(legacy_db.parent, new_db_name) + +con_old = sqlite3.connect(legacy_db) +con_new = sqlite3.connect(new_db_path) +cur = con_new.cursor() + +# bring in the new schema and execute to build new db +with open(schema_file, 'r') as src: + sql_script = src.read() +con_new.executescript(sql_script) + +# turn off FK verification while process executes +con_new.execute('PRAGMA foreign_keys = 0;') + +# belt & suspenders check that we have all tables in the schema covered +v31_tables = { + t[0] for t in (con_new.execute('SELECT name FROM sqlite_master WHERE type="table"').fetchall()) +} +covered = set(direct_transfer_tables + add_units_tables + list(transfer_with_mod.keys())) +deltas = v31_tables ^ covered +if deltas: + missing = v31_tables - covered + extra = covered - v31_tables + print(f'ERROR: missing tables (from transfer list): {missing}') + print(f"ERROR: extra tables (that don't exist in schema): {extra}") + con_new.close() + con_old.close() + sys.exit(-1) + + +# execute the direct transfers +print('\n --- Executing direct transfers ---') +for table_name in direct_transfer_tables: + try: + # Get column names from both databases + old_cols = [ + row[1] for row in con_old.execute(f'PRAGMA table_info({table_name})').fetchall() + ] + new_cols = [ + row[1] for row in con_new.execute(f'PRAGMA table_info({table_name})').fetchall() + ] + + if set(old_cols) != set(new_cols): + print(f'WARNING: Column mismatch in {table_name}') + print(f'Old columns: {old_cols}') + print(f'New columns: {new_cols}') + con_new.close() + con_old.close() + sys.exit(-1) + + # Get data from old database with explicit column order + cols_str = ', '.join(new_cols) + data = con_old.execute(f'SELECT {cols_str} FROM {table_name}').fetchall() + except sqlite3.OperationalError: + print(f'TABLE NOT FOUND: {table_name} (creating blank table)') + data = [] + continue + + if not data: + print(f'No data for {table_name} (transferring blank table)') + continue + + # Construct insert query with explicit columns + placeholders = ','.join(['?' for _ in range(len(new_cols))]) + cols_str = ', '.join(new_cols) + query = f'INSERT OR REPLACE INTO {table_name} ({cols_str}) VALUES ({placeholders})' + con_new.executemany(query, data) + print(f'inserted {len(data)} rows into {table_name}') + +# execute transfer with modifications +print('\n --- Executing transfers with modifications ---') +for table_name, mod_dict in transfer_with_mod.items(): + try: + # Get column names from both databases + old_cols = [ + row[1] for row in con_old.execute(f'PRAGMA table_info({table_name})').fetchall() + ] + new_cols = [ + row[1] for row in con_new.execute(f'PRAGMA table_info({table_name})').fetchall() + ] + if set(old_cols) != set(new_cols): + print(f'WARNING: Column mismatch in {table_name}') + print(f'Old columns: {old_cols}') + print(f'New columns: {new_cols}') + con_new.close() + con_old.close() + sys.exit(-1) + # Get data from old database with explicit column order + cols_str = ', '.join(new_cols) + + # make exclusion statement + exclusions = ' AND '.join(f'{field} != {value}' for field, value in mod_dict['omits']) + + data = con_old.execute(f'SELECT {cols_str} FROM {table_name} WHERE {exclusions}').fetchall() + + except sqlite3.OperationalError: + print(f'TABLE NOT FOUND: {table_name} (using default from schema)') + continue + + # Construct insert query with explicit columns + placeholders = ','.join(['?' for _ in range(len(new_cols))]) + query = f'INSERT OR REPLACE INTO {table_name} ({cols_str}) VALUES ({placeholders})' + con_new.executemany(query, data) + print(f'inserted {len(data)} rows into {table_name}') + + +# do the tables with units added +print('\n --- Adding "units" to tables with units added ---') +for table_name in add_units_tables: + try: + # Get column names from both databases + old_cols = [ + row[1] for row in con_old.execute(f'PRAGMA table_info({table_name})').fetchall() + ] + new_cols = [ + row[1] for row in con_new.execute(f'PRAGMA table_info({table_name})').fetchall() + ] + + if set(old_cols + ['units']) != set(new_cols): + print(f'WARNING: Column mismatch in {table_name}') + print(f'Old columns: {old_cols}') + print(f'New columns: {new_cols}') + continue + + # Get data from old database with explicit column order + cols_str = ', '.join(old_cols) + data = con_old.execute(f'SELECT {cols_str} FROM {table_name}').fetchall() + except sqlite3.OperationalError: + print(f'TABLE NOT FOUND: {table_name} (creating blank table)') + data = [] + continue + + if not data: + print(f'No data for {table_name} (transferring blank table)') + continue + + # Construct insert query with explicit columns + placeholders = ','.join(['?' for _ in range(len(old_cols))]) + query = f'INSERT OR REPLACE INTO {table_name} ({cols_str}) VALUES ({placeholders})' + con_new.executemany(query, data) + print(f'inserted {len(data)} rows into {table_name}') + + +con_new.commit() +con_new.execute('VACUUM;') +con_new.execute('PRAGMA FOREIGN_KEYS=1;') +try: + data = con_new.execute('PRAGMA FOREIGN_KEY_CHECK;').fetchall() + print('FK check fails (MUST BE FIXED):') + if not data: + print('\tNo Foreign Key Failures. (Good news!)') + else: + print('\t(Table, Row ID, Reference Table, (fkid) )') + for row in data: + print(f'\t{row}') +except sqlite3.OperationalError as e: + print('Foreign Key Check FAILED on new DB. Something may be wrong with schema.') + print(e) + +# move the GlobalDiscountRate +# move the myopic base year +con_new.close() +con_old.close() From 6b6ba263beb10be52e859d650d1b8f507e34c4c0 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Tue, 23 Sep 2025 08:02:23 -0700 Subject: [PATCH 34/70] Correct bogus FK reference for season in several schemas --- data_files/temoa_schema_minimal_v3.sql | 2 +- data_files/temoa_schema_minimal_v3_1.sql | 2 +- data_files/temoa_schema_v3.sql | 2 +- data_files/temoa_schema_v3_1.sql | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/data_files/temoa_schema_minimal_v3.sql b/data_files/temoa_schema_minimal_v3.sql index 92438444..363df135 100644 --- a/data_files/temoa_schema_minimal_v3.sql +++ b/data_files/temoa_schema_minimal_v3.sql @@ -346,7 +346,7 @@ CREATE TABLE IF NOT EXISTS OutputCurtailment period INTEGER REFERENCES TimePeriod (period), season TEXT - REFERENCES TimePeriod (period), + REFERENCES TimeSeason (season), tod TEXT REFERENCES TimeOfDay (tod), input_comm TEXT diff --git a/data_files/temoa_schema_minimal_v3_1.sql b/data_files/temoa_schema_minimal_v3_1.sql index 567c8033..cf5b91ec 100644 --- a/data_files/temoa_schema_minimal_v3_1.sql +++ b/data_files/temoa_schema_minimal_v3_1.sql @@ -358,7 +358,7 @@ CREATE TABLE IF NOT EXISTS OutputCurtailment period INTEGER REFERENCES TimePeriod (period), season TEXT - REFERENCES TimePeriod (period), + REFERENCES TimeSeason (season), tod TEXT REFERENCES TimeOfDay (tod), input_comm TEXT diff --git a/data_files/temoa_schema_v3.sql b/data_files/temoa_schema_v3.sql index 3c03e7b8..c8e6f91e 100644 --- a/data_files/temoa_schema_v3.sql +++ b/data_files/temoa_schema_v3.sql @@ -411,7 +411,7 @@ CREATE TABLE IF NOT EXISTS OutputCurtailment period INTEGER REFERENCES TimePeriod (period), season TEXT - REFERENCES TimePeriod (period), + REFERENCES TimeSeason (season), tod TEXT REFERENCES TimeOfDay (tod), input_comm TEXT diff --git a/data_files/temoa_schema_v3_1.sql b/data_files/temoa_schema_v3_1.sql index b5373e68..2155a284 100644 --- a/data_files/temoa_schema_v3_1.sql +++ b/data_files/temoa_schema_v3_1.sql @@ -418,7 +418,7 @@ CREATE TABLE IF NOT EXISTS OutputCurtailment period INTEGER REFERENCES TimePeriod (period), season TEXT - REFERENCES TimePeriod (period), + REFERENCES TimeSeason (season), tod TEXT REFERENCES TimeOfDay (tod), input_comm TEXT From e62c78a6dfb6f880136ccc4cd48ceeb19da8db9e Mon Sep 17 00:00:00 2001 From: Jeff H Date: Tue, 23 Sep 2025 08:19:46 -0700 Subject: [PATCH 35/70] Fix mis-named field in MinNewCapacityShare table from "max" to "min..." --- data_files/temoa_schema_v3.sql | 2 +- data_files/temoa_schema_v3_1.sql | 2 +- temoa/temoa_model/hybrid_loader.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/data_files/temoa_schema_v3.sql b/data_files/temoa_schema_v3.sql index c8e6f91e..020fd574 100644 --- a/data_files/temoa_schema_v3.sql +++ b/data_files/temoa_schema_v3.sql @@ -801,7 +801,7 @@ CREATE TABLE IF NOT EXISTS MinNewCapacityShare REFERENCES Technology (tech), group_name TEXT REFERENCES TechGroup (group_name), - max_proportion REAL, + min_proportion REAL, notes TEXT, PRIMARY KEY (region, period, tech, group_name) ); diff --git a/data_files/temoa_schema_v3_1.sql b/data_files/temoa_schema_v3_1.sql index 2155a284..fae8aaa9 100644 --- a/data_files/temoa_schema_v3_1.sql +++ b/data_files/temoa_schema_v3_1.sql @@ -815,7 +815,7 @@ CREATE TABLE IF NOT EXISTS MinNewCapacityShare REFERENCES Technology (tech), group_name TEXT REFERENCES TechGroup (group_name), - max_proportion REAL, + min_proportion REAL, notes TEXT, PRIMARY KEY (region, period, tech, group_name) ); diff --git a/temoa/temoa_model/hybrid_loader.py b/temoa/temoa_model/hybrid_loader.py index 5963433a..f50d5377 100644 --- a/temoa/temoa_model/hybrid_loader.py +++ b/temoa/temoa_model/hybrid_loader.py @@ -936,7 +936,7 @@ def load_indexed_set(indexed_set: Set, index_value, element, element_validator): # MinNewCapacityShare if self.table_exists('MinNewCapacityShare'): raw = cur.execute( - 'SELECT region, period, tech, group_name, max_proportion FROM main.MinNewCapacityShare' + 'SELECT region, period, tech, group_name, min_proportion FROM main.MinNewCapacityShare' ).fetchall() load_element(M.MinCapacityShare, raw, self.viable_rt, (0, 2)) From cdb0e8aa08212d52c9dc5070d05f86685d3193a6 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Tue, 23 Sep 2025 08:20:31 -0700 Subject: [PATCH 36/70] Re-run of pip-compile using matching python version. --- requirements.txt | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index 5a16a86c..10c200ff 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ # -# This file is autogenerated by pip-compile with Python 3.11 +# This file is autogenerated by pip-compile with Python 3.12 # by the following command: # -# pip-compile requirements.in +# pip-compile --cert=None --client-cert=None --index-url=None --pip-args=None requirements.in # alabaster==1.0.0 # via sphinx @@ -314,8 +314,6 @@ openpyxl==3.1.5 # -r requirements.in # ixmp4 # pyam-iamc -overrides==7.7.0 - # via jupyter-server packaging==25.0 # via # ipykernel @@ -598,7 +596,6 @@ typing-extensions==4.15.0 # fastapi # flexcache # flexparser - # ipython # mypy # pandera # pint From 8b64c9144144cc395c1f01eb4f14a765b772b051 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Tue, 23 Sep 2025 08:35:17 -0700 Subject: [PATCH 37/70] Chase out erroneous field name in sql files to match corrected schema --- data_files/example_dbs/morris_utopia.sql | 2 +- data_files/example_dbs/stepped_demand.sql | 2 +- data_files/example_dbs/utopia.sql | 2 +- tests/testing_data/emissions.sql | 2 +- tests/testing_data/mediumville.sql | 2 +- tests/testing_data/simple_linked_tech.sql | 2 +- tests/testing_data/storageville.sql | 2 +- tests/testing_data/test_system.sql | 2 +- tests/testing_data/utopia.sql | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/data_files/example_dbs/morris_utopia.sql b/data_files/example_dbs/morris_utopia.sql index 9eb1b62b..3fe7dd70 100644 --- a/data_files/example_dbs/morris_utopia.sql +++ b/data_files/example_dbs/morris_utopia.sql @@ -1020,7 +1020,7 @@ CREATE TABLE MinNewCapacityShare REFERENCES Technology (tech), group_name TEXT REFERENCES TechGroup (group_name), - max_proportion REAL, + min_proportion REAL, notes TEXT, PRIMARY KEY (region, period, tech, group_name) ); diff --git a/data_files/example_dbs/stepped_demand.sql b/data_files/example_dbs/stepped_demand.sql index 6b6fce42..53a2f058 100644 --- a/data_files/example_dbs/stepped_demand.sql +++ b/data_files/example_dbs/stepped_demand.sql @@ -1230,7 +1230,7 @@ CREATE TABLE MinNewCapacityShare REFERENCES Technology (tech), group_name TEXT REFERENCES TechGroup (group_name), - max_proportion REAL, + min_proportion REAL, notes TEXT, PRIMARY KEY (region, period, tech, group_name) ); diff --git a/data_files/example_dbs/utopia.sql b/data_files/example_dbs/utopia.sql index 704867bb..b77da082 100644 --- a/data_files/example_dbs/utopia.sql +++ b/data_files/example_dbs/utopia.sql @@ -1208,7 +1208,7 @@ CREATE TABLE MinNewCapacityShare REFERENCES Technology (tech), group_name TEXT REFERENCES TechGroup (group_name), - max_proportion REAL, + min_proportion REAL, notes TEXT, PRIMARY KEY (region, period, tech, group_name) ); diff --git a/tests/testing_data/emissions.sql b/tests/testing_data/emissions.sql index 23203e96..e6036668 100644 --- a/tests/testing_data/emissions.sql +++ b/tests/testing_data/emissions.sql @@ -835,7 +835,7 @@ CREATE TABLE MinNewCapacityShare REFERENCES Technology (tech), group_name TEXT REFERENCES TechGroup (group_name), - max_proportion REAL, + min_proportion REAL, notes TEXT, PRIMARY KEY (region, period, tech, group_name) ); diff --git a/tests/testing_data/mediumville.sql b/tests/testing_data/mediumville.sql index 6d3315fb..7b6a15de 100644 --- a/tests/testing_data/mediumville.sql +++ b/tests/testing_data/mediumville.sql @@ -928,7 +928,7 @@ CREATE TABLE MinNewCapacityShare REFERENCES Technology (tech), group_name TEXT REFERENCES TechGroup (group_name), - max_proportion REAL, + min_proportion REAL, notes TEXT, PRIMARY KEY (region, period, tech, group_name) ); diff --git a/tests/testing_data/simple_linked_tech.sql b/tests/testing_data/simple_linked_tech.sql index ce9a7fc7..edf0f1f7 100644 --- a/tests/testing_data/simple_linked_tech.sql +++ b/tests/testing_data/simple_linked_tech.sql @@ -822,7 +822,7 @@ CREATE TABLE MinNewCapacityShare REFERENCES Technology (tech), group_name TEXT REFERENCES TechGroup (group_name), - max_proportion REAL, + min_proportion REAL, notes TEXT, PRIMARY KEY (region, period, tech, group_name) ); diff --git a/tests/testing_data/storageville.sql b/tests/testing_data/storageville.sql index 0cf789f6..2112b12b 100644 --- a/tests/testing_data/storageville.sql +++ b/tests/testing_data/storageville.sql @@ -846,7 +846,7 @@ CREATE TABLE MinNewCapacityShare REFERENCES Technology (tech), group_name TEXT REFERENCES TechGroup (group_name), - max_proportion REAL, + min_proportion REAL, notes TEXT, PRIMARY KEY (region, period, tech, group_name) ); diff --git a/tests/testing_data/test_system.sql b/tests/testing_data/test_system.sql index d2a9b293..2c12de30 100644 --- a/tests/testing_data/test_system.sql +++ b/tests/testing_data/test_system.sql @@ -1234,7 +1234,7 @@ CREATE TABLE MinNewCapacityShare REFERENCES Technology (tech), group_name TEXT REFERENCES TechGroup (group_name), - max_proportion REAL, + min_proportion REAL, notes TEXT, PRIMARY KEY (region, period, tech, group_name) ); diff --git a/tests/testing_data/utopia.sql b/tests/testing_data/utopia.sql index 2d49c629..e8e38766 100644 --- a/tests/testing_data/utopia.sql +++ b/tests/testing_data/utopia.sql @@ -1208,7 +1208,7 @@ CREATE TABLE MinNewCapacityShare REFERENCES Technology (tech), group_name TEXT REFERENCES TechGroup (group_name), - max_proportion REAL, + min_proportion REAL, notes TEXT, PRIMARY KEY (region, period, tech, group_name) ); From 15d73fd385702b74dd952960049274327409cbaf Mon Sep 17 00:00:00 2001 From: Jeff H Date: Tue, 23 Sep 2025 09:13:45 -0700 Subject: [PATCH 38/70] Update table_writer.py for OutputCurtailment to pre-merge curtailment and flex to avoid collisions if both write independently --- temoa/temoa_model/table_writer.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/temoa/temoa_model/table_writer.py b/temoa/temoa_model/table_writer.py index 310534bc..94cabce0 100644 --- a/temoa/temoa_model/table_writer.py +++ b/temoa/temoa_model/table_writer.py @@ -320,11 +320,21 @@ def write_flow_tables(self, iteration=None) -> None: entry = (scenario, fi.r, sector, fi.p, fi.s, fi.d, fi.i, fi.t, fi.v, fi.o, val) flows_by_type[flow_type].append(entry) + # merge FLEX into CURTAIL to avoid duplicate PK rows + if flows_by_type[FlowType.FLEX]: + from collections import defaultdict as _dd + + _sum = _dd(float) + # walk through all entries in both collections, summing the values (last column) + for entry in (*flows_by_type[FlowType.CURTAIL], *flows_by_type[FlowType.FLEX]): + _sum[entry[:-1]] += entry[-1] + flows_by_type[FlowType.CURTAIL] = [(*k, v) for k, v in _sum.items()] + flows_by_type[FlowType.FLEX] = [] + table_associations = { FlowType.OUT: ('OutputFlowOut', 'flow'), FlowType.IN: ('OutputFlowIn', 'flow'), FlowType.CURTAIL: ('OutputCurtailment', 'curtailment'), - FlowType.FLEX: ('OutputCurtailment', 'curtailment'), } for flow_type, (table_name, value_field_name) in table_associations.items(): fields = ', '.join( From 08586e0ef8eaeaee571776ee12054c2aafb8db7a Mon Sep 17 00:00:00 2001 From: Jeff H Date: Tue, 23 Sep 2025 09:40:14 -0700 Subject: [PATCH 39/70] Update table_writer.py for OutputCurtailment to pre-merge curtailment and flex to avoid collisions if both write independently --- temoa/utilities/db_migration_v3_to_v3_1.py | 19 ++++++++++++------- temoa/utilities/db_schema_compare.py | 7 ++++--- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/temoa/utilities/db_migration_v3_to_v3_1.py b/temoa/utilities/db_migration_v3_to_v3_1.py index 114b371d..f62b5eaa 100644 --- a/temoa/utilities/db_migration_v3_to_v3_1.py +++ b/temoa/utilities/db_migration_v3_to_v3_1.py @@ -160,9 +160,10 @@ con_new.execute('PRAGMA foreign_keys = 0;') # belt & suspenders check that we have all tables in the schema covered -v31_tables = { - t[0] for t in (con_new.execute('SELECT name FROM sqlite_master WHERE type="table"').fetchall()) -} + +table_query_result = con_new.execute('SELECT name FROM sqlite_master WHERE type="table"').fetchall() +v31_tables = {t[0] for t in table_query_result} + covered = set(direct_transfer_tables + add_units_tables + list(transfer_with_mod.keys())) deltas = v31_tables ^ covered if deltas: @@ -236,9 +237,12 @@ cols_str = ', '.join(new_cols) # make exclusion statement - exclusions = ' AND '.join(f'{field} != {value}' for field, value in mod_dict['omits']) - - data = con_old.execute(f'SELECT {cols_str} FROM {table_name} WHERE {exclusions}').fetchall() + where = ' AND '.join(f'{field} != ?' for field, _ in mod_dict['omits']) + params = tuple(v for _, v in mod_dict['omits']) + data = con_old.execute( + f'SELECT {cols_str} FROM {table_name} WHERE {where}', + params, + ).fetchall() except sqlite3.OperationalError: print(f'TABLE NOT FOUND: {table_name} (using default from schema)') @@ -264,7 +268,8 @@ ] if set(old_cols + ['units']) != set(new_cols): - print(f'WARNING: Column mismatch in {table_name}') + print(f'WARNING: Column mismatch in {table_name}. NO DATA TRANSFERRED FOR THIS TABLE. ' + 'MUST DO MANUALLY or ALIGN AND RE-RUN AGENT.') print(f'Old columns: {old_cols}') print(f'New columns: {new_cols}') continue diff --git a/temoa/utilities/db_schema_compare.py b/temoa/utilities/db_schema_compare.py index 9268b683..14d9f1fd 100644 --- a/temoa/utilities/db_schema_compare.py +++ b/temoa/utilities/db_schema_compare.py @@ -104,8 +104,8 @@ def compare_db_to_schema( ) -> dict[str, tuple[list[str], list[str]]]: """ compare the db provided to a baseline schema and return a table-based comparison - :param db_file: - :param schema_file: + :param db_file: the other database + :param schema_file: the basis of comparison :return: dictionary of table name : (list of fields missing in other, list of new fields in other) """ td = tempfile.TemporaryDirectory() @@ -132,7 +132,8 @@ def write_comparison_md(output_file: Path, orig_schema: Path, new_db: Path): f.write('|--------|-----------------|--------------|----------------|\n') for key in comp: f.write( - f"| {key} | {comp[key][0] if comp[key][0] else ''} | {comp[key][1] if comp[key][1] else ''} | {'yes' if units_present[key] else ''} |\n" + f"| {key} | {comp[key][0] if comp[key][0] else ''} | {comp[key][1] if comp[key][1] else ''} | " + f"{'yes' if units_present.get(key, False) else ''} |\n" ) From e4a1e2019eb21604eb679a8a8fbfbf0fc44dd353 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Tue, 23 Sep 2025 10:40:38 -0700 Subject: [PATCH 40/70] Fix bugs in loader for shared capacity parameters --- temoa/temoa_model/hybrid_loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/temoa/temoa_model/hybrid_loader.py b/temoa/temoa_model/hybrid_loader.py index f50d5377..09860df5 100644 --- a/temoa/temoa_model/hybrid_loader.py +++ b/temoa/temoa_model/hybrid_loader.py @@ -938,14 +938,14 @@ def load_indexed_set(indexed_set: Set, index_value, element, element_validator): raw = cur.execute( 'SELECT region, period, tech, group_name, min_proportion FROM main.MinNewCapacityShare' ).fetchall() - load_element(M.MinCapacityShare, raw, self.viable_rt, (0, 2)) + load_element(M.MinNewCapacityShare, raw, self.viable_rt, (0, 2)) # MaxNewCapacityShare if self.table_exists('MaxNewCapacityShare'): raw = cur.execute( 'SELECT region, period, tech, group_name, max_proportion FROM main.MaxNewCapacityShare' ).fetchall() - load_element(M.MaxCapacityShare, raw, self.viable_rt, (0, 2)) + load_element(M.MaxNewCapacityShare, raw, self.viable_rt, (0, 2)) # MinActivityGroup if self.table_exists('MinActivityGroup'): From 32628e30921e9819548d1aa715cb17ac2808893e Mon Sep 17 00:00:00 2001 From: Jeff H Date: Tue, 23 Sep 2025 11:29:31 -0700 Subject: [PATCH 41/70] Picked up a few more (seldom used?) tables in hybrid_loader.py that should have the read "gated" by myopic index if running in myopic mode --- temoa/temoa_model/hybrid_loader.py | 104 ++++++++++++++++----- temoa/utilities/db_migration_v3_to_v3_1.py | 6 +- 2 files changed, 84 insertions(+), 26 deletions(-) diff --git a/temoa/temoa_model/hybrid_loader.py b/temoa/temoa_model/hybrid_loader.py index 09860df5..4caf18a7 100644 --- a/temoa/temoa_model/hybrid_loader.py +++ b/temoa/temoa_model/hybrid_loader.py @@ -921,30 +921,58 @@ def load_indexed_set(indexed_set: Set, index_value, element, element_validator): # MinCapacityShare if self.table_exists('MinCapacityShare'): - raw = cur.execute( - 'SELECT region, period, tech, group_name, min_proportion FROM main.MinCapacityShare' - ).fetchall() + if mi: + raw = cur.execute( + 'SELECT region, period, tech, group_name, min_proportion FROM main.MinCapacityShare' + ' WHERE period >= ? AND period <= ?', + (mi.base_year, mi.last_demand_year), + ).fetchall() + else: + raw = cur.execute( + 'SELECT region, period, tech, group_name, min_proportion FROM main.MinCapacityShare' + ).fetchall() load_element(M.MinCapacityShare, raw, self.viable_rt, (0, 2)) # MaxCapacityShare if self.table_exists('MaxCapacityShare'): - raw = cur.execute( - 'SELECT region, period, tech, group_name, max_proportion FROM main.MaxCapacityShare' - ).fetchall() + if mi: + raw = cur.execute( + 'SELECT region, period, tech, group_name, max_proportion FROM main.MaxCapacityShare' + ' WHERE period >= ? AND period <= ?', + (mi.base_year, mi.last_demand_year), + ).fetchall() + else: + raw = cur.execute( + 'SELECT region, period, tech, group_name, max_proportion FROM main.MaxCapacityShare' + ).fetchall() load_element(M.MaxCapacityShare, raw, self.viable_rt, (0, 2)) # MinNewCapacityShare if self.table_exists('MinNewCapacityShare'): - raw = cur.execute( - 'SELECT region, period, tech, group_name, min_proportion FROM main.MinNewCapacityShare' - ).fetchall() + if mi: + raw = cur.execute( + 'SELECT region, period, tech, group_name, min_proportion FROM main.MinNewCapacityShare' + ' WHERE period >= ? AND period <= ?', + (mi.base_year, mi.last_demand_year), + ).fetchall() + else: + raw = cur.execute( + 'SELECT region, period, tech, group_name, min_proportion FROM main.MinNewCapacityShare' + ).fetchall() load_element(M.MinNewCapacityShare, raw, self.viable_rt, (0, 2)) # MaxNewCapacityShare if self.table_exists('MaxNewCapacityShare'): - raw = cur.execute( - 'SELECT region, period, tech, group_name, max_proportion FROM main.MaxNewCapacityShare' - ).fetchall() + if mi: + raw = cur.execute( + 'SELECT region, period, tech, group_name, max_proportion FROM main.MaxNewCapacityShare' + ' WHERE period >= ? AND period <= ?', + (mi.base_year, mi.last_demand_year), + ).fetchall() + else: + raw = cur.execute( + 'SELECT region, period, tech, group_name, max_proportion FROM main.MaxNewCapacityShare' + ).fetchall() load_element(M.MaxNewCapacityShare, raw, self.viable_rt, (0, 2)) # MinActivityGroup @@ -977,16 +1005,30 @@ def load_indexed_set(indexed_set: Set, index_value, element, element_validator): # MinActivityShare if self.table_exists('MinActivityShare'): - raw = cur.execute( - 'SELECT region, period, tech, group_name, min_proportion FROM main.MinActivityShare' - ).fetchall() + if mi: + raw = cur.execute( + 'SELECT region, period, tech, group_name, min_proportion FROM main.MinActivityShare' + ' WHERE period >= ? AND period <= ?', + (mi.base_year, mi.last_demand_year), + ).fetchall() + else: + raw = cur.execute( + 'SELECT region, period, tech, group_name, min_proportion FROM main.MinActivityShare' + ).fetchall() load_element(M.MinActivityShare, raw, self.viable_rt, (0, 2)) # MaxActivityShare if self.table_exists('MaxActivityShare'): - raw = cur.execute( - 'SELECT region, period, tech, group_name, max_proportion FROM main.MaxActivityShare' - ).fetchall() + if mi: + raw = cur.execute( + 'SELECT region, period, tech, group_name, max_proportion FROM main.MaxActivityShare' + ' WHERE period >= ? AND period <= ?', + (mi.base_year, mi.last_demand_year), + ).fetchall() + else: + raw = cur.execute( + 'SELECT region, period, tech, group_name, max_proportion FROM main.MaxActivityShare' + ).fetchall() load_element(M.MaxActivityShare, raw, self.viable_rt, (0, 2)) # MaxResource @@ -1024,16 +1066,30 @@ def load_indexed_set(indexed_set: Set, index_value, element, element_validator): # MinAnnualCapacityFactor if self.table_exists('MinAnnualCapacityFactor'): - raw = cur.execute( - 'SELECT region, period, tech, output_comm, factor FROM main.MinAnnualCapacityFactor' - ).fetchall() + if mi: + raw = cur.execute( + 'SELECT region, period, tech, output_comm, factor FROM main.MinAnnualCapacityFactor' + ' WHERE period >= ? AND period <= ?', + (mi.base_year, mi.last_demand_year), + ).fetchall() + else: + raw = cur.execute( + 'SELECT region, period, tech, output_comm, factor FROM main.MinAnnualCapacityFactor' + ).fetchall() load_element(M.MinAnnualCapacityFactor, raw, self.viable_rt, (0, 2)) # MaxAnnualCapacityFactor if self.table_exists('MaxAnnualCapacityFactor'): - raw = cur.execute( - 'SELECT region, period, tech, output_comm, factor FROM main.MaxAnnualCapacityFactor' - ).fetchall() + if mi: + raw = cur.execute( + 'SELECT region, period, tech, output_comm, factor FROM main.MaxAnnualCapacityFactor' + ' WHERE period >= ? AND period <= ?', + (mi.base_year, mi.last_demand_year), + ).fetchall() + else: + raw = cur.execute( + 'SELECT region, period, tech, output_comm, factor FROM main.MaxAnnualCapacityFactor' + ).fetchall() load_element(M.MaxAnnualCapacityFactor, raw, self.viable_rt, (0, 2)) # GrowthRateMax diff --git a/temoa/utilities/db_migration_v3_to_v3_1.py b/temoa/utilities/db_migration_v3_to_v3_1.py index f62b5eaa..eebc56f6 100644 --- a/temoa/utilities/db_migration_v3_to_v3_1.py +++ b/temoa/utilities/db_migration_v3_to_v3_1.py @@ -268,8 +268,10 @@ ] if set(old_cols + ['units']) != set(new_cols): - print(f'WARNING: Column mismatch in {table_name}. NO DATA TRANSFERRED FOR THIS TABLE. ' - 'MUST DO MANUALLY or ALIGN AND RE-RUN AGENT.') + print( + f'WARNING: Column mismatch in {table_name}. NO DATA TRANSFERRED FOR THIS TABLE. ' + 'MUST DO MANUALLY or ALIGN AND RE-RUN AGENT.' + ) print(f'Old columns: {old_cols}') print(f'New columns: {new_cols}') continue From d9e0f97f7788b55f7ac42875ec62dc6f84f2e1f2 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Thu, 25 Sep 2025 18:33:31 -0700 Subject: [PATCH 42/70] Fundamentals in place for unit checking w/ some scaffolding --- temoa/temoa_model/unit_checking/__init__.py | 31 ++++ temoa/temoa_model/unit_checking/common.py | 141 +++++++++++++++++ .../unit_checking/entry_checker.py | 149 ++++++++++++++++++ .../unit_checking/relation_checker.py | 110 +++++++++++++ temoa/temoa_model/unit_checking/screener.py | 91 +++++++++++ .../unit_checking/table_checker.py | 122 ++++++++++++++ tests/test_unit_checker.py | 81 ++++++++++ 7 files changed, 725 insertions(+) create mode 100644 temoa/temoa_model/unit_checking/__init__.py create mode 100644 temoa/temoa_model/unit_checking/common.py create mode 100644 temoa/temoa_model/unit_checking/entry_checker.py create mode 100644 temoa/temoa_model/unit_checking/relation_checker.py create mode 100644 temoa/temoa_model/unit_checking/screener.py create mode 100644 temoa/temoa_model/unit_checking/table_checker.py create mode 100644 tests/test_unit_checker.py diff --git a/temoa/temoa_model/unit_checking/__init__.py b/temoa/temoa_model/unit_checking/__init__.py new file mode 100644 index 00000000..3be10df4 --- /dev/null +++ b/temoa/temoa_model/unit_checking/__init__.py @@ -0,0 +1,31 @@ +""" +Tools for Energy Model Optimization and Analysis (Temoa): +An open source framework for energy systems optimization modeling + +Copyright (C) 2015, NC State University + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +A complete copy of the GNU General Public License v2 (GPLv2) is available +in LICENSE.txt. Users uncompressing this from an archive may not have +received this license file. If not, see . + + +Written by: J. F. Hyink +jeff@westernspark.us +https://westernspark.us +Created on: 9/19/25 + +""" + +from pint import UnitRegistry + +ureg = UnitRegistry() diff --git a/temoa/temoa_model/unit_checking/common.py b/temoa/temoa_model/unit_checking/common.py new file mode 100644 index 00000000..77dea236 --- /dev/null +++ b/temoa/temoa_model/unit_checking/common.py @@ -0,0 +1,141 @@ +""" +Tools for Energy Model Optimization and Analysis (Temoa): +An open source framework for energy systems optimization modeling + +Copyright (C) 2015, NC State University + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +A complete copy of the GNU General Public License v2 (GPLv2) is available +in LICENSE.txt. Users uncompressing this from an archive may not have +received this license file. If not, see . + + +Written by: J. F. Hyink +jeff@westernspark.us +https://westernspark.us +Created on: 9/19/25 + +common elements used within Unit Checking + +""" +from dataclasses import dataclass + +tables_with_units = [ + 'CapacityToActivity', + 'Commodity', + 'CostEmission', + 'CostFixed', + 'CostInvest', + 'CostVariable', + 'Demand', + 'Efficiency', + 'EmissionActivity', + 'EmissionLimit', + 'ExistingCapacity', + 'GrowthRateSeed', + 'LifetimeProcess', + 'LifetimeTech', + 'LoanLifetimeTech', + 'MaxActivity', + 'MaxActivityGroup', + 'MaxCapacity', + 'MaxCapacityGroup', + 'MaxNewCapacity', + 'MaxNewCapacityGroup', + 'MaxResource', + 'MinActivity', + 'MinActivityGroup', + 'MinCapacity', + 'MinCapacityGroup', + 'MinNewCapacity', + 'MinNewCapacityGroup', + 'OutputBuiltCapacity', + 'OutputCost', + 'OutputCurtailment', + 'OutputEmission', + 'OutputFlowIn', + 'OutputFlowOut', + 'OutputNetCapacity', + 'OutputObjective', + 'OutputRetiredCapacity', + 'StorageDuration', +] +"""Tables that have units""" + +ratio_units_tables = { + 'Efficiency', + 'EmissionActivity', + 'CostEmission', + 'CostFixed', + 'CostInvest', + 'CostVariable', +} +"""Tables that express a ratio of units in form "units / (other units)" """ + +activity_based_tables = [ + 'Demand', + 'MaxActivity', + 'MaxActivityGroup', + 'MaxResource', + 'MinActivity', + 'MinActivityGroup', +] +"""Tables that should have units equivalent to the commodity's native units""" + +capacity_based_tables = [ + 'MaxCapacity', + 'MaxCapacityGroup', + 'MaxNewCapacity', + 'MaxNewCapacityGroup', + 'MinCapacity', + 'MinCapacityGroup', + 'MinNewCapacity', + 'MinNewCapacityGroup', +] +"""Tables that require conversion via CapacityToActivity to reach the native units""" + +period_based_tables = [ + 'LifetimeProcess', + 'LifetimeTech', + 'LoanLifetimeTech', +] +"""Tables that align to the time period, presumably 'years'""" + +cost_based_tables = [ + 'CostEmission', + 'CostFixed', + 'CostInvest', + 'CostVariable', +] + + +# TODO: Unclear tables: MaxResource, GrowthRateSeed + +MIXED_UNITS = 42 +"""Marker for mixed units entries""" + + +@dataclass(frozen=True) +class UnitsFormat: + format: str + groups: int + + +# any gathering of letters and allowed symbols which are "*" and "_" with end lead/trail spaces trimmed +SINGLE_ELEMENT = UnitsFormat(format=r'^\s*([A-Za-z\*\_\s]+?)\s*$', groups=1) + +# any fractional expression using the same pattern above with the denominator IN PARENTHESES +RATIO_ELEMENT = UnitsFormat( + format=r'^\s*([A-Za-z\*\_\s]+?)\s*\/\s*\(\s*([A-Za-z\*\_\s]+?)\s*\)\s*$', groups=2 +) + +ACCEPTABLE_CHARACTERS = r'^\s*([A-Za-z\*\_\s\/\(\)]+?)\s*$' diff --git a/temoa/temoa_model/unit_checking/entry_checker.py b/temoa/temoa_model/unit_checking/entry_checker.py new file mode 100644 index 00000000..0a468db5 --- /dev/null +++ b/temoa/temoa_model/unit_checking/entry_checker.py @@ -0,0 +1,149 @@ +""" +Tools for Energy Model Optimization and Analysis (Temoa): +An open source framework for energy systems optimization modeling + +Copyright (C) 2015, NC State University + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +A complete copy of the GNU General Public License v2 (GPLv2) is available +in LICENSE.txt. Users uncompressing this from an archive may not have +received this license file. If not, see . + + +Written by: J. F. Hyink +jeff@westernspark.us +https://westernspark.us +Created on: 9/19/25 + +module to check all units entries in database for... + (1) existence :) + (2) general format (e.g. as a singleton or a ratio expression like Lumens / (Watt)) + (3) membership in units registry + +""" +import logging +import re +import sqlite3 +from collections import defaultdict +from pathlib import Path + +from pint import UndefinedUnitError + +from definitions import PROJECT_ROOT +from temoa.temoa_model.unit_checking import ureg +from temoa.temoa_model.unit_checking.common import ( + UnitsFormat, + RATIO_ELEMENT, + SINGLE_ELEMENT, + MIXED_UNITS, +) + +logger = logging.getLogger(__name__) + + +def validate_units_expression(expr: str) -> tuple[bool, ureg.Unit]: + """ + validate an entry against the units registry + :param expr: the expression to validate + :return: tuple of the validity and the converted expression + """ + try: + units = ureg.parse_units(expr) + return True, units + except UndefinedUnitError as e: + if expr == 'mixed': + return True, MIXED_UNITS + return False, None + + +def validate_units_format( + expr: str, unit_format: UnitsFormat +) -> tuple[bool, tuple[str, ...] | None]: + """validate against the format""" + if not expr: + return False, None + elements = re.search(unit_format.format, expr) + if elements: + return True, tuple(elements.groups()) + return False, None + + +def gather_from_table(conn: sqlite3.Connection, table: str) -> dict[str, list[int]]: + """gather all "units" entries from a table as a list of line numbers""" + + res = defaultdict(list) + with conn: + cur = conn.cursor() + cur.execute(f'SELECT units FROM {table}') + for idx, result in enumerate(cur.fetchall(), start=1): + res[result[0]].append(idx) + + return res + + +if __name__ == '__main__': + """for development/experimentation""" + exprs = [ + 'watt', + 'meter', + 'm', + 'petajoule', + 'PJ', + 'PJ/s', + 'PeTAJouLE', + 'PetaJoule', + 'kilowatt*hour', + 'killowathour', + 'KWh', + 'KWH', + ] + for expr in exprs: + success, converted = validate_units_expression(expr) + compatible = converted.is_compatible_with(ureg('joule')) if converted else '--' + if success: + print(f'{expr} converts to: {converted}. Compatible with joules: {compatible} ') + else: + print(f'{expr} failed to convert') + + tables = ['Efficiency', 'ExistingCapacity'] + formats = [RATIO_ELEMENT, SINGLE_ELEMENT] + + conn = sqlite3.connect( + Path(PROJECT_ROOT) / 'data_files/mike_US/US_9R_8D_v3_stability_v3_1.sqlite' + ) + + def validate_entries(table_name, units_format: UnitsFormat): + """validate all entries in a table""" + conn = sqlite3.connect( + Path(PROJECT_ROOT) / 'data_files/mike_US/US_9R_8D_v3_stability_v3_1.sqlite' + ) + res = gather_from_table(conn, table_name, line_nums=False) + conn.close() + for expr in res: + valid, elements = validate_units_format(expr, units_format) + if not valid: + print(f'Format Violation: {expr} in {table_name}') + else: + for group in elements: + if group: + success, converted = validate_units_expression(group) + if not success: + print(f'Unit Validation Fail: {expr} in {table_name}') + + for table in tables: + print(gather_from_table(conn, table)) + + conn.close() + print('\n\n') + + for t in zip(tables, formats): + validate_entries(t[0], t[1]) diff --git a/temoa/temoa_model/unit_checking/relation_checker.py b/temoa/temoa_model/unit_checking/relation_checker.py new file mode 100644 index 00000000..e73754a6 --- /dev/null +++ b/temoa/temoa_model/unit_checking/relation_checker.py @@ -0,0 +1,110 @@ +""" +Tools for Energy Model Optimization and Analysis (Temoa): +An open source framework for energy systems optimization modeling + +Copyright (C) 2015, NC State University + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +A complete copy of the GNU General Public License v2 (GPLv2) is available +in LICENSE.txt. Users uncompressing this from an archive may not have +received this license file. If not, see . + + +Written by: J. F. Hyink +jeff@westernspark.us +https://westernspark.us +Created on: 9/22/25 + +A systematic check of expected relationships between tables to ensure units are consistent + +""" +import logging +import sqlite3 +from pathlib import Path + +from fontTools.misc.bezierTools import namedtuple + +from definitions import PROJECT_ROOT +from temoa.temoa_model.unit_checking.common import RATIO_ELEMENT, SINGLE_ELEMENT, MIXED_UNITS +from temoa.temoa_model.unit_checking.entry_checker import validate_units_format + +logger = logging.getLogger(__name__) + + +def commodity_units(conn: sqlite3.Connection) -> dict[str, str]: + """Get a dictionary of the units for each commodity entry""" + res = {} + cursor = conn.cursor() + query = 'SELECT name, units FROM commodity' + cursor.execute(query) + rows = cursor.fetchall() + for comm, units in rows: + valid, group = validate_units_format(units, SINGLE_ELEMENT) + if not valid: + continue + # raise RuntimeError(f"Invalid units for commodity: {comm} {units}") + res[comm] = group[0] + return res + + +IOUnits = namedtuple('IOUnits', ['input_units', 'output_units']) + + +def check_efficiency_table( + conn: sqlite3.Connection, comm_units: dict[str, str] +) -> dict[str, IOUnits]: + """Check the technology units for Efficiency table entries""" + cursor = conn.cursor() + query = 'SELECT tech, input_comm, output_comm, units FROM efficiency' + cursor.execute(query) + rows = cursor.fetchall() + res = {} + for tech, ic, oc, units in rows: + valid, located_units = validate_units_format(units, RATIO_ELEMENT) + if not valid: + continue + # we should be 'clean' on the basic unist before getting here + # raise RuntimeError(f"Invalid units for efficiency table: {ic} {oc} {units}") + output_units, input_units = located_units + invalid_input = input_units != comm_units[ic] and input_units != MIXED_UNITS + invalid_output = output_units != comm_units[oc] + if invalid_input or invalid_output: + logger.error('Units conflict for Techology %s', tech) + msg = f"\n Expected: {f'{ic} [{input_units}]' :^25} ----> {tech :^20} ----> {f'{oc} [{output_units}]': ^25}" + if invalid_input: + msg += f'\n Invalid input units: {comm_units[ic]}' + if invalid_output: + msg += f'\n Invalid output units: {comm_units[oc]}' + logger.error(msg) + else: + res[tech] = IOUnits(input_units, output_units) + return res + + +def main(db_path: Path): + """Run unit relationship checks on database""" + logging.basicConfig(level=logging.INFO) + + try: + conn = sqlite3.connect(db_path) + comm_units = commodity_units(conn) + check_efficiency_table(conn, comm_units) + conn.close() + except sqlite3.Error as e: + logger.error(f'Database error: {e}') + except Exception as e: + logger.error(f'Error during check: {repr(e)}') + raise + + +if __name__ == '__main__': + main(Path(PROJECT_ROOT) / 'data_files/mike_US/US_9R_8D_v3_stability_v3_1.sqlite') diff --git a/temoa/temoa_model/unit_checking/screener.py b/temoa/temoa_model/unit_checking/screener.py new file mode 100644 index 00000000..5584f1e5 --- /dev/null +++ b/temoa/temoa_model/unit_checking/screener.py @@ -0,0 +1,91 @@ +""" +Tools for Energy Model Optimization and Analysis (Temoa): +An open source framework for energy systems optimization modeling + +Copyright (C) 2015, NC State University + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +A complete copy of the GNU General Public License v2 (GPLv2) is available +in LICENSE.txt. Users uncompressing this from an archive may not have +received this license file. If not, see . + + +Written by: J. F. Hyink +jeff@westernspark.us +https://westernspark.us +Created on: 9/25/25 + +The main executable to screen for units in a v3.1 database + +""" +import logging +import sqlite3 +from pathlib import Path + +from definitions import PROJECT_ROOT +from temoa.temoa_model.unit_checking.common import tables_with_units +from temoa.temoa_model.unit_checking.table_checker import check_table + +logger = logging.getLogger(__name__) +verbose = True # for dev/test work + + +def screen(dp_path: Path, report_path: Path | None = None): + """The sequencer to run a series of checks on units in the database""" + report_entries = [] + with sqlite3.connect(dp_path) as conn: + # test 1: DB version + data = conn.execute('SELECT element, value FROM MetaData').fetchall() + meta_data = dict(data) + major = meta_data.get('DB_MAJOR', 0) + minor = meta_data.get('DB_MINOR', 0) + if major == 3 and minor >= 1: + msg = 'Units Check 1 (DB Version): Passed' + report_entries.extend((msg, '\n')) + logger.info(msg) + if verbose: + print(f'Units Check 1 (DB Version): Passed') + else: + msg = 'Units Check 1 (DB Version): Failed. DB must be v3.1 or greater for units checking' + report_entries.extend((msg, '\n')) + logger.warning(msg) + return + report_entries.append('\n') + + # test 2: Units in tables + msg = 'Units Check 2 (Units Entries in Tables): Started' + logger.info(msg) + report_entries.extend((msg, '\n')) + errors = False + for table in tables_with_units: + table_errors = check_table(conn, table) + if table_errors: + errors = True + for error in table_errors: + logger.warning('%s: %s', table, error) + report_entries.extend((f'{table}: {error}', '\n')) + if verbose: + print(f'{table}: {error}') + if not errors: + msg = 'Units Check 2 (Units Entries in Tables): Passed' + logger.info(msg) + report_entries.extend((msg, '\n')) + report_entries.append('\n') + + if report_path: + with open(report_path, 'w') as report_file: + report_file.writelines(report_entries) + + +if __name__ == '__main__': + db_path = Path(PROJECT_ROOT) / 'data_files/mike_US/US_9R_8D_v3_stability_v3_1.sqlite' + screen(db_path, report_path=Path(PROJECT_ROOT) / 'output_files/units.txt') diff --git a/temoa/temoa_model/unit_checking/table_checker.py b/temoa/temoa_model/unit_checking/table_checker.py new file mode 100644 index 00000000..b4be96a6 --- /dev/null +++ b/temoa/temoa_model/unit_checking/table_checker.py @@ -0,0 +1,122 @@ +""" +Tools for Energy Model Optimization and Analysis (Temoa): +An open source framework for energy systems optimization modeling + +Copyright (C) 2015, NC State University + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +A complete copy of the GNU General Public License v2 (GPLv2) is available +in LICENSE.txt. Users uncompressing this from an archive may not have +received this license file. If not, see . + + +Written by: J. F. Hyink +jeff@westernspark.us +https://westernspark.us +Created on: 9/22/25 + +functions to check tables within a version 3.1 database for units compliance + +""" +import logging +import re +import sqlite3 +from pathlib import Path + +from temoa.temoa_model.unit_checking.common import ( + tables_with_units, + ratio_units_tables, + RATIO_ELEMENT, + SINGLE_ELEMENT, + ACCEPTABLE_CHARACTERS, +) +from temoa.temoa_model.unit_checking.entry_checker import ( + validate_units_expression, + validate_units_format, + gather_from_table, +) + +logger = logging.getLogger(__name__) + + +def check_table(conn: sqlite3.Connection, table_name: str) -> list[str]: + """Check all entries in a table for format and registry compliance""" + errors = [] + format_type = RATIO_ELEMENT if table_name in ratio_units_tables else SINGLE_ELEMENT + + entries = gather_from_table(conn, table_name) + for expr, line_nums in entries.items(): + # check characters + valid_chars = re.search(ACCEPTABLE_CHARACTERS, expr) + if not valid_chars: + listed_lines = ( + line_nums + if len(line_nums) < 5 + else f'{", ".join(str(t) for t in line_nums[:5])}", ... more"' + ) + errors.append( + f'Invalid character(s) in {listed_lines} [only letters, underscore and "*, /" operators allowed]: {expr}' + ) + continue + + # Check format + valid, elements = validate_units_format(expr, format_type) + if not valid: + listed_lines = ( + line_nums + if len(line_nums) < 5 + else f'{", ".join(str(t) for t in line_nums[:5])}", ... more"' + ) + errors.append(f'Format violation at lines {listed_lines}: {expr}') + continue + + # Check registry compliance + for element in elements: + if element: + success, _ = validate_units_expression(element) + if not success: + listed_lines = ( + line_nums + if len(line_nums) < 5 + else f'{", ".join(str(t) for t in line_nums[:5])}", ... more"' + ) + errors.append( + f'Registry violation (UNK units) at lines {listed_lines}: {element}' + ) + return errors + + +def check_database(db_path: Path) -> list[str]: + """Check all tables in database for units compliance""" + errors = [] + conn = sqlite3.connect(db_path) + + for table in tables_with_units: + table_errors = check_table(conn, table) + errors.extend(table_errors) + + conn.close() + return errors + + +if __name__ == '__main__': + from definitions import PROJECT_ROOT + + test_db = Path(PROJECT_ROOT) / 'data_files/mike_US/US_9R_8D_v3_stability_v3_1.sqlite' + results = check_database(test_db) + + if results: + print('\nErrors found:') + for error in results: + print(error) + else: + print('\nNo errors found') diff --git a/tests/test_unit_checker.py b/tests/test_unit_checker.py new file mode 100644 index 00000000..0a433b4a --- /dev/null +++ b/tests/test_unit_checker.py @@ -0,0 +1,81 @@ +""" +Tools for Energy Model Optimization and Analysis (Temoa): +An open source framework for energy systems optimization modeling + +Copyright (C) 2015, NC State University + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +A complete copy of the GNU General Public License v2 (GPLv2) is available +in LICENSE.txt. Users uncompressing this from an archive may not have +received this license file. If not, see . + + +Written by: J. F. Hyink +jeff@westernspark.us +https://westernspark.us +Created on: 9/22/25 + +Set of tests for related to the unit checker + +""" +import pytest + +from temoa.temoa_model.unit_checking import ureg +from temoa.temoa_model.unit_checking.common import SINGLE_ELEMENT, RATIO_ELEMENT, MIXED_UNITS +from temoa.temoa_model.unit_checking.entry_checker import ( + validate_units_format, + validate_units_expression, +) + +cases = [ + ('PJ', SINGLE_ELEMENT, True), + (' kWh', SINGLE_ELEMENT, True), + ('dog_food ', SINGLE_ELEMENT, True), + (' G * tonne', SINGLE_ELEMENT, True), + ('Mt.steel ', SINGLE_ELEMENT, False), # period not allowed + ('PJ / day', SINGLE_ELEMENT, False), # no slash char + ('PJ / (kT)', RATIO_ELEMENT, True), + ('PJ / kT', RATIO_ELEMENT, False), # no parens on denom + ('kWh/day/(cycle)', RATIO_ELEMENT, False), # no slash char +] + + +@pytest.mark.parametrize( + 'entry, units_format, expected', + cases, + ids=[f"{t[0]} -> {'valid' if t[2] else 'invalid'}" for t in cases], +) +def test_format_validation(entry, units_format, expected): + """Test the regex matching for unit format + Note: The unit values here are NOT tested within the Units Registry""" + assert validate_units_format(expr=entry, unit_format=units_format) + + +cases = [ + ('kg', (True, ureg.kg)), + ('m/s', (True, ureg('m/s'))), + ('dog_food', (False, None)), + ('mixed', (True, MIXED_UNITS)), +] + + +@pytest.mark.parametrize( + 'expr, expected_result', + cases, + ids=[f"{t[0]} -> {'valid' if t[0] in ('kg', 'm/s', 'mixed') else 'invalid'}" for t in cases], +) +def test_validate_units_expression(expr, expected_result): + """ + Test the validate_units_expression function against various unit expressions. + """ + result = validate_units_expression(expr) + assert result == expected_result From 8ad2a9136eab62bfda253fe3f7c5d0ee878ac730 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Sat, 27 Sep 2025 13:45:35 -0700 Subject: [PATCH 43/70] First cut at documentation addition for unit checking --- docs/source/Documentation.rst | 68 +++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/docs/source/Documentation.rst b/docs/source/Documentation.rst index 1c144a34..1facd9d3 100644 --- a/docs/source/Documentation.rst +++ b/docs/source/Documentation.rst @@ -408,6 +408,74 @@ a particular region could easily be overlooked. Price checks performed/reported 6. **Uncapacitated Tech Costs**: Any technology flagged as `uncapacitated` will trigger warnings here if it has any fixed/invest costs. +Units Checking +-------------- +An upgrade to the database schema to Version 3.1 allows fairly complete units checking throughout the model. +Unit checking helps for consistency and accuracty in the model and also supports more complete documentation of +inputs and outputs. The Version 3.0 of the Temoa model will work with database versions of both 3.0 and 3.1. +The latter addition to the schema adds `units` to 16 tables, several of which are output tables. + +The Python package :code:`pint` is used to perform reference checking for units. Using Pint allows us to leverage +Pint's built in unit registry to enable validating and equating units with varying prefixes and allows for possible +future extensions in processing. It is important to note that the units expressed and checked via :code:`pint` do not +"follow the values" through the mathematics of the model. The unit checking is merely a layer of pre-processing +used to to support validation and documentation of units. The units are not used in the model itself. + +The basis for most unit comparisons and validations in the model come from the `Commodity` table and the `Efficiency` +table. Commodities have native units of measure defined in their table. As the nodes in the energy network, this +establishes a convenient benchmark to validate nodal connections entering and leaving a commodity node. +The units of a technology are inferred from the `Efficiency` table which expresses the process modeled +by a ratio of units as output / input. + +* Expressing Units + Units in the Efficiency table should be expressed in the form: + + .. math:: + OutputUnits / ( InputUnits ) + + A Regular Expression is used to parse these units and expects the denominator to be parenthesized. Other tables + should just have a plain entry such as `PJ` or `peta joules`. Unique entries into the registry from Temoa + include: `dollar` (or `USD`), `euro` (or `EUR`), `passenger`, `seat` (to support passenger miles and seat miles), + and `ethos` to support dimensionless starting point commondly used in Temoa as a source. + +* Mixed I/O + Technologies summarized in the `Efficiency` table must match the commodity (nodal) values they connect + as input/output. While it is ok (but perhaps unusual) to have differing input units, the output units must be + standardized, even if the output commodities differ. This is inferred from the many constraints on tech + activity which span regions and output commodities. For example a `MaxActivityGroup` constraint across the + `global` region set needs to be expressed in 1 set of units. An example might be a mixed power plant that + takes in barrels of oil or cubic meters of natural gas but outputs peta joules of electricity. + +* Testing values + It is possible to test the validity of units expressed separately from the model or when troubleshooting a + reported error. Temoa augments the default registry slighty by adding dollars and a few other miscellaneous + units for completeness. It is possible to load the registry and check values as such: + + .. code-block:: + + >>> import pint + >>> ureg = pint.UnitRegistry() + >>> _ = ureg.load_definitions('temoa/temoa_model/unit_checking/temoa_units.txt') # adds some additionals + >>> 'PJ' in ureg + True + >>> 'MJ' in ureg + True + >>> 'petajoule' in ureg + True + >>> 'peta joule' in ureg + False + >>> 'kWh' in ureg + True + >>> 'catfood' in ureg + False + >>> + +* Test sequencing + Tables with units are sequentially checked for illegal characters in the :code:`units` field, proper formatting, + validation of the units themselves. Data retrieved from the `Commodity` and `Efficiency` tables is then used + to QA entries in other fields for consistency. If selected in the configuration file, this process takes place + before the model is run and results in log entries and an optional secondary text report. + Source Tracing -------------- From 95a1ed5df184ee3c5037230f3b7899e1c0589f1d Mon Sep 17 00:00:00 2001 From: Jeff H Date: Sat, 27 Sep 2025 13:54:44 -0700 Subject: [PATCH 44/70] add custom units into default registry, remove MIXED, ETHOS from common.py --- temoa/temoa_model/unit_checking/__init__.py | 4 ++ temoa/temoa_model/unit_checking/common.py | 19 +++++-- .../unit_checking/entry_checker.py | 17 +++--- .../unit_checking/relation_checker.py | 52 ++++++++++++++----- temoa/temoa_model/unit_checking/screener.py | 32 ++++++++++-- .../unit_checking/table_checker.py | 50 ++++++++++-------- .../temoa_model/unit_checking/temoa_units.txt | 12 +++++ tests/test_unit_checker.py | 9 +++- 8 files changed, 141 insertions(+), 54 deletions(-) create mode 100644 temoa/temoa_model/unit_checking/temoa_units.txt diff --git a/temoa/temoa_model/unit_checking/__init__.py b/temoa/temoa_model/unit_checking/__init__.py index 3be10df4..6cb0fc7d 100644 --- a/temoa/temoa_model/unit_checking/__init__.py +++ b/temoa/temoa_model/unit_checking/__init__.py @@ -25,7 +25,11 @@ Created on: 9/19/25 """ +from pathlib import Path from pint import UnitRegistry +from definitions import PROJECT_ROOT + ureg = UnitRegistry() +ureg.load_definitions(Path(PROJECT_ROOT) / 'temoa/temoa_model/unit_checking/temoa_units.txt') diff --git a/temoa/temoa_model/unit_checking/common.py b/temoa/temoa_model/unit_checking/common.py index 77dea236..3a2618b5 100644 --- a/temoa/temoa_model/unit_checking/common.py +++ b/temoa/temoa_model/unit_checking/common.py @@ -81,11 +81,13 @@ } """Tables that express a ratio of units in form "units / (other units)" """ -activity_based_tables = [ +commodity_based_tables = [ 'Demand', + 'MaxResource', # haven't we done away with this table/constraint? +] +activity_based_tables = [ 'MaxActivity', 'MaxActivityGroup', - 'MaxResource', 'MinActivity', 'MinActivityGroup', ] @@ -120,9 +122,6 @@ # TODO: Unclear tables: MaxResource, GrowthRateSeed -MIXED_UNITS = 42 -"""Marker for mixed units entries""" - @dataclass(frozen=True) class UnitsFormat: @@ -137,5 +136,15 @@ class UnitsFormat: RATIO_ELEMENT = UnitsFormat( format=r'^\s*([A-Za-z\*\_\s]+?)\s*\/\s*\(\s*([A-Za-z\*\_\s]+?)\s*\)\s*$', groups=2 ) +"""Format for a units ratio. re will return the first group as the numerator and the second as the denominator""" ACCEPTABLE_CHARACTERS = r'^\s*([A-Za-z\*\_\s\/\(\)]+?)\s*$' + + +def consolidate_lines(line_nums: list[str | int]) -> list[str]: + listed_lines = ( + line_nums + if len(line_nums) < 5 + else f'{", ".join(str(t) for t in line_nums[:5])}", ... +{len(line_nums)-5} more"' + ) + return listed_lines diff --git a/temoa/temoa_model/unit_checking/entry_checker.py b/temoa/temoa_model/unit_checking/entry_checker.py index 0a468db5..8bc3922d 100644 --- a/temoa/temoa_model/unit_checking/entry_checker.py +++ b/temoa/temoa_model/unit_checking/entry_checker.py @@ -36,7 +36,7 @@ from collections import defaultdict from pathlib import Path -from pint import UndefinedUnitError +from pint import UndefinedUnitError, Unit from definitions import PROJECT_ROOT from temoa.temoa_model.unit_checking import ureg @@ -44,13 +44,12 @@ UnitsFormat, RATIO_ELEMENT, SINGLE_ELEMENT, - MIXED_UNITS, ) logger = logging.getLogger(__name__) -def validate_units_expression(expr: str) -> tuple[bool, ureg.Unit]: +def validate_units_expression(expr: str) -> tuple[bool, Unit]: """ validate an entry against the units registry :param expr: the expression to validate @@ -60,15 +59,16 @@ def validate_units_expression(expr: str) -> tuple[bool, ureg.Unit]: units = ureg.parse_units(expr) return True, units except UndefinedUnitError as e: - if expr == 'mixed': - return True, MIXED_UNITS return False, None def validate_units_format( expr: str, unit_format: UnitsFormat ) -> tuple[bool, tuple[str, ...] | None]: - """validate against the format""" + """ + validate against the format + return boolean for validity and tuple of elements if valid + """ if not expr: return False, None elements = re.search(unit_format.format, expr) @@ -105,6 +105,9 @@ def gather_from_table(conn: sqlite3.Connection, table: str) -> dict[str, list[in 'killowathour', 'KWh', 'KWH', + 'USD', + 'dollar', + 'passenger', ] for expr in exprs: success, converted = validate_units_expression(expr) @@ -126,7 +129,7 @@ def validate_entries(table_name, units_format: UnitsFormat): conn = sqlite3.connect( Path(PROJECT_ROOT) / 'data_files/mike_US/US_9R_8D_v3_stability_v3_1.sqlite' ) - res = gather_from_table(conn, table_name, line_nums=False) + res = gather_from_table(conn, table_name) conn.close() for expr in res: valid, elements = validate_units_format(expr, units_format) diff --git a/temoa/temoa_model/unit_checking/relation_checker.py b/temoa/temoa_model/unit_checking/relation_checker.py index e73754a6..f4c718ee 100644 --- a/temoa/temoa_model/unit_checking/relation_checker.py +++ b/temoa/temoa_model/unit_checking/relation_checker.py @@ -31,10 +31,16 @@ import sqlite3 from pathlib import Path -from fontTools.misc.bezierTools import namedtuple +from mypy.message_registry import NamedTuple +from pint.registry import Unit from definitions import PROJECT_ROOT -from temoa.temoa_model.unit_checking.common import RATIO_ELEMENT, SINGLE_ELEMENT, MIXED_UNITS +from temoa.temoa_model.unit_checking.common import ( + RATIO_ELEMENT, + SINGLE_ELEMENT, + MIXED_UNITS, + consolidate_lines, +) from temoa.temoa_model.unit_checking.entry_checker import validate_units_format logger = logging.getLogger(__name__) @@ -56,38 +62,56 @@ def commodity_units(conn: sqlite3.Connection) -> dict[str, str]: return res -IOUnits = namedtuple('IOUnits', ['input_units', 'output_units']) +class IOUnits(NamedTuple): + input_units: str + output_units: str def check_efficiency_table( conn: sqlite3.Connection, comm_units: dict[str, str] -) -> dict[str, IOUnits]: - """Check the technology units for Efficiency table entries""" - cursor = conn.cursor() +) -> tuple[dict[str, IOUnits], list[str]]: + """ + Check the technology units for Efficiency table entries + + Returns a dictionary of technology to IOUnits and a list of error messages + + """ + query = 'SELECT tech, input_comm, output_comm, units FROM efficiency' - cursor.execute(query) - rows = cursor.fetchall() + rows = conn.execute(query).fetchall() res = {} - for tech, ic, oc, units in rows: + error_msgs = [] + invalid_rows = [] + for idx, (tech, ic, oc, units) in enumerate(rows, start=1): valid, located_units = validate_units_format(units, RATIO_ELEMENT) if not valid: + invalid_rows.append(idx) continue - # we should be 'clean' on the basic unist before getting here - # raise RuntimeError(f"Invalid units for efficiency table: {ic} {oc} {units}") output_units, input_units = located_units invalid_input = input_units != comm_units[ic] and input_units != MIXED_UNITS invalid_output = output_units != comm_units[oc] if invalid_input or invalid_output: - logger.error('Units conflict for Techology %s', tech) + logger.warning('Units conflict for Technology %s near row %d', tech, idx) msg = f"\n Expected: {f'{ic} [{input_units}]' :^25} ----> {tech :^20} ----> {f'{oc} [{output_units}]': ^25}" if invalid_input: msg += f'\n Invalid input units: {comm_units[ic]}' if invalid_output: msg += f'\n Invalid output units: {comm_units[oc]}' - logger.error(msg) + error_msgs.append(msg) else: res[tech] = IOUnits(input_units, output_units) - return res + if invalid_rows: + listed_lines = consolidate_lines(invalid_rows) + line_error_msg = f'Non-processed rows (see earlier tests): {listed_lines}' + error_msgs.append(line_error_msg) + + return res, error_msgs + + +def check_inter_table_relations( + source_relations: dict[str, Unit], table_relations: dict[str, IOUnits] +) -> tuple[dict[str, str], list[str]]: + pass def main(db_path: Path): diff --git a/temoa/temoa_model/unit_checking/screener.py b/temoa/temoa_model/unit_checking/screener.py index 5584f1e5..06c59cb0 100644 --- a/temoa/temoa_model/unit_checking/screener.py +++ b/temoa/temoa_model/unit_checking/screener.py @@ -33,6 +33,7 @@ from definitions import PROJECT_ROOT from temoa.temoa_model.unit_checking.common import tables_with_units +from temoa.temoa_model.unit_checking.relation_checker import check_efficiency_table, commodity_units from temoa.temoa_model.unit_checking.table_checker import check_table logger = logging.getLogger(__name__) @@ -42,6 +43,8 @@ def screen(dp_path: Path, report_path: Path | None = None): """The sequencer to run a series of checks on units in the database""" report_entries = [] + table_units = {} + """Table name : {tech | commodity: units}""" with sqlite3.connect(dp_path) as conn: # test 1: DB version data = conn.execute('SELECT element, value FROM MetaData').fetchall() @@ -58,16 +61,23 @@ def screen(dp_path: Path, report_path: Path | None = None): msg = 'Units Check 1 (DB Version): Failed. DB must be v3.1 or greater for units checking' report_entries.extend((msg, '\n')) logger.warning(msg) + # we are non-viable, write the (very short) report and return + _write_report(report_path, report_entries) + if verbose: + print( + f'Units Check 1 (DB Version): Failed. DB must be v3.1 or greater for units checking' + ) return - report_entries.append('\n') # test 2: Units in tables + report_entries.append('\n') msg = 'Units Check 2 (Units Entries in Tables): Started' logger.info(msg) report_entries.extend((msg, '\n')) errors = False for table in tables_with_units: - table_errors = check_table(conn, table) + relations, table_errors = check_table(conn, table) + table_units[table] = relations if table_errors: errors = True for error in table_errors: @@ -81,9 +91,21 @@ def screen(dp_path: Path, report_path: Path | None = None): report_entries.extend((msg, '\n')) report_entries.append('\n') - if report_path: - with open(report_path, 'w') as report_file: - report_file.writelines(report_entries) + # test 3: Efficiency Table + msg = 'Units Check 3 (Tech I/O via Efficiency Table): Started' + logger.info(msg) + report_entries.extend((msg, '\n')) + tech_io, errors = check_efficiency_table(conn, comm_units=commodity_units(conn)) + if errors: + report_entries.extend((msg, '\n')) + + +def _write_report(report_path: Path, report_entries: list[str]): + """Write the report to file""" + if not report_path: + return + with open(report_path, 'w') as report_file: + report_file.writelines(report_entries) if __name__ == '__main__': diff --git a/temoa/temoa_model/unit_checking/table_checker.py b/temoa/temoa_model/unit_checking/table_checker.py index b4be96a6..2ac750ab 100644 --- a/temoa/temoa_model/unit_checking/table_checker.py +++ b/temoa/temoa_model/unit_checking/table_checker.py @@ -32,12 +32,15 @@ import sqlite3 from pathlib import Path +from pint.registry import Unit + from temoa.temoa_model.unit_checking.common import ( tables_with_units, ratio_units_tables, RATIO_ELEMENT, SINGLE_ELEMENT, ACCEPTABLE_CHARACTERS, + consolidate_lines, ) from temoa.temoa_model.unit_checking.entry_checker import ( validate_units_expression, @@ -48,9 +51,10 @@ logger = logging.getLogger(__name__) -def check_table(conn: sqlite3.Connection, table_name: str) -> list[str]: +def check_table(conn: sqlite3.Connection, table_name: str) -> tuple[dict[str, Unit], list[str]]: """Check all entries in a table for format and registry compliance""" errors = [] + res = {} format_type = RATIO_ELEMENT if table_name in ratio_units_tables else SINGLE_ELEMENT entries = gather_from_table(conn, table_name) @@ -58,41 +62,45 @@ def check_table(conn: sqlite3.Connection, table_name: str) -> list[str]: # check characters valid_chars = re.search(ACCEPTABLE_CHARACTERS, expr) if not valid_chars: - listed_lines = ( - line_nums - if len(line_nums) < 5 - else f'{", ".join(str(t) for t in line_nums[:5])}", ... more"' - ) + listed_lines = consolidate_lines(line_nums) + errors.append( - f'Invalid character(s) in {listed_lines} [only letters, underscore and "*, /" operators allowed]: {expr}' + f'Invalid character(s) at rows {listed_lines} [only letters, underscore and "*, /" operators allowed]: {expr}' ) continue # Check format valid, elements = validate_units_format(expr, format_type) if not valid: - listed_lines = ( - line_nums - if len(line_nums) < 5 - else f'{", ".join(str(t) for t in line_nums[:5])}", ... more"' - ) - errors.append(f'Format violation at lines {listed_lines}: {expr}') + listed_lines = consolidate_lines(line_nums) + + errors.append(f'Format violation at rows {listed_lines}: {expr}') continue # Check registry compliance + converted_units = [] for element in elements: if element: - success, _ = validate_units_expression(element) + success, units = validate_units_expression(element) if not success: - listed_lines = ( - line_nums - if len(line_nums) < 5 - else f'{", ".join(str(t) for t in line_nums[:5])}", ... more"' - ) + listed_lines = consolidate_lines(line_nums) errors.append( - f'Registry violation (UNK units) at lines {listed_lines}: {element}' + f'Registry violation (UNK units) at rows {listed_lines}: {element}' ) - return errors + else: + converted_units.append(units) + # assemble a reference of item: units-relationship if we have a valid entry + if len(converted_units) == format_type.groups: # we have the right number + match format_type: + case SINGLE_ELEMENT(): + ref = {expr: converted_units[0]} + res.update(ref) + case RATIO_ELEMENT(): + ref = {expr: converted_units[0] / converted_units[1]} + res.update(ref) + case _: + logger.error('Unknown units format: %s', format_type) + return res, errors def check_database(db_path: Path) -> list[str]: diff --git a/temoa/temoa_model/unit_checking/temoa_units.txt b/temoa/temoa_model/unit_checking/temoa_units.txt new file mode 100644 index 00000000..2b98f73b --- /dev/null +++ b/temoa/temoa_model/unit_checking/temoa_units.txt @@ -0,0 +1,12 @@ +# a few additions needed to for completeness in Temoa model +# the units here AUGMENT the default units in pint. See pint's documentation for more info. + +passenger = [person] = passenger +seat = [object] = seat + +# see pint's notes about currency and conversion before getting ideas about currency conversions. ;) +# it would be OK to add other currencies here in addition to USD. + +dollar = [currency] = USD = dollars +euro = [currency] = EUR = euros +ethos = [empty] \ No newline at end of file diff --git a/tests/test_unit_checker.py b/tests/test_unit_checker.py index 0a433b4a..811c227e 100644 --- a/tests/test_unit_checker.py +++ b/tests/test_unit_checker.py @@ -30,7 +30,7 @@ import pytest from temoa.temoa_model.unit_checking import ureg -from temoa.temoa_model.unit_checking.common import SINGLE_ELEMENT, RATIO_ELEMENT, MIXED_UNITS +from temoa.temoa_model.unit_checking.common import SINGLE_ELEMENT, RATIO_ELEMENT from temoa.temoa_model.unit_checking.entry_checker import ( validate_units_format, validate_units_expression, @@ -64,7 +64,12 @@ def test_format_validation(entry, units_format, expected): ('kg', (True, ureg.kg)), ('m/s', (True, ureg('m/s'))), ('dog_food', (False, None)), - ('mixed', (True, MIXED_UNITS)), + ('ethos', (True, ureg.ethos)), + ('passenger', (True, ureg.passenger)), + ('seat', (True, ureg.seat)), + ('dollar', (True, ureg.dollar)), + ('USD', (True, ureg.dollar)), + ('EUR', (True, ureg.euro)), ] From e21fbb38e4f55ecd5436daf08eef22426005cb36 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Sat, 27 Sep 2025 22:16:41 -0700 Subject: [PATCH 45/70] working through most inter-table relations --- temoa/temoa_model/unit_checking/common.py | 14 +-- .../unit_checking/entry_checker.py | 3 +- .../unit_checking/relation_checker.py | 112 +++++++++++++++--- temoa/temoa_model/unit_checking/screener.py | 53 ++++++++- .../unit_checking/table_checker.py | 17 ++- .../temoa_model/unit_checking/temoa_units.txt | 1 + 6 files changed, 165 insertions(+), 35 deletions(-) diff --git a/temoa/temoa_model/unit_checking/common.py b/temoa/temoa_model/unit_checking/common.py index 3a2618b5..478a51c1 100644 --- a/temoa/temoa_model/unit_checking/common.py +++ b/temoa/temoa_model/unit_checking/common.py @@ -87,21 +87,21 @@ ] activity_based_tables = [ 'MaxActivity', - 'MaxActivityGroup', + # 'MaxActivityGroup', 'MinActivity', - 'MinActivityGroup', + # 'MinActivityGroup', ] """Tables that should have units equivalent to the commodity's native units""" capacity_based_tables = [ 'MaxCapacity', - 'MaxCapacityGroup', + # 'MaxCapacityGroup', 'MaxNewCapacity', - 'MaxNewCapacityGroup', + # 'MaxNewCapacityGroup', 'MinCapacity', - 'MinCapacityGroup', + # 'MinCapacityGroup', 'MinNewCapacity', - 'MinNewCapacityGroup', + # 'MinNewCapacityGroup', ] """Tables that require conversion via CapacityToActivity to reach the native units""" @@ -130,7 +130,7 @@ class UnitsFormat: # any gathering of letters and allowed symbols which are "*" and "_" with end lead/trail spaces trimmed -SINGLE_ELEMENT = UnitsFormat(format=r'^\s*([A-Za-z\*\_\s]+?)\s*$', groups=1) +SINGLE_ELEMENT = UnitsFormat(format=r'^\s*([A-Za-z\*\_\s\/\(\)]+?)\s*$', groups=1) # any fractional expression using the same pattern above with the denominator IN PARENTHESES RATIO_ELEMENT = UnitsFormat( diff --git a/temoa/temoa_model/unit_checking/entry_checker.py b/temoa/temoa_model/unit_checking/entry_checker.py index 8bc3922d..756bc7cf 100644 --- a/temoa/temoa_model/unit_checking/entry_checker.py +++ b/temoa/temoa_model/unit_checking/entry_checker.py @@ -35,6 +35,7 @@ import sqlite3 from collections import defaultdict from pathlib import Path +from typing import Union from pint import UndefinedUnitError, Unit @@ -49,7 +50,7 @@ logger = logging.getLogger(__name__) -def validate_units_expression(expr: str) -> tuple[bool, Unit]: +def validate_units_expression(expr: str) -> tuple[bool, Union[Unit, None]]: """ validate an entry against the units registry :param expr: the expression to validate diff --git a/temoa/temoa_model/unit_checking/relation_checker.py b/temoa/temoa_model/unit_checking/relation_checker.py index f4c718ee..6cd9d934 100644 --- a/temoa/temoa_model/unit_checking/relation_checker.py +++ b/temoa/temoa_model/unit_checking/relation_checker.py @@ -35,18 +35,21 @@ from pint.registry import Unit from definitions import PROJECT_ROOT +from temoa.temoa_model.unit_checking import ureg from temoa.temoa_model.unit_checking.common import ( RATIO_ELEMENT, SINGLE_ELEMENT, - MIXED_UNITS, consolidate_lines, ) -from temoa.temoa_model.unit_checking.entry_checker import validate_units_format +from temoa.temoa_model.unit_checking.entry_checker import ( + validate_units_format, + validate_units_expression, +) logger = logging.getLogger(__name__) -def commodity_units(conn: sqlite3.Connection) -> dict[str, str]: +def commodity_units(conn: sqlite3.Connection) -> dict[str, Unit]: """Get a dictionary of the units for each commodity entry""" res = {} cursor = conn.cursor() @@ -55,16 +58,17 @@ def commodity_units(conn: sqlite3.Connection) -> dict[str, str]: rows = cursor.fetchall() for comm, units in rows: valid, group = validate_units_format(units, SINGLE_ELEMENT) + if valid: + valid, units = validate_units_expression(group[0]) if not valid: continue - # raise RuntimeError(f"Invalid units for commodity: {comm} {units}") - res[comm] = group[0] + res[comm] = units return res class IOUnits(NamedTuple): - input_units: str - output_units: str + input_units: Unit + output_units: Unit def check_efficiency_table( @@ -73,7 +77,7 @@ def check_efficiency_table( """ Check the technology units for Efficiency table entries - Returns a dictionary of technology to IOUnits and a list of error messages + Returns a dictionary of technology : IOUnits and a list of error messages """ @@ -83,23 +87,44 @@ def check_efficiency_table( error_msgs = [] invalid_rows = [] for idx, (tech, ic, oc, units) in enumerate(rows, start=1): + input_units, output_units = None, None valid, located_units = validate_units_format(units, RATIO_ELEMENT) + if valid: + valid, output_units = validate_units_expression(located_units[0]) + if valid: + valid, input_units = validate_units_expression(located_units[1]) if not valid: invalid_rows.append(idx) continue - output_units, input_units = located_units - invalid_input = input_units != comm_units[ic] and input_units != MIXED_UNITS + + # check that our tech matches the units of the connected commodities + invalid_input = input_units != comm_units[ic] invalid_output = output_units != comm_units[oc] if invalid_input or invalid_output: - logger.warning('Units conflict for Technology %s near row %d', tech, idx) + logger.warning( + 'Units conflict with linked commodity for Technology %s near row %d', tech, idx + ) msg = f"\n Expected: {f'{ic} [{input_units}]' :^25} ----> {tech :^20} ----> {f'{oc} [{output_units}]': ^25}" if invalid_input: msg += f'\n Invalid input units: {comm_units[ic]}' if invalid_output: msg += f'\n Invalid output units: {comm_units[oc]}' error_msgs.append(msg) + + # check that the output of this technology is consistent in units with other instances of same tech + if tech in res: + if res[tech].output_units != output_units: + logger.warning( + 'Units conflict with same-name tech for Technology %s near row %d', tech, idx + ) + msg = f"\n Found: {f'{ic} [{input_units}]' :^25} ----> {tech :^20} ----> {f'{oc} [{output_units}]': ^25}" + msg += f'\n Conflicting output units: {res[tech].output_units} vs {output_units}' + error_msgs.append(msg) + else: res[tech] = IOUnits(input_units, output_units) + + # we gather all non-processed rows in one statement here due to size of table vs. individual reporting if invalid_rows: listed_lines = consolidate_lines(invalid_rows) line_error_msg = f'Non-processed rows (see earlier tests): {listed_lines}' @@ -109,9 +134,68 @@ def check_efficiency_table( def check_inter_table_relations( - source_relations: dict[str, Unit], table_relations: dict[str, IOUnits] -) -> tuple[dict[str, str], list[str]]: - pass + conn: sqlite3.Connection, table_name, tech_units: dict[str, IOUnits], capacity_based: bool +) -> list[str]: + """check the tech and units in the given table vs. baseline values for the tech""" + error_msgs = [] + if capacity_based: + query = f'SELECT {table_name}.tech, {table_name}.units, ca.units FROM {table_name} JOIN CapacityToActivity ca ON {table_name}.tech = ca.tech' + else: + query = f'SELECT tech, units, NULL FROM {table_name}' + + try: + rows = conn.execute(query).fetchall() + except sqlite3.OperationalError as e: + logger.error('failed to process query: %s when processing table %s', query, table_name) + return error_msgs + for idx, (tech, table_units, c2a_units) in enumerate(rows, start=1): + if tech not in tech_units: + error_msgs.append( + f' Unprocessed row (missing reference for tech "{tech}" --see earlier tests): {idx}' + ) + continue + # validate the units in the table... + table_valid, units_data = validate_units_format(table_units, SINGLE_ELEMENT) + if table_valid: + _, valid_table_units = validate_units_expression(units_data[0]) + else: + valid_table_units = None + + # validate the c2a units, if needed + if c2a_units: + c2a_valid, units_data = validate_units_format(c2a_units, SINGLE_ELEMENT) + if c2a_valid: + # further ensure the conversion is valid and retain the validity + c2a_valid, valid_c2a_units = validate_units_expression(units_data[0]) + else: + valid_c2a_units = None + else: # we are in a valid state, but no units to use for c2a + c2a_valid = True + valid_c2a_units = None + + if not valid_table_units: + error_msgs.append(f' Unprocessed row (invalid units--see earlier tests): {idx}') + if not c2a_valid: + error_msgs.append(f' Unprocessed row (invalid c2a units--see earlier tests): {idx}') + if not valid_table_units or not c2a_valid: + continue + + # if we have valid c2a units, combine them to get the units of activity + if valid_c2a_units: + res_units = valid_table_units * (valid_c2a_units * ureg.year) + + else: + res_units = valid_table_units + + # check that the res_units match the expectation from the tech + if tech_units[tech].output_units != res_units: + error_msgs.append( + f' Units mismatch near row {idx}: Table Entry: {valid_table_units}, ' + f'C2A Entry: {valid_c2a_units if valid_c2a_units else 'N/A'}, ' + f'expected: { tech_units[tech].output_units / (valid_c2a_units * ureg.year) if valid_c2a_units else {tech_units[tech].output_units}}' + ) + + return error_msgs def main(db_path: Path): diff --git a/temoa/temoa_model/unit_checking/screener.py b/temoa/temoa_model/unit_checking/screener.py index 06c59cb0..5bbb84c4 100644 --- a/temoa/temoa_model/unit_checking/screener.py +++ b/temoa/temoa_model/unit_checking/screener.py @@ -32,8 +32,15 @@ from pathlib import Path from definitions import PROJECT_ROOT -from temoa.temoa_model.unit_checking.common import tables_with_units -from temoa.temoa_model.unit_checking.relation_checker import check_efficiency_table, commodity_units +from temoa.temoa_model.unit_checking.common import ( + tables_with_units, + capacity_based_tables, +) +from temoa.temoa_model.unit_checking.relation_checker import ( + check_efficiency_table, + commodity_units, + check_inter_table_relations, +) from temoa.temoa_model.unit_checking.table_checker import check_table logger = logging.getLogger(__name__) @@ -95,9 +102,47 @@ def screen(dp_path: Path, report_path: Path | None = None): msg = 'Units Check 3 (Tech I/O via Efficiency Table): Started' logger.info(msg) report_entries.extend((msg, '\n')) - tech_io, errors = check_efficiency_table(conn, comm_units=commodity_units(conn)) + commodity_ref = commodity_units(conn) + tech_io, errors = check_efficiency_table(conn, comm_units=commodity_ref) if errors: - report_entries.extend((msg, '\n')) + for error in errors: + logger.warning('%s: %s', 'Efficiency', error) + report_entries.extend((f'Efficiency: {error}', '\n')) + if verbose: + print(f'Efficiency: {error}') + else: + report_entries.extend((f'Units Check 3: (Efficiency Table and Tech I/O: Passed', '\n')) + + report_entries.append('\n') + + # test 4: Relationships in other tables + # this utilizes tech_io gathered above + msg = 'Units Check 4 (Related Tables): Started' + logger.info(msg) + report_entries.extend((msg, '\n')) + # for table in activity_based_tables: + # errors_1 = check_inter_table_relations(conn=conn, table_name=table, tech_units=tech_io, capacity_based=False) + # if errors_1: + # for error in errors_1: + # logger.warning('%s: %s', table, error) + # report_entries.extend((f'{table}: {error}', '\n')) + # if verbose: + # print(f'{table}: {error}') + for table in capacity_based_tables: + errors_2 = check_inter_table_relations( + conn=conn, table_name=table, tech_units=tech_io, capacity_based=True + ) + if errors_2: + for error in errors_2: + logger.warning('%s: %s', table, error) + report_entries.extend((f'{table}: {error}', '\n')) + if verbose: + print(f'{table}: {error}') + # if not errors_1 and not errors_2: + # report_entries.extend((f'Units Check 4: (Related Tables): Passed', '\n')) + + # wrap it up + _write_report(report_path, report_entries) def _write_report(report_path: Path, report_entries: list[str]): diff --git a/temoa/temoa_model/unit_checking/table_checker.py b/temoa/temoa_model/unit_checking/table_checker.py index 2ac750ab..152ed271 100644 --- a/temoa/temoa_model/unit_checking/table_checker.py +++ b/temoa/temoa_model/unit_checking/table_checker.py @@ -91,15 +91,14 @@ def check_table(conn: sqlite3.Connection, table_name: str) -> tuple[dict[str, Un converted_units.append(units) # assemble a reference of item: units-relationship if we have a valid entry if len(converted_units) == format_type.groups: # we have the right number - match format_type: - case SINGLE_ELEMENT(): - ref = {expr: converted_units[0]} - res.update(ref) - case RATIO_ELEMENT(): - ref = {expr: converted_units[0] / converted_units[1]} - res.update(ref) - case _: - logger.error('Unknown units format: %s', format_type) + if format_type == SINGLE_ELEMENT: + ref = {expr: converted_units[0]} + res.update(ref) + elif format_type == RATIO_ELEMENT: + ref = {expr: converted_units[0] / converted_units[1]} + res.update(ref) + else: + logger.error('Unknown units format: %s', format_type) return res, errors diff --git a/temoa/temoa_model/unit_checking/temoa_units.txt b/temoa/temoa_model/unit_checking/temoa_units.txt index 2b98f73b..bf10c84d 100644 --- a/temoa/temoa_model/unit_checking/temoa_units.txt +++ b/temoa/temoa_model/unit_checking/temoa_units.txt @@ -3,6 +3,7 @@ passenger = [person] = passenger seat = [object] = seat +vehicle = [object] = vehicle # see pint's notes about currency and conversion before getting ideas about currency conversions. ;) # it would be OK to add other currencies here in addition to USD. From 6d20169de5720490742ff605b101da2bbfd14b8f Mon Sep 17 00:00:00 2001 From: Jeff H Date: Sat, 27 Sep 2025 23:38:24 -0700 Subject: [PATCH 46/70] Lots of DB cleanup to standardize. Augments to the re to improve capture. --- temoa/temoa_model/unit_checking/common.py | 7 +++--- temoa/temoa_model/unit_checking/screener.py | 23 +++++++++++-------- .../unit_checking/table_checker.py | 2 +- 3 files changed, 18 insertions(+), 14 deletions(-) diff --git a/temoa/temoa_model/unit_checking/common.py b/temoa/temoa_model/unit_checking/common.py index 478a51c1..b7c29070 100644 --- a/temoa/temoa_model/unit_checking/common.py +++ b/temoa/temoa_model/unit_checking/common.py @@ -130,15 +130,16 @@ class UnitsFormat: # any gathering of letters and allowed symbols which are "*" and "_" with end lead/trail spaces trimmed -SINGLE_ELEMENT = UnitsFormat(format=r'^\s*([A-Za-z\*\_\s\/\(\)]+?)\s*$', groups=1) +SINGLE_ELEMENT = UnitsFormat(format=r'^\s*([A-Za-z0-9\*\^\_\s\/\(\)]+?)\s*$', groups=1) # any fractional expression using the same pattern above with the denominator IN PARENTHESES RATIO_ELEMENT = UnitsFormat( - format=r'^\s*([A-Za-z\*\_\s]+?)\s*\/\s*\(\s*([A-Za-z\*\_\s]+?)\s*\)\s*$', groups=2 + format=r'^\s*([A-Za-z0-9\*\/\^\_\s]+?)\s*\/\s*\(\s*([A-Za-z0-9\*\^\/\(\)\_\s]+?)\s*\)\s*$', + groups=2, ) """Format for a units ratio. re will return the first group as the numerator and the second as the denominator""" -ACCEPTABLE_CHARACTERS = r'^\s*([A-Za-z\*\_\s\/\(\)]+?)\s*$' +ACCEPTABLE_CHARACTERS = r'^\s*([A-Za-z0-9\*\^\_\s\/\(\)]+?)\s*$' def consolidate_lines(line_nums: list[str | int]) -> list[str]: diff --git a/temoa/temoa_model/unit_checking/screener.py b/temoa/temoa_model/unit_checking/screener.py index 5bbb84c4..476622f7 100644 --- a/temoa/temoa_model/unit_checking/screener.py +++ b/temoa/temoa_model/unit_checking/screener.py @@ -35,6 +35,7 @@ from temoa.temoa_model.unit_checking.common import ( tables_with_units, capacity_based_tables, + activity_based_tables, ) from temoa.temoa_model.unit_checking.relation_checker import ( check_efficiency_table, @@ -120,14 +121,16 @@ def screen(dp_path: Path, report_path: Path | None = None): msg = 'Units Check 4 (Related Tables): Started' logger.info(msg) report_entries.extend((msg, '\n')) - # for table in activity_based_tables: - # errors_1 = check_inter_table_relations(conn=conn, table_name=table, tech_units=tech_io, capacity_based=False) - # if errors_1: - # for error in errors_1: - # logger.warning('%s: %s', table, error) - # report_entries.extend((f'{table}: {error}', '\n')) - # if verbose: - # print(f'{table}: {error}') + for table in activity_based_tables: + errors_1 = check_inter_table_relations( + conn=conn, table_name=table, tech_units=tech_io, capacity_based=False + ) + if errors_1: + for error in errors_1: + logger.warning('%s: %s', table, error) + report_entries.extend((f'{table}: {error}', '\n')) + if verbose: + print(f'{table}: {error}') for table in capacity_based_tables: errors_2 = check_inter_table_relations( conn=conn, table_name=table, tech_units=tech_io, capacity_based=True @@ -138,8 +141,8 @@ def screen(dp_path: Path, report_path: Path | None = None): report_entries.extend((f'{table}: {error}', '\n')) if verbose: print(f'{table}: {error}') - # if not errors_1 and not errors_2: - # report_entries.extend((f'Units Check 4: (Related Tables): Passed', '\n')) + if not errors_1 and not errors_2: + report_entries.extend((f'Units Check 4: (Related Tables): Passed', '\n')) # wrap it up _write_report(report_path, report_entries) diff --git a/temoa/temoa_model/unit_checking/table_checker.py b/temoa/temoa_model/unit_checking/table_checker.py index 152ed271..6372de6a 100644 --- a/temoa/temoa_model/unit_checking/table_checker.py +++ b/temoa/temoa_model/unit_checking/table_checker.py @@ -65,7 +65,7 @@ def check_table(conn: sqlite3.Connection, table_name: str) -> tuple[dict[str, Un listed_lines = consolidate_lines(line_nums) errors.append( - f'Invalid character(s) at rows {listed_lines} [only letters, underscore and "*, /" operators allowed]: {expr}' + f'Invalid character(s) at rows {listed_lines} [only letters, underscore and "*, /, ^" operators allowed]: {expr}' ) continue From b70971b37b13da3d638e87d2103f6d39aa72213f Mon Sep 17 00:00:00 2001 From: Jeff H Date: Sun, 28 Sep 2025 14:33:27 -0700 Subject: [PATCH 47/70] working through capacity-related tables --- temoa/temoa_model/unit_checking/common.py | 32 +++-- .../unit_checking/entry_checker.py | 2 +- .../unit_checking/relation_checker.py | 129 ++++++++++++++++-- temoa/temoa_model/unit_checking/screener.py | 60 ++++++-- .../unit_checking/table_checker.py | 39 +++++- 5 files changed, 222 insertions(+), 40 deletions(-) diff --git a/temoa/temoa_model/unit_checking/common.py b/temoa/temoa_model/unit_checking/common.py index b7c29070..41a211b5 100644 --- a/temoa/temoa_model/unit_checking/common.py +++ b/temoa/temoa_model/unit_checking/common.py @@ -71,15 +71,15 @@ ] """Tables that have units""" -ratio_units_tables = { +ratio_capture_tables = { 'Efficiency', - 'EmissionActivity', - 'CostEmission', - 'CostFixed', - 'CostInvest', - 'CostVariable', + # 'EmissionActivity', + # 'CostEmission', + # 'CostFixed', + # 'CostInvest', + # 'CostVariable', } -"""Tables that express a ratio of units in form "units / (other units)" """ +"""Tables that require ratio capture in form "units / (other units)" """ commodity_based_tables = [ 'Demand', @@ -94,6 +94,7 @@ """Tables that should have units equivalent to the commodity's native units""" capacity_based_tables = [ + 'ExistingCapacity', 'MaxCapacity', # 'MaxCapacityGroup', 'MaxNewCapacity', @@ -105,6 +106,12 @@ ] """Tables that require conversion via CapacityToActivity to reach the native units""" +per_capacity_based_tables = [ + 'CostFixed', + 'CostInvest', +] +"""tables with capacity in the denominator""" + period_based_tables = [ 'LifetimeProcess', 'LifetimeTech', @@ -112,12 +119,15 @@ ] """Tables that align to the time period, presumably 'years'""" +# we need to delineate whether the units are commodity-referenced or tech-referenced and if they are "capacity based" so... +# format: (table_name, commodity field name (None if 'tech' based), capacity based ) cost_based_tables = [ - 'CostEmission', - 'CostFixed', - 'CostInvest', - 'CostVariable', + ('CostEmission', 'emis_comm', False), + ('CostFixed', None, True), + ('CostInvest', None, True), + ('CostVariable', None, False), ] +"""Tables that have cost units""" # TODO: Unclear tables: MaxResource, GrowthRateSeed diff --git a/temoa/temoa_model/unit_checking/entry_checker.py b/temoa/temoa_model/unit_checking/entry_checker.py index 756bc7cf..ae718f31 100644 --- a/temoa/temoa_model/unit_checking/entry_checker.py +++ b/temoa/temoa_model/unit_checking/entry_checker.py @@ -59,7 +59,7 @@ def validate_units_expression(expr: str) -> tuple[bool, Union[Unit, None]]: try: units = ureg.parse_units(expr) return True, units - except UndefinedUnitError as e: + except UndefinedUnitError: return False, None diff --git a/temoa/temoa_model/unit_checking/relation_checker.py b/temoa/temoa_model/unit_checking/relation_checker.py index 6cd9d934..cdb06601 100644 --- a/temoa/temoa_model/unit_checking/relation_checker.py +++ b/temoa/temoa_model/unit_checking/relation_checker.py @@ -29,6 +29,7 @@ """ import logging import sqlite3 +from collections.abc import Iterable from pathlib import Path from mypy.message_registry import NamedTuple @@ -49,7 +50,7 @@ logger = logging.getLogger(__name__) -def commodity_units(conn: sqlite3.Connection) -> dict[str, Unit]: +def make_commodity_lut(conn: sqlite3.Connection) -> dict[str, Unit]: """Get a dictionary of the units for each commodity entry""" res = {} cursor = conn.cursor() @@ -66,6 +67,23 @@ def commodity_units(conn: sqlite3.Connection) -> dict[str, Unit]: return res +def make_c2a_lut(conn: sqlite3.Connection) -> dict[str, Unit]: + """Get a dictionary of the units for each capacity to activity entry""" + res = {} + cursor = conn.cursor() + query = 'SELECT tech, units FROM CapacityToActivity' + cursor.execute(query) + rows = cursor.fetchall() + for comm, units in rows: + valid, group = validate_units_format(units, SINGLE_ELEMENT) + if valid: + valid, units = validate_units_expression(group[0]) + if not valid: + continue + res[comm] = units + return res + + class IOUnits(NamedTuple): input_units: Unit output_units: Unit @@ -102,7 +120,7 @@ def check_efficiency_table( invalid_output = output_units != comm_units[oc] if invalid_input or invalid_output: logger.warning( - 'Units conflict with linked commodity for Technology %s near row %d', tech, idx + ' Units conflict with linked commodity for Technology %s near row %d', tech, idx ) msg = f"\n Expected: {f'{ic} [{input_units}]' :^25} ----> {tech :^20} ----> {f'{oc} [{output_units}]': ^25}" if invalid_input: @@ -115,7 +133,7 @@ def check_efficiency_table( if tech in res: if res[tech].output_units != output_units: logger.warning( - 'Units conflict with same-name tech for Technology %s near row %d', tech, idx + ' Units conflict with same-name tech for Technology %s near row %d', tech, idx ) msg = f"\n Found: {f'{ic} [{input_units}]' :^25} ----> {tech :^20} ----> {f'{oc} [{output_units}]': ^25}" msg += f'\n Conflicting output units: {res[tech].output_units} vs {output_units}' @@ -134,7 +152,11 @@ def check_efficiency_table( def check_inter_table_relations( - conn: sqlite3.Connection, table_name, tech_units: dict[str, IOUnits], capacity_based: bool + conn: sqlite3.Connection, + table_name, + tech_lut: dict[str, IOUnits], + c2a_lut: dict[str, Unit], + capacity_based: bool, ) -> list[str]: """check the tech and units in the given table vs. baseline values for the tech""" error_msgs = [] @@ -145,11 +167,13 @@ def check_inter_table_relations( try: rows = conn.execute(query).fetchall() - except sqlite3.OperationalError as e: + except sqlite3.OperationalError: logger.error('failed to process query: %s when processing table %s', query, table_name) + msg = f' Failed to process table {table_name}. See log for failed query.' + error_msgs.append(msg) return error_msgs for idx, (tech, table_units, c2a_units) in enumerate(rows, start=1): - if tech not in tech_units: + if tech not in tech_lut: error_msgs.append( f' Unprocessed row (missing reference for tech "{tech}" --see earlier tests): {idx}' ) @@ -188,23 +212,110 @@ def check_inter_table_relations( res_units = valid_table_units # check that the res_units match the expectation from the tech - if tech_units[tech].output_units != res_units: + if tech_lut[tech].output_units != res_units: error_msgs.append( f' Units mismatch near row {idx}: Table Entry: {valid_table_units}, ' f'C2A Entry: {valid_c2a_units if valid_c2a_units else 'N/A'}, ' - f'expected: { tech_units[tech].output_units / (valid_c2a_units * ureg.year) if valid_c2a_units else {tech_units[tech].output_units}}' + f'expected: {tech_lut[tech].output_units / (valid_c2a_units * ureg.year) if valid_c2a_units else {tech_lut[tech].output_units}}' ) return error_msgs +def check_cost_tables( + conn: sqlite3.Connection, + cost_tables: Iterable[str], + tech_lut: dict[str, IOUnits], + c2a_lut: dict[str, Unit], + commodity_lut: dict[str, Unit], +) -> list[str]: + """ + Check all cost tables for (a) alignment of units to tech output (the denominator) and (b) 100% commonality + in the cost units (numerator) + Note: we'll *assume* the first passing entry in the first table establishes the common cost units and + check for consistency + """ + common_cost_unit = None + error_msgs = [] + for table_name, commodity_reference, capacity_based in cost_tables: + if commodity_reference and capacity_based: + raise ValueError( + 'Cannot have both a commodity reference and be capacity based. Check input for cost tables' + ) + query = f'SELECT {commodity_reference if commodity_reference else 'tech'}, units FROM {table_name}' + try: + rows = conn.execute(query).fetchall() + except sqlite3.OperationalError: + logger.error('failed to process query: %s when processing table %s', query, table_name) + msg = f' Failed to process table {table_name}. See log for failed query.' + error_msgs.append(msg) + continue + for idx, (tech, units) in enumerate(rows, start=1): + # convert + valid, output_units = validate_units_expression(units) + if not valid: + error_msgs.append( + f' {table_name}: Unprocessed row (invalid units--see earlier tests): {idx}' + ) + continue + + # determine the units for the commodity + c2a_units = None + if commodity_reference: + commodity_units = commodity_lut[commodity_reference] + else: + tech_io = tech_lut.get(tech) + if tech_io: + commodity_units = tech_io.output_units + else: + error_msgs.append( + f' {table_name}: Unprocessed row (unknown tech {tech}): {idx}' + ) + continue + if capacity_based: + c2a_units = c2a_lut.get(tech, ureg.dimensionless / ureg.year) + + # we need to transform the activity-based commodity units to capacity units to match the cost table + commodity_units /= c2a_units * ureg.year + + # separate and check + if common_cost_unit is None: + # establish it + + # + # MUSD + # ---- * kWh = MUSD + # kWh + + # if it's "clean" use it + + cost_unit = output_units * commodity_units + # check that what we have captured is in the currency dimension + if (1 * cost_unit).check('[currency]'): + common_cost_unit = cost_unit + else: # something is wrong, hopefully it was just this entry? + error_msgs.append( + f' {table_name}: Unprocessed row (unreducible cost units or mismatched tech output units): {idx}' + ) + continue + else: + # use the common cost unit to check + cost_unit = output_units * commodity_units + if cost_unit != common_cost_unit: + error_msgs.append( + f' {table_name}: Unprocessed row (mismatched cost units or tech output mismatch): {idx}' + f'\n Table entry: {units}, Commodity units: {commodity_units}, Remainder: {cost_unit}, c2a units: {c2a_units if c2a_units else "N/A"}:' + ) + return error_msgs + + def main(db_path: Path): """Run unit relationship checks on database""" logging.basicConfig(level=logging.INFO) try: conn = sqlite3.connect(db_path) - comm_units = commodity_units(conn) + comm_units = make_commodity_lut(conn) check_efficiency_table(conn, comm_units) conn.close() except sqlite3.Error as e: diff --git a/temoa/temoa_model/unit_checking/screener.py b/temoa/temoa_model/unit_checking/screener.py index 476622f7..a4464148 100644 --- a/temoa/temoa_model/unit_checking/screener.py +++ b/temoa/temoa_model/unit_checking/screener.py @@ -36,11 +36,14 @@ tables_with_units, capacity_based_tables, activity_based_tables, + cost_based_tables, ) from temoa.temoa_model.unit_checking.relation_checker import ( check_efficiency_table, - commodity_units, + make_commodity_lut, check_inter_table_relations, + check_cost_tables, + make_c2a_lut, ) from temoa.temoa_model.unit_checking.table_checker import check_table @@ -55,6 +58,11 @@ def screen(dp_path: Path, report_path: Path | None = None): """Table name : {tech | commodity: units}""" with sqlite3.connect(dp_path) as conn: # test 1: DB version + msg = '======== Units Check 1 (DB Version): Started ========' + report_entries.extend((msg, '\n')) + logger.info(msg) + if verbose: + print(msg) data = conn.execute('SELECT element, value FROM MetaData').fetchall() meta_data = dict(data) major = meta_data.get('DB_MAJOR', 0) @@ -64,7 +72,7 @@ def screen(dp_path: Path, report_path: Path | None = None): report_entries.extend((msg, '\n')) logger.info(msg) if verbose: - print(f'Units Check 1 (DB Version): Passed') + print(msg) else: msg = 'Units Check 1 (DB Version): Failed. DB must be v3.1 or greater for units checking' report_entries.extend((msg, '\n')) @@ -73,13 +81,13 @@ def screen(dp_path: Path, report_path: Path | None = None): _write_report(report_path, report_entries) if verbose: print( - f'Units Check 1 (DB Version): Failed. DB must be v3.1 or greater for units checking' + 'Units Check 1 (DB Version): Failed. DB must be v3.1 or greater for units checking' ) return # test 2: Units in tables report_entries.append('\n') - msg = 'Units Check 2 (Units Entries in Tables): Started' + msg = '======== Units Check 2 (Units Entries in Tables): Started ========' logger.info(msg) report_entries.extend((msg, '\n')) errors = False @@ -90,7 +98,7 @@ def screen(dp_path: Path, report_path: Path | None = None): errors = True for error in table_errors: logger.warning('%s: %s', table, error) - report_entries.extend((f'{table}: {error}', '\n')) + report_entries.extend((f' {table}: {error}', '\n')) if verbose: print(f'{table}: {error}') if not errors: @@ -100,11 +108,13 @@ def screen(dp_path: Path, report_path: Path | None = None): report_entries.append('\n') # test 3: Efficiency Table - msg = 'Units Check 3 (Tech I/O via Efficiency Table): Started' + msg = '======== Units Check 3 (Tech I/O via Efficiency Table): Started ========' logger.info(msg) report_entries.extend((msg, '\n')) - commodity_ref = commodity_units(conn) - tech_io, errors = check_efficiency_table(conn, comm_units=commodity_ref) + # make Look Up Tables for use in follow-on checks + commodity_lut = make_commodity_lut(conn) + c2a_lut = make_c2a_lut(conn) + tech_io, errors = check_efficiency_table(conn, comm_units=commodity_lut) if errors: for error in errors: logger.warning('%s: %s', 'Efficiency', error) @@ -112,18 +122,18 @@ def screen(dp_path: Path, report_path: Path | None = None): if verbose: print(f'Efficiency: {error}') else: - report_entries.extend((f'Units Check 3: (Efficiency Table and Tech I/O: Passed', '\n')) + report_entries.extend(('Units Check 3: (Efficiency Table and Tech I/O: Passed', '\n')) report_entries.append('\n') # test 4: Relationships in other tables # this utilizes tech_io gathered above - msg = 'Units Check 4 (Related Tables): Started' + msg = '======== Units Check 4 (Related Tables): Started ========' logger.info(msg) report_entries.extend((msg, '\n')) for table in activity_based_tables: errors_1 = check_inter_table_relations( - conn=conn, table_name=table, tech_units=tech_io, capacity_based=False + conn=conn, table_name=table, tech_lut=tech_io, c2a_lut=c2a_lut, capacity_based=False ) if errors_1: for error in errors_1: @@ -133,7 +143,7 @@ def screen(dp_path: Path, report_path: Path | None = None): print(f'{table}: {error}') for table in capacity_based_tables: errors_2 = check_inter_table_relations( - conn=conn, table_name=table, tech_units=tech_io, capacity_based=True + conn=conn, table_name=table, tech_lut=tech_io, c2a_lut=c2a_lut, capacity_based=True ) if errors_2: for error in errors_2: @@ -142,7 +152,31 @@ def screen(dp_path: Path, report_path: Path | None = None): if verbose: print(f'{table}: {error}') if not errors_1 and not errors_2: - report_entries.extend((f'Units Check 4: (Related Tables): Passed', '\n')) + print(errors_1, errors_2) + report_entries.extend(('Units Check 4: (Related Tables): Passed', '\n')) + + report_entries.append('\n') + + # test 5: Cost-Based Tables + # checks to assure that the output units are compatible with the related tech and that the currency is + # standardized when the units are simplified + msg = '======== Units Check 5 (Cost Tables): Started ========' + logger.info(msg) + report_entries.extend((msg, '\n')) + errors = check_cost_tables( + conn, + cost_tables=cost_based_tables, + tech_lut=tech_io, + c2a_lut=c2a_lut, + commodity_lut=commodity_lut, + ) + if errors: + for error in errors: + logger.warning('%s', error) + report_entries.extend((error, '\n')) + + else: + report_entries.extend(('Units Check 5 (Cost Tables): Passed', '\n')) # wrap it up _write_report(report_path, report_entries) diff --git a/temoa/temoa_model/unit_checking/table_checker.py b/temoa/temoa_model/unit_checking/table_checker.py index 6372de6a..cc9409dc 100644 --- a/temoa/temoa_model/unit_checking/table_checker.py +++ b/temoa/temoa_model/unit_checking/table_checker.py @@ -36,11 +36,13 @@ from temoa.temoa_model.unit_checking.common import ( tables_with_units, - ratio_units_tables, + ratio_capture_tables, RATIO_ELEMENT, SINGLE_ELEMENT, ACCEPTABLE_CHARACTERS, consolidate_lines, + capacity_based_tables, + per_capacity_based_tables, ) from temoa.temoa_model.unit_checking.entry_checker import ( validate_units_expression, @@ -52,10 +54,17 @@ def check_table(conn: sqlite3.Connection, table_name: str) -> tuple[dict[str, Unit], list[str]]: - """Check all entries in a table for format and registry compliance""" + """ + Check all entries in a table for format and registry compliance + This "first pass" gathers common entriesfor efficiency""" errors = [] res = {} - format_type = RATIO_ELEMENT if table_name in ratio_units_tables else SINGLE_ELEMENT + format_type = RATIO_ELEMENT if table_name in ratio_capture_tables else SINGLE_ELEMENT + + # check for incompatible screens... + if table_name in capacity_based_tables or table_name in capacity_based_tables: + if format_type == RATIO_ELEMENT: + logger.warning('Checking of RATIO_ELEMENTs for capacity-type units is NOT implemented') entries = gather_from_table(conn, table_name) for expr, line_nums in entries.items(): @@ -65,7 +74,7 @@ def check_table(conn: sqlite3.Connection, table_name: str) -> tuple[dict[str, Un listed_lines = consolidate_lines(line_nums) errors.append( - f'Invalid character(s) at rows {listed_lines} [only letters, underscore and "*, /, ^" operators allowed]: {expr}' + f' Invalid character(s) at rows {listed_lines} [only letters, underscore and "*, /, ^, ()" operators allowed]: {expr}' ) continue @@ -74,7 +83,7 @@ def check_table(conn: sqlite3.Connection, table_name: str) -> tuple[dict[str, Un if not valid: listed_lines = consolidate_lines(line_nums) - errors.append(f'Format violation at rows {listed_lines}: {expr}') + errors.append(f' Format violation at rows {listed_lines}: {expr}') continue # Check registry compliance @@ -85,10 +94,28 @@ def check_table(conn: sqlite3.Connection, table_name: str) -> tuple[dict[str, Un if not success: listed_lines = consolidate_lines(line_nums) errors.append( - f'Registry violation (UNK units) at rows {listed_lines}: {element}' + f' Registry violation (UNK units) at rows {listed_lines}: {element}' ) else: converted_units.append(units) + + # if we have a relationship with "capacity" check that we have some time units + if table_name in capacity_based_tables and format_type == SINGLE_ELEMENT: + test_value = converted_units[0] + if test_value.dimensionality.get('[time]') != -1: + # no time in numerator + listed_lines = consolidate_lines(line_nums) + errors.append( + f' No time dimension in denominator of capacity entry at rows {listed_lines}: {expr}' + ) + if table_name in per_capacity_based_tables and format_type == SINGLE_ELEMENT: + test_value = converted_units[0] + if test_value.dimensionality.get('[time]') != 1: + listed_lines = consolidate_lines(line_nums) + errors.append( + f' No time dimension in numerator of capacity entry at rows {listed_lines}: {expr}' + ) + # assemble a reference of item: units-relationship if we have a valid entry if len(converted_units) == format_type.groups: # we have the right number if format_type == SINGLE_ELEMENT: From ead704983880860b8664946ff9dd4c76b5c9b8e2 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Wed, 8 Oct 2025 18:55:57 -0700 Subject: [PATCH 48/70] consolidation of error listing by row group in more tables --- temoa/temoa_model/unit_checking/common.py | 16 ++-- .../unit_checking/relation_checker.py | 33 +++---- temoa/temoa_model/unit_checking/screener.py | 88 +++++++++++++------ .../unit_checking/table_checker.py | 4 +- .../temoa_model/unit_checking/temoa_units.txt | 6 +- 5 files changed, 93 insertions(+), 54 deletions(-) diff --git a/temoa/temoa_model/unit_checking/common.py b/temoa/temoa_model/unit_checking/common.py index 41a211b5..fc929834 100644 --- a/temoa/temoa_model/unit_checking/common.py +++ b/temoa/temoa_model/unit_checking/common.py @@ -120,12 +120,12 @@ """Tables that align to the time period, presumably 'years'""" # we need to delineate whether the units are commodity-referenced or tech-referenced and if they are "capacity based" so... -# format: (table_name, commodity field name (None if 'tech' based), capacity based ) +# format: (table_name, commodity field name (None if 'tech' based), capacity-based, period-based ) cost_based_tables = [ - ('CostEmission', 'emis_comm', False), - ('CostFixed', None, True), - ('CostInvest', None, True), - ('CostVariable', None, False), + ('CostEmission', 'emis_comm', False, True), + ('CostFixed', None, True, True), + ('CostInvest', None, True, False), + ('CostVariable', None, False, True), ] """Tables that have cost units""" @@ -152,10 +152,10 @@ class UnitsFormat: ACCEPTABLE_CHARACTERS = r'^\s*([A-Za-z0-9\*\^\_\s\/\(\)]+?)\s*$' -def consolidate_lines(line_nums: list[str | int]) -> list[str]: +def consolidate_lines(line_nums: list[str | int]) -> str: listed_lines = ( - line_nums + ", ".join(str(t) for t in line_nums) if len(line_nums) < 5 - else f'{", ".join(str(t) for t in line_nums[:5])}", ... +{len(line_nums)-5} more"' + else f'{", ".join(str(t) for t in line_nums[:5])}, ... +{len(line_nums)-5} more' ) return listed_lines diff --git a/temoa/temoa_model/unit_checking/relation_checker.py b/temoa/temoa_model/unit_checking/relation_checker.py index cdb06601..b3eab8c3 100644 --- a/temoa/temoa_model/unit_checking/relation_checker.py +++ b/temoa/temoa_model/unit_checking/relation_checker.py @@ -32,6 +32,7 @@ from collections.abc import Iterable from pathlib import Path +from mypy.checkexpr import defaultdict from mypy.message_registry import NamedTuple from pint.registry import Unit @@ -61,9 +62,7 @@ def make_commodity_lut(conn: sqlite3.Connection) -> dict[str, Unit]: valid, group = validate_units_format(units, SINGLE_ELEMENT) if valid: valid, units = validate_units_expression(group[0]) - if not valid: - continue - res[comm] = units + res[comm] = units return res @@ -78,9 +77,7 @@ def make_c2a_lut(conn: sqlite3.Connection) -> dict[str, Unit]: valid, group = validate_units_format(units, SINGLE_ELEMENT) if valid: valid, units = validate_units_expression(group[0]) - if not valid: - continue - res[comm] = units + res[comm] = units return res @@ -237,10 +234,11 @@ def check_cost_tables( """ common_cost_unit = None error_msgs = [] - for table_name, commodity_reference, capacity_based in cost_tables: + for table_name, commodity_reference, capacity_based, period_based in cost_tables: + table_grouped_errors = defaultdict(list) if commodity_reference and capacity_based: raise ValueError( - 'Cannot have both a commodity reference and be capacity based. Check input for cost tables' + f'Table that is "capacity based" {table_name} flagged as having commodity field. Check input for cost tables' ) query = f'SELECT {commodity_reference if commodity_reference else 'tech'}, units FROM {table_name}' try: @@ -254,9 +252,10 @@ def check_cost_tables( # convert valid, output_units = validate_units_expression(units) if not valid: - error_msgs.append( - f' {table_name}: Unprocessed row (invalid units--see earlier tests): {idx}' + label = ( + f' {table_name}: Unprocessed row (invalid units--see earlier tests): {units}' ) + table_grouped_errors[label].append(idx) continue # determine the units for the commodity @@ -268,9 +267,8 @@ def check_cost_tables( if tech_io: commodity_units = tech_io.output_units else: - error_msgs.append( - f' {table_name}: Unprocessed row (unknown tech {tech}): {idx}' - ) + label = f' {table_name}: Unprocessed row (unknown tech: {tech}) ' + table_grouped_errors[label].append(idx) continue if capacity_based: c2a_units = c2a_lut.get(tech, ureg.dimensionless / ureg.year) @@ -290,7 +288,7 @@ def check_cost_tables( # if it's "clean" use it cost_unit = output_units * commodity_units - # check that what we have captured is in the currency dimension + # check that what we have captured is in the currency dimension == "clean" if (1 * cost_unit).check('[currency]'): common_cost_unit = cost_unit else: # something is wrong, hopefully it was just this entry? @@ -302,10 +300,13 @@ def check_cost_tables( # use the common cost unit to check cost_unit = output_units * commodity_units if cost_unit != common_cost_unit: - error_msgs.append( - f' {table_name}: Unprocessed row (mismatched cost units or tech output mismatch): {idx}' + label = ( + f' {table_name}: Unprocessed row (mismatched cost units or tech output mismatch)' f'\n Table entry: {units}, Commodity units: {commodity_units}, Remainder: {cost_unit}, c2a units: {c2a_units if c2a_units else "N/A"}:' ) + table_grouped_errors[label].append(idx) + for label, listed_lines in table_grouped_errors.items(): + error_msgs.append(f'{label} {consolidate_lines(listed_lines)}') return error_msgs diff --git a/temoa/temoa_model/unit_checking/screener.py b/temoa/temoa_model/unit_checking/screener.py index a4464148..90147064 100644 --- a/temoa/temoa_model/unit_checking/screener.py +++ b/temoa/temoa_model/unit_checking/screener.py @@ -57,11 +57,12 @@ def screen(dp_path: Path, report_path: Path | None = None): table_units = {} """Table name : {tech | commodity: units}""" with sqlite3.connect(dp_path) as conn: - # test 1: DB version + # test 1: DB version msg = '======== Units Check 1 (DB Version): Started ========' report_entries.extend((msg, '\n')) logger.info(msg) if verbose: + print() print(msg) data = conn.execute('SELECT element, value FROM MetaData').fetchall() meta_data = dict(data) @@ -80,14 +81,15 @@ def screen(dp_path: Path, report_path: Path | None = None): # we are non-viable, write the (very short) report and return _write_report(report_path, report_entries) if verbose: - print( - 'Units Check 1 (DB Version): Failed. DB must be v3.1 or greater for units checking' - ) + print(msg) return - # test 2: Units in tables + # test 2: Units in tables report_entries.append('\n') msg = '======== Units Check 2 (Units Entries in Tables): Started ========' + if verbose: + print() + print(msg) logger.info(msg) report_entries.extend((msg, '\n')) errors = False @@ -105,16 +107,21 @@ def screen(dp_path: Path, report_path: Path | None = None): msg = 'Units Check 2 (Units Entries in Tables): Passed' logger.info(msg) report_entries.extend((msg, '\n')) + if verbose: + print(msg) report_entries.append('\n') - # test 3: Efficiency Table + # test 3: Efficiency Table msg = '======== Units Check 3 (Tech I/O via Efficiency Table): Started ========' logger.info(msg) report_entries.extend((msg, '\n')) + if verbose: + print() + print(msg) # make Look Up Tables for use in follow-on checks commodity_lut = make_commodity_lut(conn) c2a_lut = make_c2a_lut(conn) - tech_io, errors = check_efficiency_table(conn, comm_units=commodity_lut) + tech_io_lut, errors = check_efficiency_table(conn, comm_units=commodity_lut) if errors: for error in errors: logger.warning('%s: %s', 'Efficiency', error) @@ -122,51 +129,76 @@ def screen(dp_path: Path, report_path: Path | None = None): if verbose: print(f'Efficiency: {error}') else: - report_entries.extend(('Units Check 3: (Efficiency Table and Tech I/O: Passed', '\n')) + msg = 'Units Check 3: (Efficiency Table and Tech I/O: Passed' + report_entries.extend((msg, '\n')) + logger.info(msg) + if verbose: + print(msg) report_entries.append('\n') - # test 4: Relationships in other tables - # this utilizes tech_io gathered above + # test 4: Relationships in other tables + # this utilizes tech_io_lut gathered above to QA the units in other tables msg = '======== Units Check 4 (Related Tables): Started ========' logger.info(msg) report_entries.extend((msg, '\n')) + if verbose: + print() + print(msg) + error_free = True for table in activity_based_tables: - errors_1 = check_inter_table_relations( - conn=conn, table_name=table, tech_lut=tech_io, c2a_lut=c2a_lut, capacity_based=False + errors = check_inter_table_relations( + conn=conn, + table_name=table, + tech_lut=tech_io_lut, + c2a_lut=c2a_lut, + capacity_based=False, ) - if errors_1: - for error in errors_1: + if errors: + error_free = False + for error in errors: logger.warning('%s: %s', table, error) report_entries.extend((f'{table}: {error}', '\n')) if verbose: print(f'{table}: {error}') for table in capacity_based_tables: - errors_2 = check_inter_table_relations( - conn=conn, table_name=table, tech_lut=tech_io, c2a_lut=c2a_lut, capacity_based=True + errors = check_inter_table_relations( + conn=conn, + table_name=table, + tech_lut=tech_io_lut, + c2a_lut=c2a_lut, + capacity_based=True, ) - if errors_2: - for error in errors_2: + if errors: + error_free = False + for error in errors: logger.warning('%s: %s', table, error) report_entries.extend((f'{table}: {error}', '\n')) if verbose: print(f'{table}: {error}') - if not errors_1 and not errors_2: - print(errors_1, errors_2) - report_entries.extend(('Units Check 4: (Related Tables): Passed', '\n')) + if error_free: + msg = 'Units Check 4: (Related Tables): Passed' + logger.info(msg) + report_entries.extend((msg, '\n')) + if verbose: + print(msg) report_entries.append('\n') - # test 5: Cost-Based Tables + # test 5: Cost-Based Tables # checks to assure that the output units are compatible with the related tech and that the currency is # standardized when the units are simplified + # We expect units like Mdollars/PJ or such and the denominator should align with the commodity via the tech msg = '======== Units Check 5 (Cost Tables): Started ========' logger.info(msg) report_entries.extend((msg, '\n')) + if verbose: + print() + print(msg) errors = check_cost_tables( conn, cost_tables=cost_based_tables, - tech_lut=tech_io, + tech_lut=tech_io_lut, c2a_lut=c2a_lut, commodity_lut=commodity_lut, ) @@ -174,9 +206,12 @@ def screen(dp_path: Path, report_path: Path | None = None): for error in errors: logger.warning('%s', error) report_entries.extend((error, '\n')) - else: - report_entries.extend(('Units Check 5 (Cost Tables): Passed', '\n')) + msg = 'Units Check 5: (Cost Tables): Passed' + logger.info(msg) + report_entries.extend((msg, '\n')) + if verbose: + print(msg) # wrap it up _write_report(report_path, report_entries) @@ -186,10 +221,11 @@ def _write_report(report_path: Path, report_entries: list[str]): """Write the report to file""" if not report_path: return - with open(report_path, 'w') as report_file: + with open(report_path, 'w', encoding='utf-8') as report_file: report_file.writelines(report_entries) if __name__ == '__main__': + # db_path = Path(PROJECT_ROOT) / 'data_files/mike_US/US_9R_8D_v3_stability_orig_v3_1.sqlite' db_path = Path(PROJECT_ROOT) / 'data_files/mike_US/US_9R_8D_v3_stability_v3_1.sqlite' screen(db_path, report_path=Path(PROJECT_ROOT) / 'output_files/units.txt') diff --git a/temoa/temoa_model/unit_checking/table_checker.py b/temoa/temoa_model/unit_checking/table_checker.py index cc9409dc..1c139f54 100644 --- a/temoa/temoa_model/unit_checking/table_checker.py +++ b/temoa/temoa_model/unit_checking/table_checker.py @@ -62,7 +62,7 @@ def check_table(conn: sqlite3.Connection, table_name: str) -> tuple[dict[str, Un format_type = RATIO_ELEMENT if table_name in ratio_capture_tables else SINGLE_ELEMENT # check for incompatible screens... - if table_name in capacity_based_tables or table_name in capacity_based_tables: + if table_name in capacity_based_tables: if format_type == RATIO_ELEMENT: logger.warning('Checking of RATIO_ELEMENTs for capacity-type units is NOT implemented') @@ -74,7 +74,7 @@ def check_table(conn: sqlite3.Connection, table_name: str) -> tuple[dict[str, Un listed_lines = consolidate_lines(line_nums) errors.append( - f' Invalid character(s) at rows {listed_lines} [only letters, underscore and "*, /, ^, ()" operators allowed]: {expr}' + f' Invalid character(s) at rows {listed_lines} [only letters, underscore and "*, /, ^, ()" operators allowed]: {expr if expr else ""}' ) continue diff --git a/temoa/temoa_model/unit_checking/temoa_units.txt b/temoa/temoa_model/unit_checking/temoa_units.txt index bf10c84d..2989f445 100644 --- a/temoa/temoa_model/unit_checking/temoa_units.txt +++ b/temoa/temoa_model/unit_checking/temoa_units.txt @@ -8,6 +8,8 @@ vehicle = [object] = vehicle # see pint's notes about currency and conversion before getting ideas about currency conversions. ;) # it would be OK to add other currencies here in addition to USD. -dollar = [currency] = USD = dollars -euro = [currency] = EUR = euros +dollar = [currency] = USD = dollar +euro = [currency] = EUR = euro + +# Temoa uses ethos as an original source, so we add it here as an "empty" base class unit ethos = [empty] \ No newline at end of file From b8a3d2572a5647668edb5c4b74da71f03df7f11c Mon Sep 17 00:00:00 2001 From: Jeff H Date: Tue, 14 Oct 2025 16:14:55 -0700 Subject: [PATCH 49/70] additions, cleanup, and comments for tests and common elements. --- temoa/temoa_model/unit_checking/common.py | 7 ++++++- tests/test_unit_checker.py | 10 +++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/temoa/temoa_model/unit_checking/common.py b/temoa/temoa_model/unit_checking/common.py index fc929834..87572951 100644 --- a/temoa/temoa_model/unit_checking/common.py +++ b/temoa/temoa_model/unit_checking/common.py @@ -122,9 +122,9 @@ # we need to delineate whether the units are commodity-referenced or tech-referenced and if they are "capacity based" so... # format: (table_name, commodity field name (None if 'tech' based), capacity-based, period-based ) cost_based_tables = [ + ('CostInvest', None, True, False), ('CostEmission', 'emis_comm', False, True), ('CostFixed', None, True, True), - ('CostInvest', None, True, False), ('CostVariable', None, False, True), ] """Tables that have cost units""" @@ -140,9 +140,13 @@ class UnitsFormat: # any gathering of letters and allowed symbols which are "*" and "_" with end lead/trail spaces trimmed +# We include numbers here for cases where there is an exponent in the units like "meter^2" +# the units *may* be parenthesized arbitrarily. See the unit tests for examples. SINGLE_ELEMENT = UnitsFormat(format=r'^\s*([A-Za-z0-9\*\^\_\s\/\(\)]+?)\s*$', groups=1) # any fractional expression using the same pattern above with the denominator IN PARENTHESES +# this modification of above REQUIRES a parenthetical expression after the slash to isolate the denominator. +# see the unit tests for examples. RATIO_ELEMENT = UnitsFormat( format=r'^\s*([A-Za-z0-9\*\/\^\_\s]+?)\s*\/\s*\(\s*([A-Za-z0-9\*\^\/\(\)\_\s]+?)\s*\)\s*$', groups=2, @@ -153,6 +157,7 @@ class UnitsFormat: def consolidate_lines(line_nums: list[str | int]) -> str: + """A little sand wedge of a function to prevent lists of many, many line numbers and maxing at 5 or 5 + 'more'""" listed_lines = ( ", ".join(str(t) for t in line_nums) if len(line_nums) < 5 diff --git a/tests/test_unit_checker.py b/tests/test_unit_checker.py index 811c227e..e9254a32 100644 --- a/tests/test_unit_checker.py +++ b/tests/test_unit_checker.py @@ -44,6 +44,7 @@ ('Mt.steel ', SINGLE_ELEMENT, False), # period not allowed ('PJ / day', SINGLE_ELEMENT, False), # no slash char ('PJ / (kT)', RATIO_ELEMENT, True), + ('(PJ) / (kT)', RATIO_ELEMENT, True), # numerator optionally parenthesized ('PJ / kT', RATIO_ELEMENT, False), # no parens on denom ('kWh/day/(cycle)', RATIO_ELEMENT, False), # no slash char ] @@ -56,27 +57,30 @@ ) def test_format_validation(entry, units_format, expected): """Test the regex matching for unit format - Note: The unit values here are NOT tested within the Units Registry""" + Note: The unit values here are NOT tested within the Units Registry + This test is solely to test the regex to grab the units, esp the ratio units""" assert validate_units_format(expr=entry, unit_format=units_format) cases = [ ('kg', (True, ureg.kg)), + ('kg/m^3', (True, ureg('kg/(meter*meter*meter)'))), ('m/s', (True, ureg('m/s'))), ('dog_food', (False, None)), ('ethos', (True, ureg.ethos)), ('passenger', (True, ureg.passenger)), ('seat', (True, ureg.seat)), ('dollar', (True, ureg.dollar)), + ('dollars', (True, ureg.dollar)), ('USD', (True, ureg.dollar)), ('EUR', (True, ureg.euro)), + ('kWh', (True, ureg.kWh)), ] - @pytest.mark.parametrize( 'expr, expected_result', cases, - ids=[f"{t[0]} -> {'valid' if t[0] in ('kg', 'm/s', 'mixed') else 'invalid'}" for t in cases], + ids=[f"{t[0]} -> {'valid' if t[1][0]else 'invalid'}" for t in cases], ) def test_validate_units_expression(expr, expected_result): """ From 708fe59f52b914b40b2984f8a2aaec5ea5e403cc Mon Sep 17 00:00:00 2001 From: Jeff H Date: Wed, 15 Oct 2025 15:18:47 -0700 Subject: [PATCH 50/70] Working OK. Passing initial QA. Before splitting cost table entries by numerator/denominator --- temoa/temoa_model/unit_checking/common.py | 31 ++-- .../unit_checking/relation_checker.py | 148 ++++++++++++------ temoa/temoa_model/unit_checking/screener.py | 22 +-- .../unit_checking/table_checker.py | 82 ++++------ tests/test_unit_checker.py | 13 ++ 5 files changed, 172 insertions(+), 124 deletions(-) diff --git a/temoa/temoa_model/unit_checking/common.py b/temoa/temoa_model/unit_checking/common.py index 87572951..f6c2be51 100644 --- a/temoa/temoa_model/unit_checking/common.py +++ b/temoa/temoa_model/unit_checking/common.py @@ -28,6 +28,7 @@ """ from dataclasses import dataclass +from typing import NamedTuple tables_with_units = [ 'CapacityToActivity', @@ -74,10 +75,10 @@ ratio_capture_tables = { 'Efficiency', # 'EmissionActivity', - # 'CostEmission', - # 'CostFixed', - # 'CostInvest', - # 'CostVariable', + 'CostEmission', + 'CostFixed', + 'CostInvest', + 'CostVariable', } """Tables that require ratio capture in form "units / (other units)" """ @@ -106,11 +107,6 @@ ] """Tables that require conversion via CapacityToActivity to reach the native units""" -per_capacity_based_tables = [ - 'CostFixed', - 'CostInvest', -] -"""tables with capacity in the denominator""" period_based_tables = [ 'LifetimeProcess', @@ -121,13 +117,18 @@ # we need to delineate whether the units are commodity-referenced or tech-referenced and if they are "capacity based" so... # format: (table_name, commodity field name (None if 'tech' based), capacity-based, period-based ) +CostTableData = NamedTuple( + 'CostTableData', table_name=str, commodity_reference=str, capacity_based=bool, period_based=bool +) +"""A named tuple for the cost tables + important properties""" + cost_based_tables = [ - ('CostInvest', None, True, False), - ('CostEmission', 'emis_comm', False, True), - ('CostFixed', None, True, True), - ('CostVariable', None, False, True), + CostTableData('CostInvest', None, True, False), + CostTableData('CostEmission', 'emis_comm', False, False), + CostTableData('CostFixed', None, True, True), + CostTableData('CostVariable', None, False, False), ] -"""Tables that have cost units""" +"""Tables that have cost units and their properties""" # TODO: Unclear tables: MaxResource, GrowthRateSeed @@ -159,7 +160,7 @@ class UnitsFormat: def consolidate_lines(line_nums: list[str | int]) -> str: """A little sand wedge of a function to prevent lists of many, many line numbers and maxing at 5 or 5 + 'more'""" listed_lines = ( - ", ".join(str(t) for t in line_nums) + ', '.join(str(t) for t in line_nums) if len(line_nums) < 5 else f'{", ".join(str(t) for t in line_nums[:5])}, ... +{len(line_nums)-5} more' ) diff --git a/temoa/temoa_model/unit_checking/relation_checker.py b/temoa/temoa_model/unit_checking/relation_checker.py index b3eab8c3..7b5f046b 100644 --- a/temoa/temoa_model/unit_checking/relation_checker.py +++ b/temoa/temoa_model/unit_checking/relation_checker.py @@ -42,6 +42,7 @@ RATIO_ELEMENT, SINGLE_ELEMENT, consolidate_lines, + CostTableData, ) from temoa.temoa_model.unit_checking.entry_checker import ( validate_units_format, @@ -113,16 +114,18 @@ def check_efficiency_table( continue # check that our tech matches the units of the connected commodities - invalid_input = input_units != comm_units[ic] - invalid_output = output_units != comm_units[oc] - if invalid_input or invalid_output: + invalid_input_flag = input_units != comm_units[ic] + invalid_output_flag = output_units != comm_units[oc] + if invalid_input_flag or invalid_output_flag: logger.warning( - ' Units conflict with linked commodity for Technology %s near row %d', tech, idx + 'Efficiency units conflict with associated commodity for Technology %s near row %d', + tech, + idx, ) msg = f"\n Expected: {f'{ic} [{input_units}]' :^25} ----> {tech :^20} ----> {f'{oc} [{output_units}]': ^25}" - if invalid_input: + if invalid_input_flag: msg += f'\n Invalid input units: {comm_units[ic]}' - if invalid_output: + if invalid_output_flag: msg += f'\n Invalid output units: {comm_units[oc]}' error_msgs.append(msg) @@ -130,7 +133,9 @@ def check_efficiency_table( if tech in res: if res[tech].output_units != output_units: logger.warning( - ' Units conflict with same-name tech for Technology %s near row %d', tech, idx + 'Efficiency units conflict with same-name tech for Technology %s near row %d', + tech, + idx, ) msg = f"\n Found: {f'{ic} [{input_units}]' :^25} ----> {tech :^20} ----> {f'{oc} [{output_units}]': ^25}" msg += f'\n Conflicting output units: {res[tech].output_units} vs {output_units}' @@ -158,7 +163,11 @@ def check_inter_table_relations( """check the tech and units in the given table vs. baseline values for the tech""" error_msgs = [] if capacity_based: - query = f'SELECT {table_name}.tech, {table_name}.units, ca.units FROM {table_name} JOIN CapacityToActivity ca ON {table_name}.tech = ca.tech' + query = ( + f'SELECT {table_name}.tech, {table_name}.units, ca.units ' + f'FROM {table_name} JOIN CapacityToActivity ca ' + f'ON {table_name}.tech = ca.tech AND {table_name}.region = ca.region' + ) else: query = f'SELECT tech, units, NULL FROM {table_name}' @@ -166,13 +175,13 @@ def check_inter_table_relations( rows = conn.execute(query).fetchall() except sqlite3.OperationalError: logger.error('failed to process query: %s when processing table %s', query, table_name) - msg = f' Failed to process table {table_name}. See log for failed query.' + msg = f'Failed to process table {table_name}. See log for failed query.' error_msgs.append(msg) return error_msgs for idx, (tech, table_units, c2a_units) in enumerate(rows, start=1): if tech not in tech_lut: error_msgs.append( - f' Unprocessed row (missing reference for tech "{tech}" --see earlier tests): {idx}' + f'Unprocessed row (missing reference for tech "{tech}" --see earlier tests): {idx}' ) continue # validate the units in the table... @@ -195,9 +204,9 @@ def check_inter_table_relations( valid_c2a_units = None if not valid_table_units: - error_msgs.append(f' Unprocessed row (invalid units--see earlier tests): {idx}') + error_msgs.append(f'Unprocessed row (invalid units--see earlier tests): {idx}') if not c2a_valid: - error_msgs.append(f' Unprocessed row (invalid c2a units--see earlier tests): {idx}') + error_msgs.append(f'Unprocessed row (invalid c2a units--see earlier tests): {idx}') if not valid_table_units or not c2a_valid: continue @@ -211,9 +220,10 @@ def check_inter_table_relations( # check that the res_units match the expectation from the tech if tech_lut[tech].output_units != res_units: error_msgs.append( - f' Units mismatch near row {idx}: Table Entry: {valid_table_units}, ' - f'C2A Entry: {valid_c2a_units if valid_c2a_units else 'N/A'}, ' - f'expected: {tech_lut[tech].output_units / (valid_c2a_units * ureg.year) if valid_c2a_units else {tech_lut[tech].output_units}}' + f'Units mismatch at row {idx}. Table Entry: {valid_table_units}, ' + f'{f" C2A Entry: {valid_c2a_units}, " if valid_c2a_units else ""}' + f'expected: {tech_lut[tech].output_units / (valid_c2a_units * ureg.year) if valid_c2a_units else tech_lut[tech].output_units}' + f' for output of tech {tech}.' ) return error_msgs @@ -221,7 +231,7 @@ def check_inter_table_relations( def check_cost_tables( conn: sqlite3.Connection, - cost_tables: Iterable[str], + cost_tables: Iterable[CostTableData], tech_lut: dict[str, IOUnits], c2a_lut: dict[str, Unit], commodity_lut: dict[str, Unit], @@ -232,81 +242,123 @@ def check_cost_tables( Note: we'll *assume* the first passing entry in the first table establishes the common cost units and check for consistency """ - common_cost_unit = None + common_cost_unit = None # Expectation: MUSD. Something with a prefix and currency dimension error_msgs = [] - for table_name, commodity_reference, capacity_based, period_based in cost_tables: + for ct in cost_tables: table_grouped_errors = defaultdict(list) - if commodity_reference and capacity_based: + if ct.commodity_reference and ct.capacity_based: raise ValueError( - f'Table that is "capacity based" {table_name} flagged as having commodity field. Check input for cost tables' + f'Table that is "capacity based" {ct.table_name} flagged as having commodity field. Check input for cost tables' ) - query = f'SELECT {commodity_reference if commodity_reference else 'tech'}, units FROM {table_name}' + query = f'SELECT {ct.commodity_reference if ct.commodity_reference else 'tech'}, units FROM {ct.table_name}' try: rows = conn.execute(query).fetchall() except sqlite3.OperationalError: - logger.error('failed to process query: %s when processing table %s', query, table_name) - msg = f' Failed to process table {table_name}. See log for failed query.' + logger.error( + 'failed to process query: %s when processing table %s', query, ct.table_name + ) + msg = f'Failed to process table {ct.table_name}. See log for failed query.' error_msgs.append(msg) continue - for idx, (tech, units) in enumerate(rows, start=1): - # convert - valid, output_units = validate_units_expression(units) + for idx, (tech, raw_units_expression) in enumerate(rows, start=1): + # convert to pint expression + valid, table_units = validate_units_expression(raw_units_expression) if not valid: - label = ( - f' {table_name}: Unprocessed row (invalid units--see earlier tests): {units}' - ) + label = f' {ct.table_name}: Unprocessed row (invalid units--see earlier tests): {raw_units_expression}' table_grouped_errors[label].append(idx) continue - # determine the units for the commodity - c2a_units = None - if commodity_reference: - commodity_units = commodity_lut[commodity_reference] + # for those costs that are capacity-based, we will adjust the commodity's units (which are activity-based) + # by dividing them by the C2A factor, which should make them comparable. + # + # Example: + # $/MW (Capacity based cost from a table) + # MWh (The commodity's base units as an Activity-based (energy)) + # + # h (the C2A factor to get from MW to MWh + # + # so we take MWh / h => MW is the expected comparison point after removing the $ reference + + # find the referenced commodity units from the tech or commodity depending on table structure... + if ct.commodity_reference: + commodity_units = commodity_lut.get(ct.commodity_reference) + if not commodity_units: + label = f'{ct.table_name}: Unprocessed row (unknown commodity: {ct.commodity_reference}) ' + table_grouped_errors[label].append(idx) + continue else: tech_io = tech_lut.get(tech) if tech_io: commodity_units = tech_io.output_units else: - label = f' {table_name}: Unprocessed row (unknown tech: {tech}) ' + label = f'{ct.table_name}: Unprocessed row (unknown tech: {tech}) ' table_grouped_errors[label].append(idx) continue - if capacity_based: - c2a_units = c2a_lut.get(tech, ureg.dimensionless / ureg.year) - # we need to transform the activity-based commodity units to capacity units to match the cost table - commodity_units /= c2a_units * ureg.year + # pull the C2A factor if this table is capacity-based and determine the "match units" which should + # match the commodity units in the table, after removing the "per period" time factor, if it exists + c2a_units = None + if ct.capacity_based: + c2a_units = c2a_lut.get(tech, ureg.dimensionless) # default is dimensionless + # we need to transform the activity-based commodity units to capacity units to match the cost table + match_units = commodity_units / (c2a_units * ureg.year) + else: + match_units = commodity_units + + # now we "sit on this" so we can remove the common cost below for checking, after it is established - # separate and check if common_cost_unit is None: # establish it + # + # Typical "cost math" is like: # # MUSD # ---- * kWh = MUSD # kWh - # if it's "clean" use it + # determine if the units are a "clean cost" as shown above - cost_unit = output_units * commodity_units + cost_unit = ( + table_units * match_units + ) # should simplify to pure currency as shown above + if ct.period_based: + # multiply by the standard period to remove it from the denominator + cost_unit *= ureg.year # check that what we have captured is in the currency dimension == "clean" + # dev note: multiplying by 1 allows us to use the check_units_expression() function if (1 * cost_unit).check('[currency]'): common_cost_unit = cost_unit - else: # something is wrong, hopefully it was just this entry? + else: + # something is wrong, hopefully it was just this entry? + # mark it, dump it, and try again... error_msgs.append( - f' {table_name}: Unprocessed row (unreducible cost units or mismatched tech output units): {idx}' + f'{ct.table_name}: Unprocessed row (unreducible cost units or mismatched tech output units): {idx}' ) continue else: - # use the common cost unit to check - cost_unit = output_units * commodity_units + # use the match_units from the associated tech/commodity to remove the non-cost units + cost_unit = table_units * match_units + if ct.period_based: + cost_unit *= ureg.year + + # check 1: ensure the cost units are equal to the common cost units if cost_unit != common_cost_unit: label = ( - f' {table_name}: Unprocessed row (mismatched cost units or tech output mismatch)' - f'\n Table entry: {units}, Commodity units: {commodity_units}, Remainder: {cost_unit}, c2a units: {c2a_units if c2a_units else "N/A"}:' + f'{ct.table_name}: Non-standard cost found (does not simplify to expected common cost unit): {raw_units_expression}' + f'\n Commodity units: {commodity_units}, Residual (expecting {common_cost_unit}): {cost_unit}, c2a units: {c2a_units if c2a_units else "N/A"}.' ) table_grouped_errors[label].append(idx) + else: + # proceed to the follow-on check + # check 2: ensure that the commodity units match the commodity, + # now that we can remove the common cost and check that the plain units matches the denominator + plain_units = common_cost_unit / table_units + if match_units != plain_units: + label = f'{ct.table_name}: Commodity units of cost element incorrect after applying C2A factor: {raw_units_expression}.' + table_grouped_errors[label].append(idx) for label, listed_lines in table_grouped_errors.items(): - error_msgs.append(f'{label} {consolidate_lines(listed_lines)}') + error_msgs.append(f'{label} at rows: {consolidate_lines(listed_lines)}') return error_msgs diff --git a/temoa/temoa_model/unit_checking/screener.py b/temoa/temoa_model/unit_checking/screener.py index 90147064..6f46e3d1 100644 --- a/temoa/temoa_model/unit_checking/screener.py +++ b/temoa/temoa_model/unit_checking/screener.py @@ -99,10 +99,10 @@ def screen(dp_path: Path, report_path: Path | None = None): if table_errors: errors = True for error in table_errors: - logger.warning('%s: %s', table, error) + logger.info('%s: %s', table, error) report_entries.extend((f' {table}: {error}', '\n')) if verbose: - print(f'{table}: {error}') + print(f'{table}: {error}') if not errors: msg = 'Units Check 2 (Units Entries in Tables): Passed' logger.info(msg) @@ -124,7 +124,7 @@ def screen(dp_path: Path, report_path: Path | None = None): tech_io_lut, errors = check_efficiency_table(conn, comm_units=commodity_lut) if errors: for error in errors: - logger.warning('%s: %s', 'Efficiency', error) + logger.info('%s: %s', 'Efficiency', error) report_entries.extend((f'Efficiency: {error}', '\n')) if verbose: print(f'Efficiency: {error}') @@ -157,10 +157,10 @@ def screen(dp_path: Path, report_path: Path | None = None): if errors: error_free = False for error in errors: - logger.warning('%s: %s', table, error) - report_entries.extend((f'{table}: {error}', '\n')) + logger.info('%s: %s', table, error) + report_entries.extend((f'{table}: {error}', '\n')) if verbose: - print(f'{table}: {error}') + print(f'{table}: {error}') for table in capacity_based_tables: errors = check_inter_table_relations( conn=conn, @@ -172,10 +172,10 @@ def screen(dp_path: Path, report_path: Path | None = None): if errors: error_free = False for error in errors: - logger.warning('%s: %s', table, error) - report_entries.extend((f'{table}: {error}', '\n')) + logger.info('%s: %s', table, error) + report_entries.extend((f'{table}: {error}', '\n')) if verbose: - print(f'{table}: {error}') + print(f'{table}: {error}') if error_free: msg = 'Units Check 4: (Related Tables): Passed' logger.info(msg) @@ -204,8 +204,10 @@ def screen(dp_path: Path, report_path: Path | None = None): ) if errors: for error in errors: - logger.warning('%s', error) + logger.info('%s', error) report_entries.extend((error, '\n')) + if verbose: + print(error) else: msg = 'Units Check 5: (Cost Tables): Passed' logger.info(msg) diff --git a/temoa/temoa_model/unit_checking/table_checker.py b/temoa/temoa_model/unit_checking/table_checker.py index 1c139f54..55a3f41d 100644 --- a/temoa/temoa_model/unit_checking/table_checker.py +++ b/temoa/temoa_model/unit_checking/table_checker.py @@ -30,19 +30,17 @@ import logging import re import sqlite3 -from pathlib import Path from pint.registry import Unit +from temoa.temoa_model.unit_checking import ureg from temoa.temoa_model.unit_checking.common import ( - tables_with_units, ratio_capture_tables, RATIO_ELEMENT, SINGLE_ELEMENT, ACCEPTABLE_CHARACTERS, consolidate_lines, capacity_based_tables, - per_capacity_based_tables, ) from temoa.temoa_model.unit_checking.entry_checker import ( validate_units_expression, @@ -56,25 +54,21 @@ def check_table(conn: sqlite3.Connection, table_name: str) -> tuple[dict[str, Unit], list[str]]: """ Check all entries in a table for format and registry compliance - This "first pass" gathers common entriesfor efficiency""" + This "first pass" gathers common entries for efficiency""" errors = [] res = {} format_type = RATIO_ELEMENT if table_name in ratio_capture_tables else SINGLE_ELEMENT - # check for incompatible screens... - if table_name in capacity_based_tables: - if format_type == RATIO_ELEMENT: - logger.warning('Checking of RATIO_ELEMENTs for capacity-type units is NOT implemented') - + # this function gathers all unique entries by row number for efficiency in larger tables entries = gather_from_table(conn, table_name) for expr, line_nums in entries.items(): # check characters valid_chars = re.search(ACCEPTABLE_CHARACTERS, expr) if not valid_chars: listed_lines = consolidate_lines(line_nums) - errors.append( - f' Invalid character(s) at rows {listed_lines} [only letters, underscore and "*, /, ^, ()" operators allowed]: {expr if expr else ""}' + f'Invalid character(s): {expr if expr else ""} [only letters, underscore ' + f'and "*, /, ^, ()" operators allowed] at rows: {listed_lines} ' ) continue @@ -82,8 +76,18 @@ def check_table(conn: sqlite3.Connection, table_name: str) -> tuple[dict[str, Un valid, elements = validate_units_format(expr, format_type) if not valid: listed_lines = consolidate_lines(line_nums) + if format_type == RATIO_ELEMENT: + msg = ( + f'Format violation at rows. {listed_lines}: {expr}. ' + f'Check illegal chars/operators and that denominator is isolated in parentheses.' + ) + else: + msg = ( + f'Format violation at rows. {listed_lines}: {expr}. ' + f'Check for illegal characters or operators.' + ) - errors.append(f' Format violation at rows {listed_lines}: {expr}') + errors.append(msg) continue # Check registry compliance @@ -94,26 +98,29 @@ def check_table(conn: sqlite3.Connection, table_name: str) -> tuple[dict[str, Un if not success: listed_lines = consolidate_lines(line_nums) errors.append( - f' Registry violation (UNK units) at rows {listed_lines}: {element}' + f'Registry violation (UNK units): {element} at rows: {listed_lines}' ) else: converted_units.append(units) + if len(converted_units) != format_type.groups: + # we came up short of something, skip this entry + continue # if we have a relationship with "capacity" check that we have some time units + # this test is disabled for RATIO_ELEMENT based tables due to ambiguities if table_name in capacity_based_tables and format_type == SINGLE_ELEMENT: test_value = converted_units[0] - if test_value.dimensionality.get('[time]') != -1: - # no time in numerator - listed_lines = consolidate_lines(line_nums) - errors.append( - f' No time dimension in denominator of capacity entry at rows {listed_lines}: {expr}' - ) - if table_name in per_capacity_based_tables and format_type == SINGLE_ELEMENT: - test_value = converted_units[0] - if test_value.dimensionality.get('[time]') != 1: + # test if compatible with standard "power" units (magnitude doesn't matter) + capacity_like = ureg.watt in test_value.compatible_units() + if not capacity_like: + # test for time units in denominator as backup test + capacity_like = test_value.dimensionality.get('[time]') != -1 + + if not capacity_like: listed_lines = consolidate_lines(line_nums) errors.append( - f' No time dimension in numerator of capacity entry at rows {listed_lines}: {expr}' + f'Time dimension of capacity entry: {expr} *might* be missing in denominator ' + f'or this may not be a standard "power" expression: {listed_lines}' ) # assemble a reference of item: units-relationship if we have a valid entry @@ -125,32 +132,5 @@ def check_table(conn: sqlite3.Connection, table_name: str) -> tuple[dict[str, Un ref = {expr: converted_units[0] / converted_units[1]} res.update(ref) else: - logger.error('Unknown units format: %s', format_type) + raise ValueError('Unknown units format: %s', format_type) return res, errors - - -def check_database(db_path: Path) -> list[str]: - """Check all tables in database for units compliance""" - errors = [] - conn = sqlite3.connect(db_path) - - for table in tables_with_units: - table_errors = check_table(conn, table) - errors.extend(table_errors) - - conn.close() - return errors - - -if __name__ == '__main__': - from definitions import PROJECT_ROOT - - test_db = Path(PROJECT_ROOT) / 'data_files/mike_US/US_9R_8D_v3_stability_v3_1.sqlite' - results = check_database(test_db) - - if results: - print('\nErrors found:') - for error in results: - print(error) - else: - print('\nNo errors found') diff --git a/tests/test_unit_checker.py b/tests/test_unit_checker.py index e9254a32..66df8e5f 100644 --- a/tests/test_unit_checker.py +++ b/tests/test_unit_checker.py @@ -77,6 +77,7 @@ def test_format_validation(entry, units_format, expected): ('kWh', (True, ureg.kWh)), ] + @pytest.mark.parametrize( 'expr, expected_result', cases, @@ -88,3 +89,15 @@ def test_validate_units_expression(expr, expected_result): """ result = validate_units_expression(expr) assert result == expected_result + + +cases = [('kW', -3), ('kWh', -2), ('PJ', -2), ('PJ/h', -3)] + + +@pytest.mark.parametrize('expr, location', cases, ids=[t[0] for t in cases]) +def test_time_dimenstion_locator(expr, location: int): + test_value = validate_units_expression(expr)[1] + found = test_value.dimensionality.get('[time]') + assert ( + found == location + ), f'time dimension not found at expected location for units: {test_value}' From 70d5deb9e369714e3671d89e054bfdffca9e04dc Mon Sep 17 00:00:00 2001 From: Jeff H Date: Wed, 15 Oct 2025 16:06:33 -0700 Subject: [PATCH 51/70] Working with RATIO based expression expectation in Cost Tables --- .../unit_checking/relation_checker.py | 116 +++++++----------- 1 file changed, 47 insertions(+), 69 deletions(-) diff --git a/temoa/temoa_model/unit_checking/relation_checker.py b/temoa/temoa_model/unit_checking/relation_checker.py index 7b5f046b..a22b2a3c 100644 --- a/temoa/temoa_model/unit_checking/relation_checker.py +++ b/temoa/temoa_model/unit_checking/relation_checker.py @@ -248,7 +248,8 @@ def check_cost_tables( table_grouped_errors = defaultdict(list) if ct.commodity_reference and ct.capacity_based: raise ValueError( - f'Table that is "capacity based" {ct.table_name} flagged as having commodity field. Check input for cost tables' + f'Table that is "capacity based" {ct.table_name} flagged as ' + 'having commodity field--expecting tech field. Check data.' ) query = f'SELECT {ct.commodity_reference if ct.commodity_reference else 'tech'}, units FROM {ct.table_name}' try: @@ -262,23 +263,40 @@ def check_cost_tables( continue for idx, (tech, raw_units_expression) in enumerate(rows, start=1): # convert to pint expression - valid, table_units = validate_units_expression(raw_units_expression) - if not valid: - label = f' {ct.table_name}: Unprocessed row (invalid units--see earlier tests): {raw_units_expression}' + cost_units, measure_units = None, None + valid, (raw_cost, raw_units) = validate_units_format( + raw_units_expression, RATIO_ELEMENT + ) + if valid: + cost_valid, cost_units = validate_units_expression(raw_cost) + units_valid, measure_units = validate_units_expression(raw_units) + else: + cost_valid, units_valid = False, False + if not (cost_valid and units_valid): + label = f'{ct.table_name}: Unprocessed row (invalid units--see earlier tests): {raw_units_expression}' table_grouped_errors[label].append(idx) continue - # for those costs that are capacity-based, we will adjust the commodity's units (which are activity-based) - # by dividing them by the C2A factor, which should make them comparable. - # - # Example: - # $/MW (Capacity based cost from a table) - # MWh (The commodity's base units as an Activity-based (energy)) - # - # h (the C2A factor to get from MW to MWh - # - # so we take MWh / h => MW is the expected comparison point after removing the $ reference + # Test 1: Look for cost commonality + if common_cost_unit is None: + # try to establish it + # check that what we have captured is in the currency dimension == "clean" + # dev note: multiplying by 1 allows us to use the check_units_expression() function + if (1 * cost_units).check('[currency]'): + common_cost_unit = cost_units + else: + # something is wrong, hopefully it was just this entry? + # mark it, dump it, and try again... + error_msgs.append( + f'{ct.table_name}: Unprocessed row (unreducible cost units): {cost_units} at row: {idx}' + ) + continue + else: # use the common units to test + if cost_units != common_cost_unit: + label = f'{ct.table_name}: Non-standard cost found (expected common cost units of {common_cost_unit}) got: {cost_units}' + table_grouped_errors[label].append(idx) + # Test 2: Check the units of measure to ensure alignment with the tech's output units # find the referenced commodity units from the tech or commodity depending on table structure... if ct.commodity_reference: commodity_units = commodity_lut.get(ct.commodity_reference) @@ -300,63 +318,23 @@ def check_cost_tables( c2a_units = None if ct.capacity_based: c2a_units = c2a_lut.get(tech, ureg.dimensionless) # default is dimensionless - # we need to transform the activity-based commodity units to capacity units to match the cost table - match_units = commodity_units / (c2a_units * ureg.year) - else: - match_units = commodity_units + # apply to convert + measure_units *= c2a_units * ureg.year - # now we "sit on this" so we can remove the common cost below for checking, after it is established + if ct.period_based: + measure_units /= ureg.year # remove the "per year" from this denominator + + matched = measure_units == commodity_units + + if not matched: + tech_reference = ct.commodity_reference if ct.commodity_reference else tech + label = ( + f'{ct.table_name}: Non-matching measure unit found in cost denominator for tech/commodity {tech_reference}: {raw_units_expression}' + f'\n Commodity units: {commodity_units}, Discovered (after conversions): {measure_units}' + f'\n Conversions: c2a units: {c2a_units*ureg.year if c2a_units else "N/A"}{", `per period` removed" if ct.period_based else ""}\n ' + ) + table_grouped_errors[label].append(idx) - if common_cost_unit is None: - # establish it - - # - # Typical "cost math" is like: - # - # MUSD - # ---- * kWh = MUSD - # kWh - - # determine if the units are a "clean cost" as shown above - - cost_unit = ( - table_units * match_units - ) # should simplify to pure currency as shown above - if ct.period_based: - # multiply by the standard period to remove it from the denominator - cost_unit *= ureg.year - # check that what we have captured is in the currency dimension == "clean" - # dev note: multiplying by 1 allows us to use the check_units_expression() function - if (1 * cost_unit).check('[currency]'): - common_cost_unit = cost_unit - else: - # something is wrong, hopefully it was just this entry? - # mark it, dump it, and try again... - error_msgs.append( - f'{ct.table_name}: Unprocessed row (unreducible cost units or mismatched tech output units): {idx}' - ) - continue - else: - # use the match_units from the associated tech/commodity to remove the non-cost units - cost_unit = table_units * match_units - if ct.period_based: - cost_unit *= ureg.year - - # check 1: ensure the cost units are equal to the common cost units - if cost_unit != common_cost_unit: - label = ( - f'{ct.table_name}: Non-standard cost found (does not simplify to expected common cost unit): {raw_units_expression}' - f'\n Commodity units: {commodity_units}, Residual (expecting {common_cost_unit}): {cost_unit}, c2a units: {c2a_units if c2a_units else "N/A"}.' - ) - table_grouped_errors[label].append(idx) - else: - # proceed to the follow-on check - # check 2: ensure that the commodity units match the commodity, - # now that we can remove the common cost and check that the plain units matches the denominator - plain_units = common_cost_unit / table_units - if match_units != plain_units: - label = f'{ct.table_name}: Commodity units of cost element incorrect after applying C2A factor: {raw_units_expression}.' - table_grouped_errors[label].append(idx) for label, listed_lines in table_grouped_errors.items(): error_msgs.append(f'{label} at rows: {consolidate_lines(listed_lines)}') return error_msgs From 39623ec48edf613fa6ac475c9cd09d108dddf9bb Mon Sep 17 00:00:00 2001 From: Jeff H Date: Thu, 16 Oct 2025 16:00:03 -0700 Subject: [PATCH 52/70] minor comment fixes and formatting --- temoa/temoa_model/unit_checking/common.py | 16 +++- .../unit_checking/entry_checker.py | 94 +++---------------- .../unit_checking/relation_checker.py | 32 +------ temoa/temoa_model/unit_checking/screener.py | 13 +-- 4 files changed, 31 insertions(+), 124 deletions(-) diff --git a/temoa/temoa_model/unit_checking/common.py b/temoa/temoa_model/unit_checking/common.py index f6c2be51..d822e680 100644 --- a/temoa/temoa_model/unit_checking/common.py +++ b/temoa/temoa_model/unit_checking/common.py @@ -86,10 +86,12 @@ 'Demand', 'MaxResource', # haven't we done away with this table/constraint? ] + +# Group tables Not Yet Implemented... would need to gather by group name and tech, etc. activity_based_tables = [ 'MaxActivity', - # 'MaxActivityGroup', 'MinActivity', + # 'Max ActivityGroup', # 'MinActivityGroup', ] """Tables that should have units equivalent to the commodity's native units""" @@ -97,12 +99,12 @@ capacity_based_tables = [ 'ExistingCapacity', 'MaxCapacity', - # 'MaxCapacityGroup', 'MaxNewCapacity', - # 'MaxNewCapacityGroup', 'MinCapacity', - # 'MinCapacityGroup', 'MinNewCapacity', + # 'MinCapacityGroup', + # 'MaxNewCapacityGroup', + # 'MaxCapacityGroup', # 'MinNewCapacityGroup', ] """Tables that require conversion via CapacityToActivity to reach the native units""" @@ -118,7 +120,11 @@ # we need to delineate whether the units are commodity-referenced or tech-referenced and if they are "capacity based" so... # format: (table_name, commodity field name (None if 'tech' based), capacity-based, period-based ) CostTableData = NamedTuple( - 'CostTableData', table_name=str, commodity_reference=str, capacity_based=bool, period_based=bool + 'CostTableData', + table_name=str, + commodity_reference=str | None, + capacity_based=bool, + period_based=bool, ) """A named tuple for the cost tables + important properties""" diff --git a/temoa/temoa_model/unit_checking/entry_checker.py b/temoa/temoa_model/unit_checking/entry_checker.py index ae718f31..885cfc7f 100644 --- a/temoa/temoa_model/unit_checking/entry_checker.py +++ b/temoa/temoa_model/unit_checking/entry_checker.py @@ -34,35 +34,18 @@ import re import sqlite3 from collections import defaultdict -from pathlib import Path from typing import Union from pint import UndefinedUnitError, Unit -from definitions import PROJECT_ROOT from temoa.temoa_model.unit_checking import ureg from temoa.temoa_model.unit_checking.common import ( UnitsFormat, - RATIO_ELEMENT, - SINGLE_ELEMENT, ) logger = logging.getLogger(__name__) -def validate_units_expression(expr: str) -> tuple[bool, Union[Unit, None]]: - """ - validate an entry against the units registry - :param expr: the expression to validate - :return: tuple of the validity and the converted expression - """ - try: - units = ureg.parse_units(expr) - return True, units - except UndefinedUnitError: - return False, None - - def validate_units_format( expr: str, unit_format: UnitsFormat ) -> tuple[bool, tuple[str, ...] | None]: @@ -78,8 +61,21 @@ def validate_units_format( return False, None +def validate_units_expression(expr: str) -> tuple[bool, Union[Unit, None]]: + """ + validate an entry against the units registry + :param expr: the expression to validate + :return: tuple of the validity and the converted expression + """ + try: + units = ureg.parse_units(expr) + return True, units + except UndefinedUnitError: + return False, None + + def gather_from_table(conn: sqlite3.Connection, table: str) -> dict[str, list[int]]: - """gather all "units" entries from a table as a list of line numbers""" + """gather all unique "units" entries from a table and collect the row indices""" res = defaultdict(list) with conn: @@ -89,65 +85,3 @@ def gather_from_table(conn: sqlite3.Connection, table: str) -> dict[str, list[in res[result[0]].append(idx) return res - - -if __name__ == '__main__': - """for development/experimentation""" - exprs = [ - 'watt', - 'meter', - 'm', - 'petajoule', - 'PJ', - 'PJ/s', - 'PeTAJouLE', - 'PetaJoule', - 'kilowatt*hour', - 'killowathour', - 'KWh', - 'KWH', - 'USD', - 'dollar', - 'passenger', - ] - for expr in exprs: - success, converted = validate_units_expression(expr) - compatible = converted.is_compatible_with(ureg('joule')) if converted else '--' - if success: - print(f'{expr} converts to: {converted}. Compatible with joules: {compatible} ') - else: - print(f'{expr} failed to convert') - - tables = ['Efficiency', 'ExistingCapacity'] - formats = [RATIO_ELEMENT, SINGLE_ELEMENT] - - conn = sqlite3.connect( - Path(PROJECT_ROOT) / 'data_files/mike_US/US_9R_8D_v3_stability_v3_1.sqlite' - ) - - def validate_entries(table_name, units_format: UnitsFormat): - """validate all entries in a table""" - conn = sqlite3.connect( - Path(PROJECT_ROOT) / 'data_files/mike_US/US_9R_8D_v3_stability_v3_1.sqlite' - ) - res = gather_from_table(conn, table_name) - conn.close() - for expr in res: - valid, elements = validate_units_format(expr, units_format) - if not valid: - print(f'Format Violation: {expr} in {table_name}') - else: - for group in elements: - if group: - success, converted = validate_units_expression(group) - if not success: - print(f'Unit Validation Fail: {expr} in {table_name}') - - for table in tables: - print(gather_from_table(conn, table)) - - conn.close() - print('\n\n') - - for t in zip(tables, formats): - validate_entries(t[0], t[1]) diff --git a/temoa/temoa_model/unit_checking/relation_checker.py b/temoa/temoa_model/unit_checking/relation_checker.py index a22b2a3c..8c511140 100644 --- a/temoa/temoa_model/unit_checking/relation_checker.py +++ b/temoa/temoa_model/unit_checking/relation_checker.py @@ -30,13 +30,11 @@ import logging import sqlite3 from collections.abc import Iterable -from pathlib import Path from mypy.checkexpr import defaultdict from mypy.message_registry import NamedTuple from pint.registry import Unit -from definitions import PROJECT_ROOT from temoa.temoa_model.unit_checking import ureg from temoa.temoa_model.unit_checking.common import ( RATIO_ELEMENT, @@ -111,6 +109,7 @@ def check_efficiency_table( valid, input_units = validate_units_expression(located_units[1]) if not valid: invalid_rows.append(idx) + # we give up early. The specifics of why this failed should be evident in earlier tests continue # check that our tech matches the units of the connected commodities @@ -154,15 +153,12 @@ def check_efficiency_table( def check_inter_table_relations( - conn: sqlite3.Connection, - table_name, - tech_lut: dict[str, IOUnits], - c2a_lut: dict[str, Unit], - capacity_based: bool, + conn: sqlite3.Connection, table_name, tech_lut: dict[str, IOUnits], capacity_based: bool ) -> list[str]: - """check the tech and units in the given table vs. baseline values for the tech""" + """check the tech and units in the given table vs. baseline (expected) values for the tech""" error_msgs = [] if capacity_based: + # we make a query to join on the C2A units to pick those up query = ( f'SELECT {table_name}.tech, {table_name}.units, ca.units ' f'FROM {table_name} JOIN CapacityToActivity ca ' @@ -338,23 +334,3 @@ def check_cost_tables( for label, listed_lines in table_grouped_errors.items(): error_msgs.append(f'{label} at rows: {consolidate_lines(listed_lines)}') return error_msgs - - -def main(db_path: Path): - """Run unit relationship checks on database""" - logging.basicConfig(level=logging.INFO) - - try: - conn = sqlite3.connect(db_path) - comm_units = make_commodity_lut(conn) - check_efficiency_table(conn, comm_units) - conn.close() - except sqlite3.Error as e: - logger.error(f'Database error: {e}') - except Exception as e: - logger.error(f'Error during check: {repr(e)}') - raise - - -if __name__ == '__main__': - main(Path(PROJECT_ROOT) / 'data_files/mike_US/US_9R_8D_v3_stability_v3_1.sqlite') diff --git a/temoa/temoa_model/unit_checking/screener.py b/temoa/temoa_model/unit_checking/screener.py index 6f46e3d1..efcc9fa0 100644 --- a/temoa/temoa_model/unit_checking/screener.py +++ b/temoa/temoa_model/unit_checking/screener.py @@ -148,11 +148,7 @@ def screen(dp_path: Path, report_path: Path | None = None): error_free = True for table in activity_based_tables: errors = check_inter_table_relations( - conn=conn, - table_name=table, - tech_lut=tech_io_lut, - c2a_lut=c2a_lut, - capacity_based=False, + conn=conn, table_name=table, tech_lut=tech_io_lut, capacity_based=False ) if errors: error_free = False @@ -163,11 +159,7 @@ def screen(dp_path: Path, report_path: Path | None = None): print(f'{table}: {error}') for table in capacity_based_tables: errors = check_inter_table_relations( - conn=conn, - table_name=table, - tech_lut=tech_io_lut, - c2a_lut=c2a_lut, - capacity_based=True, + conn=conn, table_name=table, tech_lut=tech_io_lut, capacity_based=True ) if errors: error_free = False @@ -228,6 +220,5 @@ def _write_report(report_path: Path, report_entries: list[str]): if __name__ == '__main__': - # db_path = Path(PROJECT_ROOT) / 'data_files/mike_US/US_9R_8D_v3_stability_orig_v3_1.sqlite' db_path = Path(PROJECT_ROOT) / 'data_files/mike_US/US_9R_8D_v3_stability_v3_1.sqlite' screen(db_path, report_path=Path(PROJECT_ROOT) / 'output_files/units.txt') From eb37aec63ab8905ad739c771ae7dbcc4a136c434 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Thu, 16 Oct 2025 16:03:36 -0700 Subject: [PATCH 53/70] augment test --- tests/test_unit_checker.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_unit_checker.py b/tests/test_unit_checker.py index 66df8e5f..ae695ff3 100644 --- a/tests/test_unit_checker.py +++ b/tests/test_unit_checker.py @@ -46,7 +46,8 @@ ('PJ / (kT)', RATIO_ELEMENT, True), ('(PJ) / (kT)', RATIO_ELEMENT, True), # numerator optionally parenthesized ('PJ / kT', RATIO_ELEMENT, False), # no parens on denom - ('kWh/day/(cycle)', RATIO_ELEMENT, False), # no slash char + ('kWh/day/(cycle)', RATIO_ELEMENT, False), # ambiguous slash char + ('(kWh/day)/(cycle)', RATIO_ELEMENT, True), ] From 6dd757782a01589dec8ee3a4a6525a8e2f58b88a Mon Sep 17 00:00:00 2001 From: Jeff H Date: Thu, 16 Oct 2025 17:40:02 -0700 Subject: [PATCH 54/70] correct the units pattern in the custom file --- temoa/temoa_model/unit_checking/temoa_units.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/temoa/temoa_model/unit_checking/temoa_units.txt b/temoa/temoa_model/unit_checking/temoa_units.txt index 2989f445..194e1a4b 100644 --- a/temoa/temoa_model/unit_checking/temoa_units.txt +++ b/temoa/temoa_model/unit_checking/temoa_units.txt @@ -1,15 +1,15 @@ # a few additions needed to for completeness in Temoa model # the units here AUGMENT the default units in pint. See pint's documentation for more info. -passenger = [person] = passenger -seat = [object] = seat -vehicle = [object] = vehicle +passenger = [person] +seat = [object] +vehicle = [object] # see pint's notes about currency and conversion before getting ideas about currency conversions. ;) # it would be OK to add other currencies here in addition to USD. -dollar = [currency] = USD = dollar -euro = [currency] = EUR = euro +dollar = [currency] = USD +euro = [currency] = EUR # Temoa uses ethos as an original source, so we add it here as an "empty" base class unit ethos = [empty] \ No newline at end of file From 03d6b225f1c7dc03d0e4b1f4aca7f9233d85785b Mon Sep 17 00:00:00 2001 From: Jeff H Date: Thu, 16 Oct 2025 17:41:49 -0700 Subject: [PATCH 55/70] updates to screener to handle empty entries and multiple db files --- .../unit_checking/entry_checker.py | 4 +- .../unit_checking/relation_checker.py | 5 + temoa/temoa_model/unit_checking/screener.py | 281 ++++++++++-------- .../unit_checking/table_checker.py | 6 + 4 files changed, 166 insertions(+), 130 deletions(-) diff --git a/temoa/temoa_model/unit_checking/entry_checker.py b/temoa/temoa_model/unit_checking/entry_checker.py index 885cfc7f..bc669bf6 100644 --- a/temoa/temoa_model/unit_checking/entry_checker.py +++ b/temoa/temoa_model/unit_checking/entry_checker.py @@ -82,6 +82,8 @@ def gather_from_table(conn: sqlite3.Connection, table: str) -> dict[str, list[in cur = conn.cursor() cur.execute(f'SELECT units FROM {table}') for idx, result in enumerate(cur.fetchall(), start=1): - res[result[0]].append(idx) + # note: this will put in "blank" entries which is OK, we want to mark blank rows too + entry = result[0] + res[entry].append(idx) return res diff --git a/temoa/temoa_model/unit_checking/relation_checker.py b/temoa/temoa_model/unit_checking/relation_checker.py index 8c511140..a36cf71f 100644 --- a/temoa/temoa_model/unit_checking/relation_checker.py +++ b/temoa/temoa_model/unit_checking/relation_checker.py @@ -260,6 +260,11 @@ def check_cost_tables( for idx, (tech, raw_units_expression) in enumerate(rows, start=1): # convert to pint expression cost_units, measure_units = None, None + # screen for empty/missing raw inputs + if not raw_units_expression: + label = f'{ct.table_name}: Unprocessed row (missing units): {raw_units_expression}' + table_grouped_errors[label].append(idx) + continue valid, (raw_cost, raw_units) = validate_units_format( raw_units_expression, RATIO_ELEMENT ) diff --git a/temoa/temoa_model/unit_checking/screener.py b/temoa/temoa_model/unit_checking/screener.py index efcc9fa0..61dd9b61 100644 --- a/temoa/temoa_model/unit_checking/screener.py +++ b/temoa/temoa_model/unit_checking/screener.py @@ -51,174 +51,197 @@ verbose = True # for dev/test work -def screen(dp_path: Path, report_path: Path | None = None): - """The sequencer to run a series of checks on units in the database""" +def screen(*db_paths: Path, report_path: Path | None = None) -> bool: + """ + Run series of units screens on the database + :param db_paths: the abs path(S) to the database(s) + :param report_path: abs path to write the report to. If None, no report is written + :return: indicator of whether all checks passed "cleanly" or not + """ + all_clear = True report_entries = [] table_units = {} """Table name : {tech | commodity: units}""" - with sqlite3.connect(dp_path) as conn: - # test 1: DB version - msg = '======== Units Check 1 (DB Version): Started ========' - report_entries.extend((msg, '\n')) - logger.info(msg) - if verbose: - print() - print(msg) - data = conn.execute('SELECT element, value FROM MetaData').fetchall() - meta_data = dict(data) - major = meta_data.get('DB_MAJOR', 0) - minor = meta_data.get('DB_MINOR', 0) - if major == 3 and minor >= 1: - msg = 'Units Check 1 (DB Version): Passed' + for db_path in db_paths: + if not db_path.is_file(): + raise FileNotFoundError(f'Database file not found: {db_path}') + initialization_msg = f'======== Units Check on DB: {db_path}: Started ========\n' + report_entries.append(initialization_msg) + logger.info('Starting Units Check on DB: %s', db_paths) + with sqlite3.connect(db_path) as conn: + # test 1: DB version + msg = '======== Units Check 1 (DB Version): Started ========' report_entries.extend((msg, '\n')) logger.info(msg) if verbose: + print() print(msg) - else: - msg = 'Units Check 1 (DB Version): Failed. DB must be v3.1 or greater for units checking' - report_entries.extend((msg, '\n')) - logger.warning(msg) - # we are non-viable, write the (very short) report and return - _write_report(report_path, report_entries) + data = conn.execute('SELECT element, value FROM MetaData').fetchall() + meta_data = dict(data) + major = meta_data.get('DB_MAJOR', 0) + minor = meta_data.get('DB_MINOR', 0) + if major == 3 and minor >= 1: + msg = 'Units Check 1 (DB Version): Passed' + report_entries.extend((msg, '\n')) + logger.info(msg) + if verbose: + print(msg) + else: + msg = 'Units Check 1 (DB Version): Failed. DB must be v3.1 or greater for units checking' + report_entries.extend((msg, '\n')) + logger.warning(msg) + # we are non-viable, write the (very short) report and return + _write_report(report_path, report_entries) + if verbose: + print(msg) + return False + + # test 2: Units in tables + report_entries.append('\n') + msg = '======== Units Check 2 (Units Entries in Tables): Started ========' if verbose: + print() print(msg) - return - - # test 2: Units in tables - report_entries.append('\n') - msg = '======== Units Check 2 (Units Entries in Tables): Started ========' - if verbose: - print() - print(msg) - logger.info(msg) - report_entries.extend((msg, '\n')) - errors = False - for table in tables_with_units: - relations, table_errors = check_table(conn, table) - table_units[table] = relations - if table_errors: - errors = True - for error in table_errors: - logger.info('%s: %s', table, error) - report_entries.extend((f' {table}: {error}', '\n')) - if verbose: - print(f'{table}: {error}') - if not errors: - msg = 'Units Check 2 (Units Entries in Tables): Passed' logger.info(msg) report_entries.extend((msg, '\n')) - if verbose: - print(msg) - report_entries.append('\n') - - # test 3: Efficiency Table - msg = '======== Units Check 3 (Tech I/O via Efficiency Table): Started ========' - logger.info(msg) - report_entries.extend((msg, '\n')) - if verbose: - print() - print(msg) - # make Look Up Tables for use in follow-on checks - commodity_lut = make_commodity_lut(conn) - c2a_lut = make_c2a_lut(conn) - tech_io_lut, errors = check_efficiency_table(conn, comm_units=commodity_lut) - if errors: - for error in errors: - logger.info('%s: %s', 'Efficiency', error) - report_entries.extend((f'Efficiency: {error}', '\n')) + errors = False + for table in tables_with_units: + relations, table_errors = check_table(conn, table) + table_units[table] = relations + if table_errors: + errors = True + for error in table_errors: + logger.info('%s: %s', table, error) + report_entries.extend((f' {table}: {error}', '\n')) + if verbose: + print(f'{table}: {error}') + if not errors: + msg = 'Units Check 2 (Units Entries in Tables): Passed' + logger.info(msg) + report_entries.extend((msg, '\n')) if verbose: - print(f'Efficiency: {error}') - else: - msg = 'Units Check 3: (Efficiency Table and Tech I/O: Passed' - report_entries.extend((msg, '\n')) + print(msg) + else: + all_clear = False + report_entries.append('\n') + + # test 3: Efficiency Table + msg = '======== Units Check 3 (Tech I/O via Efficiency Table): Started ========' logger.info(msg) + report_entries.extend((msg, '\n')) if verbose: + print() print(msg) - - report_entries.append('\n') - - # test 4: Relationships in other tables - # this utilizes tech_io_lut gathered above to QA the units in other tables - msg = '======== Units Check 4 (Related Tables): Started ========' - logger.info(msg) - report_entries.extend((msg, '\n')) - if verbose: - print() - print(msg) - error_free = True - for table in activity_based_tables: - errors = check_inter_table_relations( - conn=conn, table_name=table, tech_lut=tech_io_lut, capacity_based=False - ) + # make Look Up Tables for use in follow-on checks + commodity_lut = make_commodity_lut(conn) + c2a_lut = make_c2a_lut(conn) + tech_io_lut, errors = check_efficiency_table(conn, comm_units=commodity_lut) if errors: - error_free = False + all_clear = False for error in errors: - logger.info('%s: %s', table, error) - report_entries.extend((f'{table}: {error}', '\n')) + logger.info('%s: %s', 'Efficiency', error) + report_entries.extend((f'Efficiency: {error}', '\n')) if verbose: - print(f'{table}: {error}') - for table in capacity_based_tables: - errors = check_inter_table_relations( - conn=conn, table_name=table, tech_lut=tech_io_lut, capacity_based=True - ) - if errors: - error_free = False - for error in errors: - logger.info('%s: %s', table, error) - report_entries.extend((f'{table}: {error}', '\n')) - if verbose: - print(f'{table}: {error}') - if error_free: - msg = 'Units Check 4: (Related Tables): Passed' + print(f'Efficiency: {error}') + else: + msg = 'Units Check 3: (Efficiency Table and Tech I/O: Passed' + report_entries.extend((msg, '\n')) + logger.info(msg) + if verbose: + print(msg) + + report_entries.append('\n') + + # test 4: Relationships in other tables + # this utilizes tech_io_lut gathered above to QA the units in other tables + msg = '======== Units Check 4 (Related Tables): Started ========' logger.info(msg) report_entries.extend((msg, '\n')) if verbose: + print() print(msg) - - report_entries.append('\n') - - # test 5: Cost-Based Tables - # checks to assure that the output units are compatible with the related tech and that the currency is - # standardized when the units are simplified - # We expect units like Mdollars/PJ or such and the denominator should align with the commodity via the tech - msg = '======== Units Check 5 (Cost Tables): Started ========' - logger.info(msg) - report_entries.extend((msg, '\n')) - if verbose: - print() - print(msg) - errors = check_cost_tables( - conn, - cost_tables=cost_based_tables, - tech_lut=tech_io_lut, - c2a_lut=c2a_lut, - commodity_lut=commodity_lut, - ) - if errors: - for error in errors: - logger.info('%s', error) - report_entries.extend((error, '\n')) + error_free = True + for table in activity_based_tables: + errors = check_inter_table_relations( + conn=conn, table_name=table, tech_lut=tech_io_lut, capacity_based=False + ) + if errors: + error_free = False + for error in errors: + logger.info('%s: %s', table, error) + report_entries.extend((f'{table}: {error}', '\n')) + if verbose: + print(f'{table}: {error}') + for table in capacity_based_tables: + errors = check_inter_table_relations( + conn=conn, table_name=table, tech_lut=tech_io_lut, capacity_based=True + ) + if errors: + error_free = False + for error in errors: + logger.info('%s: %s', table, error) + report_entries.extend((f'{table}: {error}', '\n')) + if verbose: + print(f'{table}: {error}') + if error_free: + msg = 'Units Check 4: (Related Tables): Passed' + logger.info(msg) + report_entries.extend((msg, '\n')) if verbose: - print(error) - else: - msg = 'Units Check 5: (Cost Tables): Passed' + print(msg) + else: + all_clear = False + + report_entries.append('\n') + + # test 5: Cost-Based Tables + # checks to assure that the output units are compatible with the related tech and that the currency is + # standardized when the units are simplified + # We expect units like Mdollars/PJ or such and the denominator should align with the commodity via the tech + msg = '======== Units Check 5 (Cost Tables): Started ========' logger.info(msg) report_entries.extend((msg, '\n')) if verbose: + print() print(msg) + errors = check_cost_tables( + conn, + cost_tables=cost_based_tables, + tech_lut=tech_io_lut, + c2a_lut=c2a_lut, + commodity_lut=commodity_lut, + ) + if errors: + all_clear = False + for error in errors: + logger.info('%s', error) + report_entries.extend((error, '\n')) + if verbose: + print(error) + else: + msg = 'Units Check 5: (Cost Tables): Passed' + logger.info(msg) + report_entries.extend((msg, '\n')) + if verbose: + print(msg) # wrap it up _write_report(report_path, report_entries) + logger.info('Finished Units Check') + return all_clear def _write_report(report_path: Path, report_entries: list[str]): """Write the report to file""" if not report_path: return + if report_path.is_dir(): + # augment with a default filename + report_path /= 'units_check.txt' with open(report_path, 'w', encoding='utf-8') as report_file: report_file.writelines(report_entries) if __name__ == '__main__': db_path = Path(PROJECT_ROOT) / 'data_files/mike_US/US_9R_8D_v3_stability_v3_1.sqlite' - screen(db_path, report_path=Path(PROJECT_ROOT) / 'output_files/units.txt') + screen(db_path, report_path=Path(PROJECT_ROOT) / 'output_files/') diff --git a/temoa/temoa_model/unit_checking/table_checker.py b/temoa/temoa_model/unit_checking/table_checker.py index 55a3f41d..03a597f3 100644 --- a/temoa/temoa_model/unit_checking/table_checker.py +++ b/temoa/temoa_model/unit_checking/table_checker.py @@ -62,6 +62,12 @@ def check_table(conn: sqlite3.Connection, table_name: str) -> tuple[dict[str, Un # this function gathers all unique entries by row number for efficiency in larger tables entries = gather_from_table(conn, table_name) for expr, line_nums in entries.items(): + # mark the blanks + if not expr: + listed_lines = consolidate_lines(line_nums) + errors.append(f'Blank units entry found at rows: {listed_lines}') + continue + # check characters valid_chars = re.search(ACCEPTABLE_CHARACTERS, expr) if not valid_chars: From a1b237826a4ce1bed382b531350c3b919f6057e2 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Thu, 16 Oct 2025 17:42:02 -0700 Subject: [PATCH 56/70] incorporation into main body --- .../mga_sequencer.py | 7 +++++- temoa/extensions/myopic/myopic_sequencer.py | 10 ++++++++ temoa/temoa_model/temoa_config.py | 3 +++ temoa/temoa_model/temoa_sequencer.py | 25 +++++++++++++++++++ 4 files changed, 44 insertions(+), 1 deletion(-) diff --git a/temoa/extensions/modeling_to_generate_alternatives/mga_sequencer.py b/temoa/extensions/modeling_to_generate_alternatives/mga_sequencer.py index d7f1d4c9..74c0b8f0 100644 --- a/temoa/extensions/modeling_to_generate_alternatives/mga_sequencer.py +++ b/temoa/extensions/modeling_to_generate_alternatives/mga_sequencer.py @@ -38,7 +38,7 @@ from queue import Empty import pyomo.environ as pyo -from pyomo.contrib.solver.results import Results +from pyomo.contrib.solver.common.results import Results from pyomo.dataportal import DataPortal from pyomo.opt import check_optimal_termination @@ -54,6 +54,7 @@ from temoa.temoa_model.temoa_config import TemoaConfig from temoa.temoa_model.temoa_model import TemoaModel from temoa.temoa_model.temoa_rules import TotalCost_rule +from temoa.temoa_model.unit_checking.screener import screen logger = getLogger(__name__) @@ -163,6 +164,10 @@ def start(self): good_prices = price_checker(instance) if not good_prices and not self.config.silent: print('Warning: Cost anomalies discovered. Check log file for details.') + if self.config.units_check: + clear_units = screen(self.config.input_database, report_path=self.config.output_path) + if not clear_units and not self.config.silent: + print('Warning: Units anomalies discovered. Check log file for details.') # tag the instance by name, so we can sort out the multiple results... instance.name = '-'.join((self.config.scenario, '0')) diff --git a/temoa/extensions/myopic/myopic_sequencer.py b/temoa/extensions/myopic/myopic_sequencer.py index ebb026bd..3bf213db 100644 --- a/temoa/extensions/myopic/myopic_sequencer.py +++ b/temoa/extensions/myopic/myopic_sequencer.py @@ -44,6 +44,7 @@ from temoa.temoa_model.table_writer import TableWriter from temoa.temoa_model.temoa_config import TemoaConfig from temoa.temoa_model.temoa_model import TemoaModel +from temoa.temoa_model.unit_checking.screener import screen logger = logging.getLogger(__name__) @@ -159,6 +160,15 @@ def get_connection(self) -> Connection: return con def start(self): + # run units check, if requested + if self.config.units_check: + # myopic requires input_db=output_db, so we can just use the input_db + clear_screen = screen(self.config.input_database, report_path=self.config.output_path) + if not clear_screen and not self.config.silent: + print( + '\nWarning: units screen found discrepancies. Check log file/report for details.' + ) + # load up the instance queue self.characterize_run() diff --git a/temoa/temoa_model/temoa_config.py b/temoa/temoa_model/temoa_config.py index 9a3583c0..c87dfd5e 100644 --- a/temoa/temoa_model/temoa_config.py +++ b/temoa/temoa_model/temoa_config.py @@ -55,6 +55,7 @@ def __init__( silent: bool = False, stream_output: bool = False, price_check: bool = True, + units_check: bool = True, source_trace: bool = False, plot_commodity_network: bool = False, ): @@ -127,6 +128,7 @@ def __init__( self.silent = silent self.stream_output = stream_output self.price_check = price_check + self.units_check = units_check self.source_trace = source_trace if plot_commodity_network and not self.source_trace: logger.warning( @@ -179,6 +181,7 @@ def __repr__(self): msg += spacer msg += '{:>{}s}: {}\n'.format('Price check', width, self.price_check) + msg += '{:>{}s}: {}\n'.format('Units check', width, self.units_check) msg += '{:>{}s}: {}\n'.format('Source trace', width, self.source_trace) msg += '{:>{}s}: {}\n'.format('Commodity network plots', width, self.plot_commodity_network) diff --git a/temoa/temoa_model/temoa_sequencer.py b/temoa/temoa_model/temoa_sequencer.py index 0a85197b..79505ad1 100644 --- a/temoa/temoa_model/temoa_sequencer.py +++ b/temoa/temoa_model/temoa_sequencer.py @@ -54,6 +54,7 @@ from temoa.temoa_model.temoa_config import TemoaConfig from temoa.temoa_model.temoa_mode import TemoaMode from temoa.temoa_model.temoa_model import TemoaModel +from temoa.temoa_model.unit_checking.screener import screen from temoa.version_information import ( DB_MAJOR_VERSION, MIN_DB_MINOR_VERSION, @@ -170,6 +171,8 @@ def start(self) -> TemoaModel | None: logger.warning('Plot commodity network disabled for BUILD_ONLY') if self.config.price_check: logger.warning('Price check disabled for BUILD_ONLY') + if self.config.units_check: + logger.warning('Units check disabled for BUILD_ONLY') con = sqlite3.connect(self.config.input_database) hybrid_loader = HybridLoader(db_connection=con, config=self.config) data_portal = hybrid_loader.load_data_portal(myopic_index=None) @@ -196,6 +199,17 @@ def start(self) -> TemoaModel | None: good_prices = price_checker(instance) if not good_prices and not self.config.silent: print('\nWarning: Cost anomalies discovered. Check log file for details.') + if self.config.units_check is False: + logger.warning('Units check of model is automatic with CHECK') + clear_screen = screen( + *{self.config.input_database, self.config.output_database}, + report_path=self.config.output_path, + ) + if not clear_screen: + print( + '\nWarning: Units are not clear. Check log file for details and see the units report in the output path.' + ) + con.close() case TemoaMode.PERFECT_FORESIGHT: @@ -212,6 +226,17 @@ def start(self) -> TemoaModel | None: good_prices = price_checker(instance) if not good_prices and not self.config.silent: print('\nWarning: Cost anomalies discovered. Check log file for details.') + + if self.config.units_check: + clear_screen = screen( + *{self.config.input_database, self.config.output_database}, + report_path=self.config.output_path, + ) + if not clear_screen and not self.config.silent: + print( + '\nWarning: Units are not clear. Check log file for details and see the units report in the output path.' + ) + suffixes = ( [ 'dual', From 74c12a8cedf39ea199271cf7836231c945c6c7db Mon Sep 17 00:00:00 2001 From: Jeff H Date: Thu, 16 Oct 2025 17:45:17 -0700 Subject: [PATCH 57/70] kill verbose output --- temoa/temoa_model/unit_checking/screener.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/temoa/temoa_model/unit_checking/screener.py b/temoa/temoa_model/unit_checking/screener.py index 61dd9b61..3f2b579e 100644 --- a/temoa/temoa_model/unit_checking/screener.py +++ b/temoa/temoa_model/unit_checking/screener.py @@ -48,7 +48,7 @@ from temoa.temoa_model.unit_checking.table_checker import check_table logger = logging.getLogger(__name__) -verbose = True # for dev/test work +verbose = False # for dev/test work def screen(*db_paths: Path, report_path: Path | None = None) -> bool: From 1d85dbaf90e11166eb4edd1bfaf670c9795928a3 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Thu, 16 Oct 2025 18:03:01 -0700 Subject: [PATCH 58/70] fix incorrect import packages --- temoa/temoa_model/unit_checking/relation_checker.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/temoa/temoa_model/unit_checking/relation_checker.py b/temoa/temoa_model/unit_checking/relation_checker.py index a36cf71f..567c84f2 100644 --- a/temoa/temoa_model/unit_checking/relation_checker.py +++ b/temoa/temoa_model/unit_checking/relation_checker.py @@ -27,12 +27,12 @@ A systematic check of expected relationships between tables to ensure units are consistent """ +import dataclasses import logging import sqlite3 +from collections import defaultdict from collections.abc import Iterable -from mypy.checkexpr import defaultdict -from mypy.message_registry import NamedTuple from pint.registry import Unit from temoa.temoa_model.unit_checking import ureg @@ -79,8 +79,8 @@ def make_c2a_lut(conn: sqlite3.Connection) -> dict[str, Unit]: res[comm] = units return res - -class IOUnits(NamedTuple): +@dataclasses.dataclass(frozen=True) +class IOUnits(): input_units: Unit output_units: Unit From 9fbe6d78ab1baf9275b236e8166694ef008a0a6f Mon Sep 17 00:00:00 2001 From: Jeff <143133373+jeff-ws@users.noreply.github.com> Date: Thu, 16 Oct 2025 18:57:53 -0700 Subject: [PATCH 59/70] Update temoa/utilities/db_migration_v3_to_v3_1.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- temoa/utilities/db_migration_v3_to_v3_1.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/temoa/utilities/db_migration_v3_to_v3_1.py b/temoa/utilities/db_migration_v3_to_v3_1.py index eebc56f6..21adef39 100644 --- a/temoa/utilities/db_migration_v3_to_v3_1.py +++ b/temoa/utilities/db_migration_v3_to_v3_1.py @@ -161,7 +161,9 @@ # belt & suspenders check that we have all tables in the schema covered -table_query_result = con_new.execute('SELECT name FROM sqlite_master WHERE type="table"').fetchall() +table_query_result = con_new.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%'" +).fetchall() v31_tables = {t[0] for t in table_query_result} covered = set(direct_transfer_tables + add_units_tables + list(transfer_with_mod.keys())) From ec89d260eb8d96b27eb18f9145802937ae37b1a0 Mon Sep 17 00:00:00 2001 From: Jeff <143133373+jeff-ws@users.noreply.github.com> Date: Thu, 16 Oct 2025 18:58:56 -0700 Subject: [PATCH 60/70] Update temoa/utilities/db_migration_v3_to_v3_1.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- temoa/utilities/db_migration_v3_to_v3_1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/temoa/utilities/db_migration_v3_to_v3_1.py b/temoa/utilities/db_migration_v3_to_v3_1.py index 21adef39..54fe8619 100644 --- a/temoa/utilities/db_migration_v3_to_v3_1.py +++ b/temoa/utilities/db_migration_v3_to_v3_1.py @@ -229,7 +229,7 @@ row[1] for row in con_new.execute(f'PRAGMA table_info({table_name})').fetchall() ] if set(old_cols) != set(new_cols): - print(f'WARNING: Column mismatch in {table_name}') + print(f'ERROR: Column mismatch in {table_name}') print(f'Old columns: {old_cols}') print(f'New columns: {new_cols}') con_new.close() From b49410449b7bcc06ddfaaa7e93b4534985dbe357 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Thu, 16 Oct 2025 19:05:13 -0700 Subject: [PATCH 61/70] fix loop --- temoa/temoa_model/unit_checking/screener.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/temoa/temoa_model/unit_checking/screener.py b/temoa/temoa_model/unit_checking/screener.py index 3f2b579e..eb8170bc 100644 --- a/temoa/temoa_model/unit_checking/screener.py +++ b/temoa/temoa_model/unit_checking/screener.py @@ -225,10 +225,10 @@ def screen(*db_paths: Path, report_path: Path | None = None) -> bool: if verbose: print(msg) - # wrap it up - _write_report(report_path, report_entries) - logger.info('Finished Units Check') - return all_clear + # wrap it up + _write_report(report_path, report_entries) + logger.info('Finished Units Check') + return all_clear def _write_report(report_path: Path, report_entries: list[str]): From e3edd955baa73cf769b1f32f0f7032cf45cd0db9 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Sun, 19 Oct 2025 12:12:45 -0700 Subject: [PATCH 62/70] format tweaks to screener output --- temoa/temoa_model/unit_checking/screener.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/temoa/temoa_model/unit_checking/screener.py b/temoa/temoa_model/unit_checking/screener.py index eb8170bc..9c359824 100644 --- a/temoa/temoa_model/unit_checking/screener.py +++ b/temoa/temoa_model/unit_checking/screener.py @@ -112,7 +112,7 @@ def screen(*db_paths: Path, report_path: Path | None = None) -> bool: errors = True for error in table_errors: logger.info('%s: %s', table, error) - report_entries.extend((f' {table}: {error}', '\n')) + report_entries.extend((f'{table}: {error}', '\n')) if verbose: print(f'{table}: {error}') if not errors: @@ -140,11 +140,11 @@ def screen(*db_paths: Path, report_path: Path | None = None) -> bool: all_clear = False for error in errors: logger.info('%s: %s', 'Efficiency', error) - report_entries.extend((f'Efficiency: {error}', '\n')) + report_entries.extend((f'Efficiency: {error}', '\n')) if verbose: print(f'Efficiency: {error}') else: - msg = 'Units Check 3: (Efficiency Table and Tech I/O: Passed' + msg = 'Units Check 3 (Efficiency Table and Tech I/O): Passed' report_entries.extend((msg, '\n')) logger.info(msg) if verbose: From 7f5f79b8fffd0fef2d33aace29a587e5354c8b92 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Sun, 19 Oct 2025 13:37:26 -0700 Subject: [PATCH 63/70] upgrade to pyam-iamc >= 3.1. Version 3.0 of pyam importing mangled the log setup when importing IamDataFrame, causing failure of Temoa logging. --- requirements.in | 2 +- requirements.txt | 102 +++++++++++++++++++++++------------------------ 2 files changed, 52 insertions(+), 52 deletions(-) diff --git a/requirements.in b/requirements.in index b1bca79d..ecd8e8d8 100644 --- a/requirements.in +++ b/requirements.in @@ -22,7 +22,7 @@ seaborn tabulate xlsxwriter plotly -pyam-iamc +pyam-iamc >= 3.1 # Below required to update documentation sphinx diff --git a/requirements.txt b/requirements.txt index 10c200ff..a25caee5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,15 +2,15 @@ # This file is autogenerated by pip-compile with Python 3.12 # by the following command: # -# pip-compile --cert=None --client-cert=None --index-url=None --pip-args=None requirements.in +# pip-compile --cert=None --client-cert=None --index-url=None --pip-args=None # alabaster==1.0.0 # via sphinx -alembic==1.16.5 +alembic==1.17.0 # via ixmp4 annotated-types==0.7.0 # via pydantic -anyio==4.10.0 +anyio==4.11.0 # via # httpx # jupyter-server @@ -21,13 +21,13 @@ argon2-cffi==25.1.0 # via jupyter-server argon2-cffi-bindings==25.1.0 # via argon2-cffi -arrow==1.3.0 +arrow==1.4.0 # via isoduration asttokens==3.0.0 # via stack-data async-lru==2.0.5 # via jupyterlab -attrs==25.3.0 +attrs==25.4.0 # via # jsonschema # referencing @@ -35,18 +35,18 @@ babel==2.17.0 # via # jupyterlab-server # sphinx -beautifulsoup4==4.13.5 +beautifulsoup4==4.14.2 # via nbconvert bleach[css]==6.2.0 # via nbconvert -certifi==2025.8.3 +certifi==2025.10.5 # via # httpcore # httpx # requests cffi==2.0.0 # via argon2-cffi-bindings -charset-normalizer==3.4.3 +charset-normalizer==3.4.4 # via requests click==8.3.0 # via typer @@ -78,7 +78,7 @@ et-xmlfile==2.0.0 # via openpyxl executing==2.2.1 # via stack-data -fastapi==0.116.2 +fastapi==0.119.0 # via ixmp4 fastjsonschema==2.21.2 # via nbformat @@ -86,7 +86,7 @@ flexcache==0.3 # via pint flexparser==0.4 # via pint -fonttools==4.60.0 +fonttools==4.60.1 # via matplotlib fqdn==1.5.1 # via jsonschema @@ -112,9 +112,9 @@ httpx[http2]==0.28.1 # jupyterlab hyperframe==6.1.0 # via h2 -iam-units==2025.9.12 +iam-units==2025.10.13 # via pyam-iamc -idna==3.10 +idna==3.11 # via # anyio # httpx @@ -122,15 +122,15 @@ idna==3.10 # requests imagesize==1.4.1 # via sphinx -iniconfig==2.1.0 +iniconfig==2.3.0 # via pytest -ipykernel==6.30.1 +ipykernel==7.0.1 # via # -r requirements.in # jupyter # jupyter-console # jupyterlab -ipython==9.5.0 +ipython==9.6.0 # via # -r requirements.in # ipykernel @@ -144,7 +144,7 @@ ipywidgets==8.1.7 # via jupyter isoduration==20.11.0 # via jsonschema -ixmp4==0.12.0 +ixmp4==0.13.1 # via pyam-iamc jedi==0.19.2 # via ipython @@ -184,7 +184,7 @@ jupyter-contrib-core==0.4.2 # jupyter-nbextensions-configurator jupyter-contrib-nbextensions==0.7.0 # via -r requirements.in -jupyter-core==5.8.1 +jupyter-core==5.9.1 # via # ipykernel # jupyter-client @@ -215,7 +215,7 @@ jupyter-server==2.17.0 # notebook-shim jupyter-server-terminals==0.5.3 # via jupyter-server -jupyterlab==4.4.7 +jupyterlab==4.4.9 # via # jupyter # notebook @@ -229,22 +229,22 @@ jupyterlab-widgets==3.0.15 # via ipywidgets kiwisolver==1.4.9 # via matplotlib -lark==1.2.2 +lark==1.3.0 # via rfc3987-syntax latexcodec==3.0.1 # via pybtex -lxml==6.0.1 +lxml==6.0.2 # via jupyter-contrib-nbextensions mako==1.3.10 # via alembic markdown-it-py==4.0.0 # via rich -markupsafe==3.0.2 +markupsafe==3.0.3 # via # jinja2 # mako # nbconvert -matplotlib==3.10.6 +matplotlib==3.10.7 # via # -r requirements.in # pyam-iamc @@ -266,7 +266,7 @@ mypy-extensions==1.1.0 # via # mypy # typing-inspect -narwhals==2.5.0 +narwhals==2.8.0 # via plotly nbclient==0.10.2 # via nbconvert @@ -286,7 +286,7 @@ networkx==3.5 # via -r requirements.in nose==1.3.7 # via pyutilib -notebook==7.4.5 +notebook==7.4.7 # via # jupyter # jupyter-contrib-core @@ -296,7 +296,7 @@ notebook-shim==0.2.4 # via # jupyterlab # notebook -numpy==2.3.3 +numpy==2.3.4 # via # -r requirements.in # contourpy @@ -327,7 +327,7 @@ packaging==25.0 # plotly # pytest # sphinx -pandas==2.3.2 +pandas==2.3.3 # via # -r requirements.in # ixmp4 @@ -344,18 +344,18 @@ pathspec==0.12.1 # via mypy pexpect==4.9.0 # via ipython -pillow==11.3.0 +pillow==12.0.0 # via matplotlib pint==0.25 # via # -r requirements.in # iam-units # pyam-iamc -platformdirs==4.4.0 +platformdirs==4.5.0 # via # jupyter-core # pint -plotly==6.3.0 +plotly==6.3.1 # via -r requirements.in pluggy==1.6.0 # via pytest @@ -367,11 +367,11 @@ prompt-toolkit==3.0.52 # via # ipython # jupyter-console -psutil==7.1.0 +psutil==7.1.1 # via ipykernel -psycopg[binary]==3.2.10 +psycopg[binary]==3.2.11 # via ixmp4 -psycopg-binary==3.2.10 +psycopg-binary==3.2.11 # via psycopg ptyprocess==0.7.0 # via @@ -379,7 +379,7 @@ ptyprocess==0.7.0 # terminado pure-eval==0.2.3 # via stack-data -pyam-iamc==3.0.0 +pyam-iamc==3.1.0 # via -r requirements.in pybtex==0.25.1 # via @@ -389,15 +389,15 @@ pybtex-docutils==1.0.3 # via sphinxcontrib-bibtex pycparser==2.23 # via cffi -pydantic==2.11.9 +pydantic==2.12.3 # via # fastapi # ixmp4 # pandera # pydantic-settings -pydantic-core==2.33.2 +pydantic-core==2.41.4 # via pydantic -pydantic-settings==2.10.1 +pydantic-settings==2.11.0 # via ixmp4 pydoe==0.3.8 # via -r requirements.in @@ -412,9 +412,9 @@ pygments==2.19.2 # sphinx pyjwt==2.10.1 # via ixmp4 -pyomo==6.9.4 +pyomo==6.9.5 # via -r requirements.in -pyparsing==3.2.4 +pyparsing==3.2.5 # via matplotlib pytest==8.4.2 # via -r requirements.in @@ -428,13 +428,13 @@ python-dotenv==1.1.1 # via # ixmp4 # pydantic-settings -python-json-logger==3.3.0 +python-json-logger==4.0.0 # via jupyter-events pytz==2025.2 # via pandas pyutilib==6.0.0 # via -r requirements.in -pyyaml==6.0.2 +pyyaml==6.0.3 # via # jupyter-events # jupyter-nbextensions-configurator @@ -446,7 +446,7 @@ pyzmq==27.1.0 # jupyter-client # jupyter-console # jupyter-server -referencing==0.36.2 +referencing==0.37.0 # via # jsonschema # jsonschema-specifications @@ -466,7 +466,7 @@ rfc3986-validator==0.1.1 # jupyter-events rfc3987-syntax==1.1.0 # via jsonschema -rich==14.1.0 +rich==14.2.0 # via # ixmp4 # typer @@ -476,7 +476,7 @@ rpds-py==0.27.1 # via # jsonschema # referencing -salib==1.5.1 +salib==1.5.2 # via -r requirements.in scipy==1.16.2 # via @@ -531,7 +531,7 @@ sphinxcontrib-serializinghtml==2.0.0 # via # -r requirements.in # sphinx -sqlalchemy[mypy]==2.0.43 +sqlalchemy[mypy]==2.0.44 # via # alembic # ixmp4 @@ -584,10 +584,8 @@ traitlets==5.14.3 # nbformat typeguard==4.4.4 # via pandera -typer==0.18.0 +typer==0.19.2 # via ixmp4 -types-python-dateutil==2.9.0.20250822 - # via arrow typing-extensions==4.15.0 # via # alembic @@ -611,17 +609,19 @@ typing-extensions==4.15.0 # typing-inspection typing-inspect==0.9.0 # via pandera -typing-inspection==0.4.1 +typing-inspection==0.4.2 # via # pydantic # pydantic-settings tzdata==2025.2 - # via pandas + # via + # arrow + # pandas uri-template==1.3.0 # via jsonschema urllib3==2.5.0 # via requests -wcwidth==0.2.13 +wcwidth==0.2.14 # via prompt-toolkit webcolors==24.11.1 # via jsonschema @@ -629,7 +629,7 @@ webencodings==0.5.1 # via # bleach # tinycss2 -websocket-client==1.8.0 +websocket-client==1.9.0 # via jupyter-server widgetsnbextension==4.0.14 # via ipywidgets From bc047f31d4c9b72cdabb1af58c9ebf7eb008d498 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Sun, 19 Oct 2025 14:19:48 -0700 Subject: [PATCH 64/70] Consolidate tests by row in Test 4--as in others --- .../unit_checking/relation_checker.py | 25 +++++++++++-------- temoa/temoa_model/unit_checking/screener.py | 4 +-- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/temoa/temoa_model/unit_checking/relation_checker.py b/temoa/temoa_model/unit_checking/relation_checker.py index 567c84f2..b29b92a8 100644 --- a/temoa/temoa_model/unit_checking/relation_checker.py +++ b/temoa/temoa_model/unit_checking/relation_checker.py @@ -156,7 +156,7 @@ def check_inter_table_relations( conn: sqlite3.Connection, table_name, tech_lut: dict[str, IOUnits], capacity_based: bool ) -> list[str]: """check the tech and units in the given table vs. baseline (expected) values for the tech""" - error_msgs = [] + grouped_errors = defaultdict(list) if capacity_based: # we make a query to join on the C2A units to pick those up query = ( @@ -172,12 +172,11 @@ def check_inter_table_relations( except sqlite3.OperationalError: logger.error('failed to process query: %s when processing table %s', query, table_name) msg = f'Failed to process table {table_name}. See log for failed query.' - error_msgs.append(msg) - return error_msgs + return [msg] for idx, (tech, table_units, c2a_units) in enumerate(rows, start=1): if tech not in tech_lut: - error_msgs.append( - f'Unprocessed row (missing reference for tech "{tech}" --see earlier tests): {idx}' + grouped_errors[ + f'Unprocessed row (missing reference for tech "{tech}" --see earlier tests)'].append(idx ) continue # validate the units in the table... @@ -200,9 +199,9 @@ def check_inter_table_relations( valid_c2a_units = None if not valid_table_units: - error_msgs.append(f'Unprocessed row (invalid units--see earlier tests): {idx}') + grouped_errors[f'Unprocessed row (invalid units--see earlier tests)'].append(idx) if not c2a_valid: - error_msgs.append(f'Unprocessed row (invalid c2a units--see earlier tests): {idx}') + grouped_errors[f'Unprocessed row (invalid c2a units--see earlier tests)'].append(idx) if not valid_table_units or not c2a_valid: continue @@ -215,14 +214,20 @@ def check_inter_table_relations( # check that the res_units match the expectation from the tech if tech_lut[tech].output_units != res_units: - error_msgs.append( - f'Units mismatch at row {idx}. Table Entry: {valid_table_units}, ' + msg=( + f'Units mismatch from expected reference. Table Entry: {valid_table_units}, ' f'{f" C2A Entry: {valid_c2a_units}, " if valid_c2a_units else ""}' f'expected: {tech_lut[tech].output_units / (valid_c2a_units * ureg.year) if valid_c2a_units else tech_lut[tech].output_units}' f' for output of tech {tech}.' ) + grouped_errors[msg].append(idx) - return error_msgs + # gather into list format + res = [] + for msg, line_nums in grouped_errors.items(): + res.append(f'{msg} at rows: {consolidate_lines(line_nums)}') + + return res def check_cost_tables( diff --git a/temoa/temoa_model/unit_checking/screener.py b/temoa/temoa_model/unit_checking/screener.py index 9c359824..02e2cb7e 100644 --- a/temoa/temoa_model/unit_checking/screener.py +++ b/temoa/temoa_model/unit_checking/screener.py @@ -65,9 +65,9 @@ def screen(*db_paths: Path, report_path: Path | None = None) -> bool: for db_path in db_paths: if not db_path.is_file(): raise FileNotFoundError(f'Database file not found: {db_path}') - initialization_msg = f'======== Units Check on DB: {db_path}: Started ========\n' + initialization_msg = f'\n======== Units Check on DB: {db_path}: Started ========\n\n' report_entries.append(initialization_msg) - logger.info('Starting Units Check on DB: %s', db_paths) + logger.info('Starting Units Check on DB: %s', db_path) with sqlite3.connect(db_path) as conn: # test 1: DB version msg = '======== Units Check 1 (DB Version): Started ========' From 3853c00c9f53bd5d9af6287c3196f27812f54868 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Sun, 19 Oct 2025 16:05:00 -0700 Subject: [PATCH 65/70] better fail handling in utility conversion. Added null handling tweak in relation checker --- .../unit_checking/relation_checker.py | 17 ++++---- temoa/utilities/db_migration_v3_to_v3_1.py | 40 ++++++++++++++----- 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/temoa/temoa_model/unit_checking/relation_checker.py b/temoa/temoa_model/unit_checking/relation_checker.py index b29b92a8..26cb3c32 100644 --- a/temoa/temoa_model/unit_checking/relation_checker.py +++ b/temoa/temoa_model/unit_checking/relation_checker.py @@ -79,8 +79,9 @@ def make_c2a_lut(conn: sqlite3.Connection) -> dict[str, Unit]: res[comm] = units return res + @dataclasses.dataclass(frozen=True) -class IOUnits(): +class IOUnits: input_units: Unit output_units: Unit @@ -176,8 +177,8 @@ def check_inter_table_relations( for idx, (tech, table_units, c2a_units) in enumerate(rows, start=1): if tech not in tech_lut: grouped_errors[ - f'Unprocessed row (missing reference for tech "{tech}" --see earlier tests)'].append(idx - ) + f'Unprocessed row (missing reference for tech "{tech}" --see earlier tests)' + ].append(idx) continue # validate the units in the table... table_valid, units_data = validate_units_format(table_units, SINGLE_ELEMENT) @@ -214,7 +215,7 @@ def check_inter_table_relations( # check that the res_units match the expectation from the tech if tech_lut[tech].output_units != res_units: - msg=( + msg = ( f'Units mismatch from expected reference. Table Entry: {valid_table_units}, ' f'{f" C2A Entry: {valid_c2a_units}, " if valid_c2a_units else ""}' f'expected: {tech_lut[tech].output_units / (valid_c2a_units * ureg.year) if valid_c2a_units else tech_lut[tech].output_units}' @@ -270,12 +271,10 @@ def check_cost_tables( label = f'{ct.table_name}: Unprocessed row (missing units): {raw_units_expression}' table_grouped_errors[label].append(idx) continue - valid, (raw_cost, raw_units) = validate_units_format( - raw_units_expression, RATIO_ELEMENT - ) + valid, (elements) = validate_units_format(raw_units_expression, RATIO_ELEMENT) if valid: - cost_valid, cost_units = validate_units_expression(raw_cost) - units_valid, measure_units = validate_units_expression(raw_units) + cost_valid, cost_units = validate_units_expression(elements[0]) + units_valid, measure_units = validate_units_expression(elements[1]) else: cost_valid, units_valid = False, False if not (cost_valid and units_valid): diff --git a/temoa/utilities/db_migration_v3_to_v3_1.py b/temoa/utilities/db_migration_v3_to_v3_1.py index 54fe8619..aed31df3 100644 --- a/temoa/utilities/db_migration_v3_to_v3_1.py +++ b/temoa/utilities/db_migration_v3_to_v3_1.py @@ -146,11 +146,28 @@ new_db_name = legacy_db.stem + '_v3_1.sqlite' new_db_path = Path(legacy_db.parent, new_db_name) +# check that destination doesn't exist already +if new_db_path.exists(): + print(f'ERROR: destination database already exists: {new_db_path}. Exiting.') + sys.exit(-1) + + +def exit_on_failure(msg): + print( + 'Transition failed for reason below. This issue is fatal and must be remediated. Exiting.' + ) + print(msg) + con_old.close() + con_new.close() + new_db_path.unlink() + sys.exit(-1) + con_old = sqlite3.connect(legacy_db) con_new = sqlite3.connect(new_db_path) cur = con_new.cursor() + # bring in the new schema and execute to build new db with open(schema_file, 'r') as src: sql_script = src.read() @@ -173,9 +190,7 @@ extra = covered - v31_tables print(f'ERROR: missing tables (from transfer list): {missing}') print(f"ERROR: extra tables (that don't exist in schema): {extra}") - con_new.close() - con_old.close() - sys.exit(-1) + exit_on_failure('Missing transfer tables list does not match schema.') # execute the direct transfers @@ -194,9 +209,7 @@ print(f'WARNING: Column mismatch in {table_name}') print(f'Old columns: {old_cols}') print(f'New columns: {new_cols}') - con_new.close() - con_old.close() - sys.exit(-1) + exit_on_failure(f'Column mismatch in {table_name}') # Get data from old database with explicit column order cols_str = ', '.join(new_cols) @@ -215,6 +228,7 @@ cols_str = ', '.join(new_cols) query = f'INSERT OR REPLACE INTO {table_name} ({cols_str}) VALUES ({placeholders})' con_new.executemany(query, data) + con_new.commit() print(f'inserted {len(data)} rows into {table_name}') # execute transfer with modifications @@ -232,9 +246,7 @@ print(f'ERROR: Column mismatch in {table_name}') print(f'Old columns: {old_cols}') print(f'New columns: {new_cols}') - con_new.close() - con_old.close() - sys.exit(-1) + exit_on_failure(f'Column mismatch in {table_name}') # Get data from old database with explicit column order cols_str = ', '.join(new_cols) @@ -315,5 +327,15 @@ # move the GlobalDiscountRate # move the myopic base year +# sanity check... + +qry = 'SELECT * FROM TimePeriod' +res = con_new.execute(qry).fetchall() +if res: + print(f'TimePeriod table has {len(res)} rows') + for t in res[:5]: + print(t) +else: + print('TimePeriod table is empty') con_new.close() con_old.close() From 88295b69a17f81d5d11d483e4c7c88c4c0328b15 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Sun, 19 Oct 2025 16:11:49 -0700 Subject: [PATCH 66/70] remove a little scaffolding --- temoa/utilities/db_migration_v3_to_v3_1.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/temoa/utilities/db_migration_v3_to_v3_1.py b/temoa/utilities/db_migration_v3_to_v3_1.py index aed31df3..c379d582 100644 --- a/temoa/utilities/db_migration_v3_to_v3_1.py +++ b/temoa/utilities/db_migration_v3_to_v3_1.py @@ -325,17 +325,5 @@ def exit_on_failure(msg): print('Foreign Key Check FAILED on new DB. Something may be wrong with schema.') print(e) -# move the GlobalDiscountRate -# move the myopic base year -# sanity check... - -qry = 'SELECT * FROM TimePeriod' -res = con_new.execute(qry).fetchall() -if res: - print(f'TimePeriod table has {len(res)} rows') - for t in res[:5]: - print(t) -else: - print('TimePeriod table is empty') con_new.close() con_old.close() From 06d83faa8ccee5f8ef79b3414b06472091cc6c7f Mon Sep 17 00:00:00 2001 From: Jeff H Date: Mon, 20 Oct 2025 16:56:20 -0700 Subject: [PATCH 67/70] Added check of Demand table and a little cleanup of formatting --- temoa/temoa_model/unit_checking/common.py | 10 ++- .../unit_checking/relation_checker.py | 80 +++++++++++++------ temoa/temoa_model/unit_checking/screener.py | 31 ++++++- 3 files changed, 93 insertions(+), 28 deletions(-) diff --git a/temoa/temoa_model/unit_checking/common.py b/temoa/temoa_model/unit_checking/common.py index d822e680..db7db5e5 100644 --- a/temoa/temoa_model/unit_checking/common.py +++ b/temoa/temoa_model/unit_checking/common.py @@ -28,6 +28,7 @@ """ from dataclasses import dataclass +from enum import Enum from typing import NamedTuple tables_with_units = [ @@ -84,7 +85,7 @@ commodity_based_tables = [ 'Demand', - 'MaxResource', # haven't we done away with this table/constraint? + # 'MaxResource', # haven't we done away with this table/constraint? ] # Group tables Not Yet Implemented... would need to gather by group name and tech, etc. @@ -96,6 +97,8 @@ ] """Tables that should have units equivalent to the commodity's native units""" +# dev note: The "grouped" functions below are not yet implemented / future work. They are (to date) +# seldom used. Implementing would require grouping by group name, ensuring all techs in group are same... capacity_based_tables = [ 'ExistingCapacity', 'MaxCapacity', @@ -137,7 +140,10 @@ """Tables that have cost units and their properties""" -# TODO: Unclear tables: MaxResource, GrowthRateSeed +class RelationType(Enum): + ACTIVITY = 1 + CAPACITY = 2 + COMMODITY = 3 @dataclass(frozen=True) diff --git a/temoa/temoa_model/unit_checking/relation_checker.py b/temoa/temoa_model/unit_checking/relation_checker.py index 26cb3c32..e60df8ef 100644 --- a/temoa/temoa_model/unit_checking/relation_checker.py +++ b/temoa/temoa_model/unit_checking/relation_checker.py @@ -41,6 +41,7 @@ SINGLE_ELEMENT, consolidate_lines, CostTableData, + RelationType, ) from temoa.temoa_model.unit_checking.entry_checker import ( validate_units_format, @@ -154,35 +155,68 @@ def check_efficiency_table( def check_inter_table_relations( - conn: sqlite3.Connection, table_name, tech_lut: dict[str, IOUnits], capacity_based: bool + conn: sqlite3.Connection, + table_name, + tech_lut: dict[str, IOUnits], + comm_lut: dict[str, Unit], + relation_type: RelationType, ) -> list[str]: """check the tech and units in the given table vs. baseline (expected) values for the tech""" grouped_errors = defaultdict(list) - if capacity_based: - # we make a query to join on the C2A units to pick those up - query = ( - f'SELECT {table_name}.tech, {table_name}.units, ca.units ' - f'FROM {table_name} JOIN CapacityToActivity ca ' - f'ON {table_name}.tech = ca.tech AND {table_name}.region = ca.region' - ) - else: - query = f'SELECT tech, units, NULL FROM {table_name}' - + match relation_type: + case RelationType.CAPACITY: + # we make a query to join on the C2A units to pick those up + query = ( + f'SELECT {table_name}.tech, {table_name}.units, ca.units ' + f'FROM {table_name} JOIN CapacityToActivity ca ' + f'ON {table_name}.tech = ca.tech AND {table_name}.region = ca.region' + ) + case RelationType.ACTIVITY: + query = f'SELECT tech, units, NULL FROM {table_name}' + case RelationType.COMMODITY: + query = f'SELECT commodity, units, NULL FROM {table_name}' + case _: + raise ValueError(f'Unexpected relation type: {relation_type}') try: rows = conn.execute(query).fetchall() except sqlite3.OperationalError: logger.error('failed to process query: %s when processing table %s', query, table_name) msg = f'Failed to process table {table_name}. See log for failed query.' return [msg] - for idx, (tech, table_units, c2a_units) in enumerate(rows, start=1): - if tech not in tech_lut: + + # process the rows + for idx, (tech_or_comm, table_units, c2a_units) in enumerate(rows, start=1): + expected_units = None + match relation_type: + case RelationType.CAPACITY: + io_units = tech_lut.get(tech_or_comm) + if not io_units: + grouped_errors[ + f'Unprocessed row (missing reference for tech "{tech_or_comm}" --see earlier tests)' + ].append(idx) + continue + expected_units = io_units.output_units + case RelationType.ACTIVITY: + io_units = tech_lut[tech_or_comm] + if not io_units: + grouped_errors[ + f'Unprocessed row (missing reference for tech "{tech_or_comm}" --see earlier tests)' + ].append(idx) + continue + expected_units = io_units.output_units + case RelationType.COMMODITY: + expected_units = comm_lut.get(tech_or_comm) + case _: + raise ValueError(f'Unexpected relation type: {relation_type}') + if not expected_units: grouped_errors[ - f'Unprocessed row (missing reference for tech "{tech}" --see earlier tests)' + f'Unprocessed row (missing reference for tech "{tech_or_comm}" --see earlier tests)' ].append(idx) continue + # validate the units in the table... - table_valid, units_data = validate_units_format(table_units, SINGLE_ELEMENT) - if table_valid: + entry_format_valid, units_data = validate_units_format(table_units, SINGLE_ELEMENT) + if entry_format_valid: _, valid_table_units = validate_units_expression(units_data[0]) else: valid_table_units = None @@ -195,14 +229,14 @@ def check_inter_table_relations( c2a_valid, valid_c2a_units = validate_units_expression(units_data[0]) else: valid_c2a_units = None - else: # we are in a valid state, but no units to use for c2a + else: # we are in a valid state: no C2A units provided/needed c2a_valid = True valid_c2a_units = None if not valid_table_units: - grouped_errors[f'Unprocessed row (invalid units--see earlier tests)'].append(idx) + grouped_errors['Unprocessed row (invalid units--see earlier tests)'].append(idx) if not c2a_valid: - grouped_errors[f'Unprocessed row (invalid c2a units--see earlier tests)'].append(idx) + grouped_errors['Unprocessed row (invalid c2a units--see earlier tests)'].append(idx) if not valid_table_units or not c2a_valid: continue @@ -214,12 +248,12 @@ def check_inter_table_relations( res_units = valid_table_units # check that the res_units match the expectation from the tech - if tech_lut[tech].output_units != res_units: + if expected_units != res_units: msg = ( f'Units mismatch from expected reference. Table Entry: {valid_table_units}, ' f'{f" C2A Entry: {valid_c2a_units}, " if valid_c2a_units else ""}' - f'expected: {tech_lut[tech].output_units / (valid_c2a_units * ureg.year) if valid_c2a_units else tech_lut[tech].output_units}' - f' for output of tech {tech}.' + f'expected: {expected_units}' + f' for output of tech {tech_or_comm}' ) grouped_errors[msg].append(idx) @@ -335,7 +369,7 @@ def check_cost_tables( tech_reference = ct.commodity_reference if ct.commodity_reference else tech label = ( f'{ct.table_name}: Non-matching measure unit found in cost denominator for tech/commodity {tech_reference}: {raw_units_expression}' - f'\n Commodity units: {commodity_units}, Discovered (after conversions): {measure_units}' + f'\n Expecting commodity units: {commodity_units}. Discovered (after conversions applied): {measure_units}' f'\n Conversions: c2a units: {c2a_units*ureg.year if c2a_units else "N/A"}{", `per period` removed" if ct.period_based else ""}\n ' ) table_grouped_errors[label].append(idx) diff --git a/temoa/temoa_model/unit_checking/screener.py b/temoa/temoa_model/unit_checking/screener.py index 02e2cb7e..b7f6d7fa 100644 --- a/temoa/temoa_model/unit_checking/screener.py +++ b/temoa/temoa_model/unit_checking/screener.py @@ -37,6 +37,8 @@ capacity_based_tables, activity_based_tables, cost_based_tables, + commodity_based_tables, + RelationType, ) from temoa.temoa_model.unit_checking.relation_checker import ( check_efficiency_table, @@ -163,7 +165,11 @@ def screen(*db_paths: Path, report_path: Path | None = None) -> bool: error_free = True for table in activity_based_tables: errors = check_inter_table_relations( - conn=conn, table_name=table, tech_lut=tech_io_lut, capacity_based=False + conn=conn, + table_name=table, + tech_lut=tech_io_lut, + comm_lut=commodity_lut, + relation_type=RelationType.ACTIVITY, ) if errors: error_free = False @@ -174,7 +180,26 @@ def screen(*db_paths: Path, report_path: Path | None = None) -> bool: print(f'{table}: {error}') for table in capacity_based_tables: errors = check_inter_table_relations( - conn=conn, table_name=table, tech_lut=tech_io_lut, capacity_based=True + conn=conn, + table_name=table, + tech_lut=tech_io_lut, + comm_lut=commodity_lut, + relation_type=RelationType.CAPACITY, + ) + if errors: + error_free = False + for error in errors: + logger.info('%s: %s', table, error) + report_entries.extend((f'{table}: {error}', '\n')) + if verbose: + print(f'{table}: {error}') + for table in commodity_based_tables: + errors = check_inter_table_relations( + conn=conn, + table_name=table, + tech_lut=tech_io_lut, + comm_lut=commodity_lut, + relation_type=RelationType.COMMODITY, ) if errors: error_free = False @@ -244,4 +269,4 @@ def _write_report(report_path: Path, report_entries: list[str]): if __name__ == '__main__': db_path = Path(PROJECT_ROOT) / 'data_files/mike_US/US_9R_8D_v3_stability_v3_1.sqlite' - screen(db_path, report_path=Path(PROJECT_ROOT) / 'output_files/') + screen(db_path, report_path=Path(PROJECT_ROOT) / 'temp/') From 6a7f34798b344ddc8ad256fef273a5ced67e534a Mon Sep 17 00:00:00 2001 From: Jeff H Date: Mon, 20 Oct 2025 17:30:55 -0700 Subject: [PATCH 68/70] Documentation updates to main dox and README.md describing what the unit checker is and how to execute with it. Update to 2 config files to include unit checking --- README.md | 9 ++- data_files/my_configs/config_sample.toml | 4 ++ data_files/my_configs/morris_utopia.toml | 4 ++ docs/source/Documentation.rst | 79 ++++++++++++++++++++---- temoa/temoa_model/temoa_sequencer.py | 6 +- 5 files changed, 87 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 0713d6de..bfafa9d1 100644 --- a/README.md +++ b/README.md @@ -64,9 +64,9 @@ is (for Utopia as an example): ``` (venv) $ sqlite3 utopia.sqlite < utopia.sql ``` -- Converting legacy db's to Version 3 can be done with the included database migration tool. Users who use this +- Converting legacy db's to Version 3.0 can be done with the included database migration tool. Users who use this tool are advised to carefully review the console outputs during conversion to ensure accuracy and check the -converted database carefully. The migration tool will build an empty new Version 3 database and move data from +converted database carefully. The migration tool will build an empty new Version 3.0 database and move data from the old database, preserving the legacy database in place. The command can be run from the top level of the project and needs pointers to the target database and the Version 3 schema file. A typical execution from top level should look like: @@ -78,6 +78,10 @@ should look like: directory as described above using the `sqlite3` command. The "minimal" version excludes some of the group parameters and is recommended as a starting point for entry-level models. It can be upgraded to the full set of tables by executing the full schema SQL command on the resulting database later, which will add the missing tables. +- Users wishing to use the optional "Unit Checking" functionality described in the documentation need to build from or +transition to a version 3.1 database. A second utility is provided to assist with this process and is run +similarly to the migration utility described above. The version 3.1 utility only accepts a source database +that is already version 3.0 format. ## Config Files @@ -91,6 +95,7 @@ and has all parameters in it. It can be copied/renamed, etc. | Temoa Mode | The execution mode. See note below on currently supported modes | | Input/Output DB | The source (and optionally diffent) output database. Note for myopic, MGA input must be same as output | | Price Checking | Run the "price checker" on the built model to look for costing deficiencies and log them | +| Unit Checking | Run the "unit checker" on the source (and destination) databases and document anomalies with units | | Source Tracing | Check the integrity of the commodity flow network in every region-period combination. Required for Myopic | | Plot Commodity Network | Produce HTML (viewable in any browser) displays of the networks built (see note at bottom) | | Solver | The exact name of the solver executable to call | diff --git a/data_files/my_configs/config_sample.toml b/data_files/my_configs/config_sample.toml index 1763880a..a077bf29 100644 --- a/data_files/my_configs/config_sample.toml +++ b/data_files/my_configs/config_sample.toml @@ -38,6 +38,10 @@ output_database = "data_files/example_dbs/utopia.sqlite" # Strongly recommended price_check = true +# check the units in the source/destination databases for units consistency. Errors +# are reported in log file and units_check.txt in output folder +unit_check = true + # Check the network connectivity for processes in the model. Strongly # recommended to ensure proper performance. Results are reported in log file # This requires that source commodities be marked with 's' in Commodity table diff --git a/data_files/my_configs/morris_utopia.toml b/data_files/my_configs/morris_utopia.toml index d9d64fa6..1d9ab548 100644 --- a/data_files/my_configs/morris_utopia.toml +++ b/data_files/my_configs/morris_utopia.toml @@ -35,6 +35,10 @@ output_database = "data_files/example_dbs/morris_utopia.sqlite" # Strongly recommended price_check = true +# check the units in the source/destination databases for units consistency. Errors +# are reported in log file and units_check.txt in output folder +unit_check = true + # Check the network connectivity for processes in the model. Strongly # recommended to ensure proper performance. Results are reported in log file # This requires that source commodities be marked with 's' in Commodity table diff --git a/docs/source/Documentation.rst b/docs/source/Documentation.rst index 1facd9d3..417b7e3e 100644 --- a/docs/source/Documentation.rst +++ b/docs/source/Documentation.rst @@ -411,7 +411,7 @@ a particular region could easily be overlooked. Price checks performed/reported Units Checking -------------- An upgrade to the database schema to Version 3.1 allows fairly complete units checking throughout the model. -Unit checking helps for consistency and accuracty in the model and also supports more complete documentation of +Unit checking helps for consistency and accuracy in the model and also supports more complete documentation of inputs and outputs. The Version 3.0 of the Temoa model will work with database versions of both 3.0 and 3.1. The latter addition to the schema adds `units` to 16 tables, several of which are output tables. @@ -419,7 +419,7 @@ The Python package :code:`pint` is used to perform reference checking for units. Pint's built in unit registry to enable validating and equating units with varying prefixes and allows for possible future extensions in processing. It is important to note that the units expressed and checked via :code:`pint` do not "follow the values" through the mathematics of the model. The unit checking is merely a layer of pre-processing -used to to support validation and documentation of units. The units are not used in the model itself. +used to to support validation and documentation of units. **The units are not used in the model itself.** The basis for most unit comparisons and validations in the model come from the `Commodity` table and the `Efficiency` table. Commodities have native units of measure defined in their table. As the nodes in the energy network, this @@ -433,18 +433,21 @@ by a ratio of units as output / input. .. math:: OutputUnits / ( InputUnits ) - A Regular Expression is used to parse these units and expects the denominator to be parenthesized. Other tables - should just have a plain entry such as `PJ` or `peta joules`. Unique entries into the registry from Temoa + A Regular Expression is used to parse these units and expects the **denominator to be parenthesized.** Other tables + should just have a plain entry such as `PJ` or `petajoules`, or fractional units as necessary, including + parentheticals. Unique entries into the registry from Temoa include: `dollar` (or `USD`), `euro` (or `EUR`), `passenger`, `seat` (to support passenger miles and seat miles), - and `ethos` to support dimensionless starting point commondly used in Temoa as a source. + and `ethos` to support dimensionless starting point commonly used in Temoa as a source. * Mixed I/O - Technologies summarized in the `Efficiency` table must match the commodity (nodal) values they connect + Technologies summarized in the ``Efficiency`` table must match the commodity (nodal) values they connect as input/output. While it is ok (but perhaps unusual) to have differing input units, the output units must be standardized, even if the output commodities differ. This is inferred from the many constraints on tech - activity which span regions and output commodities. For example a `MaxActivityGroup` constraint across the - `global` region set needs to be expressed in 1 set of units. An example might be a mixed power plant that - takes in barrels of oil or cubic meters of natural gas but outputs peta joules of electricity. + activity which span regions and output commodities. For example a ``MaxActivityGroup`` constraint across the + ``global`` region set needs to be expressed in 1 set of units for the referenced :code:`tech` This implies that all + :code:`tech` entries for that named :code:`tech` produce output (of arbitrary type) in the same units. An example might + be a mixed power plant that takes in barrels of oil or cubic meters of natural gas but outputs peta joules + of electricity. * Testing values It is possible to test the validity of units expressed separately from the model or when troubleshooting a @@ -472,9 +475,63 @@ by a ratio of units as output / input. * Test sequencing Tables with units are sequentially checked for illegal characters in the :code:`units` field, proper formatting, - validation of the units themselves. Data retrieved from the `Commodity` and `Efficiency` tables is then used + validation of the units themselves. Data retrieved from the ``Commodity`` and ``Efficiency`` tables is then used to QA entries in other fields for consistency. If selected in the configuration file, this process takes place - before the model is run and results in log entries and an optional secondary text report. + before the model is run and results in log entries and a secondary text report. The sequencing of the test is: + + 1. Check the Database to ensure it is marked as version 3.1 in the ``MetaData`` table. + 2. Check all input tables units values for illegal characters, formatting, and membership in the registry + 3. Check the units in the ``Efficiency`` table for alignment with the I/O commodities and associate the units + with technologies for use in steps 4 & 5. + 4. Check related tables that refer to the technologies for alignment with the :code:`tech`. + 5. Check the cost tables for consistency in cost units and alignment to the :code:`tech`. + +The Tables inspected are listed in the table below + +.. csv-table:: + :header: "Table", "Units Since Version", "C2: Stan", "C3: Tech I/O", "C4: Related Claims", "C5: Cost" + :widths: 20, 8, 10, 10, 10, 10 + + "CapacityToActivity", "V3.1", "X", "", "(used)", "(used)" + "Commodity", "V3.1", "X", "tech alignment", "", "" + "CostEmission", "V3.0", "X", "", "", "X" + "CostFixed", "V3.0", "X", "", "", "X" + "CostInvest", "V3.0", "X", "", "", "X" + "CostVariable", "V3.0", "X", "", "", "X" + "Demand", "V3.0", "X", "", "X", "" + "Efficiency", "V3.1", "X", "X (ratio)", "", "" + "EmissionActivity", "V3.0", "X", "", "", "" + "EmissionLimit", "V3.0", "X", "", "", "" + "ExistingCapacity", "V3.0", "X", "", "X (C2A)", "" + "GrowthRateSeed", "V3.0", "X", "", "NYI", "" + "LifetimeProcess", "V3.1", "X", "", "", "" + "LifetimeTech", "V3.1", "X", "", "", "" + "LoanLifetimeTech", "V3.1", "X", "", "", "" + "MaxActivity", "V3.0", "X", "", "X", "" + "MaxActivityGroup", "V3.0", "X", "", "NYI", "" + "MaxCapacity", "V3.0", "X", "", "X (C2A)", "" + "MaxCapacityGroup", "V3.0", "X", "", "NYI", "" + "MaxNewCapacity", "V3.0", "X", "", "X (C2A)", "" + "MaxNewCapacityGroup", "V3.0", "X", "", "NYI", "" + "MaxResource", "V3.0", "X", "", "", "" + "MinActivity", "V3.0", "X", "", "X", "" + "MinActivityGroup", "V3.0", "X", "", "NYI", "" + "MinCapacity", "V3.0", "X", "", "X (C2A)", "" + "MinCapacityGroup", "V3.0", "X", "", "NYI", "" + "MinNewCapacity", "V3.0", "X", "", "X (C2A)", "" + "MinNewCapacityGroup", "V3.0", "X", "", "NYI", "" + "OutputBuiltCapacity", "V3.1", "", "", "", "" + "OutputCost", "V3.1", "", "", "", "" + "OutputCurtailment", "V3.1", "", "", "", "" + "OutputEmission", "V3.1", "", "", "", "" + "OutputFlowIn", "V3.1", "", "", "", "" + "OutputFlowOut", "V3.1", "", "", "", "" + "OutputNetCapacity", "V3.1", "", "", "", "" + "OutputObjective", "V3.1", "", "", "", "" + "OutputRetiredCapacity", "V3.1", "", "", "", "" + "StorageDuration", "V3.1", "X", "", "", "" + +*notes: NYI = Not Yet Implemented, C2A = CapacityToActivity factor used in comparison* Source Tracing -------------- diff --git a/temoa/temoa_model/temoa_sequencer.py b/temoa/temoa_model/temoa_sequencer.py index 79505ad1..fd328590 100644 --- a/temoa/temoa_model/temoa_sequencer.py +++ b/temoa/temoa_model/temoa_sequencer.py @@ -207,7 +207,8 @@ def start(self) -> TemoaModel | None: ) if not clear_screen: print( - '\nWarning: Units are not clear. Check log file for details and see the units report in the output path.' + '\nWarning: Units check discovered issues. ' + 'Check log file for details and see the units report in the output path.' ) con.close() @@ -234,7 +235,8 @@ def start(self) -> TemoaModel | None: ) if not clear_screen and not self.config.silent: print( - '\nWarning: Units are not clear. Check log file for details and see the units report in the output path.' + '\nWarning: Units check discovered issues. ' + 'Check log file for details and see the units report in the output path.' ) suffixes = ( From 0ad6378d4fe07060028d2d693b6a010b9bc96a40 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Mon, 20 Oct 2025 17:44:04 -0700 Subject: [PATCH 69/70] fix typo 'unit' -> 'units' in config files --- README.md | 2 +- data_files/my_configs/config_sample.toml | 2 +- data_files/my_configs/morris_utopia.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index bfafa9d1..dcd8c90f 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,7 @@ and has all parameters in it. It can be copied/renamed, etc. | Temoa Mode | The execution mode. See note below on currently supported modes | | Input/Output DB | The source (and optionally diffent) output database. Note for myopic, MGA input must be same as output | | Price Checking | Run the "price checker" on the built model to look for costing deficiencies and log them | -| Unit Checking | Run the "unit checker" on the source (and destination) databases and document anomalies with units | +| Units Checking | Run the "units checker" on the source (and destination) databases and document anomalies with units | | Source Tracing | Check the integrity of the commodity flow network in every region-period combination. Required for Myopic | | Plot Commodity Network | Produce HTML (viewable in any browser) displays of the networks built (see note at bottom) | | Solver | The exact name of the solver executable to call | diff --git a/data_files/my_configs/config_sample.toml b/data_files/my_configs/config_sample.toml index a077bf29..4b1e3045 100644 --- a/data_files/my_configs/config_sample.toml +++ b/data_files/my_configs/config_sample.toml @@ -40,7 +40,7 @@ price_check = true # check the units in the source/destination databases for units consistency. Errors # are reported in log file and units_check.txt in output folder -unit_check = true +units_check = true # Check the network connectivity for processes in the model. Strongly # recommended to ensure proper performance. Results are reported in log file diff --git a/data_files/my_configs/morris_utopia.toml b/data_files/my_configs/morris_utopia.toml index 1d9ab548..fdff764a 100644 --- a/data_files/my_configs/morris_utopia.toml +++ b/data_files/my_configs/morris_utopia.toml @@ -37,7 +37,7 @@ price_check = true # check the units in the source/destination databases for units consistency. Errors # are reported in log file and units_check.txt in output folder -unit_check = true +units_check = true # Check the network connectivity for processes in the model. Strongly # recommended to ensure proper performance. Results are reported in log file From 558dbef79e71fa7dc7d3290f6738f41b5141cfb7 Mon Sep 17 00:00:00 2001 From: Jeff H Date: Thu, 23 Oct 2025 09:36:50 -0700 Subject: [PATCH 70/70] Flip the per year expectation for FixedCost and a few other minor adjustments --- temoa/temoa_model/unit_checking/common.py | 2 +- .../temoa_model/unit_checking/relation_checker.py | 4 ++-- temoa/temoa_model/unit_checking/screener.py | 14 ++++++++++---- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/temoa/temoa_model/unit_checking/common.py b/temoa/temoa_model/unit_checking/common.py index db7db5e5..a5661fce 100644 --- a/temoa/temoa_model/unit_checking/common.py +++ b/temoa/temoa_model/unit_checking/common.py @@ -134,7 +134,7 @@ cost_based_tables = [ CostTableData('CostInvest', None, True, False), CostTableData('CostEmission', 'emis_comm', False, False), - CostTableData('CostFixed', None, True, True), + CostTableData('CostFixed', None, True, False), CostTableData('CostVariable', None, False, False), ] """Tables that have cost units and their properties""" diff --git a/temoa/temoa_model/unit_checking/relation_checker.py b/temoa/temoa_model/unit_checking/relation_checker.py index e60df8ef..cf0e8f7c 100644 --- a/temoa/temoa_model/unit_checking/relation_checker.py +++ b/temoa/temoa_model/unit_checking/relation_checker.py @@ -361,7 +361,7 @@ def check_cost_tables( measure_units *= c2a_units * ureg.year if ct.period_based: - measure_units /= ureg.year # remove the "per year" from this denominator + measure_units /= ureg.year # remove the "per year" from this element matched = measure_units == commodity_units @@ -370,7 +370,7 @@ def check_cost_tables( label = ( f'{ct.table_name}: Non-matching measure unit found in cost denominator for tech/commodity {tech_reference}: {raw_units_expression}' f'\n Expecting commodity units: {commodity_units}. Discovered (after conversions applied): {measure_units}' - f'\n Conversions: c2a units: {c2a_units*ureg.year if c2a_units else "N/A"}{", `per period` removed" if ct.period_based else ""}\n ' + f'\n Conversions: c2a units: {c2a_units if c2a_units else "N/A"}{", `per period` removed" if ct.period_based else ""}\n ' ) table_grouped_errors[label].append(idx) diff --git a/temoa/temoa_model/unit_checking/screener.py b/temoa/temoa_model/unit_checking/screener.py index b7f6d7fa..74a68cea 100644 --- a/temoa/temoa_model/unit_checking/screener.py +++ b/temoa/temoa_model/unit_checking/screener.py @@ -260,13 +260,19 @@ def _write_report(report_path: Path, report_entries: list[str]): """Write the report to file""" if not report_path: return + if not report_path.exists(): + report_path.mkdir(parents=True) if report_path.is_dir(): - # augment with a default filename - report_path /= 'units_check.txt' + # augment with a default filename including timestamp + from datetime import datetime + + timestamp = datetime.now().strftime('%Y-%m-%d_%H%M%S') + report_path /= f'units_check_{timestamp}.txt' with open(report_path, 'w', encoding='utf-8') as report_file: report_file.writelines(report_entries) +"""Preserve a way to run this quickly/independently as long as that is useful...""" if __name__ == '__main__': - db_path = Path(PROJECT_ROOT) / 'data_files/mike_US/US_9R_8D_v3_stability_v3_1.sqlite' - screen(db_path, report_path=Path(PROJECT_ROOT) / 'temp/') + db_path = Path(PROJECT_ROOT) / '' + screen(db_path, report_path=Path(PROJECT_ROOT) / 'temp')