22
33import re
44from importlib .metadata import version
5- from typing import List
5+ from typing import List , Optional
66
77import numpy as np
88import pandas as pd
@@ -315,10 +315,37 @@ def test_prepare_reporter(test_context):
315315 assert 14299 <= len (rep .graph ) - N
316316
317317
318+ # Filters for comparison
319+ PE0 = r"Primary Energy\|(Coal|Gas|Hydro|Nuclear|Solar|Wind)"
320+ PE1 = r"Primary Energy\|(Coal|Gas|Solar|Wind)"
321+ E = (
322+ r"Emissions\|CO2\|Energy\|Demand\|Transportation\|Road Rail and Domestic "
323+ "Shipping"
324+ )
325+
326+ IGNORE = [
327+ # Other 'variable' codes are missing from `obs`
328+ re .compile (f"variable='(?!{ PE0 } ).*': no right data" ),
329+ # 'variable' codes with further parts are missing from `obs`
330+ re .compile (f"variable='{ PE0 } .*': no right data" ),
331+ # For `pe1` (NB: not Hydro or Solar) units and most values differ
332+ re .compile (f"variable='{ PE1 } .*': units mismatch .*EJ/yr.*'', nan" ),
333+ re .compile (r"variable='Primary Energy|Coal': 220 of 240 values with \|diff" ),
334+ re .compile (r"variable='Primary Energy|Gas': 234 of 240 values with \|diff" ),
335+ re .compile (r"variable='Primary Energy|Solar': 191 of 240 values with \|diff" ),
336+ re .compile (r"variable='Primary Energy|Wind': 179 of 240 values with \|diff" ),
337+ # For `e` units and most values differ
338+ re .compile (f"variable='{ E } ': units mismatch: .*Mt CO2/yr.*Mt / a" ),
339+ re .compile (rf"variable='{ E } ': 20 missing right entries" ),
340+ re .compile (rf"variable='{ E } ': 220 of 240 values with \|diff" ),
341+ ]
342+
343+
318344@to_simulate .minimum_version
319345def test_compare (test_context ):
320346 """Compare the output of genno-based and legacy reporting."""
321- key = "pe test"
347+ key = "all::iamc"
348+ # key = "pe test"
322349
323350 # Obtain the output from reporting `key` on `snapshot_id`
324351 snapshot_id : int = 1
@@ -340,24 +367,8 @@ def test_compare(test_context):
340367 engine = "pyarrow" ,
341368 )
342369
343- # Filters for comparison
344- pe0 = r"Primary Energy\|(Coal|Gas|Hydro|Nuclear|Solar|Wind)"
345- pe1 = r"Primary Energy\|(Coal|Gas|Solar|Wind)"
346- ignore = [
347- # Other 'variable' codes are missing from `obs`
348- re .compile (f"variable='(?!{ pe0 } ).*': no right data" ),
349- # 'variable' codes with further parts are missing from `obs`
350- re .compile (f"variable='{ pe0 } .*': no right data" ),
351- # For `pe1` (NB: not Hydro or Solar) units and most values differ
352- re .compile (f"variable='{ pe1 } .*': units mismatch .*EJ/yr.*'', nan" ),
353- re .compile (r"variable='Primary Energy|Coal': 220 of 240 values with \|diff" ),
354- re .compile (r"variable='Primary Energy|Gas': 234 of 240 values with \|diff" ),
355- re .compile (r"variable='Primary Energy|Solar': 191 of 240 values with \|diff" ),
356- re .compile (r"variable='Primary Energy|Wind': 179 of 240 values with \|diff" ),
357- ]
358-
359370 # Perform the comparison, ignoring some messages
360- if messages := compare_iamc (exp , obs , ignore = ignore ):
371+ if messages := compare_iamc (exp , obs , ignore = IGNORE ):
361372 # Other messages that were not explicitly ignored → some error
362373 print ("\n " .join (messages ))
363374 assert False
@@ -369,8 +380,8 @@ def compare_iamc(
369380 """Compare IAMC-structured data in `left` and `right`; return a list of messages."""
370381 result = []
371382
372- def record (message : str ) -> None :
373- if any (p .match (message ) for p in ignore ):
383+ def record (message : str , condition : Optional [ bool ] = True ) -> None :
384+ if not condition or any (p .match (message ) for p in ignore ):
374385 return
375386 result .append (message )
376387
@@ -388,16 +399,29 @@ def checks(df: pd.DataFrame):
388399 "value_rel = value_diff / value_left"
389400 )
390401
402+ na_left = tmp .isna ()[["unit_left" , "value_left" ]]
403+ if na_left .any (axis = None ):
404+ record (f"{ prefix } { na_left .sum (axis = 0 ).max ()} missing left entries" )
405+ tmp = tmp [~ na_left .any (axis = 1 )]
406+ na_right = tmp .isna ()[["unit_right" , "value_right" ]]
407+ if na_right .any (axis = None ):
408+ record (f"{ prefix } { na_right .sum (axis = 0 ).max ()} missing right entries" )
409+ tmp = tmp [~ na_right .any (axis = 1 )]
410+
391411 units_left = set (tmp .unit_left .unique ())
392412 units_right = set (tmp .unit_right .unique ())
393- if units_left != units_right :
394- record (f"{ prefix } units mismatch: { units_left } != { units_right } " )
413+ record (
414+ condition = units_left != units_right ,
415+ message = f"{ prefix } units mismatch: { units_left } != { units_right } " ,
416+ )
395417
396418 N0 = len (df )
397419
398420 mask1 = tmp .query ("abs(value_diff) > @atol" )
399- if len (mask1 ):
400- record (f"{ prefix } { len (mask1 )} of { N0 } values with |diff| > { atol } " )
421+ record (
422+ condition = len (mask1 ),
423+ message = f"{ prefix } { len (mask1 )} of { N0 } values with |diff| > { atol } " ,
424+ )
401425
402426 for (model , scenario ), group_0 in left .merge (
403427 right ,
0 commit comments