Skip to content

Commit 97ba99e

Browse files
authored
fix error-bar bug for normalized consistency plots (#222)
* fix: matplotlib errorbar now doesnt throw exception in normalized consistency tests tests: added unit test for comparative plots * fix: removed numpy.warnings in favor of warnings module * increased the number of comparison plot testss, and set to 2 the minimum of Mock models. * forced errorbar limit arguments of consistency plot to be positive, which caused problems with precision rounding * small detail: defaulted t-test plot axis labels to x: None, y: Information Gain * returned Manifest * fixed some style issues
1 parent cb54bd0 commit 97ba99e

File tree

3 files changed

+907
-599
lines changed

3 files changed

+907
-599
lines changed

csep/core/poisson_evaluations.py

Lines changed: 105 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,15 @@
44
import numpy
55
import scipy.stats
66
import scipy.spatial
7+
import warnings
78

89
from csep.models import EvaluationResult
910
from csep.utils.stats import poisson_joint_log_likelihood_ndarray
1011
from csep.core.exceptions import CSEPCatalogException
11-
from csep.core.regions import QuadtreeGrid2D
1212

1313

14-
def paired_t_test(forecast, benchmark_forecast, observed_catalog, alpha=0.05, scale=False):
14+
def paired_t_test(forecast, benchmark_forecast, observed_catalog,
15+
alpha=0.05, scale=False):
1516
""" Computes the t-test for gridded earthquake forecasts.
1617
1718
This score is positively oriented, meaning that positive values of the information gain indicate that the
@@ -30,11 +31,15 @@ def paired_t_test(forecast, benchmark_forecast, observed_catalog, alpha=0.05, sc
3031

3132
# needs some pre-processing to put the forecasts in the context that is required for the t-test. this is different
3233
# for cumulative forecasts (eg, multiple time-horizons) and static file-based forecasts.
33-
target_event_rate_forecast1, n_fore1 = forecast.target_event_rates(observed_catalog, scale=scale)
34-
target_event_rate_forecast2, n_fore2 = benchmark_forecast.target_event_rates(observed_catalog, scale=scale)
34+
target_event_rate_forecast1, n_fore1 = forecast.target_event_rates(
35+
observed_catalog, scale=scale)
36+
target_event_rate_forecast2, n_fore2 = benchmark_forecast.target_event_rates(
37+
observed_catalog, scale=scale)
3538

3639
# call the primative version operating on ndarray
37-
out = _t_test_ndarray(target_event_rate_forecast1, target_event_rate_forecast2, observed_catalog.event_count,
40+
out = _t_test_ndarray(target_event_rate_forecast1,
41+
target_event_rate_forecast2,
42+
observed_catalog.event_count,
3843
n_fore1, n_fore2, alpha=alpha)
3944

4045
# storing this for later
@@ -49,7 +54,9 @@ def paired_t_test(forecast, benchmark_forecast, observed_catalog, alpha=0.05, sc
4954
result.min_mw = numpy.min(forecast.magnitudes)
5055
return result
5156

52-
def w_test(gridded_forecast1, gridded_forecast2, observed_catalog, scale=False):
57+
58+
def w_test(gridded_forecast1, gridded_forecast2, observed_catalog,
59+
scale=False):
5360
""" Calculate the Single Sample Wilcoxon signed-rank test between two gridded forecasts.
5461
5562
This test allows to test the null hypothesis that the median of Sample (X1(i)-X2(i)) is equal to a (N1-N2) / N_obs.
@@ -79,14 +86,18 @@ def w_test(gridded_forecast1, gridded_forecast2, observed_catalog, scale=False):
7986

8087
# needs some pre-processing to put the forecasts in the context that is required for the t-test. this is different
8188
# for cumulative forecasts (eg, multiple time-horizons) and static file-based forecasts.
82-
target_event_rate_forecast1, _ = gridded_forecast1.target_event_rates(observed_catalog, scale=scale)
83-
target_event_rate_forecast2, _ = gridded_forecast2.target_event_rates(observed_catalog, scale=scale)
89+
target_event_rate_forecast1, _ = gridded_forecast1.target_event_rates(
90+
observed_catalog, scale=scale)
91+
target_event_rate_forecast2, _ = gridded_forecast2.target_event_rates(
92+
observed_catalog, scale=scale)
8493

8594
N = observed_catalog.event_count # Sum of all the observed earthquakes
8695
N1 = gridded_forecast1.event_count # Total number of Forecasted earthquakes by Model 1
8796
N2 = gridded_forecast2.event_count # Total number of Forecasted earthquakes by Model 2
88-
X1 = numpy.log(target_event_rate_forecast1) # Log of every element of Forecast 1
89-
X2 = numpy.log(target_event_rate_forecast2) # Log of every element of Forecast 2
97+
X1 = numpy.log(
98+
target_event_rate_forecast1) # Log of every element of Forecast 1
99+
X2 = numpy.log(
100+
target_event_rate_forecast2) # Log of every element of Forecast 2
90101

91102
# this ratio is the same as long as we scale all the forecasts and catalog rates by the same value
92103
median_value = (N1 - N2) / N
@@ -110,6 +121,7 @@ def w_test(gridded_forecast1, gridded_forecast2, observed_catalog, scale=False):
110121
result.min_mw = numpy.min(gridded_forecast1.magnitudes)
111122
return result
112123

124+
113125
def number_test(gridded_forecast, observed_catalog):
114126
"""Computes "N-Test" on a gridded forecast.
115127
author: @asim
@@ -155,7 +167,9 @@ def number_test(gridded_forecast, observed_catalog):
155167

156168
return result
157169

158-
def conditional_likelihood_test(gridded_forecast, observed_catalog, num_simulations=1000, seed=None,
170+
171+
def conditional_likelihood_test(gridded_forecast, observed_catalog,
172+
num_simulations=1000, seed=None,
159173
random_numbers=None, verbose=False):
160174
"""Performs the conditional likelihood test on Gridded Forecast using an Observed Catalog.
161175
@@ -186,7 +200,8 @@ def conditional_likelihood_test(gridded_forecast, observed_catalog, num_simulati
186200
gridded_catalog_data = observed_catalog.spatial_magnitude_counts()
187201

188202
# simply call likelihood test on catalog data and forecast
189-
qs, obs_ll, simulated_ll = _poisson_likelihood_test(gridded_forecast.data, gridded_catalog_data,
203+
qs, obs_ll, simulated_ll = _poisson_likelihood_test(gridded_forecast.data,
204+
gridded_catalog_data,
190205
num_simulations=num_simulations,
191206
seed=seed,
192207
random_numbers=random_numbers,
@@ -207,6 +222,7 @@ def conditional_likelihood_test(gridded_forecast, observed_catalog, num_simulati
207222

208223
return result
209224

225+
210226
def poisson_spatial_likelihood(forecast, catalog):
211227
"""
212228
This function computes the observed log-likehood score obtained by a gridded forecast in each cell, given a
@@ -227,15 +243,17 @@ def poisson_spatial_likelihood(forecast, catalog):
227243
"""
228244

229245
scale = catalog.event_count / forecast.event_count
230-
246+
231247
first_term = -forecast.spatial_counts() * scale
232-
second_term = catalog.spatial_counts() * numpy.log(forecast.spatial_counts() * scale)
248+
second_term = catalog.spatial_counts() * numpy.log(
249+
forecast.spatial_counts() * scale)
233250
third_term = -scipy.special.loggamma(catalog.spatial_counts() + 1)
234-
251+
235252
poll = first_term + second_term + third_term
236-
253+
237254
return poll
238255

256+
239257
def binary_spatial_likelihood(forecast, catalog):
240258
"""
241259
This function computes log-likelihood scores (bills), using a binary likelihood distribution of earthquakes.
@@ -253,24 +271,27 @@ def binary_spatial_likelihood(forecast, catalog):
253271
Returns:
254272
bill: Binary-based log-likelihood scores obtained by the forecast in each spatial cell.
255273
"""
256-
274+
257275
scale = catalog.event_count / forecast.event_count
258276
target_idx = numpy.nonzero(catalog.spatial_counts())
259277
X = numpy.zeros(forecast.spatial_counts().shape)
260278
X[target_idx[0]] = 1
261-
279+
262280
# First, we estimate the log-likelihood in cells where no events are observed:
263-
first_term = (1-X) * (-forecast.spatial_counts() * scale)
264-
281+
first_term = (1 - X) * (-forecast.spatial_counts() * scale)
282+
265283
# Then, we compute the log-likelihood of observing one or more events given a Poisson distribution, i.e., 1 - Pr(0):
266-
second_term = X * (numpy.log(1.0 - numpy.exp(-forecast.spatial_counts() * scale)))
267-
284+
second_term = X * (
285+
numpy.log(1.0 - numpy.exp(-forecast.spatial_counts() * scale)))
286+
268287
# Finally, we sum both terms to compute log-likelihood score in each spatial cell:
269288
bill = first_term + second_term
270-
289+
271290
return bill
272291

273-
def magnitude_test(gridded_forecast, observed_catalog, num_simulations=1000, seed=None, random_numbers=None,
292+
293+
def magnitude_test(gridded_forecast, observed_catalog, num_simulations=1000,
294+
seed=None, random_numbers=None,
274295
verbose=False):
275296
"""
276297
Performs the Magnitude Test on a Gridded Forecast using an observed catalog.
@@ -291,16 +312,18 @@ def magnitude_test(gridded_forecast, observed_catalog, num_simulations=1000, see
291312
"""
292313

293314
# grid catalog onto spatial grid
294-
gridded_catalog_data = observed_catalog.magnitude_counts(mag_bins=gridded_forecast.magnitudes)
315+
gridded_catalog_data = observed_catalog.magnitude_counts(
316+
mag_bins=gridded_forecast.magnitudes)
295317

296318
# simply call likelihood test on catalog data and forecast
297-
qs, obs_ll, simulated_ll = _poisson_likelihood_test(gridded_forecast.magnitude_counts(), gridded_catalog_data,
298-
num_simulations=num_simulations,
299-
seed=seed,
300-
random_numbers=random_numbers,
301-
use_observed_counts=True,
302-
verbose=verbose,
303-
normalize_likelihood=True)
319+
qs, obs_ll, simulated_ll = _poisson_likelihood_test(
320+
gridded_forecast.magnitude_counts(), gridded_catalog_data,
321+
num_simulations=num_simulations,
322+
seed=seed,
323+
random_numbers=random_numbers,
324+
use_observed_counts=True,
325+
verbose=verbose,
326+
normalize_likelihood=True)
304327

305328
# populate result data structure
306329
result = EvaluationResult()
@@ -315,7 +338,9 @@ def magnitude_test(gridded_forecast, observed_catalog, num_simulations=1000, see
315338

316339
return result
317340

318-
def spatial_test(gridded_forecast, observed_catalog, num_simulations=1000, seed=None, random_numbers=None,
341+
342+
def spatial_test(gridded_forecast, observed_catalog, num_simulations=1000,
343+
seed=None, random_numbers=None,
319344
verbose=False):
320345
"""
321346
Performs the Spatial Test on the Forecast using the Observed Catalogs.
@@ -338,13 +363,14 @@ def spatial_test(gridded_forecast, observed_catalog, num_simulations=1000, seed=
338363
gridded_catalog_data = observed_catalog.spatial_counts()
339364

340365
# simply call likelihood test on catalog data and forecast
341-
qs, obs_ll, simulated_ll = _poisson_likelihood_test(gridded_forecast.spatial_counts(), gridded_catalog_data,
342-
num_simulations=num_simulations,
343-
seed=seed,
344-
random_numbers=random_numbers,
345-
use_observed_counts=True,
346-
verbose=verbose,
347-
normalize_likelihood=True)
366+
qs, obs_ll, simulated_ll = _poisson_likelihood_test(
367+
gridded_forecast.spatial_counts(), gridded_catalog_data,
368+
num_simulations=num_simulations,
369+
seed=seed,
370+
random_numbers=random_numbers,
371+
use_observed_counts=True,
372+
verbose=verbose,
373+
normalize_likelihood=True)
348374

349375
# populate result data structure
350376
result = EvaluationResult()
@@ -361,7 +387,9 @@ def spatial_test(gridded_forecast, observed_catalog, num_simulations=1000, seed=
361387
result.min_mw = -1
362388
return result
363389

364-
def likelihood_test(gridded_forecast, observed_catalog, num_simulations=1000, seed=None, random_numbers=None,
390+
391+
def likelihood_test(gridded_forecast, observed_catalog, num_simulations=1000,
392+
seed=None, random_numbers=None,
365393
verbose=False):
366394
"""
367395
Performs the likelihood test on Gridded Forecast using an Observed Catalog.
@@ -392,7 +420,8 @@ def likelihood_test(gridded_forecast, observed_catalog, num_simulations=1000, se
392420
gridded_catalog_data = observed_catalog.spatial_magnitude_counts()
393421

394422
# simply call likelihood test on catalog and forecast
395-
qs, obs_ll, simulated_ll = _poisson_likelihood_test(gridded_forecast.data, gridded_catalog_data,
423+
qs, obs_ll, simulated_ll = _poisson_likelihood_test(gridded_forecast.data,
424+
gridded_catalog_data,
396425
num_simulations=num_simulations,
397426
seed=seed,
398427
random_numbers=random_numbers,
@@ -413,6 +442,7 @@ def likelihood_test(gridded_forecast, observed_catalog, num_simulations=1000, se
413442

414443
return result
415444

445+
416446
def _number_test_ndarray(fore_cnt, obs_cnt, epsilon=1e-6):
417447
""" Computes delta1 and delta2 values from the csep1 number test.
418448
@@ -428,7 +458,9 @@ def _number_test_ndarray(fore_cnt, obs_cnt, epsilon=1e-6):
428458
delta2 = scipy.stats.poisson.cdf(obs_cnt + epsilon, fore_cnt)
429459
return delta1, delta2
430460

431-
def _t_test_ndarray(target_event_rates1, target_event_rates2, n_obs, n_f1, n_f2, alpha=0.05):
461+
462+
def _t_test_ndarray(target_event_rates1, target_event_rates2, n_obs, n_f1,
463+
n_f2, alpha=0.05):
432464
""" Computes T test statistic by comparing two target event rate distributions.
433465
434466
We compare Forecast from Model 1 and with Forecast of Model 2. Information Gain is computed, which is then employed
@@ -466,7 +498,8 @@ def _t_test_ndarray(target_event_rates1, target_event_rates2, n_obs, n_f1, n_f2,
466498

467499
# Obtaining the Critical Value of T from T distribution.
468500
df = N - 1
469-
t_critical = scipy.stats.t.ppf(1 - (alpha / 2), df) # Assuming 2-Tail Distribution for 2 tail, divide 0.05/2.
501+
t_critical = scipy.stats.t.ppf(1 - (alpha / 2),
502+
df) # Assuming 2-Tail Distribution for 2 tail, divide 0.05/2.
470503

471504
# Computing Information Gain Interval.
472505
ig_lower = information_gain - (t_critical * forecast_std / numpy.sqrt(N))
@@ -480,6 +513,7 @@ def _t_test_ndarray(target_event_rates1, target_event_rates2, n_obs, n_f1, n_f2,
480513
'ig_lower': ig_lower,
481514
'ig_upper': ig_upper}
482515

516+
483517
def _w_test_ndarray(x, m=0):
484518
""" Calculate the Single Sample Wilcoxon signed-rank test for an ndarray.
485519
@@ -507,7 +541,7 @@ def _w_test_ndarray(x, m=0):
507541

508542
count = len(d)
509543
if count < 10:
510-
numpy.warnings.warn("Sample size too small for normal approximation.")
544+
warnings.warn("Sample size too small for normal approximation.")
511545

512546
# compute ranks
513547
r = scipy.stats.rankdata(abs(d))
@@ -542,7 +576,9 @@ def _w_test_ndarray(x, m=0):
542576

543577
return w_test_eval
544578

545-
def _simulate_catalog(num_events, sampling_weights, sim_fore, random_numbers=None):
579+
580+
def _simulate_catalog(num_events, sampling_weights, sim_fore,
581+
random_numbers=None):
546582
# generate uniformly distributed random numbers in [0,1), this
547583
if random_numbers is None:
548584
random_numbers = numpy.random.rand(num_events)
@@ -562,8 +598,11 @@ def _simulate_catalog(num_events, sampling_weights, sim_fore, random_numbers=Non
562598

563599
return sim_fore
564600

565-
def _poisson_likelihood_test(forecast_data, observed_data, num_simulations=1000, random_numbers=None,
566-
seed=None, use_observed_counts=True, verbose=True, normalize_likelihood=False):
601+
602+
def _poisson_likelihood_test(forecast_data, observed_data,
603+
num_simulations=1000, random_numbers=None,
604+
seed=None, use_observed_counts=True, verbose=True,
605+
normalize_likelihood=False):
567606
"""
568607
Computes the likelihood-test from CSEP using an efficient simulation based approach.
569608
Args:
@@ -582,7 +621,8 @@ def _poisson_likelihood_test(forecast_data, observed_data, num_simulations=1000,
582621
numpy.random.seed(seed)
583622

584623
# used to determine where simulated earthquake should be placed, by definition of cumsum these are sorted
585-
sampling_weights = numpy.cumsum(forecast_data.ravel()) / numpy.sum(forecast_data)
624+
sampling_weights = numpy.cumsum(forecast_data.ravel()) / numpy.sum(
625+
forecast_data)
586626

587627
# data structures to store results
588628
sim_fore = numpy.zeros(sampling_weights.shape)
@@ -605,29 +645,35 @@ def _poisson_likelihood_test(forecast_data, observed_data, num_simulations=1000,
605645

606646
# note for performance: these operations perform copies
607647
observed_data_nonzero = observed_data.ravel()[target_idx]
608-
target_event_forecast = log_bin_expectations[target_idx] * observed_data_nonzero
648+
target_event_forecast = log_bin_expectations[
649+
target_idx] * observed_data_nonzero
609650

610651
# main simulation step in this loop
611652
for idx in range(num_simulations):
612653
if use_observed_counts:
613654
num_events_to_simulate = int(n_obs)
614655
else:
615-
num_events_to_simulate = int(numpy.random.poisson(expected_forecast_count))
656+
num_events_to_simulate = int(
657+
numpy.random.poisson(expected_forecast_count))
616658

617659
if random_numbers is None:
618-
sim_fore = _simulate_catalog(num_events_to_simulate, sampling_weights, sim_fore)
660+
sim_fore = _simulate_catalog(num_events_to_simulate,
661+
sampling_weights, sim_fore)
619662
else:
620-
sim_fore = _simulate_catalog(num_events_to_simulate, sampling_weights, sim_fore,
663+
sim_fore = _simulate_catalog(num_events_to_simulate,
664+
sampling_weights, sim_fore,
621665
random_numbers=random_numbers[idx, :])
622666

623667
# compute joint log-likelihood from simulation by leveraging that only cells with target events contribute to likelihood
624668
sim_target_idx = numpy.nonzero(sim_fore)
625669
sim_obs_nonzero = sim_fore[sim_target_idx]
626-
sim_target_event_forecast = log_bin_expectations[sim_target_idx] * sim_obs_nonzero
670+
sim_target_event_forecast = log_bin_expectations[
671+
sim_target_idx] * sim_obs_nonzero
627672

628673
# compute joint log-likelihood
629-
current_ll = poisson_joint_log_likelihood_ndarray(sim_target_event_forecast, sim_obs_nonzero,
630-
expected_forecast_count)
674+
current_ll = poisson_joint_log_likelihood_ndarray(
675+
sim_target_event_forecast, sim_obs_nonzero,
676+
expected_forecast_count)
631677

632678
# append to list of simulated log-likelihoods
633679
simulated_ll.append(current_ll)
@@ -638,7 +684,9 @@ def _poisson_likelihood_test(forecast_data, observed_data, num_simulations=1000,
638684
print(f'... {idx + 1} catalogs simulated.')
639685

640686
# observed joint log-likelihood
641-
obs_ll = poisson_joint_log_likelihood_ndarray(target_event_forecast, observed_data_nonzero, expected_forecast_count)
687+
obs_ll = poisson_joint_log_likelihood_ndarray(target_event_forecast,
688+
observed_data_nonzero,
689+
expected_forecast_count)
642690

643691
# quantile score
644692
qs = numpy.sum(simulated_ll <= obs_ll) / num_simulations

0 commit comments

Comments
 (0)