diff --git a/ml_metrics/_src/aggregates/rolling_stats.py b/ml_metrics/_src/aggregates/rolling_stats.py index 72bac484..773aaedb 100644 --- a/ml_metrics/_src/aggregates/rolling_stats.py +++ b/ml_metrics/_src/aggregates/rolling_stats.py @@ -356,7 +356,7 @@ class SymmetricPredictionDifference(base.MergeableMetric): num_samples: int = 0 sum_half_pointwise_rel_diff: float = 0 - # TODO: b/356933410 - Add k_epsilon. + k_epsilon: float = 1e-7 # Minimum value to divide by. def add( self, x: types.NumbersT, y: types.NumbersT @@ -373,9 +373,8 @@ def add( self.num_samples += x.size - # TODO: b/356933410 - Add logic for k_epsilon. self.sum_half_pointwise_rel_diff += np.sum( - math_utils.safe_divide(np.abs(x - y), np.abs(x + y)) + np.abs(math_utils.safe_divide(x - y, x + y, k_epsilon=self.k_epsilon)) ) return self diff --git a/ml_metrics/_src/aggregates/rolling_stats_test.py b/ml_metrics/_src/aggregates/rolling_stats_test.py index b0938228..08e49f1c 100644 --- a/ml_metrics/_src/aggregates/rolling_stats_test.py +++ b/ml_metrics/_src/aggregates/rolling_stats_test.py @@ -607,7 +607,7 @@ def test_r_regression_valid_and_0_input(self): np.testing.assert_almost_equal(actual_result, expected_result) -class SymmetricPredictionDifferenceTest(absltest.TestCase): +class SymmetricPredictionDifferenceTest(parameterized.TestCase): def test_symmetric_prediction_difference_merge(self): x_1 = (0, 1) @@ -725,6 +725,65 @@ def test_symmetric_prediction_difference_absolute_returns_nan(self): ) ) + @parameterized.named_parameters( + dict( + testcase_name='k_epsilon_1e-7', + k_epsilon=1e-7, + # 2e-8 + 1e-8 = 3e-8 < k_epsilon = 1e-7. + # 2 * (1 - 0.3) / (1 + 0.3) / 3 = 0.358974358974 + expected_result=0.358974358974, + ), + dict( + testcase_name='k_epsilon_3e-8', + k_epsilon=3e-8, + # 1e-8 + 2e-8 = 3e-8 = k_epsilon = 1e-8. + # 2 * ((1 - 0.3) / (1 + 0.3) + (2e-8 - 1e-8) / (2e-8 + 1e-8)) / 3 + # 2 * (0.7 / 1.3 + 1 / 3) / 3 = 0.581196581197 + expected_result=0.581196581197, + ), + dict( + testcase_name='k_epsilon_1e-9', + k_epsilon=1e-9, + # 2e-8 + 1e-8 = 3e-8 > k_epsilon = 1e-9. + expected_result=0.581196581197, + ), + ) + def test_symmetric_prediction_difference_k_epsilon_single_small_batch( + self, k_epsilon, expected_result + ): + x = (0, 1, 1e-8) + y = (0, 0.3, 2e-8) + + actual_result = ( + rolling_stats.SymmetricPredictionDifference(k_epsilon=k_epsilon) + .add(x, y) + .result() + ) + + self.assertAlmostEqual(actual_result, expected_result, places=12) + + @parameterized.named_parameters( + dict(testcase_name='within_k_epsilon', data_max=1e-8, expected_result=0), + dict( + testcase_name='partially_outside_k_epsilon', + data_max=1e-6, + expected_result=3.190964660292838, + ), + ) + def test_symmetric_prediction_difference_k_epsilon_many_large_batches( + self, data_max, expected_result + ): + np.random.seed(seed=0) + + x = np.random.uniform(low=-data_max, high=data_max, size=(1000, 1000)) + y = np.random.uniform(low=-data_max, high=data_max, size=(1000, 1000)) + + state = rolling_stats.SymmetricPredictionDifference() + for x_i, y_i in zip(x, y): + state.add(x_i, y_i) + + self.assertAlmostEqual(state.result(), expected_result, places=11) + def test_symmetric_prediction_difference_asserts_with_invalid_input(self): # x.shape != y.shape x = (1, 2, 3) diff --git a/ml_metrics/_src/utils/math_utils.py b/ml_metrics/_src/utils/math_utils.py index 46d1b512..92d355bc 100644 --- a/ml_metrics/_src/utils/math_utils.py +++ b/ml_metrics/_src/utils/math_utils.py @@ -26,10 +26,25 @@ def pos_sqrt(value): return np.sqrt(value) -def safe_divide(a, b): - """Divide arguments element-wise (a / b), but returns zero(s) if b is 0.""" +def safe_divide(x1, x2, k_epsilon=0): + """Divide arguments element-wise (x1 / x2). + + Returns zero(s) if abs(x2) <= k_epsilon. + + Args: + x1: Divident array. + x2: Divisor array. + k_epsilon: The minimum value of abs(x2) to divide by. + + Returns: + The quotient x1 / x2, element-wise. This is a scalar if both x1 and x2 are + scalars. + """ result = np.divide( - a, b, out=np.zeros_like(a, dtype=types.DefaultDType), where=(b != 0) + x1, + x2, + out=np.zeros_like(x1, dtype=types.DefaultDType), + where=np.abs(x2) > k_epsilon, ) return result.item() if result.ndim == 0 else result diff --git a/ml_metrics/_src/utils/math_utils_test.py b/ml_metrics/_src/utils/math_utils_test.py index 2b2c82bb..6b00ed74 100644 --- a/ml_metrics/_src/utils/math_utils_test.py +++ b/ml_metrics/_src/utils/math_utils_test.py @@ -49,6 +49,18 @@ def test_safe_divide(self, a, b, expected_result): else: self.assertAlmostEqual(result, expected_result) + @parameterized.named_parameters( + ('denominator_greater_than_k_epsilon', 1e-6, 1 / 1e-6), + ('denominator_less_than_neg_k_epsilon', -1e-6, -1 / 1e-6), + ('denominator_less_than_k_epsilon', 1e-8, 0), + ('denominator_greater_than_neg_k_epsilon', -1e-8, 0), + ('denominator_equal_to_k_epsilon', 1e-7, 0), + ) + def test_safe_divide_k_epsilon(self, denominator, expected_result): + self.assertEqual( + math_utils.safe_divide(1, denominator, k_epsilon=1e-7), expected_result + ) + # Original Tests safe_to_scalar tests from: # tensorflow_model_analysis/metrics/metric_util_test.py @parameterized.named_parameters(