Skip to content

Commit e3bb2b1

Browse files
rantolincayetanobv
andauthored
Bug/sc 458619/allow negative values on most common approx (#159)
Co-authored-by: cayetanobv <[email protected]>
1 parent 4db2d59 commit e3bb2b1

File tree

2 files changed

+37
-5
lines changed

2 files changed

+37
-5
lines changed

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
<!-- insertion marker -->
99

10+
## [0.10.2] 2025-01-14
11+
12+
<small>[Compare with latest](https://github.com/CartoDB/raster-loader/compare/v0.10.1...HEAD)</small>
13+
14+
### Fixed
15+
16+
- Fix: OverflowError error when casting approx sum to integer ([46cab53](https://github.com/CartoDB/raster-loader/commit/46cab53bbf71a86a7df784922956eb03f9dbb327) by Roberto Antolín).
17+
- Fix: Compute approximate most common negative values ([f9f5ff5](https://github.com/CartoDB/raster-loader/commit/f9f5ff5010b1aea0d13afbea6d1869d4094fa7d7) by Roberto Antolín).
18+
1019
## [0.10.1] 2025-01-13
1120

1221
<small>[Compare with latest](https://github.com/CartoDB/raster-loader/compare/57d55999704fb003da2947db65d5617e27c5c104...HEAD)</small>

raster_loader/io/common.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -447,7 +447,12 @@ def not_enough_samples():
447447
)
448448
if not raster_is_masked:
449449
for band in bands:
450-
not_masked_samples[band].append(sample[band - 1])
450+
band_sample = sample[band - 1]
451+
is_valid_sample = not (
452+
np.isinf(band_sample) or np.isnan(band_sample)
453+
)
454+
if is_valid_sample:
455+
not_masked_samples[band].append(band_sample)
451456

452457
iterations += 1
453458

@@ -474,10 +479,22 @@ def not_enough_samples():
474479

475480
def most_common_approx(samples: List[Union[int, float]]) -> Dict[int, int]:
476481
"""Compute the most common values in a list of int samples."""
477-
counts = np.bincount(samples)
482+
print("Computing most common values...")
483+
484+
samples_array = np.array(samples)
485+
min_val = int(np.floor(samples_array.min()))
486+
max_val = int(np.ceil(samples_array.max()))
487+
488+
# +2 allows to include max_val in the last bin
489+
bins = np.arange(min_val, max_val + 2)
490+
491+
counts, bin_edges = np.histogram(samples_array, bins=bins)
492+
478493
nth = min(DEFAULT_MAX_MOST_COMMON, len(counts))
494+
counts = np.bincount(samples)
479495
idx = np.argpartition(counts, -nth)[-nth:]
480-
return dict([(int(i), int(counts[i])) for i in idx if counts[i] > 0])
496+
497+
return {int(bin_edges[i]): int(counts[i]) for i in idx if counts[i] > 0}
481498

482499

483500
def compute_quantiles(data: List[Union[int, float]], cast_function: Callable) -> dict:
@@ -518,8 +535,14 @@ def raster_band_approx_stats(
518535
_sum = 0
519536
sum_squares = 0
520537
if count > 0:
521-
_sum = int(np.sum(samples_band))
522-
sum_squares = int(np.sum(np.array(samples_band) ** 2))
538+
try:
539+
_sum = int(np.sum(samples_band))
540+
except (OverflowError, ValueError):
541+
_sum = 0
542+
try:
543+
sum_squares = int(np.sum(np.array(samples_band) ** 2))
544+
except (OverflowError, ValueError):
545+
sum_squares = 0
523546

524547
if basic_stats:
525548
quantiles = None

0 commit comments

Comments
 (0)