Skip to content

Commit c783e57

Browse files
committed
Merge branch 'main' into api-timedelta-constructor
2 parents 078ea5c + 945385d commit c783e57

File tree

74 files changed

+509
-345
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

74 files changed

+509
-345
lines changed

.pre-commit-config.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ ci:
1919
skip: [pyright, mypy]
2020
repos:
2121
- repo: https://github.com/astral-sh/ruff-pre-commit
22-
rev: v0.14.3
22+
rev: v0.14.7
2323
hooks:
2424
- id: ruff
2525
args: [--exit-non-zero-on-fix]
@@ -71,7 +71,7 @@ repos:
7171
hooks:
7272
- id: isort
7373
- repo: https://github.com/asottile/pyupgrade
74-
rev: v3.21.0
74+
rev: v3.21.2
7575
hooks:
7676
- id: pyupgrade
7777
args: [--py311-plus]
@@ -87,12 +87,12 @@ repos:
8787
types: [text] # overwrite types: [rst]
8888
types_or: [python, rst]
8989
- repo: https://github.com/sphinx-contrib/sphinx-lint
90-
rev: v1.0.1
90+
rev: v1.0.2
9191
hooks:
9292
- id: sphinx-lint
9393
args: ["--enable", "all", "--disable", "line-too-long"]
9494
- repo: https://github.com/pre-commit/mirrors-clang-format
95-
rev: v21.1.2
95+
rev: v21.1.6
9696
hooks:
9797
- id: clang-format
9898
files: ^pandas/_libs/src|^pandas/_libs/include

doc/source/whatsnew/v3.0.0.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,8 @@ In cases with mixed-resolution inputs, the highest resolution is used:
384384
385385
.. warning:: Many users will now get "M8[us]" dtype data in cases when they used to get "M8[ns]". For most use cases they should not notice a difference. One big exception is converting to integers, which will give integers 1000x smaller.
386386

387+
Similarly, the :class:`Timedelta` constructor and :func:`to_timedelta` with a string input now defaults to a microsecond unit, using nanosecond unit only in cases that actually have nanosecond precision.
388+
387389
.. _whatsnew_300.api_breaking.concat_datetime_sorting:
388390

389391
:func:`concat` no longer ignores ``sort`` when all objects have a :class:`DatetimeIndex`
@@ -1277,7 +1279,8 @@ Groupby/resample/rolling
12771279
- Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`)
12781280
- Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)
12791281
- Bug in :meth:`Rolling.sem` computing incorrect results because it divided by ``sqrt((n - 1) * (n - ddof))`` instead of ``sqrt(n * (n - ddof))``. (:issue:`63180`)
1280-
- Bug in :meth:`Rolling.skew` incorrectly computing skewness for windows following outliers due to numerical instability. The calculation now properly handles catastrophic cancellation by recomputing affected windows (:issue:`47461`)
1282+
- Bug in :meth:`Rolling.skew` and in :meth:`Rolling.kurt` incorrectly computing skewness and kurtosis, respectively, for windows following outliers due to numerical instability. The calculation now properly handles catastrophic cancellation by recomputing affected windows (:issue:`47461`, :issue:`61416`)
1283+
- Bug in :meth:`Rolling.skew` and in :meth:`Rolling.kurt` where results varied with input length despite identical data and window contents (:issue:`54380`)
12811284
- Bug in :meth:`Series.resample` could raise when the date range ended shortly before a non-existent time. (:issue:`58380`)
12821285
- Bug in :meth:`Series.resample` raising error when resampling non-nanosecond resolutions out of bounds for nanosecond precision (:issue:`57427`)
12831286
- Bug in :meth:`Series.rolling.var` and :meth:`Series.rolling.std` computing incorrect results due to numerical instability. (:issue:`47721`, :issue:`52407`, :issue:`54518`, :issue:`55343`)

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 41 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import collections
2+
import re
23
import warnings
34

45
from pandas.util._decorators import set_module
@@ -448,11 +449,19 @@ def array_to_timedelta64(
448449
ival = parse_iso_format_string(item)
449450
else:
450451
ival = parse_timedelta_string(item)
452+
if (
453+
(infer_reso or creso == NPY_DATETIMEUNIT.NPY_FR_us)
454+
and not needs_nano_unit(ival, item)
455+
):
456+
item_reso = NPY_DATETIMEUNIT.NPY_FR_us
457+
ival = ival // 1000
458+
else:
459+
item_reso = NPY_FR_ns
451460

452-
item_reso = NPY_FR_ns
453-
state.update_creso(item_reso)
454-
if infer_reso:
455-
creso = state.creso
461+
if ival != NPY_NAT:
462+
state.update_creso(item_reso)
463+
if infer_reso:
464+
creso = state.creso
456465

457466
elif is_tick_object(item):
458467
item_reso = get_supported_reso(item._creso)
@@ -722,6 +731,24 @@ cdef timedelta_from_spec(object number, object frac, object unit):
722731
return cast_from_unit(float(n), unit)
723732

724733

734+
cdef bint needs_nano_unit(int64_t ival, str item):
735+
"""
736+
Check if a passed string `item` needs to be stored with nano unit or can
737+
use microsecond instead. Needs nanoseconds if:
738+
739+
- if the parsed value in nanoseconds has sub-microseconds content -> certainly
740+
needs nano
741+
- if the seconds part in the string contains more than 6 decimals, i.e. has
742+
trailing zeros beyond the microsecond part (e.g. "0.123456000 s") -> treat
743+
as nano for consistency
744+
- if the string explicitly contains an entry for nanoseconds (e.g. "1000 ns")
745+
"""
746+
# TODO: more performant way of doing this check?
747+
if ival % 1000 != 0:
748+
return True
749+
return re.search(r"\.\d{7}", item) or "ns" in item or "nano" in item.lower()
750+
751+
725752
cpdef inline str parse_timedelta_unit(str unit):
726753
"""
727754
Parameters
@@ -2128,10 +2155,17 @@ class Timedelta(_Timedelta):
21282155
if (len(value) > 0 and value[0] == "P") or (
21292156
len(value) > 1 and value[:2] == "-P"
21302157
):
2131-
value = parse_iso_format_string(value)
2158+
ival = parse_iso_format_string(value)
2159+
else:
2160+
ival = parse_timedelta_string(value)
2161+
2162+
if not needs_nano_unit(ival, value):
2163+
# If we don't specifically need nanosecond resolution, default
2164+
# to microsecond like we do for datetimes
2165+
value = np.timedelta64(ival // 1000, "us")
2166+
return cls(value)
21322167
else:
2133-
value = parse_timedelta_string(value)
2134-
value = np.timedelta64(value)
2168+
value = np.timedelta64(ival, "ns")
21352169
elif PyDelta_Check(value):
21362170
# pytimedelta object -> microsecond resolution
21372171
new_value = delta_to_nanoseconds(

0 commit comments

Comments
 (0)