Skip to content

Commit 093cbb8

Browse files
authored
Fixed problematic date parsing (#888)
We had some trouble with datetime strings coming back from server not parsing correctly. I added a bit more flexible parsing.
1 parent 0727ca8 commit 093cbb8

File tree

3 files changed

+34
-2
lines changed

3 files changed

+34
-2
lines changed

paperqa/types.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
encode_id,
3131
format_bibtex,
3232
get_citenames,
33+
maybe_get_date,
3334
)
3435
from paperqa.version import __version__ as pqa_version
3536

@@ -585,8 +586,8 @@ def populate_bibtex_key_citation( # noqa: PLR0912
585586
"pages": data.get("pages"),
586587
"month": (
587588
None
588-
if not data.get("publication_date")
589-
else data["publication_date"].strftime("%b")
589+
if not (maybe_date := maybe_get_date(data.get("publication_date")))
590+
else maybe_date.strftime("%b")
590591
),
591592
"doi": data.get("doi"),
592593
"url": data.get("doi_url"),

paperqa/utils.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,3 +557,25 @@ def citation_to_docname(citation: str) -> str:
557557
if match is not None:
558558
year = match.group(1)
559559
return f"{author}{year}"
560+
561+
562+
def maybe_get_date(date: str | datetime | None) -> datetime | None:
563+
if not date:
564+
return None
565+
if isinstance(date, str):
566+
# Try common date formats in sequence
567+
formats = [
568+
"%Y-%m-%dT%H:%M:%S%z", # ISO with timezone: 2023-01-31T14:30:00+0000
569+
"%Y-%m-%d %H:%M:%S", # ISO with time: 2023-01-31 14:30:00
570+
"%B %d, %Y", # Full month day, year: January 31, 2023
571+
"%b %d, %Y", # Month day, year: Jan 31, 2023
572+
"%Y-%m-%d", # ISO format: 2023-01-31
573+
]
574+
575+
for fmt in formats:
576+
try:
577+
return datetime.strptime(date, fmt)
578+
except ValueError:
579+
continue
580+
return None
581+
return date

tests/test_paperqa.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
from paperqa.utils import (
5151
extract_score,
5252
get_citenames,
53+
maybe_get_date,
5354
maybe_is_html,
5455
maybe_is_text,
5556
name_in_text,
@@ -1644,3 +1645,11 @@ def test_fallback_non_json(self, input_text: str) -> None:
16441645
)
16451646
def test_llm_parse_json_with_escaped_characters(self, input_text, expected_output):
16461647
assert llm_parse_json(input_text) == expected_output
1648+
1649+
1650+
def test_maybe_get_date():
1651+
assert maybe_get_date("2023-01-01") == datetime(2023, 1, 1)
1652+
assert maybe_get_date("2023-01-31 14:30:00") == datetime(2023, 1, 31, 14, 30)
1653+
assert maybe_get_date(datetime(2023, 1, 1)) == datetime(2023, 1, 1)
1654+
assert maybe_get_date("foo") is None
1655+
assert maybe_get_date("") is None

0 commit comments

Comments
 (0)