From ffc1993bbf5c47a871c32df419f0018b853ed373 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 20 Feb 2025 16:23:53 -0500 Subject: [PATCH 01/12] Add validation and type conversion to interval init --- src/undate/undate.py | 4 ++++ tests/test_converters/test_edtf.py | 8 ++++---- tests/test_undate.py | 12 ++++++++---- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/undate/undate.py b/src/undate/undate.py index 2008914..f2d5300 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -72,6 +72,10 @@ def __init__( label: Optional[str] = None, calendar: Optional[Union[str, Calendar]] = None, ): + # everything is optional but something is required + if all([val is None for val in [year, month, day]]): + raise ValueError("At least one of year, month, or day must be specified") + # keep track of initial values and which values are known # TODO: add validation: if str, must be expected length self.initial_values: Dict[str, Optional[Union[int, str]]] = { diff --git a/tests/test_converters/test_edtf.py b/tests/test_converters/test_edtf.py index 5210e98..5c98446 100644 --- a/tests/test_converters/test_edtf.py +++ b/tests/test_converters/test_edtf.py @@ -64,8 +64,8 @@ def test_to_string(self): # if converter can't generate a string for the date, # it should return a value error - empty_undate = Undate() - empty_undate.precision = DatePrecision.DECADE - with pytest.raises(ValueError): - EDTFDateConverter().to_string(empty_undate) + # empty_undate = Undate() # undate with no date information no longer supported + # empty_undate.precision = DatePrecision.DECADE + # with pytest.raises(ValueError): + # EDTFDateConverter().to_string(empty_undate) # TODO: override missing digit and confirm replacement diff --git a/tests/test_undate.py b/tests/test_undate.py index 8f8a5c8..46fe973 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -132,7 +132,10 @@ def test_calendar(self): def test_init_invalid(self): with pytest.raises(ValueError): - Undate("19xx") + Undate("19??") + + with pytest.raises(ValueError, match="At least one of year, month, or day"): + Undate() def test_invalid_date(self): # invalid month should raise an error @@ -156,10 +159,11 @@ def test_year_property(self): # unset year assert Undate(month=12, day=31).year == "XXXX" + # NOTE: no longer supported to inistalize undate with no date information # force method to hit conditional for date precision - some_century = Undate() - some_century.precision = DatePrecision.CENTURY - assert some_century.year is None + # some_century = Undate() + # some_century.precision = DatePrecision.CENTURY + # assert some_century.year is None def test_month_property(self): # one, two digit month From 710c66a9a50c16d015bb412eceea29162b837865 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 20 Feb 2025 17:03:31 -0500 Subject: [PATCH 02/12] Implement & test an intersection method for UndateInterval --- src/undate/interval.py | 24 ++++++++++++++++++++++++ tests/test_interval.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/src/undate/interval.py b/src/undate/interval.py index 33ec200..eb91297 100644 --- a/src/undate/interval.py +++ b/src/undate/interval.py @@ -122,3 +122,27 @@ def duration(self) -> Timedelta: # is there any meaningful way to calculate duration # if one year is known and the other is not? raise NotImplementedError + + def intersection(self, other: "UndateInterval") -> Optional["UndateInterval"]: + """Determine the intersection or overlap between two :class:`UndateInterval` + objects and return a new interval, or None if no overlap. + """ + try: + # when both values are defined, return the inner bounds; + # if not, return whichever is not None, or None + earliest = ( + max(self.earliest, other.earliest) + if self.earliest and other.earliest + else self.earliest or other.earliest + ) + latest = ( + min(self.latest, other.latest) + if self.latest and other.latest + else self.latest or other.latest + ) + + # if this results in an invalid interval, initialization + # will throw an exception + return UndateInterval(earliest, latest) + except ValueError: + return None diff --git a/tests/test_interval.py b/tests/test_interval.py index dea8710..3d49179 100644 --- a/tests/test_interval.py +++ b/tests/test_interval.py @@ -143,3 +143,35 @@ def test_duration(self): # one year set and the other not currently raises not implemented error with pytest.raises(NotImplementedError): UndateInterval(Undate(2000), Undate(month=10)).duration() + + def test_intersection(self): + century11th = UndateInterval(Undate(1001), Undate(1100)) + century20th = UndateInterval(Undate(1901), Undate(2000)) + # no intersection + assert century11th.intersection(century20th) is None + # should work in either direction + assert century20th.intersection(century11th) is None + + decade1990s = UndateInterval(Undate(1990), Undate(1999)) + # intersection of an interval completely contained in another + # returns an interval equivalent to the smaller one + assert century20th.intersection(decade1990s) == decade1990s + assert decade1990s.intersection(century20th) == decade1990s + + # partial overlap + nineties_oughts = UndateInterval(Undate(1990), Undate(2009)) + assert century20th.intersection(nineties_oughts) == UndateInterval( + Undate(1990), Undate(2000) + ) + + # intersections between half open intervals + after_c11th = UndateInterval(Undate(1001), None) + assert after_c11th.intersection(century20th) == century20th + assert after_c11th.intersection(decade1990s) == decade1990s + + before_20th = UndateInterval(None, Undate(1901)) + assert before_20th.intersection(decade1990s) is None + assert before_20th.intersection(century11th) == century11th + assert before_20th.intersection(after_c11th) == UndateInterval( + Undate(1001), Undate(1901) + ) From 298bb19ba8911dd98a1f4b804750fa30f56196db Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 21 Feb 2025 11:38:48 -0500 Subject: [PATCH 03/12] Make conversion to undate more reusable and extensible --- src/undate/interval.py | 23 +++++++++------------ src/undate/undate.py | 45 +++++++++++++++++++++++++++--------------- tests/test_undate.py | 11 ++++++++--- 3 files changed, 46 insertions(+), 33 deletions(-) diff --git a/src/undate/interval.py b/src/undate/interval.py index eb91297..33c3046 100644 --- a/src/undate/interval.py +++ b/src/undate/interval.py @@ -1,5 +1,3 @@ -import datetime - # Pre 3.10 requires Union for multiple types, e.g. Union[int, None] instead of int | None from typing import Optional, Union @@ -34,21 +32,18 @@ def __init__( latest: Optional[Undate] = None, label: Optional[str] = None, ): - # for now, assume takes two undate objects; - # support conversion from datetime - if earliest and not isinstance(earliest, Undate): - # NOTE: some overlap with Undate._comparison_type method - # maybe support conversion from other formats later - if isinstance(earliest, datetime.date): - earliest = Undate.from_datetime_date(earliest) - else: + # takes two undate objects; allows conversion from supported types + if earliest: + try: + earliest = Undate.to_undate(earliest) + except TypeError: raise ValueError( f"earliest date {earliest} cannot be converted to Undate" ) - if latest and not isinstance(latest, Undate): - if isinstance(latest, datetime.date): - latest = Undate.from_datetime_date(latest) - else: + if latest: + try: + latest = Undate.to_undate(latest) + except TypeError: raise ValueError(f"latest date {latest} cannot be converted to Undate") # check that the interval is valid diff --git a/src/undate/undate.py b/src/undate/undate.py index f2d5300..1b9671e 100644 --- a/src/undate/undate.py +++ b/src/undate/undate.py @@ -2,11 +2,13 @@ import datetime from enum import auto + import re from typing import TYPE_CHECKING if TYPE_CHECKING: from undate.interval import UndateInterval + try: # StrEnum was only added in python 3.11 from enum import StrEnum @@ -246,23 +248,19 @@ def format(self, format) -> str: raise ValueError(f"Unsupported format '{format}'") - def _comparison_type(self, other: object) -> "Undate": + @classmethod + def _comparison_type(cls, other: object) -> "Undate": """Common logic for type handling in comparison methods. Converts to Undate object if possible, otherwise raises - NotImplemented error. Currently only supports conversion - from :class:`datetime.date` + NotImplementedError exception. Uses :meth:`to_undate` for conversion. """ - - # support datetime.date by converting to undate - if isinstance(other, datetime.date): - other = Undate.from_datetime_date(other) - - # recommended to support comparison with arbitrary objects - if not isinstance(other, Undate): + # convert if possible; return NotImplemented if not + try: + return cls.to_undate(other) + except TypeError: + # recommended to support comparison with arbitrary objects return NotImplemented - return other - def __eq__(self, other: object) -> bool: # Note: assumes label differences don't matter for comparing dates @@ -272,6 +270,8 @@ def __eq__(self, other: object) -> bool: other = self._comparison_type(other) if other is NotImplemented: + # return NotImplemented to indicate comparison is not supported + # with this type return NotImplemented # if both dates are fully known, then earliest/latest check @@ -363,10 +363,23 @@ def __contains__(self, other: object) -> bool: ] ) - @staticmethod - def from_datetime_date(dt_date: datetime.date): - """Initialize an :class:`Undate` object from a :class:`datetime.date`""" - return Undate(dt_date.year, dt_date.month, dt_date.day) + @classmethod + def to_undate(cls, other: object) -> "Undate": + """Converted arbitrary object to Undate, if possible. Raises TypeError + if conversion is not possible. + + Currently suppports: + - :class:`datetime.date` or :class:`datetime.datetime` + + """ + match other: + case Undate(): + return other + case datetime.date() | datetime.datetime(): + return Undate(other.year, other.month, other.day) + + case _: + raise TypeError(f"Conversion from {type(other)} is not supported") @property def known_year(self) -> bool: diff --git a/tests/test_undate.py b/tests/test_undate.py index 46fe973..b3ba4fe 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -1,4 +1,4 @@ -from datetime import date +from datetime import date, datetime import pytest @@ -142,11 +142,16 @@ def test_invalid_date(self): with pytest.raises(ValueError): Undate(1990, 22) - def test_from_datetime_date(self): - undate_from_date = Undate.from_datetime_date(date(2001, 3, 5)) + def test_to_undate(self): + undate_from_date = Undate.to_undate(date(2001, 3, 5)) assert isinstance(undate_from_date, Undate) assert undate_from_date == Undate(2001, 3, 5) + now = datetime.now() + undate_from_dt = Undate.to_undate(now) + assert isinstance(undate_from_dt, Undate) + assert undate_from_dt == Undate(now.year, now.month, now.day) + # test properties for accessing parts of date def test_year_property(self): # two, three, four five digit years; numeric and string From fc4f7a92e693e5e068503bbd7ced47f989560a1e Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 21 Feb 2025 11:51:16 -0500 Subject: [PATCH 04/12] Drop support for python 3.9 so we can use match/case --- .github/workflows/unit_tests.yml | 2 +- pyproject.toml | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 17a1c7a..381b231 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -20,7 +20,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: ["3.9", "3.10", "3.11", "3.12", "3.13"] + python: ["3.10", "3.11", "3.12", "3.13"] defaults: run: working-directory: . diff --git a/pyproject.toml b/pyproject.toml index f1ad9a7..8bcf839 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ name = "undate" description = "library for working with uncertain, fuzzy, or partially unknown dates and date intervals" readme = "README.md" license = { text = "Apache-2" } -requires-python = ">= 3.9" +requires-python = ">= 3.10" dynamic = ["version"] dependencies = ["lark[interegular]", "numpy", "convertdate", "strenum; python_version < '3.11'"] authors = [ @@ -31,7 +31,6 @@ keywords = [ classifiers = [ "Development Status :: 2 - Pre-Alpha", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", From a4f2e7bd322d74b2db6ef4684b8ef25f9a0f7a86 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Fri, 21 Feb 2025 12:01:53 -0500 Subject: [PATCH 05/12] Add more type checks and tests --- src/undate/interval.py | 3 +++ tests/test_interval.py | 6 ++++++ tests/test_undate.py | 4 ++++ 3 files changed, 13 insertions(+) diff --git a/src/undate/interval.py b/src/undate/interval.py index 33c3046..4472d67 100644 --- a/src/undate/interval.py +++ b/src/undate/interval.py @@ -73,6 +73,9 @@ def __repr__(self) -> str: return "" % self def __eq__(self, other) -> bool: + # currently doesn't support comparison with any other types + if not isinstance(other, UndateInterval): + return NotImplemented # consider interval equal if both dates are equal return self.earliest == other.earliest and self.latest == other.latest diff --git a/tests/test_interval.py b/tests/test_interval.py index 3d49179..254f3c7 100644 --- a/tests/test_interval.py +++ b/tests/test_interval.py @@ -82,6 +82,12 @@ def test_eq(self): ) assert UndateInterval(Undate(2022, 5)) == UndateInterval(Undate(2022, 5)) + def test_eq_type_check(self): + # doesn't currently support comparison with anything else + interval = UndateInterval(Undate(900)) + # returns NotIplemented if comparison with this type is not supported + assert interval.__eq__("foo") == NotImplemented + def test_not_eq(self): assert UndateInterval(Undate(2022), Undate(2023)) != UndateInterval( Undate(2022), Undate(2024) diff --git a/tests/test_undate.py b/tests/test_undate.py index b3ba4fe..a9087c2 100644 --- a/tests/test_undate.py +++ b/tests/test_undate.py @@ -152,6 +152,10 @@ def test_to_undate(self): assert isinstance(undate_from_dt, Undate) assert undate_from_dt == Undate(now.year, now.month, now.day) + # unsupported type + with pytest.raises(TypeError): + Undate.to_undate("foo") + # test properties for accessing parts of date def test_year_property(self): # two, three, four five digit years; numeric and string From b09c9fc5a354ccb9f4e172b76818a0630607ebb8 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Wed, 5 Mar 2025 19:12:42 -0500 Subject: [PATCH 06/12] Remove unused import --- tests/test_converters/test_edtf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_converters/test_edtf.py b/tests/test_converters/test_edtf.py index 5c98446..3262e46 100644 --- a/tests/test_converters/test_edtf.py +++ b/tests/test_converters/test_edtf.py @@ -1,6 +1,5 @@ import pytest from undate.converters.edtf import EDTFDateConverter -from undate.date import DatePrecision from undate import Undate, UndateInterval From f06960ae7273d5f4257680a2675f91db7ae92f73 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 13 Mar 2025 16:11:06 -0400 Subject: [PATCH 07/12] Use raise from err on type error in interval init Based on @coderabbitai feedback --- src/undate/interval.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/undate/interval.py b/src/undate/interval.py index 4472d67..262bd5b 100644 --- a/src/undate/interval.py +++ b/src/undate/interval.py @@ -36,15 +36,17 @@ def __init__( if earliest: try: earliest = Undate.to_undate(earliest) - except TypeError: + except TypeError as err: raise ValueError( f"earliest date {earliest} cannot be converted to Undate" - ) + ) from err if latest: try: latest = Undate.to_undate(latest) - except TypeError: - raise ValueError(f"latest date {latest} cannot be converted to Undate") + except TypeError as err: + raise ValueError( + f"latest date {latest} cannot be converted to Undate" + ) from err # check that the interval is valid if latest and earliest and latest <= earliest: @@ -123,7 +125,7 @@ def duration(self) -> Timedelta: def intersection(self, other: "UndateInterval") -> Optional["UndateInterval"]: """Determine the intersection or overlap between two :class:`UndateInterval` - objects and return a new interval, or None if no overlap. + objects and return a new interval. Returns None if there is no overlap. """ try: # when both values are defined, return the inner bounds; From 9ee14ef2ed46fc213d5cdf14987da450a8df2381 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 13 Mar 2025 16:48:03 -0400 Subject: [PATCH 08/12] Add and test contains/in method for interval --- src/undate/interval.py | 39 ++++++++++++++++++++++++++++++++-- tests/test_interval.py | 48 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 2 deletions(-) diff --git a/src/undate/interval.py b/src/undate/interval.py index 262bd5b..8e6cd2f 100644 --- a/src/undate/interval.py +++ b/src/undate/interval.py @@ -23,8 +23,8 @@ class UndateInterval: latest: Union[Undate, None] label: Union[str, None] - # TODO: let's think about adding an optional precision / length /size field - # using DatePrecision + # TODO: think about adding an optional precision / length /size field + # using DatePrecision for intervals of any standard duration (decade, century) def __init__( self, @@ -123,6 +123,41 @@ def duration(self) -> Timedelta: # if one year is known and the other is not? raise NotImplementedError + def __contains__(self, other: object) -> bool: + """Determine if another interval or date falls within this + interval.""" + # support comparison with another interval + if isinstance(other, UndateInterval): + # if two intervals are strictly equal, don't consider + # either one as containing the other + if self == other: + return False + # otherwise compare based on earliest/latest bounds + other_earliest = other.earliest + other_latest = other.latest + else: + # otherwise, try to convert to an Undate + try: + other = Undate.to_undate(other) + other_latest = other_earliest = other + except TypeError: + # if conversion fails, then we don't support comparison + raise + + # if either bound of the current interval is None, + # then it is an open interval and we don't need to check the other value. + # if the other value is set, then check that it falls within the + # bounds of this interval + return ( + self.earliest is None + or other_earliest is not None + and other_earliest >= self.earliest + ) and ( + self.latest is None + or other_latest is not None + and other_latest <= self.latest + ) + def intersection(self, other: "UndateInterval") -> Optional["UndateInterval"]: """Determine the intersection or overlap between two :class:`UndateInterval` objects and return a new interval. Returns None if there is no overlap. diff --git a/tests/test_interval.py b/tests/test_interval.py index 254f3c7..4552c05 100644 --- a/tests/test_interval.py +++ b/tests/test_interval.py @@ -181,3 +181,51 @@ def test_intersection(self): assert before_20th.intersection(after_c11th) == UndateInterval( Undate(1001), Undate(1901) ) + + def test_contains(self): + century11th = UndateInterval(Undate(1001), Undate(1100)) + century20th = UndateInterval(Undate(1901), Undate(2000)) + decade1990s = UndateInterval(Undate(1990), Undate(1999)) + # an interval doesn't contain itself + for interval in [century11th, century20th, decade1990s]: + assert interval not in interval + + # checking if an interval is within another interval + assert decade1990s in century20th + assert decade1990s not in century11th + assert century11th not in decade1990s + assert century20th not in decade1990s + # a specific date can be contained by an interval + y2k = Undate(2000) + assert y2k in century20th + assert y2k not in century11th + # partially known date should work too + april_someyear = Undate("198X", 4) + assert april_someyear in century20th + assert april_someyear not in century11th + # conversion from datetime.date also works + assert datetime.date(1922, 5, 1) in century20th + # unsupported types result in a type error + with pytest.raises(TypeError): + "nineteen-eighty-four" in century20th + + # contains check with half-open intervals + after_c11th = UndateInterval(Undate(1001), None) + before_20th = UndateInterval(None, Undate(1901)) + # neither of them contains the other + assert after_c11th not in before_20th + assert before_20th not in after_c11th + # nor are they contained by a smaller range + assert after_c11th not in decade1990s + assert before_20th not in decade1990s + + # all of our previous test dates are in the 1900s, + # so they are after the 11th century and not before the 20th + for period in [decade1990s, y2k, april_someyear]: + assert period in after_c11th + assert period not in before_20th + + # fully open interval - is this even meaningful? + whenever = UndateInterval(None, None) + assert decade1990s in whenever + assert whenever not in whenever From 700c8348ef9f6e377ce1dab510fdfb2a82105421 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Thu, 13 Mar 2025 18:06:20 -0400 Subject: [PATCH 09/12] Address nitpicks flagged by @coderabbitai --- tests/test_interval.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_interval.py b/tests/test_interval.py index 4552c05..40713b1 100644 --- a/tests/test_interval.py +++ b/tests/test_interval.py @@ -85,7 +85,7 @@ def test_eq(self): def test_eq_type_check(self): # doesn't currently support comparison with anything else interval = UndateInterval(Undate(900)) - # returns NotIplemented if comparison with this type is not supported + # returns NotImplemented if comparison with this type is not supported assert interval.__eq__("foo") == NotImplemented def test_not_eq(self): @@ -207,7 +207,7 @@ def test_contains(self): assert datetime.date(1922, 5, 1) in century20th # unsupported types result in a type error with pytest.raises(TypeError): - "nineteen-eighty-four" in century20th + assert "nineteen-eighty-four" in century20th # contains check with half-open intervals after_c11th = UndateInterval(Undate(1001), None) From e547b46c737f0f305375ca46343d0e1d76d34a2b Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Sat, 15 Mar 2025 18:31:31 -0400 Subject: [PATCH 10/12] Preliminary rdflib resource for ISMI CIDOC-CRM dates --- pyproject.toml | 2 +- src/undate/converters/cidoc_crm.py | 68 +++++++++++++++++++++++++ tests/test_converters/test_cidoc_crm.py | 62 ++++++++++++++++++++++ 3 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 src/undate/converters/cidoc_crm.py create mode 100644 tests/test_converters/test_cidoc_crm.py diff --git a/pyproject.toml b/pyproject.toml index 8bcf839..58fb0f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ readme = "README.md" license = { text = "Apache-2" } requires-python = ">= 3.10" dynamic = ["version"] -dependencies = ["lark[interegular]", "numpy", "convertdate", "strenum; python_version < '3.11'"] +dependencies = ["lark[interegular]", "numpy", "convertdate", "strenum; python_version < '3.11'", "rdflib"] authors = [ { name = "Rebecca Sutton Koeser" }, { name = "Cole Crawford" }, diff --git a/src/undate/converters/cidoc_crm.py b/src/undate/converters/cidoc_crm.py new file mode 100644 index 0000000..7be2cb9 --- /dev/null +++ b/src/undate/converters/cidoc_crm.py @@ -0,0 +1,68 @@ +import rdflib + + +#: CIDOC-CRM namespace +CIDOC_CRM = rdflib.Namespace("http://www.cidoc-crm.org/cidoc-crm/") +ISMI_DATE_TYPE = rdflib.Namespace( + "http://content.mpiwg-berlin.mpg.de/ns/ismi/type/date/" +) +ISMI_CALENDAR_TYPE = rdflib.Namespace( + "http://content.mpiwg-berlin.mpg.de/ns/ismi/type/calendar/" +) + + +class TimeSpan(rdflib.resource.Resource): + @property + def identified_by(self): + return self.value(CIDOC_CRM.P1_is_identified_by) + + @property + def label(self): + return self.identified_by.value(rdflib.RDFS.label) + + @property + def calendar(self): + return self.identified_by.value(CIDOC_CRM.P2_has_type).identifier + + @property + def type(self): + return self.value(CIDOC_CRM.P2_has_type).identifier + + @property + def at_some_time_within(self): + return self.value(CIDOC_CRM.P82_at_some_time_within) + + @property + def begin_of_the_begin(self): + return self.value(CIDOC_CRM.P82a_begin_of_the_begin) + + @property + def end_of_the_end(self): + return self.value(CIDOC_CRM.P82b_end_of_the_end) + + @property + def note(self): + return self.value(CIDOC_CRM.P3_has_note) + + def to_undate(self): + # day precision + if self.type == ISMI_DATE_TYPE.day: + return self.at_some_time_within.toPython() + + @classmethod + def time_spans_from_graph(cls, graph): + """Class method to find and return all CIDOC-CRM timespans in an rdflib graph + and yield them as :class:`TimeSpan` resource objects.""" + for timespan_uri in graph.subjects( + predicate=rdflib.RDF.type, object=CIDOC_CRM["E52_Time-Span"] + ): + yield cls(graph, timespan_uri) + + +# # crm:P2_has_type datetype:day ; +# crm:P82_at_some_time_within "1495-12-11"^^xsd:date ; +# crm:P3_has_note "day-precision date in islamic calendar" ; +# crm:P1_is_identified_by :date1-label . +# :date1-label a crm:E41_Appellation ; +# crm:P2_has_type calendartype:islamic ; +# rdfs:label "901 Rabīʿ I 14 (islamic)" . diff --git a/tests/test_converters/test_cidoc_crm.py b/tests/test_converters/test_cidoc_crm.py new file mode 100644 index 0000000..0de86cc --- /dev/null +++ b/tests/test_converters/test_cidoc_crm.py @@ -0,0 +1,62 @@ +import types + +import rdflib + +from undate.converters import cidoc_crm + + +# TODO: maybe copy full example ismi data as fixture +# so we have examples of all types to test against + +sample_data = """ +@prefix rdfs: . +@prefix crm: . +@prefix xsd: . +# prefix for date and calendar type URIs +@prefix datetype: . +@prefix calendartype: . +# prefix for sample data +@prefix : . + +# day-precision date in islamic calendar +:date1 a crm:E52_Time-Span ; + crm:P2_has_type datetype:day ; + crm:P82_at_some_time_within "1495-12-11"^^xsd:date ; + crm:P3_has_note "day-precision date in islamic calendar" ; + crm:P1_is_identified_by :date1-label . +:date1-label a crm:E41_Appellation ; + crm:P2_has_type calendartype:islamic ; + rdfs:label "901 Rabīʿ I 14 (islamic)" . +""" + +ISMI_NS = rdflib.Namespace("http://content.mpiwg-berlin.mpg.de/ns/ismi/") +DATE1_URI = rdflib.URIRef("http://content.mpiwg-berlin.mpg.de/ns/ismi/date1") + + +class TestTimeSpan: + def test_properties(self): + # initialize a time span rdflib.resource for date1 in the sample data + g = rdflib.Graph() + g.parse(data=sample_data) + + time_span = cidoc_crm.TimeSpan(g, DATE1_URI) + assert time_span.type == cidoc_crm.ISMI_DATE_TYPE.day + assert time_span.label == rdflib.term.Literal("901 Rabīʿ I 14 (islamic)") + assert time_span.calendar == cidoc_crm.ISMI_CALENDAR_TYPE.islamic + assert time_span.at_some_time_within == rdflib.term.Literal( + "1495-12-11", datatype=rdflib.XSD.date + ) + assert time_span.note == rdflib.term.Literal( + "day-precision date in islamic calendar" + ) + + def test_time_spans_from_graph(self): + g = rdflib.Graph() + g.parse(data=sample_data) + + time_spans = cidoc_crm.TimeSpan.time_spans_from_graph(g) + assert isinstance(time_spans, types.GeneratorType) + time_spans = list(time_spans) + assert len(time_spans) == 1 + assert isinstance(time_spans[0], cidoc_crm.TimeSpan) + assert time_spans[0].identifier == DATE1_URI From 6f10c67ccc6946eb0aa3be155fd2cbbcc4dfec00 Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Sat, 15 Mar 2025 18:49:26 -0400 Subject: [PATCH 11/12] Preliminary method to convert ISMI dates to Undates --- src/undate/converters/cidoc_crm.py | 33 ++++++++++++++----------- tests/test_converters/test_cidoc_crm.py | 18 ++++++++++++++ 2 files changed, 37 insertions(+), 14 deletions(-) diff --git a/src/undate/converters/cidoc_crm.py b/src/undate/converters/cidoc_crm.py index 7be2cb9..ab08c9f 100644 --- a/src/undate/converters/cidoc_crm.py +++ b/src/undate/converters/cidoc_crm.py @@ -1,5 +1,6 @@ import rdflib +from undate import Undate #: CIDOC-CRM namespace CIDOC_CRM = rdflib.Namespace("http://www.cidoc-crm.org/cidoc-crm/") @@ -14,18 +15,23 @@ class TimeSpan(rdflib.resource.Resource): @property def identified_by(self): + # by default, rdflib resource value method will return another Resource return self.value(CIDOC_CRM.P1_is_identified_by) @property def label(self): + # for ISMI records, label is under the crm identifier/appelation + # other examples have it directly under the time span as RDFS.label return self.identified_by.value(rdflib.RDFS.label) @property def calendar(self): + # for ISMI records, calendar type is associated with identifier return self.identified_by.value(CIDOC_CRM.P2_has_type).identifier @property def type(self): + # CIDOC-CRM type return self.value(CIDOC_CRM.P2_has_type).identifier @property @@ -45,24 +51,23 @@ def note(self): return self.value(CIDOC_CRM.P3_has_note) def to_undate(self): - # day precision - if self.type == ISMI_DATE_TYPE.day: - return self.at_some_time_within.toPython() + # convert to an undate object, if possible + match self.type: + # day precision + case ISMI_DATE_TYPE.day: + # at_some_time_within is xsd:date; use toPython method + # to convert to datetime.date and then convert to undate + return Undate.to_undate(self.at_some_time_within.toPython()) + # TODO: should we set label before returning? + + # for ISMI dates, could we parse the label and preserve calendar information? @classmethod def time_spans_from_graph(cls, graph): - """Class method to find and return all CIDOC-CRM timespans in an rdflib graph - and yield them as :class:`TimeSpan` resource objects.""" + """Find and return all entities with CIDOC-CRM type E52 Time-Span + within the rdflib graph and yield them as :class:`TimeSpan` + resources.""" for timespan_uri in graph.subjects( predicate=rdflib.RDF.type, object=CIDOC_CRM["E52_Time-Span"] ): yield cls(graph, timespan_uri) - - -# # crm:P2_has_type datetype:day ; -# crm:P82_at_some_time_within "1495-12-11"^^xsd:date ; -# crm:P3_has_note "day-precision date in islamic calendar" ; -# crm:P1_is_identified_by :date1-label . -# :date1-label a crm:E41_Appellation ; -# crm:P2_has_type calendartype:islamic ; -# rdfs:label "901 Rabīʿ I 14 (islamic)" . diff --git a/tests/test_converters/test_cidoc_crm.py b/tests/test_converters/test_cidoc_crm.py index 0de86cc..785aa51 100644 --- a/tests/test_converters/test_cidoc_crm.py +++ b/tests/test_converters/test_cidoc_crm.py @@ -2,6 +2,7 @@ import rdflib +from undate import Undate, DatePrecision from undate.converters import cidoc_crm @@ -36,6 +37,7 @@ class TestTimeSpan: def test_properties(self): # initialize a time span rdflib.resource for date1 in the sample data + # TODO: convert to a fixture g = rdflib.Graph() g.parse(data=sample_data) @@ -60,3 +62,19 @@ def test_time_spans_from_graph(self): assert len(time_spans) == 1 assert isinstance(time_spans[0], cidoc_crm.TimeSpan) assert time_spans[0].identifier == DATE1_URI + + def test_to_undate(self): + g = rdflib.Graph() + g.parse(data=sample_data) + + time_span = cidoc_crm.TimeSpan(g, DATE1_URI) + ts_undate = time_span.to_undate() + assert isinstance(ts_undate, Undate) + # 1495-12-11"^^xsd:date ; + assert ts_undate.year == "1495" + assert ts_undate.month == "12" + assert ts_undate.day == "11" + assert ts_undate.precision == DatePrecision.DAY + + # if we round trip the date it comes out the same + assert ts_undate.format("ISO8601") == str(time_span.at_some_time_within) From aa38603222eef942d5f8d7fd96c30c74258272cf Mon Sep 17 00:00:00 2001 From: rlskoeser Date: Tue, 18 Mar 2025 10:25:19 -0400 Subject: [PATCH 12/12] Use full ismi sample data as test fixture --- tests/test_converters/test_cidoc_crm.py | 69 +++++++++++-------------- 1 file changed, 31 insertions(+), 38 deletions(-) diff --git a/tests/test_converters/test_cidoc_crm.py b/tests/test_converters/test_cidoc_crm.py index 785aa51..cbcb4d0 100644 --- a/tests/test_converters/test_cidoc_crm.py +++ b/tests/test_converters/test_cidoc_crm.py @@ -1,47 +1,45 @@ +import pathlib import types +import pytest import rdflib from undate import Undate, DatePrecision from undate.converters import cidoc_crm -# TODO: maybe copy full example ismi data as fixture -# so we have examples of all types to test against +# TODO: move or copy example ismi data to test for use as a fixture +ISMI_DATA_PATH = ( + pathlib.Path(__file__) + / ".." + / ".." + / ".." + / "examples" + / "use-cases" + / "ismi" + / "data" + / "ismi-crm-date-samples.ttl" +) -sample_data = """ -@prefix rdfs: . -@prefix crm: . -@prefix xsd: . -# prefix for date and calendar type URIs -@prefix datetype: . -@prefix calendartype: . -# prefix for sample data -@prefix : . +DATE1_URI = rdflib.URIRef("http://content.mpiwg-berlin.mpg.de/ns/ismi/date1") -# day-precision date in islamic calendar -:date1 a crm:E52_Time-Span ; - crm:P2_has_type datetype:day ; - crm:P82_at_some_time_within "1495-12-11"^^xsd:date ; - crm:P3_has_note "day-precision date in islamic calendar" ; - crm:P1_is_identified_by :date1-label . -:date1-label a crm:E41_Appellation ; - crm:P2_has_type calendartype:islamic ; - rdfs:label "901 Rabīʿ I 14 (islamic)" . -""" -ISMI_NS = rdflib.Namespace("http://content.mpiwg-berlin.mpg.de/ns/ismi/") -DATE1_URI = rdflib.URIRef("http://content.mpiwg-berlin.mpg.de/ns/ismi/date1") +@pytest.fixture +def ismi_data(): + g = rdflib.Graph() + g.parse(ISMI_DATA_PATH) + return g class TestTimeSpan: - def test_properties(self): + def test_properties(self, ismi_data): # initialize a time span rdflib.resource for date1 in the sample data # TODO: convert to a fixture - g = rdflib.Graph() - g.parse(data=sample_data) + # g = rdflib.Graph() + # g.parse(ISMI_DATA_PATH) + # g.parse(data=sample_data) - time_span = cidoc_crm.TimeSpan(g, DATE1_URI) + time_span = cidoc_crm.TimeSpan(ismi_data, DATE1_URI) assert time_span.type == cidoc_crm.ISMI_DATE_TYPE.day assert time_span.label == rdflib.term.Literal("901 Rabīʿ I 14 (islamic)") assert time_span.calendar == cidoc_crm.ISMI_CALENDAR_TYPE.islamic @@ -52,22 +50,17 @@ def test_properties(self): "day-precision date in islamic calendar" ) - def test_time_spans_from_graph(self): - g = rdflib.Graph() - g.parse(data=sample_data) - - time_spans = cidoc_crm.TimeSpan.time_spans_from_graph(g) + def test_time_spans_from_graph(self, ismi_data): + time_spans = cidoc_crm.TimeSpan.time_spans_from_graph(ismi_data) assert isinstance(time_spans, types.GeneratorType) time_spans = list(time_spans) - assert len(time_spans) == 1 + # fixture has 9 time spans + assert len(time_spans) == 9 assert isinstance(time_spans[0], cidoc_crm.TimeSpan) assert time_spans[0].identifier == DATE1_URI - def test_to_undate(self): - g = rdflib.Graph() - g.parse(data=sample_data) - - time_span = cidoc_crm.TimeSpan(g, DATE1_URI) + def test_to_undate(self, ismi_data): + time_span = cidoc_crm.TimeSpan(ismi_data, DATE1_URI) ts_undate = time_span.to_undate() assert isinstance(ts_undate, Undate) # 1495-12-11"^^xsd:date ;