diff --git a/.gitignore b/.gitignore index 2b3ab2c0b5d..7d6d944acf2 100644 --- a/.gitignore +++ b/.gitignore @@ -55,3 +55,4 @@ e2e/test-results .aider* /tools/server/.lwjgl/ /tools/server/.lwjgl/ +.m2_repo/ diff --git a/AGENTS.md b/AGENTS.md index af461b6082d..d8489a780ab 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -129,7 +129,7 @@ After each grouped action, post an **Evidence block**, then continue working: **Evidence template** ``` Evidence: -Command: mvn -o -pl -Dtest=Class#method verify +Command: mvn -o -Dmaven.repo.local=.m2_repo -pl -Dtest=Class#method verify Report: /target/surefire-reports/.txt Snippet: \ @@ -153,7 +153,7 @@ To avoid losing the first test evidence when later runs overwrite `target/*-repo - Capture and store the last 200 lines of the Maven verify output. - Example (module‑scoped): - - `mvn -o -pl verify | tee .initial-verify.log` + - `mvn -o -Dmaven.repo.local=.m2_repo -pl verify | tee .initial-verify.log` - `tail -200 .initial-verify.log > initial-evidence.txt` • On any failing verify run (unit or IT failures): @@ -195,6 +195,7 @@ Plan * **JDK:** 11 (minimum). The project builds and runs on Java 11+. * **Maven default:** run **offline** using `-o` whenever possible. +* **Maven local repo (required):** always pass `-Dmaven.repo.local=.m2_repo` on all Maven commands (install, verify, plugins, formatting). All examples in this document implicitly assume this flag, even if omitted. * **Network:** only to fetch missing deps/plugins; then rerun once without `-o`, and return offline. * **Large project:** some module test suites can take **5–10 minutes**. Prefer **targeted** runs. @@ -203,14 +204,14 @@ Plan `-am` is helpful for **compiles**, hazardous for **tests**. * ✅ Use `-am` **only** for compile/verify with tests skipped (e.g. `-Pquick`): - * `mvn -o -pl -am -Pquick install` + * `mvn -o -Dmaven.repo.local=.m2_repo -pl -am -Pquick install` * ❌ Do **not** use `-am` with `verify` when tests are enabled. **Two-step pattern (fast + safe)** 1. **Compile deps fast (skip tests):** - `mvn -o -pl -am -Pquick install` + `mvn -o -Dmaven.repo.local=.m2_repo -pl -am -Pquick install` 2. **Run tests:** - `mvn -o -pl verify | tail -500` + `mvn -o -Dmaven.repo.local=.m2_repo -pl verify | tail -500` It is illegal to `-am` when running tests! It is illegal to `-q` when running tests! @@ -219,22 +220,22 @@ It is illegal to `-q` when running tests! ## Always Install Before Tests (Required) -The Maven reactor resolves inter-module dependencies from the local Maven repository (`~/.m2/repository`). +The Maven reactor resolves inter-module dependencies from the configured local Maven repository (here: `.m2_repo`). Running `install` publishes your changed modules there so downstream modules and tests pick up the correct versions. -* Always run `mvn -o -Pquick install | tail -200` before you start working. This command typically takes up to 30 seconds. Never use a small timeout than 30,000 ms. -* Always run `mvn -o -Pquick install | tail -200` before any `verify` or test runs. +* Always run `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install | tail -200` before you start working. This command typically takes up to 30 seconds. Never use a small timeout than 30,000 ms. +* Always run `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install | tail -200` before any `verify` or test runs. * If offline resolution fails due to a missing dependency or plugin, rerun the exact `install` command once without `-o`, then return offline. * Skipping this step can lead to stale or missing artifacts during tests, producing confusing compilation or linkage errors. -* Never ever change the repo location. Never use `-Dmaven.repo.local=.m2_repo`. +* Always use a workspace-local Maven repository: append `-Dmaven.repo.local=.m2_repo` to all Maven commands (install, verify, formatter, etc.). * Always try to run these commands first to see if they run without needing any approvals from the user w.r.t. the sandboxing. Why this is mandatory -- Tests must not use `-am`. Without `-am`, Maven will not build upstream modules when you run tests; it will resolve cross‑module dependencies from the local `~/.m2/repository` instead. -- Therefore, tests only see whatever versions were last published to `~/.m2`. If you change code in one module and then run tests in another, those tests will not see your changes unless the updated module has been installed to `~/.m2` first. -- The reliable way to ensure all tests always use the latest code across the entire multi‑module build is to install all modules to `~/.m2` before running any tests: run `mvn -o -Pquick install` at the repository root. -- In tight loops you may also install a specific module and its deps (`-pl -am -Pquick install`) to iterate quickly, but before executing tests anywhere that depend on your changes, run a root‑level `mvn -o -Pquick install` so the latest jars are available to the reactor from `~/.m2`. +- Tests must not use `-am`. Without `-am`, Maven will not build upstream modules when you run tests; it will resolve cross‑module dependencies from the configured local repository (here: `.m2_repo`). +- Therefore, tests only see whatever versions were last published to the configured local repo (`.m2_repo`). If you change code in one module and then run tests in another, those tests will not see your changes unless the updated module has been installed to `.m2_repo` first. +- The reliable way to ensure all tests always use the latest code across the entire multi‑module build is to install all modules to the configured local repo (`.m2_repo`) before running any tests: run `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install` at the repository root. +- In tight loops you may also install a specific module and its deps (`-pl -am -Pquick install`) to iterate quickly, but before executing tests anywhere that depend on your changes, run a root‑level `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install` so the latest jars are available to the reactor from `.m2_repo`. --- ## Quick Start (First 10 Minutes) @@ -243,13 +244,13 @@ Why this is mandatory * Inspect root `pom.xml` and module tree (see “Maven Module Overview”). * Search fast with ripgrep: `rg -n ""` 2. **Build sanity (fast, skip tests)** - * `mvn -o -Pquick install | tail -200` + * `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install | tail -200` 3. **Format (Java, imports, XML)** - * `mvn -o -q -T 2C formatter:format impsort:sort xml-format:xml-format` + * `mvn -o -Dmaven.repo.local=.m2_repo -q -T 2C formatter:format impsort:sort xml-format:xml-format` 4. **Targeted tests (tight loops)** - * Module: `mvn -o -pl verify | tail -500` - * Class: `mvn -o -pl -Dtest=ClassName verify | tail -500` - * Method: `mvn -o -pl -Dtest=ClassName#method verify | tail -500` + * Module: `mvn -o -Dmaven.repo.local=.m2_repo -pl verify | tail -500` + * Class: `mvn -o -Dmaven.repo.local=.m2_repo -pl -Dtest=ClassName verify | tail -500` + * Method: `mvn -o -Dmaven.repo.local=.m2_repo -pl -Dtest=ClassName#method verify | tail -500` 5. **Inspect failures** * **Unit (Surefire):** `/target/surefire-reports/` * **IT (Failsafe):** `/target/failsafe-reports/` @@ -343,8 +344,8 @@ It is illegal to `-q` when running tests! * **Plan:** small, verifiable steps; keep one `in_progress`. * **Change:** minimal, surgical edits; keep style/structure consistent. -* **Format:** `mvn -o -q -T 2C formatter:format impsort:sort xml-format:xml-format` -* **Compile (fast):** `mvn -o -pl -am -Pquick install | tail -500` +* **Format:** `mvn -o -Dmaven.repo.local=.m2_repo -q -T 2C formatter:format impsort:sort xml-format:xml-format` +* **Compile (fast):** `mvn -o -Dmaven.repo.local=.m2_repo -pl -am -Pquick install | tail -500` * **Test:** start smallest (class/method → module). For integration, run module `verify`. * **Triage:** read reports; fix root cause; expand scope only when needed. * **Iterate:** keep momentum; escalate only when blocked or irreversible. @@ -371,7 +372,7 @@ It is illegal to `-q` when running tests! ### Optional: Redirect test stdout/stderr to files ```bash -mvn -o -pl -Dtest=ClassName[#method] -Dmaven.test.redirectTestOutputToFile=true verify | tail -500 +mvn -o -Dmaven.repo.local=.m2_repo -pl -Dtest=ClassName[#method] -Dmaven.test.redirectTestOutputToFile=true verify | tail -500 ```` Logs under: @@ -415,14 +416,16 @@ Assertions are executable claims about what must be true. Use **temporary tripwi * Always run before finalizing: - * `mvn -o -q -T 2C formatter:format impsort:sort xml-format:xml-format` + * `mvn -o -Dmaven.repo.local=.m2_repo -q -T 2C formatter:format impsort:sort xml-format:xml-format` * Style: no wildcard imports; 120‑char width; curly braces always; LF endings. --- ## Source File Headers -Use this exact header for **new Java files only** (replace `${year}` with current year): +Strict requirement — copy/paste exactly. All new Java source files MUST begin with the exact header below with the exact lines. The text, spacing, punctuation, URL, and SPDX line must be identical. Replace `${year}` with the correct current year at the time the file is created. + +Hint: get the current year with `date +%Y`. ``` /******************************************************************************* @@ -443,9 +446,9 @@ Do **not** modify existing headers’ years. ## Pre‑Commit Checklist -* **Format:** `mvn -o -q -T 2C formatter:format impsort:sort xml-format:xml-format` -* **Compile (fast path):** `mvn -o -Pquick install | tail -200` -* **Tests (targeted):** `mvn -o -pl verify | tail -500` (broaden as needed) +* **Format:** `mvn -o -Dmaven.repo.local=.m2_repo -q -T 2C formatter:format impsort:sort xml-format:xml-format` +* **Compile (fast path):** `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install | tail -200` +* **Tests (targeted):** `mvn -o -Dmaven.repo.local=.m2_repo -pl verify | tail -500` (broaden as needed) * **Reports:** zero new failures in Surefire/Failsafe, or explain precisely. * **Evidence:** Routine A — failing pre‑fix + passing post‑fix. Routine B — **pre/post green** from same selection + **Hit Proof**. @@ -515,19 +518,19 @@ Do **not** modify existing headers’ years. ## Running Tests -* By module: `mvn -o -pl core/sail/shacl verify | tail -500` -* Entire repo: `mvn -o verify` (long; only when appropriate) +* By module: `mvn -o -Dmaven.repo.local=.m2_repo -pl core/sail/shacl verify | tail -500` +* Entire repo: `mvn -o -Dmaven.repo.local=.m2_repo verify` (long; only when appropriate) * Slow tests (entire repo): - `mvn -o verify -PslowTestsOnly,-skipSlowTests | tail -500` + `mvn -o -Dmaven.repo.local=.m2_repo verify -PslowTestsOnly,-skipSlowTests | tail -500` * Slow tests (by module): - `mvn -o -pl verify -PslowTestsOnly,-skipSlowTests | tail -500` + `mvn -o -Dmaven.repo.local=.m2_repo -pl verify -PslowTestsOnly,-skipSlowTests | tail -500` * Slow tests (specific test): - * `mvn -o -pl core/sail/shacl -PslowTestsOnly,-skipSlowTests -Dtest=ClassName#method verify | tail -500` + * `mvn -o -Dmaven.repo.local=.m2_repo -pl core/sail/shacl -PslowTestsOnly,-skipSlowTests -Dtest=ClassName#method verify | tail -500` * Integration tests (entire repo): - `mvn -o verify -PskipUnitTests | tail -500` + `mvn -o -Dmaven.repo.local=.m2_repo verify -PskipUnitTests | tail -500` * Integration tests (by module): - `mvn -o -pl verify -PskipUnitTests | tail -500` + `mvn -o -Dmaven.repo.local=.m2_repo -pl verify -PskipUnitTests | tail -500` * Useful flags: * `-Dtest=ClassName` @@ -540,10 +543,10 @@ Do **not** modify existing headers’ years. ## Build * **Build without tests (fast path):** - `mvn -o -Pquick install` + `mvn -o -Dmaven.repo.local=.m2_repo -Pquick install` * **Verify with tests:** - Targeted module(s): `mvn -o -pl verify` - Entire repo: `mvn -o verify` (use judiciously) + Targeted module(s): `mvn -o -Dmaven.repo.local=.m2_repo -pl verify` + Entire repo: `mvn -o -Dmaven.repo.local=.m2_repo verify` (use judiciously) * **When offline fails due to missing deps:** Re‑run the **exact** command **without** `-o` once to fetch, then return to `-o`. @@ -554,9 +557,9 @@ Do **not** modify existing headers’ years. JaCoCo is configured via the `jacoco` Maven profile in the root POM. Surefire/Failsafe honor the prepared agent `argLine`, so no extra flags are required beyond `-Pjacoco`. - Run with coverage - - Module: `mvn -o -pl -Pjacoco verify | tail -500` - - Class: `mvn -o -pl -Pjacoco -Dtest=ClassName verify | tail -500` - - Method: `mvn -o -pl -Pjacoco -Dtest=ClassName#method verify | tail -500` + - Module: `mvn -o -Dmaven.repo.local=.m2_repo -pl -Pjacoco verify | tail -500` + - Class: `mvn -o -Dmaven.repo.local=.m2_repo -pl -Pjacoco -Dtest=ClassName verify | tail -500` + - Method: `mvn -o -Dmaven.repo.local=.m2_repo -pl -Pjacoco -Dtest=ClassName#method verify | tail -500` - Where to find reports (per module) - Exec data: `/target/jacoco.exec` diff --git a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/DelegatingSailDataset.java b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/DelegatingSailDataset.java index a426eb395a4..51003a8264a 100644 --- a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/DelegatingSailDataset.java +++ b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/DelegatingSailDataset.java @@ -13,6 +13,7 @@ import java.util.Comparator; import java.util.Set; +import org.eclipse.rdf4j.common.annotation.Experimental; import org.eclipse.rdf4j.common.iteration.CloseableIteration; import org.eclipse.rdf4j.common.order.StatementOrder; import org.eclipse.rdf4j.model.IRI; @@ -93,4 +94,10 @@ public Set getSupportedOrders(Resource subj, IRI pred, Value obj public Comparator getComparator() { return delegate.getComparator(); } + + @Experimental + @Override + public long size(Resource subj, IRI pred, Value obj, Resource... contexts) { + return delegate.size(subj, pred, obj, contexts); + } } diff --git a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDataset.java b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDataset.java index b47b9410ae9..38c19d6e3de 100644 --- a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDataset.java +++ b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDataset.java @@ -130,4 +130,10 @@ default Comparator getComparator() { return null; } + @Experimental + default long size(final Resource subj, final IRI pred, final Value obj, final Resource... contexts) { + try (CloseableIteration statements = getStatements(subj, pred, obj, contexts)) { + return statements.stream().count(); + } + } } diff --git a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDatasetImpl.java b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDatasetImpl.java index 5f6fd74407d..976cb5b94db 100644 --- a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDatasetImpl.java +++ b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailDatasetImpl.java @@ -21,6 +21,7 @@ import java.util.Set; import java.util.function.Function; +import org.eclipse.rdf4j.common.annotation.Experimental; import org.eclipse.rdf4j.common.iteration.AbstractCloseableIteration; import org.eclipse.rdf4j.common.iteration.CloseableIteration; import org.eclipse.rdf4j.common.iteration.CloseableIteratorIteration; @@ -114,7 +115,7 @@ public CloseableIteration getNamespaces() throws SailExcept if (added == null && removed == null) { return namespaces; } - final Iterator> addedIter = added; + final Iterator> addedIter = added; final Set removedSet = removed; return new AbstractCloseableIteration<>() { @@ -382,4 +383,18 @@ private boolean isDeprecated(Triple triple, List deprecatedStatements } return true; } + + @Experimental + @Override + public long size(final Resource subj, final IRI pred, final Value obj, final Resource... contexts) { + // Fast path: no approved or deprecated and not cleared + if (!changes.hasApproved() && !changes.hasDeprecated() && !changes.isStatementCleared()) { + return derivedFrom.size(subj, pred, obj, contexts); + } + + // Fallback path: iterate over all matching statements + try (CloseableIteration statements = getStatements(subj, pred, obj, contexts)) { + return statements.stream().count(); + } + } } diff --git a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailSourceConnection.java b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailSourceConnection.java index a32f6ba1cb9..6d2041831a5 100644 --- a/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailSourceConnection.java +++ b/core/sail/base/src/main/java/org/eclipse/rdf4j/sail/base/SailSourceConnection.java @@ -17,6 +17,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Stream; +import org.eclipse.rdf4j.common.annotation.Experimental; import org.eclipse.rdf4j.common.iteration.CloseableIteration; import org.eclipse.rdf4j.common.order.StatementOrder; import org.eclipse.rdf4j.common.transaction.IsolationLevel; @@ -1034,4 +1035,23 @@ private boolean hasStatement(SailDataset dataset, Resource subj, IRI pred, Value } } + /** + * Returns the number of statements in the snapshot, optionally including inferred statements, for the given + * contexts. This method reads the size directly from the dataset within the current isolation level. + * + * @param includeInferred whether to include inferred statements in the count + * @param contexts the RDF contexts (named graphs) to restrict the count to; if none are provided, counts all + * contexts + * @return the number of statements in the dataset + * @throws SailException if an error occurs while accessing the Sail store + */ + @Experimental + protected long getSizeFromSnapshot(final boolean includeInferred, final Resource... contexts) throws SailException { + try (SailSource branch = branch(IncludeInferred.fromBoolean(includeInferred))) { + try (SailDataset dataset = branch.dataset(getIsolationLevel())) { + return dataset.size(null, null, null, contexts); + } + } + } + } diff --git a/core/sail/base/src/test/java/org/eclipse/rdf4j/sail/base/SailDatasetImplSizeTest.java b/core/sail/base/src/test/java/org/eclipse/rdf4j/sail/base/SailDatasetImplSizeTest.java new file mode 100644 index 00000000000..bc82bf96403 --- /dev/null +++ b/core/sail/base/src/test/java/org/eclipse/rdf4j/sail/base/SailDatasetImplSizeTest.java @@ -0,0 +1,599 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + ******************************************************************************/ + +package org.eclipse.rdf4j.sail.base; + +import static org.junit.jupiter.api.Assertions.assertAll; +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; +import java.util.stream.Stream; + +import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.common.iteration.CloseableIteratorIteration; +import org.eclipse.rdf4j.common.iteration.EmptyIteration; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Model; +import org.eclipse.rdf4j.model.Namespace; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.Statement; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.LinkedHashModel; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.sail.SailException; +import org.junit.jupiter.api.Test; + +/** + * Verifies that SailDatasetImpl.size respects a pending clear() operation (statementCleared), and does not delegate to + * the backing dataset when cleared with no contexts. + */ +public class SailDatasetImplSizeTest { + + /** + * Minimal backing dataset that reports a fixed size regardless of arguments. + */ + private static final class FixedSizeDataset implements SailDataset { + private final long size; + + private FixedSizeDataset(long size) { + this.size = size; + } + + @Override + public void close() throws SailException { + // no-op + } + + @Override + public CloseableIteration getNamespaces() throws SailException { + return new EmptyIteration<>(); + } + + @Override + public String getNamespace(String prefix) throws SailException { + return null; + } + + @Override + public CloseableIteration getContextIDs() throws SailException { + return new EmptyIteration<>(); + } + + @Override + public CloseableIteration getStatements(Resource subj, IRI pred, Value obj, + Resource... contexts) throws SailException { + return new EmptyIteration<>(); + } + + @Override + public long size(Resource subj, IRI pred, Value obj, Resource... contexts) { + return size; + } + } + + @Test + public void size_respects_statementCleared() { + // backing dataset contains data (non-zero size) + SailDataset backing = new FixedSizeDataset(5); + + // create a changeset and simulate clear() without contexts + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + // not used in this test + } + + @Override + public Model createEmptyModel() { + return new LinkedHashModel(); + } + }; + + // clear() with zero contexts should mark statementCleared=true while leaving + // hasApproved()/hasDeprecated() false + changes.clear(); + + // snapshot over backing with pending clear should report size 0 + SailDataset snapshot = new SailDatasetImpl(backing, changes); + long snapshotSize = snapshot.size(null, null, null); + + assertEquals(0L, snapshotSize, + "size() should respect statementCleared and return 0 when cleared without contexts"); + } + + /** + * Backing dataset that returns a concrete set of statements and supports filtering. + */ + private static final class ListBackedDataset implements SailDataset { + private final List data; + + private ListBackedDataset(List data) { + this.data = List.copyOf(data); + } + + @Override + public void close() throws SailException { + } + + @Override + public CloseableIteration getNamespaces() throws SailException { + return new EmptyIteration<>(); + } + + @Override + public String getNamespace(String prefix) throws SailException { + return null; + } + + @Override + public CloseableIteration getContextIDs() throws SailException { + return new EmptyIteration<>(); + } + + @Override + public CloseableIteration getStatements(Resource subj, IRI pred, Value obj, + Resource... contexts) throws SailException { + Stream stream = data.stream(); + if (subj != null) { + stream = stream.filter(st -> subj.equals(st.getSubject())); + } + if (pred != null) { + stream = stream.filter(st -> pred.equals(st.getPredicate())); + } + if (obj != null) { + stream = stream.filter(st -> obj.equals(st.getObject())); + } + if (contexts != null && contexts.length > 0) { + Set ctxs = new HashSet<>(Arrays.asList(contexts)); + stream = stream.filter(st -> ctxs.contains(st.getContext())); + } + Iterator it = stream.iterator(); + return new CloseableIteratorIteration<>(it); + } + } + + private static final ValueFactory VF = SimpleValueFactory.getInstance(); + private static final Resource CTX_A = VF.createIRI("urn:ctx:A"); + private static final Resource CTX_B = VF.createIRI("urn:ctx:B"); + private static final Resource CTX_C = VF.createIRI("urn:ctx:C"); + private static final IRI P = VF.createIRI("urn:p"); + private static final IRI Q = VF.createIRI("urn:q"); + + private static Statement st(String s, String o, Resource ctx) { + return VF.createStatement(VF.createIRI("urn:s:" + s), P, VF.createIRI("urn:o:" + o), ctx); + } + + @Test + public void size_afterMultiContextClear_andMixedContextQueries() { + SailDataset backing = new ListBackedDataset(List.of( + st("1", "1", CTX_A), + st("2", "2", CTX_B), + st("3", "3", CTX_C) + )); + + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public Model createEmptyModel() { + return new LinkedHashModel(); + } + }; + + // clear two contexts at once (A and B) + changes.clear(CTX_A, CTX_B); + + SailDataset snapshot = new SailDatasetImpl(backing, changes); + + // Global should only include C; mixed queries should reflect filters properly + assertAll( + () -> assertEquals(1L, snapshot.size(null, null, null), "only C remains globally"), + () -> assertEquals(0L, snapshot.size(null, null, null, CTX_A), "A cleared => empty"), + () -> assertEquals(0L, snapshot.size(null, null, null, CTX_B), "B cleared => empty"), + () -> assertEquals(1L, snapshot.size(null, null, null, CTX_C), "C remains"), + () -> assertEquals(1L, snapshot.size(null, null, null, CTX_B, CTX_C), "B cleared, C remains => 1") + ); + } + + @Test + public void size_withSubjPredObjFilters_afterContextClear() { + var s1 = VF.createIRI("urn:s:1"); + var o1 = VF.createIRI("urn:o:1"); + var o2 = VF.createIRI("urn:o:2"); + + SailDataset backing = new ListBackedDataset(List.of( + VF.createStatement(s1, P, o1, CTX_A), + VF.createStatement(s1, P, o1, CTX_B), + VF.createStatement(VF.createIRI("urn:s:2"), P, o2, CTX_B) + )); + + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public Model createEmptyModel() { + return new LinkedHashModel(); + } + }; + + changes.clear(CTX_A); + SailDataset snapshot = new SailDatasetImpl(backing, changes); + + assertAll( + () -> assertEquals(1L, snapshot.size(s1, P, null), "s1@A removed, only s1@B remains"), + () -> assertEquals(1L, snapshot.size(null, P, o2), "o2 only in B remains"), + () -> assertEquals(0L, snapshot.size(s1, P, o1, CTX_A), "A cleared => empty for filter"), + () -> assertEquals(1L, snapshot.size(s1, P, o1, CTX_B), "B unaffected => 1 for filter") + ); + } + + @Test + public void size_withFilters_afterGlobalClear_withApprovals() { + var s1 = VF.createIRI("urn:s:1"); + var o1 = VF.createIRI("urn:o:1"); + var o2 = VF.createIRI("urn:o:2"); + + SailDataset backing = new ListBackedDataset(List.of( + VF.createStatement(VF.createIRI("urn:s:x"), P, VF.createIRI("urn:o:x"), CTX_A) + )); + + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public Model createEmptyModel() { + return new LinkedHashModel(); + } + }; + + changes.clear(); + changes.approve(VF.createStatement(s1, P, o1, CTX_A)); + changes.approve(VF.createStatement(s1, P, o2, CTX_B)); + + SailDataset snapshot = new SailDatasetImpl(backing, changes); + + assertAll( + () -> assertEquals(2L, snapshot.size(s1, P, null), "two approved for s1 after global clear"), + () -> assertEquals(1L, snapshot.size(null, P, o1), "only approved o1 remains"), + () -> assertEquals(1L, snapshot.size(null, P, o2), "only approved o2 remains") + ); + } + + @Test + public void size_tripleTerms_afterClear_andApprovals() { + // create a triple value, then use it as subject and object in statements + var ts = VF.createTriple(VF.createIRI("urn:ts:s"), P, VF.createIRI("urn:ts:o")); + var to = VF.createTriple(VF.createIRI("urn:to:s"), P, VF.createIRI("urn:to:o")); + + Statement subjTripleInA = VF.createStatement((Resource) ts, P, VF.createIRI("urn:o:X"), CTX_A); + Statement objTripleInB = VF.createStatement(VF.createIRI("urn:s:Y"), P, to, CTX_B); + + SailDataset backing = new ListBackedDataset(List.of(subjTripleInA, objTripleInB)); + + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public Model createEmptyModel() { + return new LinkedHashModel(); + } + }; + + // clear A, then approve a triple-subject statement in C + changes.clear(CTX_A); + Statement approvedInC = VF.createStatement((Resource) ts, P, VF.createIRI("urn:o:Z"), CTX_C); + changes.approve(approvedInC); + + SailDataset snapshot = new SailDatasetImpl(backing, changes); + + // Expected: removed subjTripleInA (cleared); kept objTripleInB; added approvedInC + assertAll( + () -> assertEquals(2L, snapshot.size(null, null, null), "B + approved C"), + () -> assertEquals(0L, snapshot.size(null, null, null, CTX_A), "A cleared"), + () -> assertEquals(1L, snapshot.size(null, null, null, CTX_B), "B remains"), + () -> assertEquals(1L, snapshot.size(null, null, null, CTX_C), "approved in C") + ); + } + + @Test + public void size_deprecatedThenApprovedDuplicate_acrossContexts() { + // Same SPO in two contexts in backing + Statement a = st("x", "y", CTX_A); + Statement b = st("x", "y", CTX_B); + SailDataset backing = new ListBackedDataset(List.of(a, b)); + + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public Model createEmptyModel() { + return new LinkedHashModel(); + } + }; + + // Deprecate B from backing, then approve B again + changes.deprecate(b); + changes.approve(b); + + SailDataset snapshot = new SailDatasetImpl(backing, changes); + + // Expect both contexts visible, and no double count + assertAll( + () -> assertEquals(2L, snapshot.size(null, null, null), "A and B should both be visible"), + () -> assertEquals(1L, snapshot.size(null, null, null, CTX_A), "A visible"), + () -> assertEquals(1L, snapshot.size(null, null, null, CTX_B), "B re-approved and visible") + ); + } + + @Test + public void size_contextArrayWithDuplicatesAndNulls() { + // Backing has one in default (null) context and one in A + Statement def = VF.createStatement(VF.createIRI("urn:s:def"), P, VF.createIRI("urn:o:def")); + Statement inA = st("a", "a", CTX_A); + SailDataset backing = new ListBackedDataset(List.of(def, inA)); + + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public Model createEmptyModel() { + return new LinkedHashModel(); + } + }; + + // Clear default graph only + changes.clear((Resource) null); + + SailDataset snapshot = new SailDatasetImpl(backing, changes); + + // Ask size with duplicate and null contexts in the query + assertAll( + () -> assertEquals(1L, snapshot.size(null, null, null), "Only A remains globally"), + () -> assertEquals(0L, snapshot.size(null, null, null, (Resource) null), "default graph cleared"), + () -> assertEquals(1L, snapshot.size(null, null, null, CTX_A, (Resource) null, CTX_A), + "duplicates ignored; default graph cleared; A remains") + ); + } + + @Test + public void size_additionalFilterCombinations_predOnly_objOnly_mixed() { + var s1 = VF.createIRI("urn:s:1"); + var s2 = VF.createIRI("urn:s:2"); + var o1 = VF.createIRI("urn:o:1"); + var o2 = VF.createIRI("urn:o:2"); + + SailDataset backing = new ListBackedDataset(List.of( + VF.createStatement(s1, P, o1, CTX_A), + VF.createStatement(s2, P, o2, CTX_B), + VF.createStatement(s2, Q, o2, CTX_A) + )); + + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public Model createEmptyModel() { + return new LinkedHashModel(); + } + }; + + // Clear B, then approve a new P/o2 in A for s2 + changes.clear(CTX_B); + changes.approve(VF.createStatement(s2, P, o2, CTX_A)); + + SailDataset snapshot = new SailDatasetImpl(backing, changes); + + assertAll( + // pred-only across all contexts: s1@A (P/o1), s2@A (P/o2 approved) => 2 + () -> assertEquals(2L, snapshot.size(null, P, null), "two P statements after clear+approve"), + // obj-only: o2 now appears only in A (approved); B was cleared + () -> assertEquals(2L, snapshot.size(null, null, o2), "Q@A + approved P@A have o2"), + // mixed filter: s2,P,null => only approved one in A + () -> assertEquals(1L, snapshot.size(s2, P, null), "s2@P only approved in A remains"), + // context filter combos + () -> assertEquals(2L, snapshot.size(null, null, o2, CTX_A), "both P/Q@A with o2 => 2"), + () -> assertEquals(0L, snapshot.size(null, null, o2, CTX_B), "B cleared") + ); + } + + @Test + public void size_afterGlobalClear_countsApprovedOnly() { + SailDataset backing = new ListBackedDataset(List.of( + st("1", "1", CTX_A), + st("2", "2", CTX_B) + )); + + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public Model createEmptyModel() { + return new LinkedHashModel(); + } + }; + + // global clear: remove all backing statements from view + changes.clear(); + // approve two new statements (one per context) + changes.approve(st("a", "a", CTX_A)); + changes.approve(st("b", "b", CTX_B)); + + SailDataset snapshot = new SailDatasetImpl(backing, changes); + + assertAll( + () -> assertEquals(2L, snapshot.size(null, null, null), + "after global clear, only approved statements are visible"), + () -> assertEquals(1L, snapshot.size(null, null, null, CTX_A), + "context filter A should see 1 approved statement in A"), + () -> assertEquals(1L, snapshot.size(null, null, null, CTX_B), + "context filter B should see 1 approved statement in B") + ); + } + + @Test + public void size_afterContextClear_excludesClearedContextData() { + // backing has 2 in A and 3 in B + SailDataset backing = new ListBackedDataset(List.of( + st("1", "1", CTX_A), st("2", "2", CTX_A), + st("3", "3", CTX_B), st("4", "4", CTX_B), st("5", "5", CTX_B) + )); + + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public Model createEmptyModel() { + return new LinkedHashModel(); + } + }; + + // clear only context A + changes.clear(CTX_A); + + SailDataset snapshot = new SailDatasetImpl(backing, changes); + + assertAll( + () -> assertEquals(3L, snapshot.size(null, null, null), + "global view should exclude cleared context A (only B remains)"), + () -> assertEquals(0L, snapshot.size(null, null, null, CTX_A), + "cleared context A should be empty"), + () -> assertEquals(3L, snapshot.size(null, null, null, CTX_B), + "uncleared context B remains visible") + ); + } + + @Test + public void size_afterContextClear_withApprovedInClearedContext() { + // backing has 1 in A and 1 in B + SailDataset backing = new ListBackedDataset(List.of( + st("1", "1", CTX_A), + st("2", "2", CTX_B) + )); + + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public Model createEmptyModel() { + return new LinkedHashModel(); + } + }; + + // clear A, then approve a new statement in A and another in B + changes.clear(CTX_A); + changes.approve(st("a", "a", CTX_A)); + changes.approve(st("b", "b", CTX_B)); + + SailDataset snapshot = new SailDatasetImpl(backing, changes); + + // Global: backing B (1) + approved A (1) + approved B (1) = 3 + // Context A: only approved in A (1) + // Context B: backing B (1) + approved B (1) = 2 + assertAll( + () -> assertEquals(3L, snapshot.size(null, null, null), "global view reflects clear+approvals"), + () -> assertEquals(1L, snapshot.size(null, null, null, CTX_A), "A has only approved statements"), + () -> assertEquals(2L, snapshot.size(null, null, null, CTX_B), "B has both backing and approved") + ); + } + + @Test + public void size_noChanges_delegatesToDerivedFrom() { + // With no approved/deprecated and not cleared, must delegate to backing.size + SailDataset backing = new FixedSizeDataset(7); + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public Model createEmptyModel() { + return new LinkedHashModel(); + } + }; + SailDataset snapshot = new SailDatasetImpl(backing, changes); + assertEquals(7L, snapshot.size(null, null, null)); + assertEquals(7L, snapshot.size(null, null, null, CTX_A)); + } + + @Test + public void size_withDeprecatedStatements_excludesDeprecatedOnes() { + Statement a1 = st("1", "1", CTX_A); + Statement b1 = st("2", "2", CTX_B); + SailDataset backing = new ListBackedDataset(List.of(a1, b1)); + + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public Model createEmptyModel() { + return new LinkedHashModel(); + } + }; + // deprecate one existing statement + changes.deprecate(a1); + + SailDataset snapshot = new SailDatasetImpl(backing, changes); + assertAll( + () -> assertEquals(1L, snapshot.size(null, null, null), "one deprecated removed from global view"), + () -> assertEquals(0L, snapshot.size(null, null, null, CTX_A), "deprecated in A excluded"), + () -> assertEquals(1L, snapshot.size(null, null, null, CTX_B), "B remains visible") + ); + } + + @Test + public void size_withApprovedDuplicates_doesNotDoubleCount() { + Statement b1 = st("2", "2", CTX_B); + SailDataset backing = new ListBackedDataset(List.of(b1)); + + Changeset changes = new Changeset() { + @Override + public void flush() throws SailException { + } + + @Override + public Model createEmptyModel() { + return new LinkedHashModel(); + } + }; + // approve same statement as in backing + changes.approve(b1); + + SailDataset snapshot = new SailDatasetImpl(backing, changes); + assertEquals(1L, snapshot.size(null, null, null), "approved duplicate must not be double-counted"); + } +} diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java index 6d87d4bc33e..e218d84aa82 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java @@ -71,7 +71,7 @@ class LmdbSailStore implements SailStore { private boolean multiThreadingActive; private volatile boolean asyncTransactionFinished; private volatile boolean nextTransactionAsync; - private volatile boolean mayHaveInferred; + private final AtomicBoolean mayHaveInferred = new AtomicBoolean(); boolean enableMultiThreading = true; @@ -144,7 +144,7 @@ class AddQuadOperation implements Operation { @Override public void execute() throws IOException { if (!explicit) { - mayHaveInferred = true; + mayHaveInferred.setRelease(true); } if (!unusedIds.isEmpty()) { // these ids are used again @@ -196,8 +196,8 @@ public LmdbSailStore(File dataDir, LmdbStoreConfig config) throws IOException, S namespaceStore = new NamespaceStore(dataDir); var valueStore = new ValueStore(new File(dataDir, "values"), config); this.valueStore = valueStore; - tripleStore = new TripleStore(new File(dataDir, "triples"), config, valueStore); - mayHaveInferred = tripleStore.hasTriples(false); + tripleStore = new TripleStore(new File(dataDir, "triples"), config, valueStore, mayHaveInferred); + mayHaveInferred.setRelease(tripleStore.hasTriples(false)); initialized = true; } finally { if (!initialized) { @@ -353,7 +353,7 @@ protected void handleClose() throws SailException { */ CloseableIteration createStatementIterator( Txn txn, Resource subj, IRI pred, Value obj, boolean explicit, Resource... contexts) throws IOException { - if (!explicit && !mayHaveInferred) { + if (!explicit && !mayHaveInferred.getAcquire()) { // there are no inferred statements and the iterator should only return inferred statements return CloseableIteration.EMPTY_STATEMENT_ITERATION; } @@ -413,6 +413,79 @@ CloseableIteration createStatementIterator( } } + /** + * Returns the number of statements that match the specified pattern. + * + * @param subj The subject of the pattern, or null to indicate a wildcard. + * @param pred The predicate of the pattern, or null to indicate a wildcard. + * @param obj The object of the pattern, or null to indicate a wildcard. + * @param includeImplicit Whether to include inferred statements in addition to explicit. + * @param contexts The context(s) of the pattern. Note that this parameter is a vararg and as such is + * optional. If no contexts are supplied the method operates on the entire repository. + * @return The number of statements that match the specified pattern. + * @throws SailException If an error occurred while determining the size. + */ + private long size(final TxnManager.Txn txn, final Resource subj, final IRI pred, final Value obj, + final boolean includeImplicit, final Resource... contexts) + throws SailException { + try { + long totalSize = 0; + + long subjID = LmdbValue.UNKNOWN_ID; + if (subj != null) { + subjID = valueStore.getId(subj); + if (subjID == LmdbValue.UNKNOWN_ID) { + return 0; + } + } + + long predID = LmdbValue.UNKNOWN_ID; + if (pred != null) { + predID = valueStore.getId(pred); + if (predID == LmdbValue.UNKNOWN_ID) { + return 0; + } + } + + long objID = LmdbValue.UNKNOWN_ID; + if (obj != null) { + objID = valueStore.getId(obj); + if (objID == LmdbValue.UNKNOWN_ID) { + return 0; + } + } + + // Handle context selection mirroring getStatements semantics + if (contexts.length == 0) { + // wildcard across all contexts + totalSize = tripleStore.cardinalityExact(txn, subjID, predID, objID, LmdbValue.UNKNOWN_ID, + includeImplicit); + } else { + for (Resource context : contexts) { + Long contextIDToCount = null; + if (context == null) { + // default graph + contextIDToCount = 0L; + } else if (!context.isTriple()) { + long contextID = valueStore.getId(context); + // skip unknown (non-existent) contexts; do not early-return + if (contextID != LmdbValue.UNKNOWN_ID) { + contextIDToCount = contextID; + } + } + + if (contextIDToCount != null) { + totalSize += tripleStore.cardinalityExact(txn, subjID, predID, objID, contextIDToCount, + includeImplicit); + } + } + } + return totalSize; + } catch (final IOException e) { + throw new SailException(e); + } + } + private final class LmdbSailSource extends BackingSailSource { private final boolean explicit; @@ -965,5 +1038,24 @@ public Set getSupportedOrders(Resource subj, IRI pred, Value obj public Comparator getComparator() { return null; } + + @Override + public long size(final Resource subj, final IRI pred, final Value obj, final Resource... contexts) + throws SailException { + try { + if (explicit) { + // explicit dataset: count explicit statements only + return LmdbSailStore.this.size(txn, subj, pred, obj, false, contexts); + } else { + // inferred dataset: count inferred-only = (explicit+inferred) - explicit + long total = LmdbSailStore.this.size(txn, subj, pred, obj, true, contexts); + long explicitOnly = LmdbSailStore.this.size(txn, subj, pred, obj, false, contexts); + long inferredOnly = total - explicitOnly; + return inferredOnly >= 0 ? inferredOnly : 0; + } + } catch (final Exception e) { + throw new SailException(e); + } + } } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java index 9c0577e655a..acf6bbcc332 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnection.java @@ -206,4 +206,9 @@ protected void closeInternal() throws SailException { // release thread-local pool Pool.release(); } + + @Override + protected long sizeInternal(Resource... contexts) throws SailException { + return super.getSizeFromSnapshot(false, contexts); + } } diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java index 059bb51e666..a0c69b59e03 100644 --- a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java +++ b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/TripleStore.java @@ -75,6 +75,7 @@ import java.util.Set; import java.util.StringTokenizer; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.LongAdder; import java.util.function.Consumer; @@ -85,6 +86,7 @@ import org.eclipse.rdf4j.sail.lmdb.TxnRecordCache.Record; import org.eclipse.rdf4j.sail.lmdb.TxnRecordCache.RecordCacheIterator; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; +import org.eclipse.rdf4j.sail.lmdb.model.LmdbValue; import org.eclipse.rdf4j.sail.lmdb.util.GroupMatcher; import org.eclipse.rdf4j.sail.lmdb.util.IndexKeyWriters; import org.lwjgl.PointerBuffer; @@ -161,6 +163,7 @@ class TripleStore implements Closeable { */ private final List indexes = new ArrayList<>(); private final ValueStore valueStore; + private final AtomicBoolean mayHaveInferred; private long env; private int contextsDbi; @@ -194,11 +197,13 @@ public int compareRegion(ByteBuffer array1, int startIdx1, ByteBuffer array2, in } }; - TripleStore(File dir, LmdbStoreConfig config, ValueStore valueStore) throws IOException, SailException { + TripleStore(File dir, LmdbStoreConfig config, ValueStore valueStore, AtomicBoolean mayHaveInferred) + throws IOException, SailException { this.dir = dir; this.forceSync = config.getForceSync(); this.autoGrow = config.getAutoGrow(); this.valueStore = valueStore; + this.mayHaveInferred = mayHaveInferred; // create directory if it not exists this.dir.mkdirs(); @@ -673,6 +678,70 @@ protected void filterUsedIds(Collection ids) throws IOException { }); } + /** + * Returns the exact total size of the triple pattern with the given subject, predicate, object and context. If the + * subject, predicate, object or context is not specified (i.e., set to {@link LmdbValue#UNKNOWN_ID}), it will + * return the size of the entire database from the mdb_stat. Otherwise, it will iterate over all matching triples + * and count them. + * + * @param subj Subject ID or {@link LmdbValue#UNKNOWN_ID} if not specified + * @param pred Predicate ID or {@link LmdbValue#UNKNOWN_ID} if not specified + * @param obj Object ID or {@link LmdbValue#UNKNOWN_ID} if not specified + * @param context Context ID or {@link LmdbValue#UNKNOWN_ID} if not specified + * @param includeImplicit Whether to include implicit triples in the count + * @return The exact size of the triple pattern + */ + protected long cardinalityExact(final TxnManager.Txn txn, final long subj, final long pred, final long obj, + final long context, final boolean includeImplicit) + throws IOException { + + if (subj == LmdbValue.UNKNOWN_ID && pred == LmdbValue.UNKNOWN_ID && obj == LmdbValue.UNKNOWN_ID) { + try (final MemoryStack stack = MemoryStack.stackPush()) { + // Fast path: if all values are unknown, return the total size of the database + if (context == LmdbValue.UNKNOWN_ID) { + long cardinality = 0; + final TripleIndex index = getBestIndex(subj, pred, obj, context); + + int dbi = index.getDB(true); + MDBStat stat = MDBStat.mallocStack(stack); + mdb_stat(txn.get(), dbi, stat); + cardinality += stat.ms_entries(); + + if (includeImplicit) { + dbi = index.getDB(false); + mdb_stat(txn.get(), dbi, stat); + cardinality += stat.ms_entries(); + } + return cardinality; + } else { + // Fast path: if only context is specified. Only use the precomputed + // context size when including implicit statements; otherwise fall through + // and count explicit-only via iteration below. + if (includeImplicit || !mayHaveInferred.getAcquire()) { + return getContextSize(txn, stack, context); + } + } + } + } + + long size = 0; + + try (RecordIterator explicitIter = getTriples(txn, subj, pred, obj, context, true); + RecordIterator implicitIter = includeImplicit + ? getTriples(txn, subj, pred, obj, context, false) + : null) { + for (long[] quad = explicitIter.next(); quad != null; quad = explicitIter.next()) { + size++; + } + if (includeImplicit && implicitIter != null) { + for (long[] quad = implicitIter.next(); quad != null; quad = implicitIter.next()) { + size++; + } + } + } + return size; + } + protected double cardinality(long subj, long pred, long obj, long context) throws IOException { TripleIndex index = getBestIndex(subj, pred, obj, context); @@ -690,7 +759,6 @@ protected double cardinality(long subj, long pred, long obj, long context) throw return cardinality; }); } - return txnManager.doWith((stack, txn) -> { Pool pool = Pool.get(); final Statistics s = pool.getStatistics(); @@ -928,6 +996,33 @@ public boolean storeTriple(long subj, long pred, long obj, long context, boolean return stAdded; } + private long getContextSize(final Txn txn, final MemoryStack stack, final long context) throws IOException { + try { + stack.push(); + + // Prepare key + MDBVal idVal = MDBVal.calloc(stack); + ByteBuffer keyBuffer = stack.malloc(1 + Long.BYTES); + Varint.writeUnsigned(keyBuffer, context); + keyBuffer.flip(); + idVal.mv_data(keyBuffer); + + // Prepare value holder + MDBVal dataVal = MDBVal.calloc(stack); + int rc = mdb_get(txn.get(), contextsDbi, idVal, dataVal); + if (rc == MDB_SUCCESS && dataVal.mv_data() != null) { + return Varint.readUnsigned(dataVal.mv_data()); + } else if (rc == MDB_NOTFOUND) { + // Context not present in DB + return 0; + } else { + throw new IOException("Failed to read context size: " + mdb_strerror(rc)); + } + } finally { + stack.pop(); + } + } + private void incrementContext(MemoryStack stack, long context) throws IOException { try { stack.push(); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityExactTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityExactTest.java new file mode 100644 index 00000000000..f2654b0fa21 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityExactTest.java @@ -0,0 +1,150 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.sail.lmdb; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.File; +import java.util.Random; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; +import org.eclipse.rdf4j.sail.lmdb.model.LmdbValue; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +public class CardinalityExactTest { + private static final int NUM_RESOURCES = 1000; + private static final int MIN_TRIPLES_PER_RESOURCE = 20; + private static final int MAX_TRIPLES_PER_RESOURCE = 100; + private final int[] contextIds = new int[] { 1, 2, 3 }; + private final int[] objectIds = new int[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; + @TempDir + File tempFolder; + + protected TripleStore tripleStore; + + @BeforeEach + public void before() throws Exception { + File dataDir = new File(tempFolder, "triplestore"); + dataDir.mkdir(); + tripleStore = new TripleStore(dataDir, new LmdbStoreConfig("spoc,posc"), null, new AtomicBoolean(false)); + } + + private long countTriples(RecordIterator iterator) { + long count = 0; + while (iterator.next() != null) { + count++; + } + return count; + } + + private long randomObjectId(Random random) { + return objectIds[random.nextInt(objectIds.length)]; + } + + private long randomContextId(Random random) { + return contextIds[random.nextInt(contextIds.length)]; + } + + @Test + public void testCardinalityExact() throws Exception { + Random random = new Random(); + + tripleStore.startTransaction(); + + for (int resourceId = 1; resourceId <= NUM_RESOURCES; resourceId++) { + int tripleCount = MIN_TRIPLES_PER_RESOURCE + random.nextInt(MAX_TRIPLES_PER_RESOURCE); + for (int i = 0; i < tripleCount; i++) { + long objectId = randomObjectId(random); + long randomContextId = randomContextId(random); + tripleStore.storeTriple(resourceId, 2, objectId, randomContextId, true); + + int predicateId = 2 + random.nextInt(1000) + 1; + tripleStore.storeTriple(resourceId, predicateId, randomObjectId(random), randomContextId, true); + } + } + + tripleStore.commit(); + + try (TxnManager.Txn txn = tripleStore.getTxnManager().createReadTxn()) { + for (final long contextId : contextIds) { + long actual = tripleStore.cardinalityExact(txn, LmdbValue.UNKNOWN_ID, 2, LmdbValue.UNKNOWN_ID, + contextId, true); + long expected = countTriples( + tripleStore.getTriples(txn, LmdbValue.UNKNOWN_ID, 2, LmdbValue.UNKNOWN_ID, contextId, false)) + + countTriples(tripleStore.getTriples(txn, LmdbValue.UNKNOWN_ID, 2, LmdbValue.UNKNOWN_ID, + contextId, true)); + assertEquals(expected, actual, "Exact size does not match counted triples."); + + } + + for (final long objectId : objectIds) { + long explicitActual = tripleStore.cardinalityExact(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, + objectId, + LmdbValue.UNKNOWN_ID, false); + long totalActual = tripleStore.cardinalityExact(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, + objectId, + LmdbValue.UNKNOWN_ID, true); + long implicitExpected = countTriples( + tripleStore.getTriples(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, objectId, + LmdbValue.UNKNOWN_ID, false)); + long explicitExpected = countTriples( + tripleStore.getTriples(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, objectId, + LmdbValue.UNKNOWN_ID, true)); + + assertEquals(explicitExpected, explicitActual); + assertEquals(totalActual, implicitExpected + explicitExpected, + "Exact size does not match counted triples."); + } + + for (int resourceId = 1; resourceId <= NUM_RESOURCES; resourceId++) { + long totalExactSize = tripleStore.cardinalityExact(txn, resourceId, 2, LmdbValue.UNKNOWN_ID, 1, true); + long expectedCount = countTriples( + tripleStore.getTriples(txn, resourceId, 2, LmdbValue.UNKNOWN_ID, 1, false)) + + countTriples(tripleStore.getTriples(txn, resourceId, 2, LmdbValue.UNKNOWN_ID, 1, true)); + assertEquals(expectedCount, totalExactSize, "Exact size does not match counted triples."); + } + + for (int resourceId = 1; resourceId <= 50; resourceId++) { + long targetObjectId = randomObjectId(random); + long targetContextId = randomContextId(random); + long generalSize = tripleStore.cardinalityExact(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, + targetObjectId, + targetContextId, false); + long generalExplicitCount = countTriples( + tripleStore.getTriples( + txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, targetObjectId, targetContextId, + true)); + assertEquals( + generalExplicitCount, generalSize, + "Exact size does not match counted triples." + ); + } + + long totalSize = tripleStore.cardinalityExact(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, + LmdbValue.UNKNOWN_ID, + LmdbValue.UNKNOWN_ID, true); + long totalCount = countTriples(tripleStore.getTriples(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, + LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, false)) + + countTriples(tripleStore.getTriples(txn, LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, + LmdbValue.UNKNOWN_ID, LmdbValue.UNKNOWN_ID, true)); + assertEquals(totalCount, totalSize, "Total size does not match counted triples."); + } + } + + @AfterEach + public void after() throws Exception { + tripleStore.close(); + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityTest.java index 685db90c83a..5a53fcf6fb5 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/CardinalityTest.java @@ -12,6 +12,7 @@ import java.io.File; import java.util.Random; +import java.util.concurrent.atomic.AtomicBoolean; import org.eclipse.rdf4j.sail.lmdb.TxnManager.Txn; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; @@ -38,7 +39,7 @@ public class CardinalityTest { public void before() throws Exception { File dataDir = new File(tempFolder, "triplestore"); dataDir.mkdir(); - tripleStore = new TripleStore(dataDir, new LmdbStoreConfig("spoc,posc"), null); + tripleStore = new TripleStore(dataDir, new LmdbStoreConfig("spoc,posc"), null, new AtomicBoolean(false)); } int count(RecordIterator it) { diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/DefaultIndexTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/DefaultIndexTest.java index b1e8b23df7c..8cb6c36e1a4 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/DefaultIndexTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/DefaultIndexTest.java @@ -16,6 +16,7 @@ import java.io.FileInputStream; import java.io.InputStream; import java.util.Properties; +import java.util.concurrent.atomic.AtomicBoolean; import org.eclipse.rdf4j.common.io.FileUtil; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; @@ -26,7 +27,7 @@ public class DefaultIndexTest { @Test public void testDefaultIndex(@TempDir File dir) throws Exception { - TripleStore store = new TripleStore(dir, new LmdbStoreConfig(), null); + TripleStore store = new TripleStore(dir, new LmdbStoreConfig(), null, new AtomicBoolean(false)); store.close(); // check that the triple store used the default index assertEquals("spoc,posc", findIndex(dir)); @@ -36,11 +37,11 @@ public void testDefaultIndex(@TempDir File dir) throws Exception { @Test public void testExistingIndex(@TempDir File dir) throws Exception { // set a non-default index - TripleStore store = new TripleStore(dir, new LmdbStoreConfig("spoc,opsc"), null); + TripleStore store = new TripleStore(dir, new LmdbStoreConfig("spoc,opsc"), null, new AtomicBoolean(false)); store.close(); String before = findIndex(dir); // check that the index is preserved with a null value - store = new TripleStore(dir, new LmdbStoreConfig(null), null); + store = new TripleStore(dir, new LmdbStoreConfig(null), null, new AtomicBoolean(false)); store.close(); assertEquals(before, findIndex(dir)); FileUtil.deleteDir(dir); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbContextSizeExplicitOnlyTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbContextSizeExplicitOnlyTest.java new file mode 100644 index 00000000000..ca115d889b8 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbContextSizeExplicitOnlyTest.java @@ -0,0 +1,135 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + ******************************************************************************/ + +package org.eclipse.rdf4j.sail.lmdb; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.sail.SailConnection; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * Reproduces regression: context-size must be explicit-only. The LMDB fast path currently counts inferred statements + * for context-only size queries. + */ +public class LmdbContextSizeExplicitOnlyTest { + + private Path tmpDir; + private LmdbStore store; + + @BeforeEach + public void setUp() throws IOException { + tmpDir = Files.createTempDirectory("rdf4j-lmdb-test-"); + store = new LmdbStore(tmpDir.toFile()); + store.init(); + } + + @AfterEach + public void tearDown() throws IOException { + if (store != null) { + store.shutDown(); + } + if (tmpDir != null) { + // best-effort cleanup + Files.walk(tmpDir) + .sorted((a, b) -> b.compareTo(a)) + .forEach(p -> { + try { + Files.deleteIfExists(p); + } catch (IOException ignore) { + } + }); + } + } + + @Test + public void sizeContext_excludesInferred() { + try (SailConnection raw = store.getConnection()) { + LmdbStoreConnection conn = (LmdbStoreConnection) raw; + conn.begin(); + ValueFactory vf = store.getValueFactory(); + + IRI ctx = vf.createIRI("urn:ctx"); + IRI p = vf.createIRI("urn:p"); + + // one explicit in ctx + conn.addStatement(vf.createIRI("urn:s1"), p, vf.createLiteral("x"), ctx); + // one inferred in the same ctx (simulate inference via addInferredStatement) + conn.addInferredStatement(vf.createIRI("urn:s2"), p, vf.createLiteral("y"), ctx); + conn.commit(); + + // size must exclude inferred statements + long contextSize = conn.size(ctx); + assertEquals(1L, contextSize, "size(context) must exclude inferred statements"); + + long totalSize = conn.size(); + assertEquals(1L, totalSize, "total size must exclude inferred statements"); + + } + } + + @Test + public void sizeContext_excludesInferred2() { + try (SailConnection raw = store.getConnection()) { + LmdbStoreConnection conn = (LmdbStoreConnection) raw; + conn.begin(); + ValueFactory vf = store.getValueFactory(); + + IRI ctx = vf.createIRI("urn:ctx"); + IRI p = vf.createIRI("urn:p"); + + // one explicit in ctx + conn.addStatement(vf.createIRI("urn:s1"), p, vf.createLiteral("x"), ctx); + // one inferred in the same ctx (simulate inference via addInferredStatement) + conn.addInferredStatement(vf.createIRI("urn:s2"), p, vf.createLiteral("y"), ctx); + + // size must exclude inferred statements + long contextSize = conn.size(ctx); + assertEquals(1L, contextSize, "size(context) must exclude inferred statements"); + + long totalSize = conn.size(); + assertEquals(1L, totalSize, "total size must exclude inferred statements"); + + conn.commit(); + } + } + + @Test + public void sizeContext() { + try (SailConnection raw = store.getConnection()) { + LmdbStoreConnection conn = (LmdbStoreConnection) raw; + conn.begin(); + ValueFactory vf = store.getValueFactory(); + + IRI ctx = vf.createIRI("urn:ctx"); + IRI p = vf.createIRI("urn:p"); + + // one explicit in ctx + conn.addStatement(vf.createIRI("urn:s1"), p, vf.createLiteral("x"), ctx); + conn.commit(); + + // size must exclude inferred statements + long contextSize = conn.size(ctx); + assertEquals(1L, contextSize, "size(context) must exclude inferred statements"); + + long totalSize = conn.size(); + assertEquals(1L, totalSize, "total size must exclude inferred statements"); + } + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbInferredDatasetSizeTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbInferredDatasetSizeTest.java new file mode 100644 index 00000000000..421001a7d69 --- /dev/null +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbInferredDatasetSizeTest.java @@ -0,0 +1,78 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + ******************************************************************************/ + +package org.eclipse.rdf4j.sail.lmdb; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import java.io.File; + +import org.eclipse.rdf4j.common.transaction.IsolationLevels; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.sail.base.SailDataset; +import org.eclipse.rdf4j.sail.base.SailSink; +import org.eclipse.rdf4j.sail.base.SailSource; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +/** + * Reproduces regression: inferred dataset size() must count inferred statements. The current implementation ignores the + * dataset's explicit/inferred flag and always counts explicit-only, causing inferred datasets to report 0. + */ +public class LmdbInferredDatasetSizeTest { + + private final ValueFactory vf = SimpleValueFactory.getInstance(); + + private LmdbStore store; + + @BeforeEach + public void setUp(@TempDir File dataDir) { + store = new LmdbStore(dataDir); + store.init(); + } + + @AfterEach + public void tearDown() { + if (store != null) { + store.shutDown(); + } + } + + @Test + public void inferredDatasetSize_countsInferredStatements() throws Exception { + // Arrange: add one inferred statement via the inferred sink + LmdbSailStore backing = store.getBackingStore(); + SailSource inferred = backing.getInferredSailSource(); + SailSource explicit = backing.getExplicitSailSource(); + + IRI s = vf.createIRI("urn:s"); + IRI p = vf.createIRI("urn:p"); + IRI o = vf.createIRI("urn:o"); + + try (SailSink sink = inferred.sink(IsolationLevels.NONE)) { + sink.approve(s, p, o, null); + sink.flush(); + } + + // Act/Assert: inferred dataset sees the inferred statement, explicit does not + try (SailDataset ds = inferred.dataset(IsolationLevels.NONE)) { + assertEquals(1L, ds.size(null, null, null), "inferred dataset should count inferred statements"); + } + + try (SailDataset ds = explicit.dataset(IsolationLevels.NONE)) { + assertEquals(0L, ds.size(null, null, null), "explicit dataset should not include inferred statements"); + } + } +} diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStoreTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStoreTest.java index b735074c00c..bfb9fdda9fd 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStoreTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStoreTest.java @@ -19,10 +19,7 @@ import org.eclipse.rdf4j.common.iteration.CloseableIteration; import org.eclipse.rdf4j.common.iteration.EmptyIteration; import org.eclipse.rdf4j.common.transaction.IsolationLevels; -import org.eclipse.rdf4j.model.IRI; -import org.eclipse.rdf4j.model.Resource; -import org.eclipse.rdf4j.model.Statement; -import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.*; import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.eclipse.rdf4j.model.vocabulary.RDFS; import org.eclipse.rdf4j.query.TupleQuery; @@ -34,9 +31,12 @@ import org.eclipse.rdf4j.sail.base.SailDataset; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; /** * Extended test for {@link LmdbStore}. @@ -106,6 +106,34 @@ public void testRemoveInvalidContext() { } } + @Test + public void testSizeNullContextCountsDefaultGraphOnly() { + try (RepositoryConnection conn = repo.getConnection()) { + assertEquals("size(null) must count default graph only", 1, conn.size((Resource) null)); + } + } + + @Test + public void testSizeUnknownContextIsZero() { + try (RepositoryConnection conn = repo.getConnection()) { + assertEquals("size(unknownCtx) must be zero", 0, conn.size(CTX_INV)); + } + } + + @Test + public void testSizeMixedValidAndUnknownSkipsUnknown() { + try (RepositoryConnection conn = repo.getConnection()) { + assertEquals("size(valid,unknown) must equal size(valid)", 1, conn.size(CTX_1, CTX_INV)); + } + } + + @Test + public void testSizeNullAndValidCountsUnion() { + try (RepositoryConnection conn = repo.getConnection()) { + assertEquals("size(null,valid) must count default + valid", 2, conn.size((Resource) null, CTX_1)); + } + } + @Test public void testRemoveMultipleValidContext() { try (RepositoryConnection conn = repo.getConnection()) { @@ -197,6 +225,96 @@ public void testPassConnectionBetweenThreadsWithTx() throws InterruptedException } } + @ParameterizedTest + @EnumSource(IsolationLevels.class) + public void testSizeIsolationLevels(final IsolationLevels isolationLevel) { + try (final RepositoryConnection conn1 = repo.getConnection(); + final RepositoryConnection conn2 = repo.getConnection()) { + final int baseSize = 3; // S0, S1, S2 + Assertions.assertEquals(baseSize, conn1.size(), "Size should be " + baseSize); + Assertions.assertEquals(baseSize, conn2.size(), "Size should be " + baseSize); + final int count = 100; + conn1.begin(isolationLevel); + conn2.begin(isolationLevel); + for (int i = 0; i < count; i++) { + conn1.add(F.createStatement(F.createIRI("http://example.org/" + i), RDFS.LABEL, + F.createLiteral("label" + i))); + } + // conn1 should see its own changes + Assertions.assertEquals(baseSize + count, conn1.size(), "Size should be " + (3 + count)); + + // LMDBStore supports: NONE, READ_COMMITTED, SNAPSHOT_READ, SNAPSHOT, and SERIALIZABLE. + // If an unsupported level (e.g., READ_UNCOMMITTED) is requested, + // a stronger supported level (e.g., READ_COMMITTED) is used instead. + if (isolationLevel.equals(IsolationLevels.NONE)) { + // conn2 should see the changes of conn1 + Assertions.assertEquals(baseSize + count, conn2.size(), "Size should be " + (3 + count)); + } else if (isolationLevel.equals(IsolationLevels.READ_UNCOMMITTED)) { + // Use a stronger level (READ_COMMITTED) instead of READ_UNCOMMITTED + Assertions.assertEquals(baseSize, conn2.size(), "Size should be " + (3 + count)); + } else if (isolationLevel.equals(IsolationLevels.READ_COMMITTED)) { + // conn2 should not see the changes of conn1 + Assertions.assertEquals(baseSize, conn2.size(), "Size should be " + baseSize); + } else if (isolationLevel.equals(IsolationLevels.SNAPSHOT_READ)) { + // conn2 should not see the changes of conn1 + Assertions.assertEquals(baseSize, conn2.size(), "Size should be " + baseSize); + } else if (isolationLevel.equals(IsolationLevels.SNAPSHOT)) { + // conn2 should not see the changes of conn1 + Assertions.assertEquals(baseSize, conn2.size(), "Size should be " + baseSize); + } else if (isolationLevel.equals(IsolationLevels.SERIALIZABLE)) { + // conn2 should not see the changes of conn1 + Assertions.assertEquals(baseSize, conn2.size(), "Size should be " + baseSize); + } else { + Assertions.fail("Unsupported isolation level: " + isolationLevel); + } + conn1.commit(); + // conn2 should see the changes of conn1 after commit + if (isolationLevel.equals(IsolationLevels.READ_COMMITTED) + || isolationLevel.equals(IsolationLevels.READ_UNCOMMITTED) + || isolationLevel.equals(IsolationLevels.SNAPSHOT_READ)) { + Assertions.assertEquals(baseSize + count, conn2.size(), "Size should be " + (3 + count)); + } + conn2.commit(); + Assertions.assertEquals(baseSize + count, conn2.size(), "Size should be " + (3 + count)); + } + } + + @ParameterizedTest + @EnumSource(value = IsolationLevels.class, names = "NONE", mode = EnumSource.Mode.EXCLUDE) + public void testSizeWhenRollbackTxn(final IsolationLevels isolationLevel) { + try (RepositoryConnection conn1 = repo.getConnection(); + RepositoryConnection conn2 = repo.getConnection()) { + + final int baseSize = 3; // S0, S1, S2 + Assertions.assertEquals(baseSize, conn1.size(), "Initial size in conn1 should be " + baseSize); + Assertions.assertEquals(baseSize, conn2.size(), "Initial size in conn2 should be " + baseSize); + + final int count = 50; + + conn1.begin(isolationLevel); + conn2.begin(isolationLevel); + + for (int i = 0; i < count; i++) { + conn1.add(F.createStatement(F.createIRI("http://example.org/rollback/" + i), RDFS.LABEL, + F.createLiteral("rollback" + i))); + } + + // conn1 sees its uncommitted changes + Assertions.assertEquals(baseSize + count, conn1.size(), "conn1 should see uncommitted additions"); + + // conn2 should NOT see uncommitted changes + Assertions.assertEquals(baseSize, conn2.size(), "conn2 should not see uncommitted changes"); + + conn1.rollback(); + + // After rollback, both connections should see base size + Assertions.assertEquals(baseSize, conn1.size(), "conn1 should not see rolled-back additions"); + Assertions.assertEquals(baseSize, conn2.size(), "conn2 should not see rolled-back additions"); + + conn2.commit(); + } + } + @Test public void testInferredSourceHasEmptyIterationWithoutInferredStatements() throws SailException { LmdbStore sail = (LmdbStore) ((SailRepository) repo).getSail(); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnectionTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnectionTest.java index c6785e1ba60..ea0ffd760b6 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnectionTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/LmdbStoreConnectionTest.java @@ -13,6 +13,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import java.io.File; +import java.util.Random; import org.eclipse.rdf4j.common.iteration.Iterations; import org.eclipse.rdf4j.common.transaction.IsolationLevel; @@ -22,6 +23,7 @@ import org.eclipse.rdf4j.repository.sail.SailRepository; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; import org.eclipse.rdf4j.testsuite.repository.RepositoryConnectionTest; +import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; @@ -64,4 +66,37 @@ public void testSES715(IsolationLevel level) { testCon2.close(); } + @ParameterizedTest + @MethodSource("parameters") + public void testSize(final IsolationLevel level) { + setupTest(level); + + ValueFactory vf = testCon.getValueFactory(); + IRI context1 = vf.createIRI("http://my.context.1"); + IRI context2 = vf.createIRI("http://my.context.2"); + IRI predicate = vf.createIRI("http://my.predicate"); + IRI object = vf.createIRI("http://my.object"); + Random random = new Random(); + int context1Size = random.nextInt(5000); + int context2Size = random.nextInt(5000); + for (int j = 0; j < context1Size; j++) { + testCon.add(vf.createIRI("http://my.subject" + j), predicate, object, context1); + } + for (int j = 0; j < context2Size; j++) { + testCon.add(vf.createIRI("http://my.subject" + j), predicate, object, context2); + } + assertEquals(context1Size, testCon.size(context1)); + assertEquals(context2Size, testCon.size(context2)); + assertEquals(context1Size + context2Size, testCon.size()); + + testCon.clear(context1); + assertEquals(0, testCon.size(context1)); + assertEquals(context2Size, testCon.size(context2)); + testCon.commit(); + + assertEquals(0, testCon2.size(context1)); + assertEquals(context2Size, testCon2.size(context2)); + + testCon2.close(); + } } diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/RecordIteratorBenchmark.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/RecordIteratorBenchmark.java index db6d2da0486..491e1b54b0d 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/RecordIteratorBenchmark.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/RecordIteratorBenchmark.java @@ -14,6 +14,7 @@ import java.io.File; import java.io.IOException; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.io.FileUtils; import org.assertj.core.util.Files; @@ -56,7 +57,7 @@ public class RecordIteratorBenchmark { @Setup(Level.Trial) public void setup() throws IOException { dataDir = Files.newTemporaryFolder(); - tripleStore = new TripleStore(dataDir, new LmdbStoreConfig("spoc,posc"), null); + tripleStore = new TripleStore(dataDir, new LmdbStoreConfig("spoc,posc"), null, new AtomicBoolean(false)); final int statements = 1_000_000; tripleStore.startTransaction(); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleIndexToKeyCacheTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleIndexToKeyCacheTest.java index 0e82f5246fe..47c69844b07 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleIndexToKeyCacheTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleIndexToKeyCacheTest.java @@ -14,6 +14,7 @@ import java.io.File; import java.nio.ByteBuffer; +import java.util.concurrent.atomic.AtomicBoolean; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; import org.junit.jupiter.api.AfterEach; @@ -32,7 +33,7 @@ class TripleIndexToKeyCacheTest { @BeforeEach void setup(@TempDir File dataDir) throws Exception { // Create a small store; index set is irrelevant for constructing standalone TripleIndex instances - tripleStore = new TripleStore(dataDir, new LmdbStoreConfig("spoc,posc"), null); + tripleStore = new TripleStore(dataDir, new LmdbStoreConfig("spoc,posc"), null, new AtomicBoolean(false)); } @AfterEach diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleStoreAutoGrowTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleStoreAutoGrowTest.java index afcfc5e64b0..bfb46a1469c 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleStoreAutoGrowTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleStoreAutoGrowTest.java @@ -12,6 +12,7 @@ import java.io.File; import java.util.Random; +import java.util.concurrent.atomic.AtomicBoolean; import org.eclipse.rdf4j.sail.lmdb.TxnManager.Txn; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; @@ -34,7 +35,7 @@ public class TripleStoreAutoGrowTest { public void before(@TempDir File dataDir) throws Exception { var config = new LmdbStoreConfig("spoc,posc"); config.setTripleDBSize(4096 * 10); - tripleStore = new TripleStore(dataDir, config, null); + tripleStore = new TripleStore(dataDir, config, null, new AtomicBoolean(false)); ((Logger) LoggerFactory .getLogger(TripleStore.class.getName())) .setLevel(ch.qos.logback.classic.Level.DEBUG); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleStoreManyIndexesTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleStoreManyIndexesTest.java index f6e7ca850a9..53765c7f088 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleStoreManyIndexesTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleStoreManyIndexesTest.java @@ -13,6 +13,7 @@ import static org.junit.Assert.assertNotNull; import java.io.File; +import java.util.concurrent.atomic.AtomicBoolean; import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig; import org.junit.jupiter.api.BeforeEach; @@ -33,7 +34,7 @@ public void before(@TempDir File dataDir) throws Exception { @Test public void testSixIndexes() throws Exception { TripleStore tripleStore = new TripleStore(dataDir, - new LmdbStoreConfig("spoc,posc,ospc,cspo,cpos,cosp"), null); + new LmdbStoreConfig("spoc,posc,ospc,cspo,cpos,cosp"), null, new AtomicBoolean(false)); tripleStore.startTransaction(); tripleStore.storeTriple(1, 2, 3, 1, true); tripleStore.commit(); diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleStoreTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleStoreTest.java index 336c22b9378..80febc408f0 100644 --- a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleStoreTest.java +++ b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/TripleStoreTest.java @@ -16,6 +16,7 @@ import java.util.Arrays; import java.util.HashSet; import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Collectors; import org.eclipse.rdf4j.sail.lmdb.TxnManager.Txn; @@ -33,7 +34,7 @@ public class TripleStoreTest { @BeforeEach public void before(@TempDir File dataDir) throws Exception { - tripleStore = new TripleStore(dataDir, new LmdbStoreConfig("spoc,posc"), null); + tripleStore = new TripleStore(dataDir, new LmdbStoreConfig("spoc,posc"), null, new AtomicBoolean(false)); } int count(RecordIterator it) { diff --git a/core/sail/memory/src/test/java/org/eclipse/rdf4j/sail/memory/MemoryContextSizeExplicitOnlyTest.java b/core/sail/memory/src/test/java/org/eclipse/rdf4j/sail/memory/MemoryContextSizeExplicitOnlyTest.java new file mode 100644 index 00000000000..dc71cca7cc8 --- /dev/null +++ b/core/sail/memory/src/test/java/org/eclipse/rdf4j/sail/memory/MemoryContextSizeExplicitOnlyTest.java @@ -0,0 +1,109 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + ******************************************************************************/ + +package org.eclipse.rdf4j.sail.memory; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.sail.SailConnection; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * Mirrors LMDB context-size tests using MemoryStoreConnection, ensuring size(context) and size() count explicit-only + * statements even when inferred statements exist. + */ +public class MemoryContextSizeExplicitOnlyTest { + + private MemoryStore store; + + @BeforeEach + public void setUp() { + store = new MemoryStore(); + store.init(); + } + + @AfterEach + public void tearDown() { + if (store != null) { + store.shutDown(); + } + } + + @Test + public void sizeContext_excludesInferred_afterCommit() { + try (SailConnection raw = store.getConnection()) { + MemoryStoreConnection conn = (MemoryStoreConnection) raw; + conn.begin(); + ValueFactory vf = store.getValueFactory(); + + IRI ctx = vf.createIRI("urn:ctx"); + IRI p = vf.createIRI("urn:p"); + + conn.addStatement(vf.createIRI("urn:s1"), p, vf.createLiteral("x"), ctx); + conn.addInferredStatement(vf.createIRI("urn:s2"), p, vf.createLiteral("y"), ctx); + conn.commit(); + + long contextSize = conn.size(ctx); + assertEquals(1L, contextSize, "size(context) must exclude inferred statements"); + + long totalSize = conn.size(); + assertEquals(1L, totalSize, "total size must exclude inferred statements"); + } + } + + @Test + public void sizeContext_excludesInferred_duringTxn() { + try (SailConnection raw = store.getConnection()) { + MemoryStoreConnection conn = (MemoryStoreConnection) raw; + conn.begin(); + ValueFactory vf = store.getValueFactory(); + + IRI ctx = vf.createIRI("urn:ctx"); + IRI p = vf.createIRI("urn:p"); + + conn.addStatement(vf.createIRI("urn:s1"), p, vf.createLiteral("x"), ctx); + conn.addInferredStatement(vf.createIRI("urn:s2"), p, vf.createLiteral("y"), ctx); + + long contextSize = conn.size(ctx); + assertEquals(1L, contextSize, "size(context) must exclude inferred statements"); + + long totalSize = conn.size(); + assertEquals(1L, totalSize, "total size must exclude inferred statements"); + + conn.commit(); + } + } + + @Test + public void sizeContext_onlyExplicit() { + try (SailConnection raw = store.getConnection()) { + MemoryStoreConnection conn = (MemoryStoreConnection) raw; + conn.begin(); + ValueFactory vf = store.getValueFactory(); + + IRI ctx = vf.createIRI("urn:ctx"); + IRI p = vf.createIRI("urn:p"); + + conn.addStatement(vf.createIRI("urn:s1"), p, vf.createLiteral("x"), ctx); + conn.commit(); + + long contextSize = conn.size(ctx); + assertEquals(1L, contextSize, "size(context) must exclude inferred statements"); + + long totalSize = conn.size(); + assertEquals(1L, totalSize, "total size must exclude inferred statements"); + } + } +} diff --git a/core/sail/memory/src/test/java/org/eclipse/rdf4j/sail/memory/MemoryDatasetSizeTest.java b/core/sail/memory/src/test/java/org/eclipse/rdf4j/sail/memory/MemoryDatasetSizeTest.java new file mode 100644 index 00000000000..fbb9bb37f1d --- /dev/null +++ b/core/sail/memory/src/test/java/org/eclipse/rdf4j/sail/memory/MemoryDatasetSizeTest.java @@ -0,0 +1,125 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + ******************************************************************************/ + +package org.eclipse.rdf4j.sail.memory; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import org.eclipse.rdf4j.common.transaction.IsolationLevels; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.sail.base.SailDataset; +import org.eclipse.rdf4j.sail.base.SailSink; +import org.eclipse.rdf4j.sail.base.SailSource; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +/** + * Verifies dataset.size() semantics for MemoryStore across explicit, inferred and mixed data. + */ +public class MemoryDatasetSizeTest { + + private MemoryStore store; + + @BeforeEach + public void setUp() { + store = new MemoryStore(); + store.init(); + } + + @AfterEach + public void tearDown() { + if (store != null) { + store.shutDown(); + } + } + + @Test + public void explicitDatasetSize_countsOnlyExplicit() throws Exception { + ValueFactory vf = store.getValueFactory(); + IRI s = vf.createIRI("urn:explicit:s"); + IRI p = vf.createIRI("urn:p"); + IRI o = vf.createIRI("urn:o"); + + SailSource explicit = store.getSailStore().getExplicitSailSource(); + SailSource inferred = store.getSailStore().getInferredSailSource(); + + try (SailSink sink = explicit.sink(IsolationLevels.NONE)) { + sink.approve(s, p, o, null); + sink.flush(); + } + + try (SailDataset ds = explicit.dataset(IsolationLevels.NONE)) { + assertEquals(1L, ds.size(null, null, null), "explicit dataset should count explicit statements"); + } + try (SailDataset ds = inferred.dataset(IsolationLevels.NONE)) { + assertEquals(0L, ds.size(null, null, null), "inferred dataset should not include explicit statements"); + } + } + + @Test + public void inferredDatasetSize_countsOnlyInferred() throws Exception { + ValueFactory vf = store.getValueFactory(); + IRI s = vf.createIRI("urn:inferred:s"); + IRI p = vf.createIRI("urn:p"); + IRI o = vf.createIRI("urn:o"); + + SailSource explicit = store.getSailStore().getExplicitSailSource(); + SailSource inferred = store.getSailStore().getInferredSailSource(); + + try (SailSink sink = inferred.sink(IsolationLevels.NONE)) { + sink.approve(s, p, o, null); + sink.flush(); + } + + try (SailDataset ds = inferred.dataset(IsolationLevels.NONE)) { + assertEquals(1L, ds.size(null, null, null), "inferred dataset should count inferred statements"); + } + try (SailDataset ds = explicit.dataset(IsolationLevels.NONE)) { + assertEquals(0L, ds.size(null, null, null), "explicit dataset should not include inferred statements"); + } + } + + @Test + public void mixedDatasets_eachCountsOwn_andCombinedSums() throws Exception { + ValueFactory vf = store.getValueFactory(); + IRI s1 = vf.createIRI("urn:explicit:s"); + IRI s2 = vf.createIRI("urn:inferred:s"); + IRI p = vf.createIRI("urn:p"); + IRI o = vf.createIRI("urn:o"); + + SailSource explicit = store.getSailStore().getExplicitSailSource(); + SailSource inferred = store.getSailStore().getInferredSailSource(); + + try (SailSink sink = explicit.sink(IsolationLevels.NONE)) { + sink.approve(s1, p, o, null); + sink.flush(); + } + try (SailSink sink = inferred.sink(IsolationLevels.NONE)) { + sink.approve(s2, p, o, null); + sink.flush(); + } + + long explicitSize; + long inferredSize; + try (SailDataset ds = explicit.dataset(IsolationLevels.NONE)) { + explicitSize = ds.size(null, null, null); + } + try (SailDataset ds = inferred.dataset(IsolationLevels.NONE)) { + inferredSize = ds.size(null, null, null); + } + + assertEquals(1L, explicitSize, "explicit dataset should count explicit statements only"); + assertEquals(1L, inferredSize, "inferred dataset should count inferred statements only"); + assertEquals(2L, explicitSize + inferredSize, "combined explicit+inferred should sum to total statements"); + } +}