Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
ec5c3ed
Simple Oracle Embedding Store Example
psilberk Sep 2, 2024
e47970a
Simple Example #2
psilberk Sep 2, 2024
947ec3d
Merge branch 'main' into main
psilberk Sep 2, 2024
549f534
Update oracle-example/pom.xml
dliubarskyi Sep 5, 2024
ef1a971
Update oracle-example/pom.xml
dliubarskyi Sep 5, 2024
1f3dbf8
Update oracle-example/pom.xml
dliubarskyi Sep 5, 2024
3350d3f
Add examples
hackerdave Apr 10, 2025
9892958
Update version
hackerdave Apr 10, 2025
a1abcd4
Add loadOnnxModel example
hackerdave Apr 16, 2025
2b66a3a
Add comments
hackerdave Apr 25, 2025
e20467c
Add ingest example
hackerdave Apr 25, 2025
5171b60
Formatting and comments
hackerdave Apr 30, 2025
0b54bdb
Group preferences together
hackerdave Apr 30, 2025
26230c4
Display chunks, prompt user
hackerdave May 5, 2025
b58dbd9
Add manual example
hackerdave May 8, 2025
645281a
Add directory and table examples
hackerdave May 8, 2025
4102e15
Add comment about environment variables
hackerdave May 8, 2025
2184fa6
Add example files
hackerdave May 8, 2025
eaf9764
Add third-party examples
hackerdave May 9, 2025
47e9d0f
Update comments
hackerdave May 9, 2025
6c0b359
Rename to segment
hackerdave May 20, 2025
4aff691
Change to directory, add summary
hackerdave May 20, 2025
6c55c7c
Create biography-of-john-doe.txt
hackerdave May 20, 2025
ac7aad8
Comments
hackerdave May 20, 2025
95224bd
Move example files to resources
hackerdave May 21, 2025
9674e79
Split ingest example
hackerdave May 21, 2025
79795e3
Change back to Charlie example
hackerdave May 27, 2025
a63661a
Revert "Change back to Charlie example"
hackerdave May 27, 2025
41078ec
Display metadata before text
hackerdave May 27, 2025
18c3389
Merge pull request #1 from psilberk/oracle-doc-loader
psilberk Oct 8, 2025
c79c06c
Merge branch 'main' into main
hackerdave Oct 8, 2025
8b63297
Merge branch 'main' into main
psilberk Oct 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 116 additions & 0 deletions oracle-example/src/main/java/OracleDocumentLoaderExample.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import com.fasterxml.jackson.databind.ObjectMapper;
import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.loader.oracle.DirectoryPreference;
import dev.langchain4j.data.document.loader.oracle.FilePreference;
import dev.langchain4j.data.document.loader.oracle.OracleDocumentLoader;
import dev.langchain4j.data.document.loader.oracle.TablePreference;
import java.io.IOException;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.List;
import oracle.ucp.jdbc.PoolDataSource;
import oracle.ucp.jdbc.PoolDataSourceFactory;

/**
* Demonstrate loading documents from the file system or a table.
* The documents can be in any format supported by the Oracle Text filter
* including Word, PDF, HTML, and text files. If it is a rich text document
* like Word or PDF, it will be converted into plain text and contain any
* metadata associated with it.
*
* This example requires the following environment variables:
* ORACLE_JDBC_URL
* ORACLE_JDBC_USER
* ORACLE_JDBC_PASSWORD
* DEMO_FILE
* DEMO_DIRECTORY
* DEMO_OWNER
* DEMO_TABLE
* DEMO_COLUMN
*/
public class OracleDocumentLoaderExample {

public static void main(String[] args) throws SQLException, IOException {
loadFromFile();
loadFromDirectory();
loadFromTable();
}

private static void loadFromFile() throws IOException, SQLException {
PoolDataSource pds = PoolDataSourceFactory.getPoolDataSource();
pds.setConnectionFactoryClassName("oracle.jdbc.pool.OracleDataSource");
pds.setURL(System.getenv("ORACLE_JDBC_URL"));
pds.setUser(System.getenv("ORACLE_JDBC_USER"));
pds.setPassword(System.getenv("ORACLE_JDBC_PASSWORD"));
Connection conn = pds.getConnection();

// Can build pref as a string
// String pref = "{\"file\": \"...\"}";
// Alternatively, can use FilePreference
ObjectMapper mapper = new ObjectMapper();
FilePreference loaderPref = new FilePreference();
loaderPref.setFilename(System.getenv("DEMO_FILE"));
String pref = mapper.writeValueAsString(loaderPref);

OracleDocumentLoader loader = new OracleDocumentLoader(conn);

List<Document> docs = loader.loadDocuments(pref);
for (Document doc : docs) {
System.out.println("metadata=" + doc.metadata());
System.out.println("text=" + doc.text());
}
}

private static void loadFromDirectory() throws IOException, SQLException {
PoolDataSource pds = PoolDataSourceFactory.getPoolDataSource();
pds.setConnectionFactoryClassName("oracle.jdbc.pool.OracleDataSource");
pds.setURL(System.getenv("ORACLE_JDBC_URL"));
pds.setUser(System.getenv("ORACLE_JDBC_USER"));
pds.setPassword(System.getenv("ORACLE_JDBC_PASSWORD"));
Connection conn = pds.getConnection();

// Can build pref as a string
// String pref = "{\"dir\": \"...\"}";
// Alternatively, can use DirectoryPreference
ObjectMapper mapper = new ObjectMapper();
DirectoryPreference loaderPref = new DirectoryPreference();
loaderPref.setDirectory(System.getenv("DEMO_DIRECTORY"));
String pref = mapper.writeValueAsString(loaderPref);

OracleDocumentLoader loader = new OracleDocumentLoader(conn);

List<Document> docs = loader.loadDocuments(pref);
for (Document doc : docs) {
System.out.println("metadata=" + doc.metadata());
System.out.println("text=" + doc.text());
}
}

private static void loadFromTable() throws IOException, SQLException {
PoolDataSource pds = PoolDataSourceFactory.getPoolDataSource();
pds.setConnectionFactoryClassName("oracle.jdbc.pool.OracleDataSource");
pds.setURL(System.getenv("ORACLE_JDBC_URL"));
pds.setUser(System.getenv("ORACLE_JDBC_USER"));
pds.setPassword(System.getenv("ORACLE_JDBC_PASSWORD"));
Connection conn = pds.getConnection();

// Can build pref as a string
// String pref = "{\"owner\": \"...\", \"tablename\": \"...\", \"colname\": \"...\"}";
// Alternatively, can use TablePreference
ObjectMapper mapper = new ObjectMapper();
TablePreference loaderPref = new TablePreference();
loaderPref.setOwner(System.getenv("DEMO_OWNER"));
loaderPref.setTableName(System.getenv("DEMO_TABLE"));
loaderPref.setColumnName(System.getenv("DEMO_COLUMN"));
String pref = mapper.writeValueAsString(loaderPref);

OracleDocumentLoader loader = new OracleDocumentLoader(conn);

List<Document> docs = loader.loadDocuments(pref);
for (Document doc : docs) {
System.out.println("metadata=" + doc.metadata());
System.out.println("text=" + doc.text());
}
}

}
46 changes: 46 additions & 0 deletions oracle-example/src/main/java/OracleDocumentSplitterExample.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.loader.oracle.OracleDocumentLoader;
import dev.langchain4j.data.document.splitter.oracle.OracleDocumentSplitter;
import java.io.IOException;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.List;
import oracle.ucp.jdbc.PoolDataSource;
import oracle.ucp.jdbc.PoolDataSourceFactory;

/**
* Demonstrate chunking or splitting text in a document. You can customize how
* to split the content such as by words, characters, or vocabulary (for tokens)
* to match a tokenizer in the preference.
*
* This example requires the following environment variables:
* ORACLE_JDBC_URL
* ORACLE_JDBC_USER
* ORACLE_JDBC_PASSWORD
* DEMO_FILE
*/
public class OracleDocumentSplitterExample {

public static void main(String[] args) throws SQLException, IOException {
PoolDataSource pds = PoolDataSourceFactory.getPoolDataSource();
pds.setConnectionFactoryClassName("oracle.jdbc.pool.OracleDataSource");
pds.setURL(System.getenv("ORACLE_JDBC_URL"));
pds.setUser(System.getenv("ORACLE_JDBC_USER"));
pds.setPassword(System.getenv("ORACLE_JDBC_PASSWORD"));
Connection conn = pds.getConnection();

String loadPref = "{\"file\": \"" + System.getenv("DEMO_FILE") + "\"}";
String splitPref = "{\"by\": \"words\", \"max\": 100}";

OracleDocumentLoader loader = new OracleDocumentLoader(conn);
OracleDocumentSplitter splitter = new OracleDocumentSplitter(conn, splitPref);

List<Document> docs = loader.loadDocuments(loadPref);
for (Document doc : docs) {
String[] segments = splitter.split(doc.text());
for (String segment : segments) {
System.out.println("segment=" + segment);
}
}
}
}
99 changes: 99 additions & 0 deletions oracle-example/src/main/java/OracleEmbeddingModelExample.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.oracle.OracleEmbeddingModel;
import dev.langchain4j.model.output.Response;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import oracle.ucp.jdbc.PoolDataSource;
import oracle.ucp.jdbc.PoolDataSourceFactory;

/**
* Demonstrate getting the vector embeddings. You can customize which provider
* to use such as database for an ONNX model or with a third-party provider.
*
* This example requires the following environment variables:
* ORACLE_JDBC_URL
* ORACLE_JDBC_USER
* ORACLE_JDBC_PASSWORD
* DEMO_ONNX_DIR
* DEMO_ONNX_FILE
* DEMO_ONNX_MODEL
* DEMO_CREDENTIAL
*/
public class OracleEmbeddingModelExample {

public static void main(String[] args) throws SQLException {
databaseEmbeddings();
thirdPartyEmbeddings();
}

public static void databaseEmbeddings() throws SQLException {
PoolDataSource pds = PoolDataSourceFactory.getPoolDataSource();
pds.setConnectionFactoryClassName("oracle.jdbc.pool.OracleDataSource");
pds.setURL(System.getenv("ORACLE_JDBC_URL"));
pds.setUser(System.getenv("ORACLE_JDBC_USER"));
pds.setPassword(System.getenv("ORACLE_JDBC_PASSWORD"));
Connection conn = pds.getConnection();

// load an ONNX model into the database
// remember to create a directory alias with
// create or replace directory MODEL_DIR as '/path/to/model';
OracleEmbeddingModel.loadOnnxModel(
conn,
System.getenv("DEMO_ONNX_DIR"),
System.getenv("DEMO_ONNX_FILE"),
System.getenv("DEMO_ONNX_MODEL"));

String pref = "{\"provider\": \"database\", \"model\": \"" + System.getenv("DEMO_ONNX_MODEL") + "\"}";

OracleEmbeddingModel model = new OracleEmbeddingModel(conn, pref);

// embed a single string
Response<Embedding> response = model.embed("I love Java");
Embedding embedding = response.content();
System.out.println(embedding);

// embed a list of text
List<TextSegment> textSegments = new ArrayList<>();
textSegments.add(TextSegment.from("I like soccer."));
textSegments.add(TextSegment.from("I love Stephen King."));
textSegments.add(TextSegment.from("The weather is good today."));
Response<List<Embedding>> resp = model.embedAll(textSegments);
System.out.println(resp.content());
}

public static void thirdPartyEmbeddings() throws SQLException {
PoolDataSource pds = PoolDataSourceFactory.getPoolDataSource();
pds.setConnectionFactoryClassName("oracle.jdbc.pool.OracleDataSource");
pds.setURL(System.getenv("ORACLE_JDBC_URL"));
pds.setUser(System.getenv("ORACLE_JDBC_USER"));
pds.setPassword(System.getenv("ORACLE_JDBC_PASSWORD"));
Connection conn = pds.getConnection();

// For a third-party provider, remember to create a credential
// with dbms_vector.create_credential() and then refer to it here
String pref = "{\n" +
" \"provider\": \"ocigenai\",\n" +
" \"credential_name\": \"" + System.getenv("DEMO_CREDENTIAL") + "\",\n" +
" \"url\": \"https://inference.generativeai.us-chicago-1.oci.oraclecloud.com/20231130/actions/embedText\",\n" +
" \"model\": \"cohere.embed-english-light-v3.0\"\n" +
"}";

OracleEmbeddingModel model = new OracleEmbeddingModel(conn, pref);

// embed a single string
Response<Embedding> response = model.embed("I love Java");
Embedding embedding = response.content();
System.out.println(embedding);

// embed a list of text
List<TextSegment> textSegments = new ArrayList<>();
textSegments.add(TextSegment.from("I like soccer."));
textSegments.add(TextSegment.from("I love Stephen King."));
textSegments.add(TextSegment.from("The weather is good today."));
Response<List<Embedding>> resp = model.embedAll(textSegments);
System.out.println(resp.content());
}
}
120 changes: 120 additions & 0 deletions oracle-example/src/main/java/OracleIngestExample.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.loader.oracle.OracleDocumentLoader;
import dev.langchain4j.data.document.splitter.oracle.OracleDocumentSplitter;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.oracle.OracleEmbeddingModel;
import dev.langchain4j.store.embedding.EmbeddingMatch;
import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
import dev.langchain4j.store.embedding.EmbeddingSearchResult;
import dev.langchain4j.store.embedding.EmbeddingStoreIngestor;
import static dev.langchain4j.store.embedding.oracle.CreateOption.CREATE_OR_REPLACE;
import dev.langchain4j.store.embedding.oracle.EmbeddingTable;
import dev.langchain4j.store.embedding.oracle.OracleEmbeddingStore;
import java.io.IOException;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.List;
import oracle.ucp.jdbc.PoolDataSource;
import oracle.ucp.jdbc.PoolDataSourceFactory;

/**
* Demonstrate how to ingest documents using an OracleEmbeddingStore to hide
* the manual steps of ingesting into an embedding store for search/retrieval.
*
* This example requires the following environment variables:
* ORACLE_JDBC_URL
* ORACLE_JDBC_USER
* ORACLE_JDBC_PASSWORD
* DEMO_ONNX_DIR
* DEMO_ONNX_FILE
* DEMO_ONNX_MODEL
* DEMO_DIRECTORY
*/
public class OracleIngestExample {

public static void main(String[] args) throws SQLException, IOException {
PoolDataSource pds = PoolDataSourceFactory.getPoolDataSource();
pds.setConnectionFactoryClassName("oracle.jdbc.pool.OracleDataSource");
pds.setURL(System.getenv("ORACLE_JDBC_URL"));
pds.setUser(System.getenv("ORACLE_JDBC_USER"));
pds.setPassword(System.getenv("ORACLE_JDBC_PASSWORD"));
Connection conn = pds.getConnection();

// load the ONNX model for embedding
OracleEmbeddingModel.loadOnnxModel(
conn,
System.getenv("DEMO_ONNX_DIR"),
System.getenv("DEMO_ONNX_FILE"),
System.getenv("DEMO_ONNX_MODEL"));

// set the loader, splitter, and embedding preferences
String loaderPref = "{\"dir\": \"" + System.getenv("DEMO_DIRECTORY") + "\"}";
String splitterPref = "{\"by\": \"words\", \"max\": 100}";
String embedderPref = "{\"provider\": \"database\", \"model\": \"" + System.getenv("DEMO_ONNX_MODEL") + "\"}";

OracleDocumentLoader loader = new OracleDocumentLoader(conn);
OracleDocumentSplitter splitter = new OracleDocumentSplitter(conn, splitterPref);
OracleEmbeddingModel embeddingModel = new OracleEmbeddingModel(conn, embedderPref);

// setup the embedding store

// set column names for the output table
String tableName = "TEST";
String idColumn = "ID";
String embeddingColumn = "EMBEDDING";
String textColumn = "TEXT";
String metadataColumn = "METADATA";

// build() should create a table with the configured names
OracleEmbeddingStore embeddingStore = OracleEmbeddingStore.builder()
.dataSource(pds)
.embeddingTable(EmbeddingTable.builder()
.createOption(CREATE_OR_REPLACE)
.name(tableName)
.idColumn(idColumn)
.embeddingColumn(embeddingColumn)
.textColumn(textColumn)
.metadataColumn(metadataColumn)
.build())
.build();

// build an ingestor with the following components
EmbeddingStoreIngestor ingestor = EmbeddingStoreIngestor.builder()
.documentSplitter(splitter)
.embeddingModel(embeddingModel)
.embeddingStore(embeddingStore)
.build();

// load and ingest the documents
// this will call the splitter to split into segments,
// embedding model to get the embeddings, and then store the
// embeddings into the embedding store for further search / retrieval
List<Document> docs = loader.loadDocuments(loaderPref);
ingestor.ingest(docs);

// get the question
String question = "Who is John Doe?";

// get the vector representation
Embedding questionAsVector = embeddingModel.embed(question).content();

// perform the vector search
EmbeddingSearchResult<TextSegment> result = embeddingStore.search(
EmbeddingSearchRequest.builder()
.queryEmbedding(questionAsVector)
.maxResults(3)
.minScore(0.6)
.build()
);

// display the results
System.out.println(question);
List<EmbeddingMatch<TextSegment>> results = result.matches();
for (EmbeddingMatch<TextSegment> match : results) {
System.out.println("\nScore: " + match.score());
System.out.println("Metadata: " + match.embedded().metadata());
System.out.println("Text: " + match.embedded().text());
}
}
}
Loading