diff --git a/src/main/java/org/apache/commons/text/similarity/SorensenDiceSimilarity.java b/src/main/java/org/apache/commons/text/similarity/SorensenDiceSimilarity.java
new file mode 100644
index 0000000000..e15cfe0958
--- /dev/null
+++ b/src/main/java/org/apache/commons/text/similarity/SorensenDiceSimilarity.java
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.function.Function;
+
+import org.apache.commons.lang3.StringUtils;
+
+/**
+ * A similarity algorithm indicating the percentage of matched characters
+ * between two character sequences.
+ *
+ *
The Sørensen-Dice coefficient is a statistic used for comparing the
+ * similarity of two samples. It was independently developed by the botanists
+ * Thorvald Sørensen and Lee Raymond Dice, who published in 1948 and 1945
+ * respectively. The index is known by several other names, especially
+ * Sørensen-Dice index, Sørensen index and Dice's coefficient. Other
+ * variations include the "similarity coefficient" or "index", such as Dice
+ * similarity coefficient (DSC).
+ *
+ * This implementation is based on the Sørensen-Dice similarity algorithm
+ * from
+ * https://en.wikipedia.org/wiki/Dice-S%C3%B8rensen_coefficient.
+ *
+ * @since 1.13
+ */
+public class SorensenDiceSimilarity implements SimilarityScore {
+
+ /**
+ * For shifting bigrams to fit in single integer.
+ */
+ private static final int SHIFT_NUMBER = 16;
+
+ /**
+ * Converter function for conversion of string to bigrams.
+ */
+ private final Function> converter = new SorensenDiceConverter();
+
+ /**
+ * Measures the overlap of two sets created from a pair of character sequences.
+ * {@link IntersectionSimilarity}}
+ */
+ private final IntersectionSimilarity similarity = new IntersectionSimilarity<>(this.converter);
+
+ /**
+ * Calculates Sorensen-Dice Similarity of two character sequences passed as
+ * input.
+ *
+ *
+ * similarity.apply(null, null) = IllegalArgumentException
+ * similarity.apply("foo", null) = IllegalArgumentException
+ * similarity.apply(null, "foo") = IllegalArgumentException
+ * similarity.apply("night", "nacht") = 0.25
+ * similarity.apply("", "") = 1.0
+ * similarity.apply("foo", "foo") = 1.0
+ * similarity.apply("foo", "foo ") = 0.8
+ * similarity.apply("foo", " foo") = 0.8
+ * similarity.apply("foo", " foo ") ≈ 0.66…
+ * similarity.apply("", "a") = 0.0
+ * similarity.apply("aaapppp", "") = 0.0
+ * similarity.apply("frog", "fog") = 0.4
+ * similarity.apply("fly", "ant") = 0.0
+ * similarity.apply("elephant", "hippo") = 0.0
+ * similarity.apply("hippo", "elephant") = 0.0
+ * similarity.apply("hippo", "zzzzzzzz") = 0.0
+ * similarity.apply("hello", "hallo") = 0.5
+ * similarity.apply("ABC Corporation", "ABC Corp") ≈ 0.66…
+ * similarity.apply("D N H Enterprises Inc", "D & H Enterprises, Inc.") = 0.74
+ * similarity.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness") = 0.76
+ * similarity.apply("PENNSYLVANIA", "PENNCISYLVNIA") = 0.69
+ *
+ *
+ * @param left the first CharSequence, must not be null
+ * @param right the second CharSequence, must not be null
+ * @return result similarity
+ * @throws IllegalArgumentException if either CharSequence input is {@code null}
+ */
+ @Override
+ public Double apply(final CharSequence left, final CharSequence right) {
+
+ if (left == null || right == null) {
+ throw new IllegalArgumentException("CharSequences must not be null");
+ }
+
+ if (StringUtils.equals(left, right)) {
+ return 1d;
+ }
+
+ // if bigram is not formed out of any given string, clearly both are not similar.
+ if (left.length() < 2 || right.length() < 2) {
+ return 0d;
+ }
+
+ IntersectionResult overlap = similarity.apply(left, right);
+
+ final int total = overlap.getSizeA() + overlap.getSizeB();
+ final long intersection = overlap.getIntersection();
+
+ return (2.0d * intersection) / total;
+ }
+
+ /**
+ * Converter class for creating Bigrams for SorensenDice similarity.
+ */
+ private static final class SorensenDiceConverter implements Function> {
+ @Override
+ public Collection apply(CharSequence cs) {
+ final int length = cs.length();
+ final List list = new ArrayList<>(length);
+ if (length > 1) {
+ char ch2 = cs.charAt(0);
+ for (int i = 1; i < length; i++) {
+ final char ch1 = ch2;
+ ch2 = cs.charAt(i);
+ list.add((ch1 << SHIFT_NUMBER) | ch2);
+ }
+ }
+ return list;
+ }
+ }
+}
diff --git a/src/test/java/org/apache/commons/text/similarity/SorensenDiceSimilarityTest.java b/src/test/java/org/apache/commons/text/similarity/SorensenDiceSimilarityTest.java
new file mode 100644
index 0000000000..09c852d0d4
--- /dev/null
+++ b/src/test/java/org/apache/commons/text/similarity/SorensenDiceSimilarityTest.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.text.similarity;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+
+import java.math.RoundingMode;
+import java.text.DecimalFormat;
+
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Unit tests for {@link SorensenDiceSimilarity}.
+ */
+public class SorensenDiceSimilarityTest {
+
+ private static SorensenDiceSimilarity similarity;
+
+ @BeforeAll
+ public static void setUp() {
+ similarity = new SorensenDiceSimilarity();
+ }
+
+ @Test
+ public void test() {
+ assertEquals(0.25d, similarity.apply("night", "nacht"));
+ }
+
+ @Test
+ public void testGetSorensenDiceSimilarity_StringString() {
+ assertEquals(1d, similarity.apply("", ""));
+ assertEquals(0d, similarity.apply("", "a"));
+ assertEquals(0d, similarity.apply("a", ""));
+ assertEquals(1d, similarity.apply("a", "a"));
+ assertEquals(0d, similarity.apply("a", "b"));
+ assertEquals(1.0d, similarity.apply("foo", "foo"));
+ assertEquals(0.8d, similarity.apply("foo", "foo "));
+ assertEquals(0.8d, similarity.apply("foo", " foo"));
+ assertEquals(0.66d, printTwoDecimals(similarity.apply("foo", " foo ")));
+ assertEquals(0.4d, similarity.apply("frog", "fog"));
+ assertEquals(0.0d, similarity.apply("fly", "ant"));
+ assertEquals(0.0d, similarity.apply("elephant", "hippo"));
+ assertEquals(0.0d, similarity.apply("hippo", "elephant"));
+ assertEquals(0.0d, similarity.apply("hippo", "zzzzzzzz"));
+ assertEquals(0.25d, similarity.apply("night", "nacht"));
+ assertEquals(0.5d, similarity.apply("hello", "hallo"));
+ assertEquals(0.0d, similarity.apply("aaapppp", ""));
+ assertEquals(0.66d, printTwoDecimals(similarity.apply("ABC Corporation", "ABC Corp")));
+ assertEquals(0.73d, printTwoDecimals(similarity.apply("D N H Enterprises Inc", "D & H Enterprises, Inc.")));
+ assertEquals(0.76d, printTwoDecimals(similarity.apply("My Gym Children's Fitness Center", "My Gym. Childrens Fitness")));
+ assertEquals(0.69d, printTwoDecimals(similarity.apply("PENNSYLVANIA", "PENNCISYLVNIA")));
+ assertEquals(0.92d, printTwoDecimals(similarity.apply("/opt/software1", "/opt/software2")));
+ assertEquals(0.60d, printTwoDecimals(similarity.apply("aaabcd", "aaacdb")));
+ assertEquals(0.63d, printTwoDecimals(similarity.apply("John Horn", "John Hopkins")));
+ }
+
+ @Test
+ public void testGetSorensenDicesSimilarity_NullNull() {
+ assertThrows(IllegalArgumentException.class, () -> similarity.apply(null, null));
+ }
+
+ @Test
+ public void testGetSorensenDicesSimilarity_StringNull() {
+ assertThrows(IllegalArgumentException.class, () -> similarity.apply(" ", null));
+ }
+
+ @Test
+ public void testGetSorensenDicesSimilarity_NullString() {
+ assertThrows(IllegalArgumentException.class, () -> similarity.apply(null, "clear"));
+ }
+
+ /**
+ * Format the double to two decimal places rounding it down.
+ *
+ * @param value the double value
+ * @return double formatted to two places rounding it down
+ */
+ public static double printTwoDecimals(double value) {
+ DecimalFormat df = new DecimalFormat("#.##");
+ df.setRoundingMode(RoundingMode.DOWN);
+ return Double.parseDouble(df.format(value));
+ }
+}