diff --git a/src/main/java/org/apache/commons/text/CaseUtils.java b/src/main/java/org/apache/commons/text/CaseUtils.java index c3a7591a38..63fd560106 100644 --- a/src/main/java/org/apache/commons/text/CaseUtils.java +++ b/src/main/java/org/apache/commons/text/CaseUtils.java @@ -72,7 +72,7 @@ public static String toCamelCase(String str, final boolean capitalizeFirstLetter } str = str.toLowerCase(); final int strLen = str.length(); - final int[] newCodePoints = new int[strLen]; + final int[] newCodePoints = new int[str.codePointCount(0, strLen)]; int outOffset = 0; final Set delimiterSet = toDelimiterSet(delimiters); boolean capitalizeNext = capitalizeFirstLetter; @@ -96,11 +96,100 @@ public static String toCamelCase(String str, final boolean capitalizeFirstLetter return new String(newCodePoints, 0, outOffset); } + private static String toDelimiterCase(String str, final char newDelimiter, final char... delimiters) { + if (StringUtils.isEmpty(str)) { + return str; + } + str = str.toLowerCase(); + final int strLen = str.length(); + final int[] newCodePoints = new int[str.codePointCount(0, strLen)]; + int outOffset = 0; + final Set delimiterSet = toDelimiterSet(delimiters); + boolean toAddDelimiter = false; + for (int index = 0; index < strLen;) { + final int codePoint = str.codePointAt(index); + + if (delimiterSet.contains(codePoint)) { + toAddDelimiter = outOffset != 0; + index += Character.charCount(codePoint); + } else { + if (toAddDelimiter) { + newCodePoints[outOffset++] = newDelimiter; + toAddDelimiter = false; + } + newCodePoints[outOffset++] = codePoint; + index += Character.charCount(codePoint); + } + } + + return new String(newCodePoints, 0, outOffset); + } + + /** + * Converts all the delimiter separated words in a String into snake_case, + * that is each word is separated by an underscore (_) character and the String + * is converted to lower case. + * + *

The delimiters represent a set of characters understood to separate words.

+ * + *

A {@code null} input String returns {@code null}.

+ * + *

A input string with only delimiter characters returns {@code ""}.

+ * + *
+     * CaseUtils.toSnakeCase(null)                                 = null
+     * CaseUtils.toSnakeCase("", *)                                = ""
+     * CaseUtils.toSnakeCase(*, null)                              = *
+     * CaseUtils.toSnakeCase(*, new char[0])                       = *
+     * CaseUtils.toSnakeCase("To.Snake.Case", new char[]{'.'})     = "to_snake_case"
+     * CaseUtils.toSnakeCase(" to @ Snake case", new char[]{'@'})  = "to_snake_case"
+     * CaseUtils.toSnakeCase(" @to @ Snake case", new char[]{'@'}) = "to_snake_case"
+     * CaseUtils.toSnakeCase(" @", new char[]{'@'})                = ""
+     * 
+ * + * @param str the String to be converted to snake_case, may be null + * @param delimiters set of characters to determine a new word, null and/or empty array means whitespace + * @return snake_case of String, {@code null} if null String input + */ + public static String toSnakeCase(String str, final char... delimiters) { + return toDelimiterCase(str, '_', delimiters); + } + + /** + * Converts all the delimiter separated words in a String into kebab-case, + * that is each word is separated by aa dash (-) character and the String + * is converted to lower case. + * + *

The delimiters represent a set of characters understood to separate words.

+ * + *

A {@code null} input String returns {@code null}.

+ * + *

A input string with only delimiter characters returns {@code ""}.

+ * + *
+     * CaseUtils.toKebabCase(null)                                 = null
+     * CaseUtils.toKebabCase("", *)                                = ""
+     * CaseUtils.toKebabCase(*, null)                              = *
+     * CaseUtils.toKebabCase(*, new char[0])                       = *
+     * CaseUtils.toKebabCase("To.Kebab.Case", new char[]{'.'})     = "to-kebab-case"
+     * CaseUtils.toKebabCase(" to @ Kebab case", new char[]{'@'})  = "to-kebab-case"
+     * CaseUtils.toKebabCase(" @to @ Kebab case", new char[]{'@'}) = "to-kebab-case"
+     * CaseUtils.toKebabCase(" @", new char[]{'@'})                = ""
+     * 
+ * + * @param str the String to be converted to kebab-case, may be null + * @param delimiters set of characters to determine a new word, null and/or empty array means whitespace + * @return kebab-case of String, {@code null} if null String input + */ + public static String toKebabCase(String str, final char... delimiters) { + return toDelimiterCase(str, '-', delimiters); + } + /** * Converts an array of delimiters to a hash set of code points. Code point of space(32) is added * as the default value. The generated hash set provides O(1) lookup time. * - * @param delimiters set of characters to determine capitalization, null means whitespace + * @param delimiters set of characters to determine words, null means whitespace * @return Set */ private static Set toDelimiterSet(final char[] delimiters) { diff --git a/src/test/java/org/apache/commons/text/CaseUtilsTest.java b/src/test/java/org/apache/commons/text/CaseUtilsTest.java index f7195e708c..ba5a00c4d6 100644 --- a/src/test/java/org/apache/commons/text/CaseUtilsTest.java +++ b/src/test/java/org/apache/commons/text/CaseUtilsTest.java @@ -73,4 +73,62 @@ public void testToCamelCase() { assertThat(CaseUtils.toCamelCase("\uD800\uDF00\uD800\uDF01\uD800\uDF14\uD800\uDF02\uD800\uDF03", true, '\uD800', '\uDF14')).isEqualTo("\uD800\uDF00\uD800\uDF01\uD800\uDF02\uD800\uDF03"); } + + @Test + public void testToSnakeCase() { + assertThat(CaseUtils.toSnakeCase(null, null)).isNull(); + assertThat(CaseUtils.toSnakeCase("", null)).isEqualTo(""); + assertThat(CaseUtils.toSnakeCase(" ", null)).isEqualTo(""); + assertThat(CaseUtils.toSnakeCase("a b c @def", null)).isEqualTo("a_b_c_@def"); + assertThat(CaseUtils.toSnakeCase("a b c @def")).isEqualTo("a_b_c_@def"); + assertThat(CaseUtils.toSnakeCase("a b c @def", '_')).isEqualTo("a_b_c_@def"); + assertThat(CaseUtils.toSnakeCase("a_b_c_@def", '_')).isEqualTo("a_b_c_@def"); + assertThat(CaseUtils.toSnakeCase("_a___b__c_@def", '_')).isEqualTo("a_b_c_@def"); + + final char[] chars = {'-', '+', ' ', '@'}; + assertThat(CaseUtils.toSnakeCase("-+@ ", chars)).isEqualTo(""); + assertThat(CaseUtils.toSnakeCase(" to-SNAKE-cASE", chars)).isEqualTo("to_snake_case"); + assertThat(CaseUtils.toSnakeCase("@@@@ to+SNAKE@cASE ", chars)).isEqualTo("to_snake_case"); + assertThat(CaseUtils.toSnakeCase("To+SN+AK E@cASE", chars)).isEqualTo("to_sn_ak_e_case"); + + assertThat(CaseUtils.toSnakeCase("To.Snake.Case", '.')).isEqualTo("to_snake_case"); + assertThat(CaseUtils.toSnakeCase("To.Snake-Case", '-', '.')).isEqualTo("to_snake_case"); + assertThat(CaseUtils.toSnakeCase(" to @ Snake case", '-', '@')).isEqualTo("to_snake_case"); + assertThat(CaseUtils.toSnakeCase(" @to @ Snake case", '-', '@')).isEqualTo("to_snake_case"); + + assertThat(CaseUtils.toSnakeCase("tosnakecase")).isEqualTo("tosnakecase"); + + assertThat(CaseUtils.toSnakeCase("\uD800\uDF00 \uD800\uDF02")).isEqualTo("\uD800\uDF00_\uD800\uDF02"); + assertThat(CaseUtils.toSnakeCase("\uD800\uDF00\uD800\uDF01\uD800\uDF14\uD800\uDF02\uD800\uDF03", '\uD800', + '\uDF14')).isEqualTo("\uD800\uDF00\uD800\uDF01_\uD800\uDF02\uD800\uDF03"); + } + + @Test + public void testToKebabCase() { + assertThat(CaseUtils.toKebabCase(null, null)).isNull(); + assertThat(CaseUtils.toKebabCase("", null)).isEqualTo(""); + assertThat(CaseUtils.toKebabCase(" ", null)).isEqualTo(""); + assertThat(CaseUtils.toKebabCase("a b c @def", null)).isEqualTo("a-b-c-@def"); + assertThat(CaseUtils.toKebabCase("a b c @def")).isEqualTo("a-b-c-@def"); + assertThat(CaseUtils.toKebabCase("a b c @def", '-')).isEqualTo("a-b-c-@def"); + assertThat(CaseUtils.toKebabCase("a-b-c-@def", '-')).isEqualTo("a-b-c-@def"); + assertThat(CaseUtils.toKebabCase("-a---b--c-@def", '-')).isEqualTo("a-b-c-@def"); + + final char[] chars = {'-', '+', ' ', '@'}; + assertThat(CaseUtils.toKebabCase("-+@ ", chars)).isEqualTo(""); + assertThat(CaseUtils.toKebabCase(" to-KEBAB-cASE", chars)).isEqualTo("to-kebab-case"); + assertThat(CaseUtils.toKebabCase("@@@@ to+KEBAB@cASE ", chars)).isEqualTo("to-kebab-case"); + assertThat(CaseUtils.toKebabCase("To+KE+BA B@cASE", chars)).isEqualTo("to-ke-ba-b-case"); + + assertThat(CaseUtils.toKebabCase("To.Kebab.Case", '.')).isEqualTo("to-kebab-case"); + assertThat(CaseUtils.toKebabCase("To.Kebab-Case", '-', '.')).isEqualTo("to-kebab-case"); + assertThat(CaseUtils.toKebabCase(" to @ Kebab case", '-', '@')).isEqualTo("to-kebab-case"); + assertThat(CaseUtils.toKebabCase(" @to @ Kebab case", '-', '@')).isEqualTo("to-kebab-case"); + + assertThat(CaseUtils.toKebabCase("tokebabcase")).isEqualTo("tokebabcase"); + + assertThat(CaseUtils.toKebabCase("\uD800\uDF00 \uD800\uDF02")).isEqualTo("\uD800\uDF00-\uD800\uDF02"); + assertThat(CaseUtils.toKebabCase("\uD800\uDF00\uD800\uDF01\uD800\uDF14\uD800\uDF02\uD800\uDF03", '\uD800', + '\uDF14')).isEqualTo("\uD800\uDF00\uD800\uDF01-\uD800\uDF02\uD800\uDF03"); + } }