Skip to content

New and useful methods for commons.text.CaseUtils #528

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
274 changes: 233 additions & 41 deletions src/main/java/org/apache/commons/text/CaseUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
*/
package org.apache.commons.text;

import java.util.HashSet;
import java.util.Set;
import java.util.regex.Pattern;

import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.BooleanUtils;
import org.apache.commons.lang3.StringUtils;

/**
Expand All @@ -28,13 +28,51 @@
* <p>This class tries to handle {@code null} input gracefully.
* An exception will not be thrown for a {@code null} input.
* Each method documents its behavior in more detail.</p>
* Examples:
* <pre>
* "Two words" "foo bar" "Pi�ata Caf�"
* camelCase toCamelCase(str) "twoWords" "fooBar" "pinataCafe"
* camelCase toCamelCase(str, false, " ") "twoWords" "fooBar" "pi�ataCaf�"
* camel_Snake toDelimitedCase(str, false, '_') "two_Words" "foo_Bar" "pinata_Cafe"
* flatcase toPascalCase(str).toLowerCase() "twowords" "foobar" "pinatacafe"
* kebab-case toKebabCase(str) "two-words" "foo-bar" "pinata-cafe"
* PascalCase toPascalCase(str) "TwoWords" "FooBar" "PinataCafe"
* PascalCase toCamelCase(str, true, " ") "TwoWords" "FooBar" "Pi�ataCaf�"
* SCREAMINGCASE toPascalCase(str).toUpperCase() "TWOWORDS" "FOOBAR" "PINATACAFE"
* SCREAMING-KEBAB toDelimitedCase(str, '-').toUpperCase() "TWO-WORDS" "FOO-BAR" "PINATA-CAFE"
* SCREAMING_SNAKE toDelimitedCase(str, '_').toUpperCase() "TWO_WORDS" "FOO_BAR" "PINATA_CAFE"
* snake_case toSnakeCase(str) "two_words" "foo_bar" "pinata_cafe"
* Title_Case toDelimitedCase(str, '_') "Two_Words" "Foo_Bar" "Pinata_Cafe"
* Train-Case toDelimitedCase(str, '-') "Two-Words" "Foo-Bar" "Pinata-Cafe"
* </pre>
*
* Note: Examples with {@code toUpperCase()} and {@code toLowerCase()} may be replaced with
* {@code StringUtils.upperCase(str)} or {@code StringUtils.lowerCase(str)} to be null-safe.
*
* @since 1.2
*/
public class CaseUtils {

/**
* Converts all the delimiter separated words in a String into camelCase,
* All lower ASCII alphanumeric characters.
*/
private static final Pattern ALPHANUMERIC = Pattern.compile("[0-9A-Za-z]");

/**
* All lower ASCII alphanumeric characters, single quote, and right single "curly" quote (\u2019).
*/
private static final Pattern ALPHANUMERIC_WITH_APOSTROPHE = Pattern.compile("[0-9A-Za-z'\u2019]");

/**
* All characters not included in ALPHANUMERIC
*/
private static final Pattern NON_ALPHANUMERIC = Pattern.compile("^[^0-9A-Za-z]*$");

private static final Pattern O_IRISH = Pattern.compile("(O')|(O\u2019)");


/**
* Converts all the delimiter-separated words in a String into camelCase,
* that is each word is made up of a title case character and then a series of
* lowercase characters.
*
Expand All @@ -46,7 +84,7 @@ public class CaseUtils {
* <p>A {@code null} input String returns {@code null}.</p>
*
* <p>A input string with only delimiter characters returns {@code ""}.</p>
*
* <p>
* Capitalization uses the Unicode title case, normally equivalent to
* upper case and cannot perform locale-sensitive mappings.
*
Expand All @@ -61,59 +99,214 @@ public class CaseUtils {
* CaseUtils.toCamelCase(" @", false, new char[]{'@'}) = ""
* </pre>
*
* @param str the String to be converted to camelCase, may be null
* @param capitalizeFirstLetter boolean that determines if the first character of first word should be title case.
* @param delimiters set of characters to determine capitalization, null and/or empty array means whitespace
* @param str the String to be converted to camelCase, may be null
* @param capitalizeFirstLetter boolean. If true, set the first character of the first word to title case.
* @param delimiters set of characters to determine capitalization, null and/or empty array means whitespace
* @return camelCase of String, {@code null} if null String input
*/
public static String toCamelCase(String str, final boolean capitalizeFirstLetter, final char... delimiters) {
public static String toCamelCase(String str, final Boolean capitalizeFirstLetter, char... delimiters) {
if (StringUtils.isEmpty(str)) {
return str;
}
str = str.toLowerCase();
final int strLen = str.length();
final int[] newCodePoints = new int[strLen];
int outOffset = 0;
final Set<Integer> delimiterSet = toDelimiterSet(delimiters);
boolean capitalizeNext = capitalizeFirstLetter;
for (int index = 0; index < strLen;) {
final int codePoint = str.codePointAt(index);

if (delimiterSet.contains(codePoint)) {
capitalizeNext = outOffset != 0;
index += Character.charCount(codePoint);
} else if (capitalizeNext || outOffset == 0 && capitalizeFirstLetter) {
final int titleCaseCodePoint = Character.toTitleCase(codePoint);
newCodePoints[outOffset++] = titleCaseCodePoint;
index += Character.charCount(titleCaseCodePoint);
capitalizeNext = false;
boolean capitalizeFirst = BooleanUtils.isTrue(capitalizeFirstLetter);
if (ArrayUtils.isEmpty(delimiters)) {
delimiters = new char[]{' '};
}
// The delimiter array in text.WordUtils.capitalize(String, char[]) is not working properly
// in the current (1.12) build.
// The following loop is a temporary fix.
StringBuilder sb = new StringBuilder();
for (int i = 0; i < str.length(); i++) {
if (str.charAt(i) != ' ' && ArrayUtils.contains(delimiters, str.charAt(i))) {
sb.append(' ');
} else {
newCodePoints[outOffset++] = codePoint;
index += Character.charCount(codePoint);
int codepoint = str.codePointAt(i);
sb.append(Character.toChars(Character.toLowerCase(codepoint)));
}
}
str = sb.toString();
delimiters = new char[]{' '};
// End temporary fix.
if (capitalizeFirst) {
return StringUtils.deleteWhitespace(WordUtils.capitalize(str, delimiters));
} else {
return WordUtils.uncapitalize(StringUtils.deleteWhitespace(WordUtils.capitalize(str, delimiters)));
}
}

return new String(newCodePoints, 0, outOffset);
/**
* Uses {@code toDelimitedCase()} to convert a string to camelCase. <br>
* This method has different behavior from {@link #toCamelCase(String, Boolean, char[])}
* because all accented characters are normalized (accents removed). <br>
* For example, {@code toCamelCase("Pi�ata Caf�")} will return {@code "pinataCafe"}, where
* {@code toCamelCase("Pi�ata Caf�", false, " ")} will return {@code "pi�ataCaf�"}. <br>
* Converts the first alphanumeric character of the string to lower case.
* Capitalizes first character of all other alphanumeric sequences.
* Converts all other characters in the sequence to lower case. <br>
* Strips all non-alphanumeric characters or sequences of non-alphanumeric characters
* from the beginning and end of the string. <br>
* Deletes all other non-alphanumeric characters or sequences of non-alphanumeric characters. <br>
*
* @param str The text to convert.
* @return The convertedText.
* @see #toCamelCase(String, Boolean, char[])
* @see #toDelimitedCase(String, Boolean, Character)
*/
public static String toCamelCase(String str) {
return StringUtils.deleteWhitespace(toDelimitedCase(str, false, ' '));
}

/**
* Converts an array of delimiters to a hash set of code points. Code point of space(32) is added
* as the default value. The generated hash set provides O(1) lookup time.
* Converts a string to Delimited Case. <br>
* This is identical to using {@code toDelimitedCase(str, true, separator);} <br>
* Normalizes accented characters (removes accents). <br>
* Capitalizes the first character of any alphanumeric sequence.
* Converts the rest of the characters in the sequence to lower case<br>
* Strips all non-alphanumeric characters or sequences of non-alphanumeric characters
* from the beginning and end of the string. <br>
* Converts all other non-alphanumeric characters or sequences of non-alphanumeric characters
* to the separator delimiter. <br>
*
* @param delimiters set of characters to determine capitalization, null means whitespace
* @return Set<Integer>
* @param str String: the text to convert.
* @param separator char: The separator to use as a delimiter.
* @return The Converted_Text.
*/
private static Set<Integer> toDelimiterSet(final char[] delimiters) {
final Set<Integer> delimiterHashSet = new HashSet<>();
delimiterHashSet.add(Character.codePointAt(new char[]{' '}, 0));
if (ArrayUtils.isEmpty(delimiters)) {
return delimiterHashSet;
public static String toDelimitedCase(String str, Character separator) {
return toDelimitedCase(str, true, separator);
}

/**
* Converts a string to Delimited Case. <br>
* Normalizes accented characters (removes accents). <br>
* If {@code capitalizeFirstLetter} is {@code true}, capitalizes the first character of the string.
* Otherwise, converts the first character of the string to lower case. <br>
* Capitalizes the first character of any other alphanumeric sequence.
* Converts the rest of the characters in the sequence to lower case<br>
* Strips all non-alphanumeric characters or sequences of non-alphanumeric characters
* from the beginning and end of the string. <br>
* Converts all other non-alphanumeric characters or sequences of non-alphanumeric characters
* to the separator delimiter. <br>
*
* @param str String: the text to convert.
* @param capitalizeFirstLetter boolean: If false, converts the first character of the string to lower case.
* @param separator char: The separator to use as a delimiter.
* @return The Converted_Text.
*/
public static String toDelimitedCase(String str, final Boolean capitalizeFirstLetter, Character separator) {
// This method sanitizes the input to run through toDelimitedEngine().
if (StringUtils.isEmpty(str)) {
return str;
}
boolean capitalizeFirst = BooleanUtils.isNotFalse(capitalizeFirstLetter);
if (separator == null) {
if (capitalizeFirst) {
return toPascalCase(str);
} else {
return toCamelCase(str);
}
}
// return STRIP_ACCENTS_PATTERN.matcher(decomposed).replaceAll(EMPTY);
String normalized = O_IRISH.matcher(StringUtils.stripAccents(str).trim()).replaceAll("O ");
if (NON_ALPHANUMERIC.matcher(normalized).matches()) {
return "";
}
int startIndex = 0;
for (int i = 0; i < normalized.length(); i++) {
if (ALPHANUMERIC.matcher(Character.toString(normalized.charAt(i))).matches()) {
startIndex = i;
break;
}
}

for (int index = 0; index < delimiters.length; index++) {
delimiterHashSet.add(Character.codePointAt(delimiters, index));
return toDelimitedEngine(normalized, capitalizeFirst, separator, startIndex);
}

/**
* This is the engine that generates the return value of {@link #toDelimitedCase(String, Boolean, Character)}
*
* @param normalized String: the sanitized and normalized text to convert.
* @param capitalizeFirst boolean: If false, converts the first character of the string to lower case.
* @param separator char: The separator to use as a delimiter.
* @param startIndex int: The index of the first alphanumeric character.
* @return The Converted_Text.
*/
private static String toDelimitedEngine(String normalized, boolean capitalizeFirst, char separator, int startIndex) {
StringBuilder delimited = new StringBuilder();
for (int i = startIndex; i < normalized.length(); i++) {
if (i > startIndex &&
!ALPHANUMERIC_WITH_APOSTROPHE.matcher(Character.toString(normalized.charAt(i))).matches()) {
if (delimited.charAt(delimited.length() - 1) != separator) {
delimited.append(separator);
}
} else if (normalized.charAt(i) != '\'' && normalized.charAt(i) != '\u2019') {
if (i == startIndex && capitalizeFirst) {
delimited.append(Character.toUpperCase(normalized.charAt(i)));
} else if (i != startIndex && delimited.charAt(delimited.length() - 1) == separator) {
delimited.append(Character.toUpperCase(normalized.charAt(i)));
} else {
delimited.append(Character.toLowerCase(normalized.charAt(i)));
}
}
}
if (delimited.charAt(delimited.length() - 1) == separator) {
delimited.deleteCharAt(delimited.length() - 1);
}
return delimiterHashSet;

return delimited.toString();
}

/**
* Uses {@code toDelimitedCase()} to convert a string to kebab-case. <br>
* Normalizes accented characters (removes accents). <br>
* Converts all alphanumeric characters to lower case. <br>
* Strips all non-alphanumeric characters or sequences of non-alphanumeric characters
* from the beginning and end of the string. <br>
* Converts all other non-alphanumeric characters or sequences of non-alphanumeric characters
* to a single hyphen ('-'). <br>
*
* @param str The text to convert.
* @return The converted-text.
* @see #toDelimitedCase(String, Character)
* @see StringUtils#lowerCase(String)
*/
public static String toKebabCase(String str) {
return StringUtils.lowerCase(toDelimitedCase(str, '-'));
}

/**
* Uses {@code toDelimitedCase()} to convert a string to UpperCamelCase. <br>
* Normalizes accented characters (removes accents). <br>
* Capitalizes The first character of any alphanumeric sequence.
* Converts the rest of the characters in the sequence to lower case <br>
* Strips all non-alphanumeric characters or sequences of non-alphanumeric characters
* from the beginning and end of the string. <br>
* Deletes all other non-alphanumeric characters or sequences of non-alphanumeric characters. <br>
*
* @param str The text to convert.
* @return The ConvertedText.
* @see #toDelimitedCase(String, Character)
* @see StringUtils#deleteWhitespace(String)
*/
public static String toPascalCase(String str) {
return StringUtils.deleteWhitespace(toDelimitedCase(str, ' '));
}

/**
* Uses {@code toDelimitedCase()} to convert a string to snake_case. <br>
* Normalizes accented characters (removes accents). <br>
* Converts all alphanumeric characters to lower case. <br>
* Strips all non-alphanumeric characters or sequences of non-alphanumeric characters
* from the beginning and end of the string. <br>
* Converts all other non-alphanumeric characters or sequences of non-alphanumeric characters
* to a single underscore ('_'). <br>
*
* @param str The text to convert.
* @return The converted_text.
* @see #toDelimitedCase(String, Character)
* @see StringUtils#lowerCase(String)
*/
public static String toSnakeCase(String str) {
return StringUtils.lowerCase(toDelimitedCase(str, '_'));
}

/**
Expand All @@ -127,4 +320,3 @@ private static Set<Integer> toDelimiterSet(final char[] delimiters) {
public CaseUtils() {
}
}

Loading