Skip to content

TEXT-231: WordUtils.wrap react to pre-existing "newline string" #458

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 66 additions & 77 deletions src/main/java/org/apache/commons/text/WordUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -561,13 +561,13 @@ public static String uncapitalize(final String str, final char... delimiters) {
}

/**
* Wraps a single line of text, identifying words by {@code ' '}.
* Wraps a single line of text, identifying word boundaries by {@code ' '}.
*
* <p>New lines will be separated by the system property line separator.
* Very long words, such as URLs will <i>not</i> be wrapped.</p>
*
* <p>Leading spaces on a new line are stripped.
* Trailing spaces are not stripped.</p>
* <p>Leading spaces on a new line are trimmed.
* Trailing spaces on a line are not trimmed.</p>
*
* <table border="1">
* <caption>Examples</caption>
Expand Down Expand Up @@ -614,10 +614,10 @@ public static String wrap(final String str, final int wrapLength) {
}

/**
* Wraps a single line of text, identifying words by {@code ' '}.
* Wraps a single line of text, identifying word boundaries by {@code ' '}.
*
* <p>Leading spaces on a new line are stripped.
* Trailing spaces are not stripped.</p>
* <p>Leading spaces on a new line are trimmed.
* Trailing spaces on a line are not trimmed.</p>
*
* <table border="1">
* <caption>Examples</caption>
Expand Down Expand Up @@ -696,10 +696,11 @@ public static String wrap(final String str,
}

/**
* Wraps a single line of text, identifying words by {@code wrapOn}.
* Wraps a single line of text, identifying words boundaries by {@code wrapOn},
* parsed as a regular expression.
*
* <p>Leading spaces on a new line are stripped.
* Trailing spaces are not stripped.</p>
* <p>Leading matches of {@code wrapOn} on a new line are trimmed.
* Trailing matches of {@code wrapOn} on a line are not trimmed.</p>
*
* <table border="1">
* <caption>Examples</caption>
Expand Down Expand Up @@ -783,7 +784,7 @@ public static String wrap(final String str,
* @param newLineStr the string to insert for a new line,
* {@code null} uses the system property line separator
* @param wrapLongWords true if long words (such as URLs) should be wrapped
* @param wrapOn regex expression to be used as a breakable characters,
* @param wrapOn regex expression to be used as word boundary,
* if blank string is provided a space character will be used
* @return a line with newlines inserted, {@code null} if null input
*/
Expand All @@ -804,85 +805,73 @@ public static String wrap(final String str,
if (StringUtils.isBlank(wrapOn)) {
wrapOn = " ";
}
final Pattern patternToWrapOn = Pattern.compile(wrapOn);

final int inputLineLength = str.length();
int offset = 0;

final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32);
int matcherSize = -1;

while (offset < inputLineLength) {
int spaceToWrapAt = -1;
Matcher matcher = patternToWrapOn.matcher(str.substring(offset,
Math.min((int) Math.min(Integer.MAX_VALUE, offset + wrapLength + 1L), inputLineLength)));
if (matcher.find()) {
if (matcher.start() == 0) {
matcherSize = matcher.end();
if (matcherSize != 0) {
offset += matcher.end();
continue;
}
offset += 1;
}
spaceToWrapAt = matcher.start() + offset;
}

// only last line without leading spaces is left
if (inputLineLength - offset <= wrapLength) {
break;
}
final Pattern wrapOnPattern = Pattern.compile(wrapOn);
final Pattern newLineStrPattern = Pattern.compile(newLineStr);

while (matcher.find()) {
spaceToWrapAt = matcher.start() + offset;
}
int lineStart = 0;
int lineEnd = 0;
int nextLineStart = 0;

if (spaceToWrapAt >= offset) {
// normal case
wrappedLine.append(str, offset, spaceToWrapAt);
wrappedLine.append(newLineStr);
offset = spaceToWrapAt + 1;
Matcher newlineStrMatcher = newLineStrPattern.matcher(str);
Matcher wrapOnMatcher = wrapOnPattern.matcher(str);

} else // really long word or URL
if (wrapLongWords) {
if (matcherSize == 0) {
offset--;
}
// wrap really long word one line at a time
wrappedLine.append(str, offset, wrapLength + offset);
int nextForcedWrap = wrapLength;
int nextNewlineStr = newlineStrMatcher.find() ? newlineStrMatcher.start() : inputLineLength;
int nextWrapOn = wrapOnMatcher.find() ? wrapOnMatcher.start() : inputLineLength;

boolean suppressNewline = true;

while(lineStart < inputLineLength) {
// Here is always(!) the beginning of a line

if(!suppressNewline) {
wrappedLine.append(newLineStr);
offset += wrapLength;
matcherSize = -1;
} else {
// do not wrap really long word, just extend beyond limit
matcher = patternToWrapOn.matcher(str.substring(offset + wrapLength));
if (matcher.find()) {
matcherSize = matcher.end() - matcher.start();
spaceToWrapAt = matcher.start() + offset + wrapLength;
}
suppressNewline = false;
}

if (spaceToWrapAt >= 0) {
if (matcherSize == 0 && offset != 0) {
offset--;
}
wrappedLine.append(str, offset, spaceToWrapAt);
wrappedLine.append(newLineStr);
offset = spaceToWrapAt + 1;
} else {
if (matcherSize == 0 && offset != 0) {
offset--;
}
wrappedLine.append(str, offset, str.length());
offset = inputLineLength;
matcherSize = -1;
}
// Trim all leading instances of wrapOn
while(nextWrapOn == lineStart && lineStart < inputLineLength) {
lineStart = nextLineStart = !wrapOnMatcher.hitEnd() ? wrapOnMatcher.end() : inputLineLength;
nextForcedWrap = lineStart + wrapLength;
nextWrapOn = !wrapOnMatcher.hitEnd() && wrapOnMatcher.find() ? wrapOnMatcher.start() : inputLineLength;
}
}

if (matcherSize == 0 && offset < inputLineLength) {
offset--;
}
if(wrapLongWords && nextForcedWrap < nextNewlineStr && nextForcedWrap < nextWrapOn) {
// We need to wrap the line due to a long word
lineEnd = nextLineStart = nextForcedWrap;
} else if(nextNewlineStr <= nextForcedWrap || (nextNewlineStr > nextForcedWrap && nextNewlineStr <= nextWrapOn)) {
// There is a newLineStr before the length limit of the line,
// or after the length limit and before the next wrapOn,
// so we wrap just after that newline string.
// This preserves trailing instances of wrapOn.
nextLineStart = !newlineStrMatcher.hitEnd() ? newlineStrMatcher.end() : inputLineLength;
lineEnd = nextLineStart;
nextNewlineStr = !newlineStrMatcher.hitEnd() && newlineStrMatcher.find() ? newlineStrMatcher.start() : inputLineLength;
suppressNewline = true; // This saves a call to append()
} else {
// Here we are not forced to wrap due to long word, nor due to
// the existence of a newline string.
// So we can just keep look for wrapOn,
// until we run out of room on the line
do {
lineEnd = nextWrapOn;
nextLineStart = !wrapOnMatcher.hitEnd() ? wrapOnMatcher.end() : inputLineLength;
nextWrapOn = !wrapOnMatcher.hitEnd() && wrapOnMatcher.find() ? wrapOnMatcher.start() : inputLineLength;
} while(nextWrapOn < inputLineLength && nextWrapOn <= nextForcedWrap);
}

// Whatever is left in line is short enough to just pass through
wrappedLine.append(str, offset, str.length());
// We have found the end of this line, and can append it to the result
wrappedLine.append(str.substring(lineStart, lineEnd));

lineStart = nextLineStart;
nextForcedWrap = nextLineStart + wrapLength;
}

return wrappedLine.toString();
}
Expand Down
Loading