From c14e41637db7b2d4d87b905db4c306268bc0319b Mon Sep 17 00:00:00 2001 From: ifly6 Date: Sun, 13 Dec 2020 14:31:17 -0500 Subject: [PATCH 01/10] numeric entity unescaper new behaviour in range [128, 159] for that range, translate to Windows 1252 encoding; re-throw illegal argument exception with input if restrictions are violated add test which tests numeric entities that are improperly encoded in cp-1252 and including code points before and after range [128, 159]. algorithm for numeric entity applies in very restrictive conditions: it must be in the range where ISO 8859-1 and Windows-1252 decohere, it must be a non-hex numeric entity (this is to avoid tripping one of StringEscapeUtilsTests), it must also not be an invalid Windows-1252 point in that range. --- .../translate/NumericEntityUnescaper.java | 37 ++++++++++- .../commons/text/StringEscapeUtilsTest.java | 64 ++++++++++++++----- 2 files changed, 85 insertions(+), 16 deletions(-) diff --git a/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java b/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java index d41e5b54b2..795e7de6cc 100644 --- a/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java +++ b/src/main/java/org/apache/commons/text/translate/NumericEntityUnescaper.java @@ -18,8 +18,13 @@ import java.io.IOException; import java.io.Writer; +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CodingErrorAction; import java.util.Arrays; import java.util.EnumSet; +import java.util.HashSet; +import java.util.Set; /** * Translate XML numeric entities of the form &#[xX]?\d+;? to @@ -53,6 +58,9 @@ public enum OPTION { /** EnumSet of OPTIONS, given from the constructor. */ private final EnumSet