Emulating Character.isWhitespace with regex ranges
Change-Id: Ia07c5d6b7235115927430338d975f035c23ed520
Review-Link: https://gwt-review.googlesource.com/#/c/11030/
diff --git a/user/super/com/google/gwt/emul/java/lang/Character.java b/user/super/com/google/gwt/emul/java/lang/Character.java
index cfc2a00..80014cf 100644
--- a/user/super/com/google/gwt/emul/java/lang/Character.java
+++ b/user/super/com/google/gwt/emul/java/lang/Character.java
@@ -45,7 +45,6 @@
* - isTitleCase(char)
* - isUnicodeIdentifierPart(char)
* - isUnicodeIdentifierStart(char)
- * - isWhitespace(char)
* - getDirectionality(*)
* - getNumericValue(*)
* - getType(*)
@@ -279,6 +278,18 @@
}
}
+ public static boolean isWhitespace(char ch) {
+ return isWhitespace((int) ch);
+ }
+
+ // The regex would just be /\s/, but browsers handle non-breaking spaces inconsistently. Also,
+ // the Java definition includes separators.
+ public static native boolean isWhitespace(int codePoint) /*-{
+ return (null !== String.fromCharCode(codePoint).match(
+ /[\t-\r \u1680\u180E\u2000-\u2006\u2008-\u200A\u2028\u2029\u205F\u3000\uFEFF]|[\x1C-\x1F]/
+ ));
+ }-*/;
+
public static boolean isSupplementaryCodePoint(int codePoint) {
return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT && codePoint <= MAX_CODE_POINT;
}
diff --git a/user/test/com/google/gwt/emultest/java/lang/CharacterTest.java b/user/test/com/google/gwt/emultest/java/lang/CharacterTest.java
index d8a9c3b..737a4ee 100644
--- a/user/test/com/google/gwt/emultest/java/lang/CharacterTest.java
+++ b/user/test/com/google/gwt/emultest/java/lang/CharacterTest.java
@@ -326,6 +326,96 @@
assertEquals(-1, Character.digit('A', 10));
}
+ @SuppressWarnings("deprecation")
+ public void testIsSpace() {
+ assertFalse(Character.isSpace('a'));
+ assertFalse(Character.isSpace('_'));
+
+ assertTrue(Character.isSpace(' '));
+ assertTrue(Character.isSpace('\n'));
+ }
+
+ public void testIsWhitepace() {
+ char[] separators = {
+ '\u0020', // SPACE.
+ '\u1680', // OGHAM SPACE MARK.
+ '\u2000', // EN QUAD.
+ '\u2001', // EM QUAD.
+ '\u2002', // EN SPACE.
+ '\u2003', // EM SPACE.
+ '\u2004', // THREE-PER-EM SPACE.
+ '\u2005', // FOUR-PER-EM SPACE.
+ '\u2006', // SIX-PER-EM SPACE.
+ '\u2008', // PUNCTUATION SPACE.
+ '\u2009', // THIN SPACE.
+ '\u200A', // HAIR SPACE.
+ '\u2028', // LINE SEPARATOR.
+ '\u2029', // PARAGRAPH SEPARATOR.
+ '\u205F', // MEDIUM MATHEMATICAL SPACE.
+ '\u3000' // IDEOGRAPHIC SPACE.
+ };
+
+ char[] nonBreakingSpaceSeparators = {
+ '\u00A0', // NO-BREAK SPACE.
+ '\u2007', // FIGURE SPACE.
+ '\u202F' // NARROW NO-BREAK SPACE.
+ };
+
+ char[] specialCases = {
+ '\t', // HORIZONTAL TABULATION.
+ '\n', // LINE FEED.
+ '\u000B', // VERTICAL TABULATION.
+ '\f', // FORM FEED.
+ '\r', // CARRIAGE RETURN.
+ '\u001C', // FILE SEPARATOR.
+ '\u001D', // GROUP SEPARATOR.
+ '\u001E', // RECORD SEPARATOR.
+ '\u001F' // UNIT SEPARATOR.
+ };
+
+ char[] typicalCounterExamples = {
+ 'a', // LATIN SMALL LETTER A.
+ 'B', // LATIN CAPITAL LETTER B.
+ '_', // LOW LINE.
+ '\u2500' // BOX DRAWINGS LIGHT HORIZONTAL.
+ };
+
+ int[] supplementaryCounterExamples = {
+ 0x2070E, // UNICODE HAN CHARACTER 'to castrate a fowl, a capon'.
+ 0x20731, // UNICODE HAN CHARACTER 'to peel, pare'.
+ 0x29D98, // UNICODE HAN CHARACTER 'a general name for perch, etc.'.
+ };
+
+ // Must match unicode space separator characters.
+ for (char c : separators) {
+ assertTrue(Character.isWhitespace(c));
+ assertTrue(Character.isWhitespace((int) c));
+ }
+
+ // But NOT the non-breaking spaces.
+ for (char c : nonBreakingSpaceSeparators) {
+ assertFalse(Character.isWhitespace(c));
+ assertFalse(Character.isWhitespace((int) c));
+ }
+
+ // The ASCII legacy cases.
+ for (char c : specialCases) {
+ assertTrue(Character.isWhitespace(c));
+ assertTrue(Character.isWhitespace((int) c));
+ }
+
+ // Behave appropriately on other characters, like the alphabet.
+ for (char c : typicalCounterExamples) {
+ assertFalse(Character.isWhitespace(c));
+ assertFalse(Character.isWhitespace((int) c));
+ }
+
+ // Support for non-UCS-2 characters.
+ for (int c : supplementaryCounterExamples) {
+ assertFalse(Character.isWhitespace(c));
+ }
+ }
+
public void testToString() {
assertEquals(" ", new Character((char) 32).toString());
}