Emulating Character.isWhitespace with regex ranges

Change-Id: Ia07c5d6b7235115927430338d975f035c23ed520
Review-Link: https://gwt-review.googlesource.com/#/c/11030/
diff --git a/user/super/com/google/gwt/emul/java/lang/Character.java b/user/super/com/google/gwt/emul/java/lang/Character.java
index cfc2a00..80014cf 100644
--- a/user/super/com/google/gwt/emul/java/lang/Character.java
+++ b/user/super/com/google/gwt/emul/java/lang/Character.java
@@ -45,7 +45,6 @@
  *  - isTitleCase(char)
  *  - isUnicodeIdentifierPart(char)
  *  - isUnicodeIdentifierStart(char)
- *  - isWhitespace(char)
  *  - getDirectionality(*)
  *  - getNumericValue(*)
  *  - getType(*)
@@ -279,6 +278,18 @@
     }
   }
 
+  public static boolean isWhitespace(char ch) {
+    return isWhitespace((int) ch);
+  }
+
+  // The regex would just be /\s/, but browsers handle non-breaking spaces inconsistently. Also,
+  // the Java definition includes separators.
+  public static native boolean isWhitespace(int codePoint) /*-{
+    return (null !== String.fromCharCode(codePoint).match(
+      /[\t-\r \u1680\u180E\u2000-\u2006\u2008-\u200A\u2028\u2029\u205F\u3000\uFEFF]|[\x1C-\x1F]/
+    ));
+  }-*/;
+
   public static boolean isSupplementaryCodePoint(int codePoint) {
     return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT && codePoint <= MAX_CODE_POINT;
   }
diff --git a/user/test/com/google/gwt/emultest/java/lang/CharacterTest.java b/user/test/com/google/gwt/emultest/java/lang/CharacterTest.java
index d8a9c3b..737a4ee 100644
--- a/user/test/com/google/gwt/emultest/java/lang/CharacterTest.java
+++ b/user/test/com/google/gwt/emultest/java/lang/CharacterTest.java
@@ -326,6 +326,96 @@
     assertEquals(-1, Character.digit('A', 10));
   }
 
+  @SuppressWarnings("deprecation")
+  public void testIsSpace() {
+    assertFalse(Character.isSpace('a'));
+    assertFalse(Character.isSpace('_'));
+
+    assertTrue(Character.isSpace(' '));
+    assertTrue(Character.isSpace('\n'));
+  }
+
+  public void testIsWhitepace() {
+    char[] separators = {
+        '\u0020', // SPACE.
+        '\u1680', // OGHAM SPACE MARK.
+        '\u2000', // EN QUAD.
+        '\u2001', // EM QUAD.
+        '\u2002', // EN SPACE.
+        '\u2003', // EM SPACE.
+        '\u2004', // THREE-PER-EM SPACE.
+        '\u2005', // FOUR-PER-EM SPACE.
+        '\u2006', // SIX-PER-EM SPACE.
+        '\u2008', // PUNCTUATION SPACE.
+        '\u2009', // THIN SPACE.
+        '\u200A', // HAIR SPACE.
+        '\u2028', // LINE SEPARATOR.
+        '\u2029', // PARAGRAPH SEPARATOR.
+        '\u205F', // MEDIUM MATHEMATICAL SPACE.
+        '\u3000' // IDEOGRAPHIC SPACE.
+    };
+
+    char[] nonBreakingSpaceSeparators = {
+        '\u00A0', // NO-BREAK SPACE.
+        '\u2007', // FIGURE SPACE.
+        '\u202F' // NARROW NO-BREAK SPACE.
+    };
+
+    char[] specialCases = {
+      '\t', // HORIZONTAL TABULATION.
+      '\n', // LINE FEED.
+      '\u000B', // VERTICAL TABULATION.
+      '\f', // FORM FEED.
+      '\r', // CARRIAGE RETURN.
+      '\u001C', // FILE SEPARATOR.
+      '\u001D', // GROUP SEPARATOR.
+      '\u001E', // RECORD SEPARATOR.
+      '\u001F' // UNIT SEPARATOR.
+    };
+
+    char[] typicalCounterExamples = {
+        'a', // LATIN SMALL LETTER A.
+        'B', // LATIN CAPITAL LETTER B.
+        '_', // LOW LINE.
+        '\u2500' // BOX DRAWINGS LIGHT HORIZONTAL.
+    };
+
+    int[] supplementaryCounterExamples = {
+        0x2070E, // UNICODE HAN CHARACTER 'to castrate a fowl, a capon'.
+        0x20731, // UNICODE HAN CHARACTER 'to peel, pare'.
+        0x29D98, // UNICODE HAN CHARACTER 'a general name for perch, etc.'.
+    };
+
+    // Must match unicode space separator characters.
+    for (char c : separators) {
+      assertTrue(Character.isWhitespace(c));
+      assertTrue(Character.isWhitespace((int) c));
+    }
+
+    // But NOT the non-breaking spaces.
+    for (char c : nonBreakingSpaceSeparators) {
+      assertFalse(Character.isWhitespace(c));
+      assertFalse(Character.isWhitespace((int) c));
+    }
+
+    // The ASCII legacy cases.
+    for (char c : specialCases) {
+      assertTrue(Character.isWhitespace(c));
+      assertTrue(Character.isWhitespace((int) c));
+    }
+
+    // Behave appropriately on other characters, like the alphabet.
+    for (char c : typicalCounterExamples) {
+      assertFalse(Character.isWhitespace(c));
+      assertFalse(Character.isWhitespace((int) c));
+    }
+
+    // Support for non-UCS-2 characters.
+    for (int c : supplementaryCounterExamples) {
+      assertFalse(Character.isWhitespace(c));
+    }
+  }
+
   public void testToString() {
     assertEquals(" ", new Character((char) 32).toString());
   }