Adding RegExp to public GWT (native version, pure Java version, tests)
git-svn-id: https://google-web-toolkit.googlecode.com/svn/trunk@7517 8db76d5a-ed1c-0410-87a9-c151d255dfc7
diff --git a/dev/build.xml b/dev/build.xml
index 5073f0a..523258b 100755
--- a/dev/build.xml
+++ b/dev/build.xml
@@ -28,7 +28,8 @@
<pathelement location="${gwt.tools.lib}/junit/junit-3.8.1.jar" />
</classpath>
</gwt.javac>
- <gwt.javac srcdir="${gwt.root}/user/src" destdir="${javac.junit.out}">
+ <gwt.javac srcdir="${gwt.root}/user/src" destdir="${javac.junit.out}"
+ excludes="**/super/**">
<classpath>
<pathelement location="${javac.out}" />
<pathelement location="${gwt.tools.lib}/tomcat/servlet-api-2.5.jar" />
diff --git a/eclipse/lang/.classpath b/eclipse/lang/.classpath
index 3963a6e..9d67ef8 100644
--- a/eclipse/lang/.classpath
+++ b/eclipse/lang/.classpath
@@ -1,6 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" path="dev/super/com/google/gwt/dev/jjs/intrinsic"/>
+ <classpathentry kind="src" path="user/src/com/google/gwt/regexp/super"/>
<classpathentry kind="src" path="user/super/com/google/gwt/user/translatable"/>
<classpathentry kind="src" path="user/super/com/google/gwt/benchmarks/translatable"/>
<classpathentry kind="src" path="user/super/com/google/gwt/emul"/>
diff --git a/eclipse/user/.classpath b/eclipse/user/.classpath
index b0d2eec..7382b12 100644
--- a/eclipse/user/.classpath
+++ b/eclipse/user/.classpath
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
- <classpathentry kind="src" path="core/src"/>
+ <classpathentry excluding="**/super/**" kind="src" path="core/src"/>
<classpathentry kind="src" path="core/test_i18n_dollar"/>
<classpathentry kind="src" path="core/test_i18n_bar"/>
<classpathentry kind="src" path="core/javadoc"/>
diff --git a/servlet/build.xml b/servlet/build.xml
index ebbfa40..11aa99d 100755
--- a/servlet/build.xml
+++ b/servlet/build.xml
@@ -13,6 +13,7 @@
<fileset dir="${gwt.user.bin}">
<exclude name="**/rebind/**" />
<exclude name="**/tools/**" />
+ <exclude name="**/super/**" />
<exclude name="com/google/gwt/json/**" />
<exclude name="com/google/gwt/junit/*" />
<exclude name="com/google/gwt/junit/client/GWTTestCase.*" />
diff --git a/tools/api-checker/config/gwt16_20userApi.conf b/tools/api-checker/config/gwt16_20userApi.conf
index de9fa76..7a905d3 100644
--- a/tools/api-checker/config/gwt16_20userApi.conf
+++ b/tools/api-checker/config/gwt16_20userApi.conf
@@ -33,6 +33,7 @@
:**/remote/**\
:**/server/**\
:**/tools/**\
+:user/src/com/google/gwt/regexp/shared/**\
:com/google/gwt/junit/*.java\
:com/google/gwt/junit/client/GWTTestCase.java\
:com/google/gwt/junit/client/impl/GWTRunner.java\
diff --git a/user/build.xml b/user/build.xml
index 202d5f5..ddef745 100755
--- a/user/build.xml
+++ b/user/build.xml
@@ -67,7 +67,7 @@
<target name="compile" description="Compile all class files"
unless="compile.complete">
<mkdir dir="${javac.out}" />
- <gwt.javac>
+ <gwt.javac excludes="**/super/**">
<classpath>
<pathelement location="${gwt.tools.lib}/tomcat/servlet-api-2.5.jar" />
<pathelement location="${gwt.tools.lib}/junit/junit-3.8.1.jar" />
diff --git a/user/src/com/google/gwt/regexp/RegExp.gwt.xml b/user/src/com/google/gwt/regexp/RegExp.gwt.xml
new file mode 100644
index 0000000..285dd66
--- /dev/null
+++ b/user/src/com/google/gwt/regexp/RegExp.gwt.xml
@@ -0,0 +1,19 @@
+<!-- -->
+<!-- Copyright 2010 Google Inc. -->
+<!-- Licensed under the Apache License, Version 2.0 (the "License"); you -->
+<!-- may not use this file except in compliance with the License. You may -->
+<!-- may obtain a copy of the License at -->
+<!-- -->
+<!-- http://www.apache.org/licenses/LICENSE-2.0 -->
+<!-- -->
+<!-- Unless required by applicable law or agreed to in writing, software -->
+<!-- distributed under the License is distributed on an "AS IS" BASIS, -->
+<!-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -->
+<!-- implied. License for the specific language governing permissions and -->
+<!-- limitations under the License. -->
+
+<!-- regular expressions support. -->
+<module>
+ <source path="shared" />
+ <super-source path="super" />
+</module>
diff --git a/user/src/com/google/gwt/regexp/shared/MatchResult.java b/user/src/com/google/gwt/regexp/shared/MatchResult.java
new file mode 100644
index 0000000..a74a98ee
--- /dev/null
+++ b/user/src/com/google/gwt/regexp/shared/MatchResult.java
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.regexp.shared;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Pure Java implementation of a regular expression match result.
+ */
+public class MatchResult {
+
+ private final List<String> groups;
+ private final int index;
+ private final String input;
+
+ public MatchResult(int index, String input, List<String> groups) {
+ this.index = index;
+ this.input = input;
+ this.groups = new ArrayList<String>(groups);
+ }
+
+ /**
+ * Retrieves the matched string or the given matched group.
+ *
+ * @param index the index of the group to return, 0 to return the whole
+ * matched string; must be between 0 and {@code getGroupCount() - 1}
+ * included
+ * @return The matched string if {@code index} is zero, else the given matched
+ * group. If the given group was optional and did not match, the
+ * behavior is browser-dependent: this method will return {@code null}
+ * or an empty string.
+ */
+ public String getGroup(int index) {
+ return groups.get(index);
+ }
+
+ /**
+ * @return The number of groups, including the matched string hence greater or
+ * equal than 1.
+ */
+ public int getGroupCount() {
+ return groups.size();
+ }
+
+ /**
+ * @return The zero-based index of the match in the input string.
+ */
+ public int getIndex() {
+ return index;
+ }
+
+ /**
+ * @return The original input string.
+ */
+ public String getInput() {
+ return input;
+ }
+}
diff --git a/user/src/com/google/gwt/regexp/shared/RegExp.java b/user/src/com/google/gwt/regexp/shared/RegExp.java
new file mode 100644
index 0000000..47daa5e
--- /dev/null
+++ b/user/src/com/google/gwt/regexp/shared/RegExp.java
@@ -0,0 +1,371 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.regexp.shared;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * A class for regular expressions with features like Javascript's RegExp, plus
+ * Javascript String's replace and split methods (which can take a RegExp
+ * parameter). The pure Java implementation (for server-side use) uses Java's
+ * Pattern class, unavailable under GWT. The super-sourced GWT implementation
+ * simply calls on to the native Javascript classes.
+ * <p>
+ * There are a few small incompatibilities between the two implementations.
+ * Java-specific constructs in the regular expression syntax (e.g. [a-z&&[^bc]],
+ * (?<=foo), \A, \Q) work only in the pure Java implementation, not the GWT
+ * implementation, and are not rejected by either. Also, the Javscript-specific
+ * constructs $` and $' in the replacement expression work only in the GWT
+ * implementation, not the pure Java implementation, which rejects them.
+ */
+public class RegExp {
+
+ // In JS syntax, a \ in the replacement string has no special meaning.
+ // In Java syntax, a \ in the replacement string escapes the next character,
+ // so we have to translate \ to \\ before passing it to Java.
+ private static final Pattern REPLACEMENT_BACKSLASH = Pattern.compile("\\\\");
+ // To get \\, we have to say \\\\\\\\:
+ // \\\\\\\\ --> Java string unescape --> \\\\
+ // \\\\ ---> Pattern replacement unescape in replacement preprocessing --> \\
+ private static final String REPLACEMENT_BACKSLASH_FOR_JAVA = "\\\\\\\\";
+
+ // In JS syntax, a $& in the replacement string stands for the whole match.
+ // In Java syntax, the equivalent is $0, so we have to translate $& to
+ // $0 before passing it to Java. However, we have to watch out for $$&, which
+ // is actually a Javascript $$ (see below) followed by a & with no special
+ // meaning, and must not get translated.
+ private static final Pattern REPLACEMENT_DOLLAR_AMPERSAND =
+ Pattern.compile("((?:^|\\G|[^$])(?:\\$\\$)*)\\$&");
+ private static final String REPLACEMENT_DOLLAR_AMPERSAND_FOR_JAVA = "$1\\$0";
+
+ // In JS syntax, a $` and $' in the replacement string stand for everything
+ // before the match and everything after the match.
+ // In Java syntax, there is no equivalent, so we detect and reject $` and $'.
+ // However, we have to watch out for $$` and $$', which are actually a JS $$
+ // (see below) followed by a ` or ' with no special meaning, and must not be
+ // rejected.
+ private static final Pattern REPLACEMENT_DOLLAR_APOSTROPHE =
+ Pattern.compile("(?:^|[^$])(?:\\$\\$)*\\$[`']");
+
+ // In JS syntax, a $$ in the replacement string stands for a (single) dollar
+ // sign, $.
+ // In Java syntax, the equivalent is \$, so we have to translate $$ to \$
+ // before passing it to Java.
+ private static final Pattern REPLACEMENT_DOLLAR_DOLLAR =
+ Pattern.compile("\\$\\$");
+ // To get \$, we have to say \\\\\\$:
+ // \\\\\\$ --> Java string unescape --> \\\$
+ // \\\$ ---> Pattern replacement unescape in replacement preprocessing --> \$
+ private static final String REPLACEMENT_DOLLAR_DOLLAR_FOR_JAVA = "\\\\\\$";
+
+ /**
+ * Creates a regular expression object from a pattern with no flags.
+ *
+ * @param pattern the Javascript regular expression pattern to compile
+ * @return a new regular expression
+ * @throws RuntimeException if the pattern is invalid
+ */
+ public static RegExp compile(String pattern) {
+ return compile(pattern, "");
+ }
+
+ /**
+ * Creates a regular expression object from a pattern using the given flags.
+ *
+ * @param pattern the Javascript regular expression pattern to compile
+ * @param flags the flags string, containing at most one occurrence of {@code
+ * 'g'} ({@link #getGlobal()}), {@code 'i'} ({@link #getIgnoreCase()}),
+ * or {@code 'm'} ({@link #getMultiline()}).
+ * @return a new regular expression
+ * @throws RuntimeException if the pattern or the flags are invalid
+ */
+ public static RegExp compile(String pattern, String flags) {
+ // Parse flags
+ boolean globalFlag = false;
+ int javaPatternFlags = Pattern.UNIX_LINES;
+ for (char flag : parseFlags(flags)) {
+ switch (flag) {
+ case 'g':
+ globalFlag = true;
+ break;
+
+ case 'i':
+ javaPatternFlags |= Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE;
+ break;
+
+ case 'm':
+ javaPatternFlags |= Pattern.MULTILINE;
+ break;
+
+ default:
+ throw new IllegalArgumentException("Unknown regexp flag: '" + flag
+ + "'");
+ }
+ }
+
+ Pattern javaPattern = Pattern.compile(pattern, javaPatternFlags);
+
+ return new RegExp(pattern, javaPattern, globalFlag);
+ }
+
+ /**
+ * Parses a flags string as a set of characters. Does not reject unknown
+ * flags.
+ *
+ * @param flags the flag string to parse
+ * @return a set of flags
+ * @throws IllegalArgumentException if a flag is duplicated
+ */
+ private static Set<Character> parseFlags(String flags) {
+ Set<Character> flagsSet = new HashSet<Character>(flags.length());
+ for (int flagIndex = 0; flagIndex < flags.length(); flagIndex++) {
+ char flag = flags.charAt(flagIndex);
+ if (!flagsSet.add(flag)) {
+ throw new IllegalArgumentException("Flag cannot be specified twice: '"
+ + flag + "'");
+ }
+ }
+ return flagsSet;
+ }
+
+ private final boolean globalFlag;
+
+ private int lastIndex;
+
+ private final Pattern pattern;
+
+ private final String source;
+
+ private RegExp(String source, Pattern pattern, boolean globalFlag) {
+ this.source = source;
+ this.pattern = pattern;
+ this.globalFlag = globalFlag;
+ lastIndex = 0;
+ }
+
+ /**
+ * Applies the regular expression to the given string. This call affects the
+ * value returned by {@link #getLastIndex()} if the global flag is set.
+ *
+ * @param input the string to apply the regular expression to
+ * @return a match result if the string matches, else {@code null}
+ */
+ public MatchResult exec(String input) {
+ // Start the search at lastIndex if the global flag is true.
+ int searchStartIndex = (globalFlag) ? lastIndex : 0;
+
+ Matcher matcher;
+ if (input == null || searchStartIndex < 0
+ || searchStartIndex > input.length()) {
+ // Avoid exceptions: Javascript is more tolerant than Java
+ matcher = null;
+ } else {
+ matcher = pattern.matcher(input);
+ if (!matcher.find(searchStartIndex)) {
+ matcher = null;
+ }
+ }
+
+ if (matcher != null) {
+ // Match: create a result
+
+ // Retrieve the matched groups.
+ int groupCount = matcher.groupCount();
+ List<String> groups = new ArrayList<String>(1 + groupCount);
+ for (int group = 0; group <= groupCount; group++) {
+ groups.add(matcher.group(group));
+ }
+
+ if (globalFlag) {
+ lastIndex = matcher.end();
+ }
+
+ return new MatchResult(matcher.start(), input, groups);
+ } else {
+ // No match
+ if (globalFlag) {
+ lastIndex = 0;
+ }
+ return null;
+ }
+ }
+
+ /**
+ * @return Whether the regular expression captures all occurrences of the
+ * pattern.
+ */
+ public boolean getGlobal() {
+ return globalFlag;
+ }
+
+ /**
+ * @return Whether the regular expression ignores case.
+ */
+ public boolean getIgnoreCase() {
+ return (pattern.flags() & Pattern.CASE_INSENSITIVE) != 0;
+ }
+
+ /**
+ * @return The zero-based position at which to start the next match. The
+ * return value is not defined if the global flag is not set. After a
+ * call to {@link #exec} or {@link #test}, this method returns the
+ * next position following the most recent match.
+ *
+ * @see #getGlobal()
+ */
+ public int getLastIndex() {
+ return lastIndex;
+ }
+
+ /**
+ * @return Whether '$' and '^' match line returns ('\n' and '\r') in addition
+ * to the beginning or end of the string.
+ */
+ public boolean getMultiline() {
+ return (pattern.flags() & Pattern.MULTILINE) != 0;
+ }
+
+ /**
+ * @return The pattern string of the regular expression.
+ */
+ public String getSource() {
+ return source;
+ }
+
+ /**
+ * Returns the input string with the part(s) matching the regular expression
+ * replaced with the replacement string. If the global flag is set, replaces
+ * all matches of the regular expression. Otherwise, replaces the first match
+ * of the regular expression. As per Javascript semantics, backslashes in the
+ * replacement string get no special treatment, but the replacement string can
+ * use the following special patterns:
+ * <ul>
+ * <li>$1, $2, ... $99 - inserts the n'th group matched by the regular
+ * expression.
+ * <li>$& - inserts the entire string matched by the regular expression.
+ * <li>$$ - inserts a $.
+ * </ul>
+ * Note: $` and $' are *not* supported in the pure Java implementation, and
+ * throw an exception.
+ *
+ * @param input the string in which the regular expression is to be searched.
+ * @param replacement the replacement string.
+ * @return the input string with the regular expression replaced by the
+ * replacement string.
+ * @throws RuntimeException if {@code replacement} is invalid
+ */
+ public String replace(String input, String replacement) {
+ // Replace \ in the replacement with \\ to escape it for Java replace.
+ replacement = REPLACEMENT_BACKSLASH.matcher(replacement).replaceAll(
+ REPLACEMENT_BACKSLASH_FOR_JAVA);
+
+ // Replace the Javascript-ese $& in the replacement with Java-ese $0, but
+ // watch out for $$&, which should stay $$&, to be changed to \$& below.
+ replacement = REPLACEMENT_DOLLAR_AMPERSAND.matcher(replacement).replaceAll(
+ REPLACEMENT_DOLLAR_AMPERSAND_FOR_JAVA);
+
+ // Test for Javascript-ese $` and $', which we do not support in the pure
+ // Java version.
+ if (REPLACEMENT_DOLLAR_APOSTROPHE.matcher(replacement).find()) {
+ throw new UnsupportedOperationException(
+ "$` and $' replacements are not supported");
+ }
+
+ // Replace the Javascript-ese $$ in the replacement with Java-ese \$.
+ replacement = REPLACEMENT_DOLLAR_DOLLAR.matcher(replacement).replaceAll(
+ REPLACEMENT_DOLLAR_DOLLAR_FOR_JAVA);
+
+ return globalFlag ? pattern.matcher(input).replaceAll(replacement)
+ : pattern.matcher(input).replaceFirst(replacement);
+ }
+
+ /**
+ * Sets the zero-based position at which to start the next match.
+ */
+ public void setLastIndex(int lastIndex) {
+ this.lastIndex = lastIndex;
+ }
+
+ /**
+ * Splits the input string around matches of the regular expression. If the
+ * regular expression is completely empty, splits the input string into its
+ * constituent characters. If the regular expression is not empty but matches
+ * an empty string, the results are not well defined.
+ *
+ * @param input the string to be split.
+ * @return the strings split off, any of which may be empty.
+ */
+ public SplitResult split(String input) {
+ return split(input, -1);
+ }
+
+ /**
+ * Splits the input string around matches of the regular expression. If the
+ * regular expression is completely empty, splits the input string into its
+ * constituent characters. If the regular expression is not empty but matches
+ * an empty string, the results are not well defined.
+ *
+ * @param input the string to be split.
+ * @param limit the the maximum number of strings to split off and return,
+ * ignoring the rest of the input string. If negative, there is no
+ * limit.
+ * @return the strings split off, any of which may be empty.
+ */
+ public SplitResult split(String input, int limit) {
+ String[] result;
+ if (source.length() == 0) {
+ // Javascript split using a completely empty regular expression splits the
+ // string into its constituent characters.
+ int resultLength = input.length();
+ if (resultLength > limit && limit >= 0) {
+ resultLength = limit;
+ }
+ result = new String[resultLength];
+ for (int i = 0; i < resultLength; i++) {
+ result[i] = input.substring(i, i + 1);
+ }
+ } else {
+ result = pattern.split(input, limit < 0 ? -1 : (limit + 1));
+ if (result.length > limit && limit >= 0) {
+ // Chop off the unsplit part of the string which has been put in
+ // result[limit]. Javascript split does not return it.
+ String[] realResult = new String[limit];
+ for (int i = 0; i < limit; i++) {
+ realResult[i] = result[i];
+ }
+ result = realResult;
+ }
+ }
+ return new SplitResult(result);
+ }
+
+ /**
+ * Determines if the regular expression matches the given string. This call
+ * affects the value returned by {@link #getLastIndex()} if the global flag is
+ * not set. Equivalent to: {@code exec(input) != null}
+ *
+ * @param input the string to apply the regular expression to
+ * @return whether the regular expression matches the given string.
+ */
+ public boolean test(String input) {
+ return exec(input) != null;
+ }
+}
diff --git a/user/src/com/google/gwt/regexp/shared/SplitResult.java b/user/src/com/google/gwt/regexp/shared/SplitResult.java
new file mode 100644
index 0000000..66e3e36
--- /dev/null
+++ b/user/src/com/google/gwt/regexp/shared/SplitResult.java
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.regexp.shared;
+
+/**
+ * Pure Java implementation of a regular expression split result.
+ */
+public class SplitResult {
+
+ private final String[] result;
+
+ public SplitResult(String[] result) {
+ this.result = result;
+ }
+
+ /**
+ * Returns one the strings split off.
+ *
+ * @param index the index of the string to be returned.
+ * @return The index'th string resulting from the split.
+ */
+ public String get(int index) {
+ return result[index];
+ }
+
+ /**
+ * @return The number of strings split off.
+ */
+ public int length() {
+ return result.length;
+ }
+
+ /**
+ * Sets (overrides) one of the strings split off.
+ *
+ * @param index the index of the string to be set.
+ */
+ public void set(int index, String value) {
+ result[index] = value;
+ }
+}
diff --git a/user/src/com/google/gwt/regexp/super/com/google/gwt/regexp/shared/MatchResult.java b/user/src/com/google/gwt/regexp/super/com/google/gwt/regexp/shared/MatchResult.java
new file mode 100644
index 0000000..89b03d1
--- /dev/null
+++ b/user/src/com/google/gwt/regexp/super/com/google/gwt/regexp/shared/MatchResult.java
@@ -0,0 +1,64 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.regexp.shared;
+
+import com.google.gwt.core.client.JavaScriptObject;
+
+/**
+ * GWT wrapper for Javascript RegExp matching results.
+ */
+public class MatchResult extends JavaScriptObject {
+
+ protected MatchResult() {
+ }
+
+ /**
+ * Retrieves the matched string or the given matched group.
+ *
+ * @param index the index of the group to return, 0 to return the whole
+ * matched string; must be between 0 and {@code getGroupCount() - 1}
+ * included
+ * @return The matched string if {@code index} is zero, else the given matched
+ * group. If the given group was optional and did not match, the
+ * behavior is browser-dependent: this method will return {@code null}
+ * or an empty string.
+ */
+ public final native String getGroup(int index) /*-{
+ return this[index];
+ }-*/;
+
+ /**
+ * @return The number of groups, including the matched string hence greater or
+ * equal than 1.
+ */
+ public final native int getGroupCount() /*-{
+ return this.length;
+ }-*/;
+
+ /**
+ * @return The zero-based index of the match in the input string.
+ */
+ public final native int getIndex() /*-{
+ return this.index;
+ }-*/;
+
+ /**
+ * @return The original input string.
+ */
+ public final native String getInput() /*-{
+ return this.input;
+ }-*/;
+}
diff --git a/user/src/com/google/gwt/regexp/super/com/google/gwt/regexp/shared/RegExp.java b/user/src/com/google/gwt/regexp/super/com/google/gwt/regexp/shared/RegExp.java
new file mode 100644
index 0000000..8307ba8
--- /dev/null
+++ b/user/src/com/google/gwt/regexp/super/com/google/gwt/regexp/shared/RegExp.java
@@ -0,0 +1,181 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.regexp.shared;
+
+import com.google.gwt.core.client.JavaScriptObject;
+
+/**
+ * GWT wrapper for the Javascript RegExp class extended with the Javascript
+ * String class's replace and split methods, which can take a RegExp parameter.
+ */
+public class RegExp extends JavaScriptObject {
+
+ /**
+ * Creates a regular expression object from a pattern with no flags.
+ *
+ * @param pattern the Javascript regular expression pattern to compile
+ * @return a new regular expression
+ * @throws RuntimeException if the pattern is invalid
+ */
+ public static native RegExp compile(String pattern) /*-{
+ return new RegExp(pattern);
+ }-*/;
+
+ /**
+ * Creates a regular expression object from a pattern with no flags.
+ *
+ * @param pattern the Javascript regular expression pattern to compile
+ * @param flags the flags string, containing at most one occurence of {@code
+ * 'g'} ({@link #getGlobal()}), {@code 'i'} ({@link #getIgnoreCase()}
+ * ), or {@code 'm'} ({@link #getMultiline()}).
+ * @return a new regular expression
+ * @throws RuntimeException if the pattern or the flags are invalid
+ */
+ public static native RegExp compile(String pattern, String flags) /*-{
+ return new RegExp(pattern, flags);
+ }-*/;
+
+ protected RegExp() {
+ }
+
+ /**
+ * Applies the regular expression to the given string. This call affects the
+ * value returned by {@link #getLastIndex()} if the global flag is set.
+ *
+ * @param input the string to apply the regular expression to
+ * @return a match result if the string matches, else {@code null}
+ */
+ public final native MatchResult exec(String input) /*-{
+ return this.exec(input);
+ }-*/;
+
+ /**
+ * @return Whether the regular expression captures all occurences of the
+ * pattern.
+ */
+ public final native boolean getGlobal() /*-{
+ return this.global;
+ }-*/;
+
+ /**
+ * @return Whether the regular expression ignores case.
+ */
+ public final native boolean getIgnoreCase() /*-{
+ return this.ignoreCase;
+ }-*/;
+
+ /**
+ * @return The zero-based position at which to start the next match. The
+ * return value is not defined if the global flag is not set. After a
+ * call to {@link #exec} or {@link #test}, this method returns the
+ * next position following the most recent match.
+ *
+ * @see #getGlobal()
+ */
+ public final native int getLastIndex() /*-{
+ return this.lastIndex;
+ }-*/;
+
+ /**
+ * @return Whether '$' and '^' match line returns ('\n' and '\r') in addition
+ * to the beginning or end of the string.
+ */
+ public final native boolean getMultiline() /*-{
+ return this.multiline;
+ }-*/;
+
+ /**
+ * @return The pattern string of the regular expression.
+ */
+ public final native String getSource() /*-{
+ return this.source;
+ }-*/;
+
+ /**
+ * Returns the input string with the part(s) matching the regular expression
+ * replaced with the replacement string. If the global flag is set, replaces
+ * all matches of the regular expression. Otherwise, replaces the first match
+ * of the regular expression. As per Javascript semantics, backslashes in the
+ * replacement string get no special treatment, but the replacement string can
+ * use the following special patterns:
+ * <ul>
+ * <li>$1, $2, ... $99 - inserts the n'th group matched by the regular
+ * expression.
+ * <li>$& - inserts the entire string matched by the regular expression.
+ * <li>$$ - inserts a $.
+ * </ul>
+ *
+ * @param input the string in which the regular expression is to be searched.
+ * @param replacement the replacement string.
+ * @return the input string with the regular expression replaced with the
+ * replacement string.
+ * @throws RuntimeException if {@code replacement} is invalid
+ */
+ public final native String replace(String input, String replacement) /*-{
+ return input.replace(this, replacement);
+ }-*/;
+
+ /**
+ * Sets the zero-based position at which to start the next match.
+ */
+ public final native void setLastIndex(int lastIndex) /*-{
+ this.lastIndex = lastIndex;
+ }-*/;
+
+ /**
+ * Splits the input string around matches of the regular expression. If the
+ * regular expression is completely empty, splits the input string into its
+ * constituent characters. If the regular expression is not empty but matches
+ * an empty string, the results are not well defined.
+ *
+ * @param input the string to be split.
+ *
+ * @return the strings split off, any of which may be empty.
+ */
+ public final native SplitResult split(String input) /*-{
+ return input.split(this);
+ }-*/;
+
+ /**
+ * Splits the input string around matches of the regular expression. If the
+ * regular expression is completely empty, splits the input string into its
+ * constituent characters. If the regular expression is not empty but matches
+ * an empty string, the results are not well defined.
+ *
+ * @param input the string to be split.
+ * @param limit the the maximum number of strings to split off and return,
+ * ignoring the rest of the input string. If negative, there is no
+ * limit.
+ *
+ * @return the strings split off, any of which may be empty.
+ */
+ public final native SplitResult split(String input, int limit) /*-{
+ return input.split(this, limit);
+ }-*/;
+
+ /**
+ * Determines if the regular expression matches the given string. This call
+ * affects the value returned by {@link #getLastIndex()} if the global flag is
+ * not set. Equivalent to: {@code exec(input) != null}
+ *
+ * @param input the string to apply the regular expression to
+ * @return whether the regular expression matches the given string.
+ */
+ public final native boolean test(String input) /*-{
+ return this.test(input);
+ }-*/;
+}
+
diff --git a/user/src/com/google/gwt/regexp/super/com/google/gwt/regexp/shared/SplitResult.java b/user/src/com/google/gwt/regexp/super/com/google/gwt/regexp/shared/SplitResult.java
new file mode 100644
index 0000000..a3009a1
--- /dev/null
+++ b/user/src/com/google/gwt/regexp/super/com/google/gwt/regexp/shared/SplitResult.java
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.regexp.shared;
+
+import com.google.gwt.core.client.JsArrayString;
+
+/**
+ * GWT wrapper for Javascript string.split results.
+ */
+public class SplitResult extends JsArrayString {
+
+ protected SplitResult() {
+ }
+}
diff --git a/user/test/com/google/gwt/regexp/RegExpSuite.java b/user/test/com/google/gwt/regexp/RegExpSuite.java
new file mode 100644
index 0000000..f903486
--- /dev/null
+++ b/user/test/com/google/gwt/regexp/RegExpSuite.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.regexp;
+
+import com.google.gwt.junit.tools.GWTTestSuite;
+import com.google.gwt.regexp.shared.GwtRegExpTest;
+import com.google.gwt.regexp.shared.RegExpTest;
+
+import junit.framework.Test;
+
+/**
+ * All RegExp tests.
+ */
+public class RegExpSuite {
+ public static Test suite() {
+ GWTTestSuite suite = new GWTTestSuite("All RegExp tests");
+
+ // $JUnit-BEGIN$
+ suite.addTestSuite(RegExpTest.class);
+ suite.addTestSuite(GwtRegExpTest.class);
+ // $JUnit-END$
+
+ return suite;
+ }
+}
diff --git a/user/test/com/google/gwt/regexp/RegExpTestModule.gwt.xml b/user/test/com/google/gwt/regexp/RegExpTestModule.gwt.xml
new file mode 100644
index 0000000..a27cc74
--- /dev/null
+++ b/user/test/com/google/gwt/regexp/RegExpTestModule.gwt.xml
@@ -0,0 +1,19 @@
+<!-- -->
+<!-- Copyright 2010 Google Inc. -->
+<!-- Licensed under the Apache License, Version 2.0 (the "License"); you -->
+<!-- may not use this file except in compliance with the License. You may -->
+<!-- may obtain a copy of the License at -->
+<!-- -->
+<!-- http://www.apache.org/licenses/LICENSE-2.0 -->
+<!-- -->
+<!-- Unless required by applicable law or agreed to in writing, software -->
+<!-- distributed under the License is distributed on an "AS IS" BASIS, -->
+<!-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -->
+<!-- implied. License for the specific language governing permissions and -->
+<!-- limitations under the License. -->
+
+<module>
+ <!-- Inherit the JUnit support -->
+ <inherits name="com.google.gwt.regexp.RegExp"/>
+ <source path="shared" />
+</module>
diff --git a/user/test/com/google/gwt/regexp/shared/GwtRegExpTest.java b/user/test/com/google/gwt/regexp/shared/GwtRegExpTest.java
new file mode 100644
index 0000000..21ea58b
--- /dev/null
+++ b/user/test/com/google/gwt/regexp/shared/GwtRegExpTest.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.regexp.shared;
+
+/**
+ * Tests the GWT implementation of RegExp. Performs the tests from {@link
+ * RegExpTest} using the GWT implementation.
+ */
+public class GwtRegExpTest extends RegExpTest {
+
+ @Override
+ public String getModuleName() {
+ return "com.google.gwt.regexp.RegExpTestModule";
+ }
+}
diff --git a/user/test/com/google/gwt/regexp/shared/RegExpTest.java b/user/test/com/google/gwt/regexp/shared/RegExpTest.java
new file mode 100644
index 0000000..7a5fc85
--- /dev/null
+++ b/user/test/com/google/gwt/regexp/shared/RegExpTest.java
@@ -0,0 +1,856 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.regexp.shared;
+
+import com.google.gwt.core.client.GWT;
+import com.google.gwt.junit.DoNotRunWith;
+import com.google.gwt.junit.Platform;
+import com.google.gwt.junit.client.GWTTestCase;
+
+/**
+ * Unit tests for {@link RegExp}.
+ *
+ * <p>
+ * Tests the pure-Java implementation, using a hack that causes a GWTTestCase to
+ * run as a vanilla JUnit TestCase (see {@link #getModuleName()}). The JS
+ * implementation of RegExp is tested by {@link GwtRegExpTest}, which extends
+ * this class.
+ * <p>
+ * The pure Java and GWT implementations are not 100% compatible. Some
+ * discrepancies have been found when writing this test: search for DISCREPANCY
+ * to find them.
+ */
+public class RegExpTest extends GWTTestCase {
+
+ public static final String WORD_CHARACTERS =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_";
+ private static final String NOT_SPACE_CHARACTERS = allAsciiCharsBut(
+ " \f\n\r\t\u000b\u00a0", 255);
+ private static final String SPACE_CHARACTERS = " \f\n\r\t\u000b";
+
+ /**
+ * Generates a string containing all characters up to the given limit but the
+ * characters in the given string.
+ *
+ * @param exclude the characters not to return
+ * @param limit the last character to include, typically 127 or 255
+ * @return an ASCII string
+ */
+ private static String allAsciiCharsBut(String exclude, int limit) {
+ StringBuilder sb = new StringBuilder();
+ // DISCREPANCY: character 0 is handled differently by Webkit
+ for (char c = 1; c <= limit; c++) {
+ if (!exclude.contains(String.valueOf(c))) {
+ sb.append(c);
+ }
+ }
+ return sb.toString();
+ }
+
+ private RegExp regExp;
+
+ // This is a hack to force a GWTTestCase to run as a vanilla JUnit TestCase.
+ @Override
+ public String getModuleName() {
+ return null;
+ }
+
+ public void testCompile_duplicatedFlags() {
+ checkCompileThrows("regexp", "igg", true);
+ }
+
+ public void testCompile_unknownFlags() {
+ checkCompileThrows("regexp", "z", true);
+ }
+
+ public void testExec_atLeastNtimes() {
+ regExp = RegExp.compile("a{3,}");
+ checkExecNoMatch("_a_");
+ checkExecNoMatch("_aa_");
+ checkExec("_aaa_", 1, "aaa");
+ checkExec("_aaaab_", 1, "aaaa");
+ }
+
+ /**
+ * Checks that backreferences with two digits are accepted.
+ */
+ public void testExec_backreferenceMoreThanNine() {
+ regExp = RegExp.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(a)(b)\\1\\2\\3\\4\\5\\6\\7\\8\\9\\10\\11");
+ checkExec("123456789ab123456789ab", 0, "123456789ab123456789ab", "1", "2",
+ "3", "4", "5", "6", "7", "8", "9", "a", "b");
+ }
+
+ public void testExec_backreferenceNested() {
+ regExp = RegExp.compile("(([ab])X\\2)([-,])\\1\\3\\2");
+ checkExec("_aXa-aXa-a_", 1, "aXa-aXa-a", "aXa", "a", "-");
+ checkExec("_aXa,aXa,a_", 1, "aXa,aXa,a", "aXa", "a", ",");
+ checkExec("_bXb-bXb-b_", 1, "bXb-bXb-b", "bXb", "b", "-");
+ checkExecNoMatch("_aXa-bXb-a_");
+ checkExecNoMatch("_aXa,aXa,b_");
+ checkExecNoMatch("_aXa,aXa-a_");
+ }
+
+ public void testExec_backreferenceOne() {
+ regExp = RegExp.compile("([ab])X\\1");
+ checkExec("_aXa_", 1, "aXa", "a");
+ checkExec("_bXb_", 1, "bXb", "b");
+ checkExecNoMatch("_aXb_");
+ }
+
+ public void testExec_carriageReturn() {
+ checkAlias("\\r", "\r");
+ }
+
+ public void testExec_characterSetList() {
+ regExp = RegExp.compile("[ace]");
+ checkExec("abcde", 0, "a");
+ checkExec("XXcYY", 2, "c");
+ checkExecNoMatch("bdf");
+ }
+
+ public void testExec_characterSetRange() {
+ regExp = RegExp.compile("[a-ce-z]");
+ checkExec("abcde", 0, "a");
+ checkExec("XXcYY", 2, "c");
+ checkExecNoMatch("XXdYY");
+ }
+
+ // DISCREPANCY: chrome does not support \cX
+ /*
+ public void testExec_controlCharacterInvalid() {
+ // regExp = RegExp.compile("\\c5");
+ // checkExecNoMatch("c5");
+ // DISCREPANCY: Java specificity: \c accepts any character; \cX is an alias
+ // for the character of code: asciiCode(X) ^ 64
+ // checkExecNoMatch(allAsciiCharsBut(""));
+ checkExecNoMatch(allAsciiCharsBut(Character.toString((char)('5' ^ 64)),
+ 255));
+ }
+
+ public void testExec_controlCharacterValid() {
+ regExp = RegExp.compile("\\cM");
+ // DISCREPANCY: does not work on hosted mode
+ // checkExec("\r", 0, "\r");
+ checkExecNoMatch(allAsciiCharsBut("\r", 255));
+ }
+ */
+
+ public void testExec_digit() {
+ checkAlias("\\d", "0123456789");
+ }
+
+ public void testExec_disjunction() {
+ regExp = RegExp.compile("a|abc|def");
+ checkExec("_a_", 1, "a");
+ checkExec("_ab_", 1, "a");
+ checkExec("_abc_", 1, "a"); // First expression has precedence
+ checkExec("_def_", 1, "def");
+ checkExecNoMatch("_de_");
+ }
+
+ public void testExec_dotMultiLine() {
+ testExec_dot("m");
+ }
+
+ public void testExec_dotSingleLine() {
+ testExec_dot("");
+ }
+
+ public void testExec_emptyFlags() {
+ regExp = RegExp.compile("a", "");
+ // DISCREPANCY: IE always sets lastIndex, even in non-global mode
+ // regExp.setLastIndex(42);
+ // checkExec("0a_3a_", 1, 42, "a");
+ checkExec("0a_3a_", 1, "a");
+ }
+
+ public void testExec_escaping() {
+ regExp = RegExp.compile("a\\(b");
+ checkExec("_a(b_", 1, "a(b");
+ checkExecNoMatch("_a\\(b_");
+ }
+
+ public void testExec_formFeed() {
+ checkAlias("\\f", "\f");
+ }
+
+ public void testExec_fromNtoMtimes() {
+ regExp = RegExp.compile("a{2,3}");
+ checkExecNoMatch("_a_");
+ checkExec("_aa_", 1, "aa");
+ checkExec("_aaa_", 1, "aaa");
+ checkExec("_aaaab_", 1, "aaa");
+ }
+
+ public void testExec_global() {
+ regExp = RegExp.compile("a", "g");
+ checkExec("0a_3a_", 1, 2, "a");
+ checkExec("0a_3a_", 4, 5, "a");
+ checkExecNoMatch("0a_3a_", 0);
+ }
+
+ public void testExec_greedinessDisjunction() {
+ regExp = RegExp.compile("a|ab");
+ checkExec("_ab_", 1, "a"); // First expression has precedence
+ }
+
+ public void testExec_greedinessOptionalMatch() {
+ regExp = RegExp.compile("ab?");
+ checkExec("_ab_", 1, "ab"); // Optional match has precedence
+ }
+
+ public void testExec_greedinessSeveralMatches() {
+ regExp = RegExp.compile("a");
+ checkExec("_a_a_", 1, "a"); // First match returned
+ }
+
+ public void testExec_hexa2() {
+ checkAlias("\\x41", "A");
+ checkAlias("\\x42", "B");
+ checkAlias("\\x20", " ");
+ checkAlias("\\xff", "\377");
+ }
+
+ public void testExec_lineFeed() {
+ checkAlias("\\n", "\n");
+ }
+
+ public void testExec_matchBeginningMultiLine() {
+ regExp = RegExp.compile("^ab+", "m");
+ checkExec("abbc", 0, "abb");
+ checkExecNoMatch("_abbc");
+ checkExec("\nabbc\n", 1, "abb");
+ checkExec("xxx\nabbc", 4, "abb");
+ }
+
+ public void testExec_matchBeginningSingleLine() {
+ regExp = RegExp.compile("^ab+");
+ checkExec("abbc", 0, "abb");
+ checkExecNoMatch("_abbc");
+ checkExecNoMatch("\nabbc\n");
+ checkExecNoMatch("xxx\nabbc");
+ }
+
+ public void testExec_matchEndMultiLine() {
+ regExp = RegExp.compile("b+a$", "m");
+ checkExec("cbba", 1, "bba");
+ checkExecNoMatch("cbba_");
+ checkExec("\ncbba\n", 2, "bba");
+ checkExec("cbba\nxxx", 1, "bba");
+ }
+
+ public void testExec_matchEndSingleLine() {
+ regExp = RegExp.compile("b+a$");
+ checkExec("cbba", 1, "bba");
+ checkExecNoMatch("cbba_");
+ // DISCREPANCY: Java specificity: $ matches end of input string even if it
+ // has a trailing \n
+ // checkExecNoMatch("\ncbba\n");
+ // The discrepancy disappears if the trailing \n is doubled.
+ checkExecNoMatch("\ncbba\n\n");
+ checkExecNoMatch("cbba\nxxx");
+ }
+
+ public void testExec_negativeCharacterSetList() {
+ regExp = RegExp.compile("[^ace]");
+ checkExec("abcde", 1, "b");
+ checkExec("aaBcc", 2, "B");
+ checkExec("bdf", 0, "b");
+ }
+
+ public void testExec_negativeCharacterSetRange() {
+ regExp = RegExp.compile("[^a-ce-z]");
+ checkExec("abcde", 3, "d");
+ checkExecNoMatch("xxcyy");
+ checkExec("xxdyy", 2, "d");
+ }
+
+ public void testExec_negativeLookahead() {
+ regExp = RegExp.compile("ab(?!cc)");
+ checkExecNoMatch("_abcc_");
+ checkExecNoMatch("_abccd_");
+ checkExec("_abc_", 1, "ab");
+ }
+
+ public void testExec_negativeLookaheadComplex() {
+ regExp = RegExp.compile("ab(?!cc).+");
+ checkExecNoMatch("_ab");
+ checkExec("_abX", 1, "abX");
+ checkExecNoMatch("_abcc");
+ checkExecNoMatch("_abccc");
+ checkExec("_abXcc", 1, "abXcc");
+ }
+
+ public void testExec_negativeLookaheadNested() {
+ regExp = RegExp.compile("ab(?=cc(?=d))");
+ checkExec("_abccd_", 1, "ab");
+ checkExecNoMatch("_abcc_");
+ }
+
+ public void testExec_nestedCapturingGroups() {
+ regExp = RegExp.compile("a(?:b(?:c+)d)e");
+ checkExec("_abcccde_", 1, "abcccde");
+
+ regExp = RegExp.compile("a(?:b(?:c+)?d)e");
+ checkExec("_abde_", 1, "abde");
+ checkExec("_abcccde_", 1, "abcccde");
+ checkExecNoMatch("_ac_");
+ }
+
+ public void testExec_noFlags() {
+ regExp = RegExp.compile("a");
+ // DISCREPANCY: IE always sets lastIndex, even in non-global mode
+ // regExp.setLastIndex(42);
+ // checkExec("0a_3a_", 1, 42, "a");
+ checkExec("0a_3a_", 1, "a");
+ }
+
+ public void testExec_nonDigit() {
+ checkAlias("\\D", allAsciiCharsBut("0123456789", 255));
+ }
+
+ public void testExec_nonSpace() {
+ checkAlias("\\S", NOT_SPACE_CHARACTERS, SPACE_CHARACTERS);
+ }
+
+ public void testExec_nonWord() {
+ checkAlias("\\W", allAsciiCharsBut(WORD_CHARACTERS, 127), WORD_CHARACTERS);
+ }
+
+ public void testExec_nonWordBoundary() {
+ regExp = RegExp.compile("\\BX");
+ checkExecNoMatch("ab X cd");
+ checkExecNoMatch("ab\fX cd");
+ checkExecNoMatch("ab\nX cd");
+ checkExecNoMatch("ab\rX cd");
+ checkExecNoMatch("ab\tX cd");
+ checkExecNoMatch("ab\13X cd");
+ checkExecNoMatch("ab\33X cd");
+ checkExecNoMatch("ab\34X cd");
+ for (int i = 0; i <= 32; i++) {
+ checkExecNoMatch("ab" + (char) i + "X cd");
+ }
+ checkExec("aX", 1, "X");
+ checkExec("_X", 1, "X");
+ checkExec(" X aX", 4, "X");
+ }
+
+ public void testExec_nTimes() {
+ regExp = RegExp.compile("a{3}");
+ checkExecNoMatch("_a_");
+ checkExecNoMatch("_aa_");
+ checkExec("_aaa_", 1, "aaa");
+ checkExec("_aaaab_", 1, "aaa");
+ }
+
+ public void testExec_nullInput() {
+ regExp = RegExp.compile("a", "");
+ // DISCREPANCY: IE always sets lastIndex, even in non-global mode
+ // regExp.setLastIndex(42);
+ // checkExecNoMatch(null, 42);
+ checkExecNoMatch(null);
+
+ regExp = RegExp.compile("a", "g");
+ checkExecNoMatch(null, 0);
+ }
+
+ public void testExec_oneCapturingGroup() {
+ regExp = RegExp.compile("a(b+)c");
+ checkExec("_abbbc_", 1, "abbbc", "bbb");
+ checkExecNoMatch("_ac_");
+ }
+
+ public void testExec_oneNonCapturingGroup() {
+ regExp = RegExp.compile("a(?:b+c)?d");
+ checkExec("_abbcd_", 1, "abbcd");
+ checkExec("_ad_", 1, "ad");
+ }
+
+ public void testExec_optionalCapturingGroup() {
+ regExp = RegExp.compile("a(b+)?c");
+ checkExec("_abbbc_", 1, "abbbc", "bbb");
+ checkExec("_ac_", 1, "ac", null);
+ }
+
+ public void testExec_plus() {
+ regExp = RegExp.compile("a[bc]+d");
+ checkExecNoMatch("ad");
+ checkExec("abd", 0, "abd");
+ checkExec("acd", 0, "acd");
+ checkExec("abcbcd", 0, "abcbcd");
+ checkExecNoMatch("abbbec");
+ }
+
+ public void testExec_positiveLookahead() {
+ regExp = RegExp.compile("ab(?=cc)");
+ checkExec("_abcc_", 1, "ab");
+ checkExec("_abccd_", 1, "ab");
+ checkExecNoMatch("_abc_");
+ }
+
+ public void testExec_positiveLookaheadDouble() {
+ regExp = RegExp.compile("ab(?=cc)cc(?=d)");
+ checkExec("_abccd_", 1, "abcc");
+ checkExecNoMatch("_abcc_");
+ }
+
+ public void testExec_positiveLookaheadNested() {
+ regExp = RegExp.compile("ab(?=cc(?=d))");
+ checkExec("_abccd_", 1, "ab");
+ checkExecNoMatch("_abcc_");
+ }
+
+ public void testExec_questionMark() {
+ regExp = RegExp.compile("a[bc]?d");
+ checkExec("ad", 0, "ad");
+ checkExec("abd", 0, "abd");
+ checkExec("acd", 0, "acd");
+ checkExecNoMatch("abcd");
+ checkExecNoMatch("aXd");
+ checkExecNoMatch("ab");
+ }
+
+ public void testExec_space() {
+ checkAlias("\\s", SPACE_CHARACTERS, NOT_SPACE_CHARACTERS);
+ }
+
+ public void testExec_star() {
+ regExp = RegExp.compile("a[bc]*d");
+ checkExec("ad", 0, "ad");
+ checkExec("abd", 0, "abd");
+ checkExec("acd", 0, "acd");
+ checkExec("abcbcd", 0, "abcbcd");
+ checkExecNoMatch("abbbec");
+ }
+
+ public void testExec_tab() {
+ checkAlias("\\t", "\t");
+ }
+
+ public void testExec_unicode2() {
+ // DISCREPANCY: IE bug and Java specificity: '\'u HH is not recognized.
+ // checkAlias("\\u41", "A");
+ // checkAlias("\\u42", "B");
+ // checkAlias("\\u20", " ");
+ // checkAlias("\\uff", "\255");
+ }
+
+ public void testExec_unicode4() {
+ checkAlias("\\u0041", "A");
+ checkAlias("\\u0042", "B");
+ checkAlias("\\u0020", " ");
+ checkAlias("\\u00ff", "\377");
+
+ checkAlias("\\u0100", "\u0100");
+ checkAlias("\\u1234", "\u1234");
+ checkAlias("\\uffff", "\uffff");
+ }
+
+ public void testExec_verticalTab() {
+ checkAlias("\\v", "\u000b");
+ }
+
+ public void testExec_word() {
+ checkAlias("\\w", WORD_CHARACTERS, allAsciiCharsBut(WORD_CHARACTERS, 127));
+ }
+
+ public void testExec_wordBoundary() {
+ regExp = RegExp.compile("\\bX");
+ checkExec("ab X cd", 3, "X");
+ checkExec("ab\fX cd", 3, "X");
+ checkExec("ab\nX cd", 3, "X");
+ checkExec("ab\rX cd", 3, "X");
+ checkExec("ab\tX cd", 3, "X");
+ checkExec("ab\13X cd", 3, "X");
+ checkExec("ab\33X cd", 3, "X");
+ checkExec("ab\34X cd", 3, "X");
+ // DISCREPANCY: character 0 is non-space on Webkit
+ for (int i = 1; i <= 32; i++) {
+ checkExec("ab" + (char) i + "X cd", 3, "X");
+ }
+ checkExecNoMatch("aX");
+ checkExecNoMatch("_X");
+ checkExec("aX X", 3, "X");
+ }
+
+ public void testExec_zero() {
+ // DISCREPANCY: Java specificity: \0 is not recognized.
+ // checkAlias("\\0", "\0");
+ }
+
+ public void testGetGlobal() {
+ assertTrue(RegExp.compile("test", "g").getGlobal());
+ assertFalse(RegExp.compile("test", "im").getGlobal());
+ assertFalse(RegExp.compile("test").getGlobal());
+ }
+
+ public void testGetIgnoreCase() {
+ assertTrue(RegExp.compile("test", "i").getIgnoreCase());
+ assertFalse(RegExp.compile("test", "gm").getIgnoreCase());
+ assertFalse(RegExp.compile("test").getIgnoreCase());
+ }
+
+ public void testGetLastIndex_initiallyZero() {
+ assertEquals(0, RegExp.compile("test").getLastIndex());
+ }
+
+ public void testGetMultiline() {
+ assertTrue(RegExp.compile("test", "m").getMultiline());
+ assertFalse(RegExp.compile("test", "ig").getMultiline());
+ assertFalse(RegExp.compile("test").getMultiline());
+ }
+
+ public void testGetSource() {
+ assertEquals("a(b|c)+d$", RegExp.compile("a(b|c)+d$").getSource());
+ }
+
+ public void testReplace_backslashReplacement() {
+ regExp = RegExp.compile("A+");
+ checkReplace("Abc", "\\x", "\\xbc");
+ }
+
+ @DoNotRunWith({Platform.HtmlUnit})
+ public void testReplace_dollarReplacement() {
+ regExp = RegExp.compile("A+");
+ checkReplace("the A stops here", "$$", "the $ stops here");
+ checkReplace("the A stops here", "$$$$", "the $$ stops here");
+ }
+
+ public void testReplace_doubleDigitGroupReplacement() {
+ regExp = RegExp.compile("(1)(2)(3)(4)(5)(6)(7)(8)(9)(a)(b)(c)");
+ checkReplace("it's 123456789abc.", "[$11]", "it's [b].");
+ }
+
+ public void testReplace_emptyGlobalRegExp() {
+ regExp = RegExp.compile("", "g");
+ checkReplace("abc", "x", "xaxbxcx");
+ }
+
+ public void testReplace_emptyInput() {
+ regExp = RegExp.compile("A+");
+ checkReplace("", "x", "");
+ }
+
+ public void testReplace_emptyRegExp() {
+ regExp = RegExp.compile("");
+ checkReplace("abc", "x", "xabc");
+ }
+
+ public void testReplace_emptyReplacement() {
+ regExp = RegExp.compile("A+");
+ checkReplace("AAA", "", "");
+ }
+
+ public void testReplace_first() {
+ regExp = RegExp.compile("A+");
+ checkReplace("AAA AAA", "x", "x AAA");
+ }
+
+ public void testReplace_global() {
+ regExp = RegExp.compile("A+", "g");
+ checkReplace("AAA AAA", "x", "x x");
+ }
+
+ @DoNotRunWith({Platform.HtmlUnit})
+ public void testReplace_groupAmpersandReplacement() {
+ regExp = RegExp.compile("A(B+)A");
+ checkReplace("he likes ABBBA", "'$&'", "he likes 'ABBBA'");
+ checkReplace("he likes ABBBA", "'$$&'", "he likes '$&'");
+ checkReplace("he likes ABBBA", "'$$$&'", "he likes '$ABBBA'");
+ checkReplace("he likes ABBBA", "'$$$$&'", "he likes '$$&'");
+ checkReplace("he likes ABBBA", "'$$$$$&'", "he likes '$$ABBBA'");
+ checkReplace("he likes ABBBA", "$&!", "he likes ABBBA!");
+ checkReplace("he likes ABBBA", "$$&!", "he likes $&!");
+ checkReplace("he likes ABBBA", "$$$&!", "he likes $ABBBA!");
+ checkReplace("he likes ABBBA", "$$$$&!", "he likes $$&!");
+ checkReplace("he likes ABBBA", "$$$$$&!", "he likes $$ABBBA!");
+ checkReplace("he likes ABBBA", "$&$&", "he likes ABBBAABBBA");
+ checkReplace("he likes ABBBA", "$$&$$&", "he likes $&$&");
+ checkReplace("he likes ABBBA", "$& $&\n$&\n\n$&",
+ "he likes ABBBA ABBBA\nABBBA\n\nABBBA");
+ checkReplace("he likes ABBBA", "$$& $$&\n$$&\n\n$$&",
+ "he likes $& $&\n$&\n\n$&");
+ }
+
+ public void testReplace_groupReplacement() {
+ regExp = RegExp.compile("A(B+)A");
+ checkReplace("he likes ABBBA", "$1", "he likes BBB");
+ }
+
+ public void testReplace_noMatch() {
+ regExp = RegExp.compile("A+");
+ checkReplace("none here", "x", "none here");
+ }
+
+ @DoNotRunWith({Platform.HtmlUnit})
+ public void testReplace_unsupportedReplacement() {
+ regExp = RegExp.compile("foo");
+ checkReplaceThrows("", "$`", true);
+ checkReplaceThrows("", "($`)", true);
+ checkReplaceThrows("", "$$$`", true);
+ checkReplaceThrows("", "($$$`)", true);
+ checkReplaceThrows("", "$$$$$`", true);
+ checkReplaceThrows("", "($$$$$`)", true);
+ checkReplaceThrows("", "\n$`", true);
+ checkReplaceThrows("", "\n\n$`", true);
+ checkReplaceThrows("", " $`", true);
+ checkReplace("foo", "$$`$$`", "$`$`");
+ checkReplace("foo", "$$$$`$$$$`", "$$`$$`");
+ checkReplace("foo", "\n$$`", "\n$`");
+ checkReplace("foo", "\n\n$$`", "\n\n$`");
+ checkReplace("foo", " $$`", " $`");
+ checkReplaceThrows("", "$'", true);
+ checkReplaceThrows("", "($')", true);
+ checkReplaceThrows("", "$$$'", true);
+ checkReplaceThrows("", "($$$')", true);
+ checkReplaceThrows("", "$$$$$'", true);
+ checkReplaceThrows("", "($$$$$')", true);
+ checkReplaceThrows("", "\n$'", true);
+ checkReplaceThrows("", "\n\n$'", true);
+ checkReplaceThrows("", " $'", true);
+ checkReplace("foo", "$$'$$'", "$'$'");
+ checkReplace("foo", "$$$$'$$$$'", "$$'$$'");
+ checkReplace("foo", "\n$$'", "\n$'");
+ checkReplace("foo", "\n\n$$'", "\n\n$'");
+ checkReplace("foo", " $$'", " $'");
+ }
+
+ public void testSetLastIndex() {
+ regExp = RegExp.compile("test");
+ regExp.setLastIndex(3);
+ assertEquals(3, regExp.getLastIndex());
+ }
+
+ public void testSplit_emptyInput() {
+ regExp = RegExp.compile(",");
+ checkSplit("", "");
+ }
+
+ public void testSplit_emptyParts() {
+ regExp = RegExp.compile(",");
+ // DISCREPANCY: IE discards empty parts
+ // checkSplit(",a,,b,", "", "a", "", "b", "");
+ }
+
+ public void testSplit_emptySeparator() {
+ regExp = RegExp.compile("");
+ checkSplit("ab", "a", "b");
+ }
+
+ public void testSplit_emptySeparatorExactLimit() {
+ regExp = RegExp.compile("");
+ checkSplit("ab", 2, "a", "b");
+ }
+
+ public void testSplit_emptySeparatorHighLimit() {
+ regExp = RegExp.compile("");
+ checkSplit("ab", 3, "a", "b");
+ }
+
+ public void testSplit_emptySeparatorLowLimit() {
+ regExp = RegExp.compile("");
+ checkSplit("ab", 1, "a");
+ }
+
+ public void testSplit_emptySeparatorZeroLimit() {
+ regExp = RegExp.compile("");
+ checkSplit("ab", 0);
+ }
+
+ public void testSplit_exactLimit() {
+ regExp = RegExp.compile(",");
+ checkSplit("a,b", 2, "a", "b");
+ }
+
+ public void testSplit_highLimit() {
+ regExp = RegExp.compile(",");
+ checkSplit("a,b", 3, "a", "b");
+ }
+
+ public void testSplit_lowLimit() {
+ regExp = RegExp.compile(",");
+ checkSplit("a,b", 1, "a");
+ }
+
+ public void testSplit_negLimit() {
+ regExp = RegExp.compile(",");
+ checkSplit("a,b", -1, "a", "b");
+ }
+
+ public void testSplit_noMatch() {
+ regExp = RegExp.compile(",");
+ checkSplit("ab", "ab");
+ }
+
+ public void testSplit_regExpSeparator() {
+ regExp = RegExp.compile(" +|,");
+ checkSplit("a b,c", "a", "b", "c");
+ }
+
+ public void testSplit_veryHighLimit() {
+ regExp = RegExp.compile(",");
+ checkSplit("a,b", 1000, "a", "b");
+ }
+
+ public void testSplit_zeroLimit() {
+ regExp = RegExp.compile(",");
+ checkSplit("a,b", 0);
+ }
+
+ public void testTest() {
+ assertTrue(RegExp.compile("test").test("test"));
+ assertTrue(RegExp.compile("test", "i").test("TeSt"));
+ assertFalse(RegExp.compile("test").test("mismatch"));
+ assertTrue(RegExp.compile("[a-z]+").test("abc"));
+ assertFalse(RegExp.compile("[a-z]+").test("42"));
+ }
+
+ /**
+ * Checks that a regular expression matches all characters of a string and no
+ * other.
+ *
+ * @param regexp the regular expression
+ * @param matched all ASCII characters the regexp must exactly match, on all
+ * browsers and in Java
+ */
+ private void checkAlias(String regexp, String matched) {
+ checkAlias(regexp, matched, allAsciiCharsBut(matched, 255));
+ }
+
+ /**
+ * Checks that a regular expression matches all characters of a string and
+ * none of another.
+ *
+ * <p>
+ * In theory {@code matched} and {@code notMatched} should be the same. In
+ * practice, discrepancies of regular expressions implementation across
+ * browsers and in Java force to ignore some characters. This leads {@code
+ * matched} to be a subset of {@code notMatched}.
+ *
+ * @param regexp the regular expression
+ * @param matched all characters the regexp must match, on all browsers and in
+ * Java
+ * @param notMatched all characters the regexp must not match, on all browsers
+ * and in Java
+ */
+ private void checkAlias(String regexp, String matched, String notMatched) {
+ regExp = RegExp.compile(regexp + "+");
+ checkExec(matched, 0, matched);
+ checkExecNoMatch(notMatched);
+ }
+
+ private void checkCompileThrows(String regexp, String flags,
+ boolean onlyPureJava) {
+ boolean thrown = false;
+ try {
+ RegExp.compile(regexp, flags);
+ } catch (RuntimeException e) {
+ thrown = true;
+ }
+ if (!onlyPureJava || !GWT.isClient()) {
+ assertTrue(thrown);
+ }
+ }
+
+ private void checkExec(String input, int expectedIndex,
+ int expectedLastIndex, String... expectedGroups) {
+ MatchResult matchResult = regExp.exec(input);
+ assertNotNull("Match expected", matchResult);
+ assertEquals("Wrong result input", input, matchResult.getInput());
+ assertEquals("Wrong result index", expectedIndex, matchResult.getIndex());
+ if (expectedLastIndex >= 0) {
+ assertEquals("Wrong last index", expectedLastIndex,
+ regExp.getLastIndex());
+ }
+ assertEquals("Wrong group count", expectedGroups.length,
+ matchResult.getGroupCount());
+ for (int group = 0; group < expectedGroups.length; group++) {
+ String expectedGroup = expectedGroups[group];
+ String actualGroup = matchResult.getGroup(group);
+ if (expectedGroup == null && "".equals(actualGroup)) {
+ // IE sets non-matching groups to "" instead of null.
+ } else {
+ assertEquals("Wrong group " + group, expectedGroup, actualGroup);
+ }
+ }
+ }
+
+ private void checkExec(String input, int expectedIndex,
+ String... expectedGroups) {
+ checkExec(input, expectedIndex, -1, expectedGroups);
+ }
+
+ private void checkExecNoMatch(String input) {
+ checkExecNoMatch(input, 0);
+ }
+
+ private void checkExecNoMatch(String input, int expectedLastIndex) {
+ MatchResult matchResult = regExp.exec(input);
+ assertNull(matchResult);
+ assertEquals("Wrong last index", expectedLastIndex, regExp.getLastIndex());
+ }
+
+ private void checkReplace(String input, String replacement, String expected) {
+ String result = regExp.replace(input, replacement);
+ assertNotNull("Replace result expected", result);
+ assertEquals("Wrong replace result", expected, result);
+ }
+
+ private void checkReplaceThrows(String input, String replacement,
+ boolean onlyPureJava) {
+ boolean thrown = false;
+ try {
+ regExp.replace(input, replacement);
+ } catch (RuntimeException e) {
+ thrown = true;
+ }
+ if (!onlyPureJava || !GWT.isClient()) {
+ assertTrue(thrown);
+ }
+ }
+
+ private void checkSplit(SplitResult splitResult, String[] expectedParts) {
+ assertNotNull("Split result expected", splitResult);
+ assertEquals("Wrong result length", expectedParts.length,
+ splitResult.length());
+ for (int i = 0; i < expectedParts.length; i++) {
+ String expectedPart = expectedParts[i];
+ String actualPart = splitResult.get(i);
+ assertNotNull("Split part expected", actualPart);
+ assertEquals("Wrong split part " + i, expectedPart, actualPart);
+ }
+ }
+
+ private void checkSplit(String input, int limit, String... expectedParts) {
+ SplitResult splitResult = regExp.split(input, limit);
+ checkSplit(splitResult, expectedParts);
+ }
+
+ private void checkSplit(String input, String... expectedParts) {
+ SplitResult splitResult = regExp.split(input);
+ checkSplit(splitResult, expectedParts);
+ }
+
+ private void testExec_dot(String flags) {
+ regExp = RegExp.compile("a.c", flags);
+ checkExecNoMatch("ac");
+ checkExec("abc", 0, "abc");
+ checkExecNoMatch("a\nc");
+ // DISCREPANCY: Firefox bug: '.' should not match '\r'
+ // checkExec("a\rc", 0, "a\rc");
+ checkExecNoMatch("a\r\nc");
+ }
+}