Added all safehtml packages.
Added new safehtml test suites.
Several changes for migration:
* Split EscapeUtils into SafeHtmlUtils and UriUtils
* Added .equals() and .hashCode() to SafeHtmlString
* Updated comments, formatted code
* Removed i18n references
* Removed templates
* Removed .fromPlainText() functionality
* Removed HtmlRenderer functionality
* Added check in SafeHtmlUtils.htmlEscape() to check if a string needs to escaped, benchmarked this test.
Review at http://gwt-code-reviews.appspot.com/771801
git-svn-id: https://google-web-toolkit.googlecode.com/svn/trunk@8621 8db76d5a-ed1c-0410-87a9-c151d255dfc7
diff --git a/user/src/com/google/gwt/safehtml/SafeHtml.gwt.xml b/user/src/com/google/gwt/safehtml/SafeHtml.gwt.xml
new file mode 100644
index 0000000..9721d70
--- /dev/null
+++ b/user/src/com/google/gwt/safehtml/SafeHtml.gwt.xml
@@ -0,0 +1,23 @@
+<!--
+ Copyright 2010 Google Inc.
+
+ Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ use this file except in compliance with the License. You may obtain a copy of
+ the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ License for the specific language governing permissions and limitations under
+ the License.
+-->
+
+<!-- SafeHtml - facilities for avoiding XSS attacks -->
+<!-- -->
+<module>
+ <inherits name='com.google.gwt.user.User'/>
+ <source path="client"/>
+ <source path="shared"/>
+</module>
diff --git a/user/src/com/google/gwt/safehtml/client/HasSafeHtml.java b/user/src/com/google/gwt/safehtml/client/HasSafeHtml.java
new file mode 100644
index 0000000..f9854d5
--- /dev/null
+++ b/user/src/com/google/gwt/safehtml/client/HasSafeHtml.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.gwt.safehtml.client;
+
+import com.google.gwt.safehtml.shared.SafeHtml;
+
+/**
+ * An object that implements this interface contains text with HTML markup,
+ * which can be set with the Cross-Site-Scripting-safe HTML markup encapsulated
+ * in a {@link com.google.gwt.safehtml.shared.SafeHtml} object.
+ */
+public interface HasSafeHtml {
+
+ /**
+ * Sets this object's contents via known-safe HTML.
+ *
+ * <p>
+ * The object will behave exactly the same as when a widget's
+ * {@link com.google.gwt.user.client.ui.HasHTML#setHTML(String)} method is
+ * invoked; however the {@link SafeHtml} passed to this method observes the
+ * contract that it can be used in an HTML context without causing unsafe
+ * script execution. Thus, unlike
+ * {@link com.google.gwt.user.client.ui.HasHTML#setHTML(String)}, using this
+ * method cannot result in Cross-Site Scripting security vulnerabilities.
+ *
+ * @param html the object's new HTML, represented as a {@link SafeHtml} object
+ */
+ void setSafeHtml(SafeHtml html);
+}
diff --git a/user/src/com/google/gwt/safehtml/shared/HtmlSanitizer.java b/user/src/com/google/gwt/safehtml/shared/HtmlSanitizer.java
new file mode 100644
index 0000000..14b4990
--- /dev/null
+++ b/user/src/com/google/gwt/safehtml/shared/HtmlSanitizer.java
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.safehtml.shared;
+
+/**
+ * Sanitizes untrusted HTML.
+ *
+ * Note on usage: SafeHtml should be used to ensure user input is not executed
+ * in the browser. SafeHtml should not be used to sanitize input before sending
+ * it to the server.
+ *
+ */
+public interface HtmlSanitizer {
+
+ /**
+ * Sanitizes a string into {@code SafeHtml}.
+ *
+ * @param html String containing untrusted HTML.
+ * @return Contents of {@code html}, sanitized according to the
+ * policy implemented by this sanitizer.
+ */
+ SafeHtml sanitize(String html);
+}
diff --git a/user/src/com/google/gwt/safehtml/shared/OnlyToBeUsedInGeneratedCodeStringBlessedAsSafeHtml.java b/user/src/com/google/gwt/safehtml/shared/OnlyToBeUsedInGeneratedCodeStringBlessedAsSafeHtml.java
new file mode 100644
index 0000000..301a3c2
--- /dev/null
+++ b/user/src/com/google/gwt/safehtml/shared/OnlyToBeUsedInGeneratedCodeStringBlessedAsSafeHtml.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.safehtml.shared;
+
+/**
+ * A string wrapped as an object of type {@link SafeHtml}.
+ *
+ * <p>
+ * This class is intended only for use in generated code where the code
+ * generator guarantees that instances of this type will adhere to the
+ * {@link SafeHtml} contract (hence the purposely unwieldy class name).
+ */
+public class OnlyToBeUsedInGeneratedCodeStringBlessedAsSafeHtml
+ implements SafeHtml {
+ private String html;
+
+ public OnlyToBeUsedInGeneratedCodeStringBlessedAsSafeHtml(String html) {
+ if (html == null) {
+ throw new NullPointerException("html is null");
+ }
+ this.html = html;
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public String asString() {
+ return html;
+ }
+
+ /**
+ * Compares this string to the specified object.
+ */
+ public boolean equals(Object obj) {
+ if (!(obj instanceof SafeHtml)) {
+ return false;
+ }
+ return html.equals(((SafeHtml) obj).asString());
+ }
+
+ /**
+ * Returns a hash code for this string.
+ */
+ public int hashCode() {
+ return html.hashCode();
+ }
+}
diff --git a/user/src/com/google/gwt/safehtml/shared/SafeHtml.java b/user/src/com/google/gwt/safehtml/shared/SafeHtml.java
new file mode 100644
index 0000000..145f3a3
--- /dev/null
+++ b/user/src/com/google/gwt/safehtml/shared/SafeHtml.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.safehtml.shared;
+
+import java.io.Serializable;
+
+/**
+ * An object that implements this interface encapsulates HTML that is guaranteed
+ * to be safe to use (with respect to potential Cross-Site-Scripting
+ * vulnerabilities) in an HTML context.
+ *
+ * Note on usage: SafeHtml should be used to ensure user input is not executed
+ * in the browser. SafeHtml should not be used to sanitize input before sending
+ * it to the server.
+ *
+ * <p>
+ * All implementing classes must maintain the class invariant (by design and
+ * implementation and/or convention of use), that invoking {@link #asString()}
+ * on any instance will return a string that is safe to assign to the {@code
+ * .innerHTML} DOM property in a browser (or to use similarly in an "inner HTML"
+ * context), in the sense that doing so must not cause execution of script in
+ * the browser.
+ *
+ * All implementations must implement equals() and hashCode() to behave
+ * consistently with the result of asString().equals() and asString.hashCode().
+ *
+ * The internal string must not be null.
+ *
+ * <p>
+ * Implementations of this interface must not implement
+ * {@link com.google.gwt.user.client.rpc.IsSerializable}, since deserialization
+ * can result in violation of the class invariant.
+ */
+public interface SafeHtml extends Serializable {
+ /*
+ * Notes regarding serialization: - It may be reasonable to allow
+ * deserialization on the client of objects serialized on the server (i.e. RPC
+ * responses), based on the assumption that server code is trusted and would
+ * not provide a malicious serialized form (if a MitM were able to modify
+ * server responses, the client would be fully compromised in any case).
+ * However, the GWT RPC framework currently does not seem to provide a
+ * facility for restricting deserialization on the Server only (thought this
+ * shouldn't be difficult to implement through a custom SerializationPolicy)
+ *
+ * - Some implementations of SafeHtml would in principle be able to enforce
+ * their class invariant on deserialization (e.g., SimpleHtmlSanitizer could
+ * apply HTML sanitization on deserialization). However, the GWT RPC framework
+ * does not provide for an equivalent of readResolve() to enforce the class
+ * invariant on deserialization.
+ */
+
+ /**
+ * Returns this object's contained HTML as a string. Based on this class'
+ * contract, the returned string will be safe to use in an HTML context.
+ */
+ String asString();
+
+ /**
+ * Compares this string to the specified object.
+ * Must be equal to asString().equals()
+ */
+ boolean equals(Object anObject);
+
+ /**
+ * Returns a hash code for this string.
+ * Must be equal to asString().hashCode()
+ */
+ int hashCode();
+}
diff --git a/user/src/com/google/gwt/safehtml/shared/SafeHtmlBuilder.java b/user/src/com/google/gwt/safehtml/shared/SafeHtmlBuilder.java
new file mode 100644
index 0000000..f2f57dc
--- /dev/null
+++ b/user/src/com/google/gwt/safehtml/shared/SafeHtmlBuilder.java
@@ -0,0 +1,207 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.safehtml.shared;
+
+/**
+ * A builder that facilitates the building up of XSS-safe HTML from text
+ * snippets. It is used essentially like a {@link StringBuilder}; unlike a
+ * {@link StringBuilder}, it automatically HTML-escapes appended input where
+ * necessary.
+ *
+ * <p>
+ * In addition, it supports methods that allow strings with HTML markup to be
+ * appended without escaping: One can append other {@link SafeHtml} objects, and
+ * one can append constant strings. The method that appends constant strings (
+ * {@link #appendHtmlConstant(String)}) requires a convention of use to be
+ * adhered to in order for this class to adhere to the contract required by
+ * {@link SafeHtml}: The argument expression must be fully determined and known
+ * to be safe at compile time, and the value of the argument must not contain
+ * incomplete HTML tags. See {@link #appendHtmlConstant(String)} for details.
+ *
+ * <p>
+ * The accumulated XSS-safe HTML can be obtained in the form of a
+ * {@link SafeHtml} via the {@link #toSafeHtml()} method.
+ *
+ * <p>
+ * This class is not thread-safe.
+ */
+public final class SafeHtmlBuilder {
+
+ private StringBuilder sb = new StringBuilder();
+
+ /**
+ * Constructs an empty SafeHtmlBuilder.
+ */
+ public SafeHtmlBuilder() {
+ }
+
+ /*
+ * Boolean and numeric types converted to String are always HTML safe -- no
+ * escaping necessary.
+ */
+
+ /**
+ * Appends the string representation of a boolean.
+ *
+ * @param b the boolean whose string representation to append
+ * @return a reference to this object
+ */
+ public SafeHtmlBuilder append(boolean b) {
+ sb.append(b);
+ return this;
+ }
+
+ /**
+ * Appends the string representation of a number.
+ *
+ * @param num the number whose string representation to append
+ * @return a reference to this object
+ */
+ public SafeHtmlBuilder append(byte num) {
+ sb.append(num);
+ return this;
+ }
+
+ /**
+ * Appends the string representation of a char.
+ *
+ * @param num the number whose string representation to append
+ * @return a reference to this object
+ */
+ public SafeHtmlBuilder append(char num) {
+ sb.append(num);
+ return this;
+ }
+
+ /**
+ * Appends the string representation of a number.
+ *
+ * @param num the number whose string representation to append
+ * @return a reference to this object
+ */
+ public SafeHtmlBuilder append(double num) {
+ sb.append(num);
+ return this;
+ }
+
+ /**
+ * Appends the string representation of a number.
+ *
+ * @param num the number whose string representation to append
+ * @return a reference to this object
+ */
+ public SafeHtmlBuilder append(float num) {
+ sb.append(num);
+ return this;
+ }
+
+ /**
+ * Appends the string representation of a number.
+ *
+ * @param num the number whose string representation to append
+ * @return a reference to this object
+ */
+ public SafeHtmlBuilder append(int num) {
+ sb.append(num);
+ return this;
+ }
+
+ /**
+ * Appends the string representation of a number.
+ *
+ * @param num the number whose string representation to append
+ * @return a reference to this object
+ */
+ public SafeHtmlBuilder append(long num) {
+ sb.append(num);
+ return this;
+ }
+
+ /**
+ * Appends the contents of another {@link SafeHtml} object, without applying
+ * HTML-escaping to it.
+ *
+ * @param html the {@link SafeHtml} to append
+ * @return a reference to this object
+ */
+ public SafeHtmlBuilder append(SafeHtml html) {
+ sb.append(html.asString());
+ return this;
+ }
+
+ /**
+ * Appends a string after HTML-escaping it.
+ *
+ * @param text the string to append
+ * @return a reference to this object
+ */
+ public SafeHtmlBuilder appendEscaped(String text) {
+ sb.append(SafeHtmlUtils.htmlEscape(text));
+ return this;
+ }
+
+ /**
+ * Appends a string consisting of several newline-separated lines after
+ * HTML-escaping it. Newlines in the original string are converted to {@code
+ * <br>}.
+ *
+ * @param text the string to append
+ * @return a reference to this object
+ */
+ public SafeHtmlBuilder appendEscapedLines(String text) {
+ sb.append(SafeHtmlUtils.htmlEscape(text).replaceAll("\n", "<br>"));
+ return this;
+ }
+
+ /**
+ * Appends a compile-time-constant string, which will <em>not</em> be escaped.
+ *
+ * <p>
+ * <b>Important</b>: For this class to be able to honor its contract as
+ * required by {@link SafeHtml}, all uses of this method must satisfy the
+ * following requirements:
+ *
+ * <ul>
+ *
+ * <li>The argument expression must be fully determined and known to be safe
+ * at compile time.
+ *
+ * <li>The value of the argument must not contain incomplete HTML tags. I.e.,
+ * the following is not a correct use of this method, because the {@code <a>}
+ * tag is incomplete:
+ *
+ * <pre class="code">
+ * {@code shb.appendConstantHtml("<a href='").append(url)}</pre>
+ *
+ * </ul>
+ *
+ * @param html the HTML snippet to be appended
+ * @return a reference to this object
+ */
+ public SafeHtmlBuilder appendHtmlConstant(String html) {
+ // TODO(xtof): (hosted-mode only) assert that html satisfies the second
+ // constraint.
+ sb.append(html);
+ return this;
+ }
+
+ /**
+ * Returns the safe HTML accumulated in the builder as a {@link SafeHtml}.
+ */
+ public SafeHtml toSafeHtml() {
+ return new SafeHtmlString(sb.toString());
+ }
+}
diff --git a/user/src/com/google/gwt/safehtml/shared/SafeHtmlString.java b/user/src/com/google/gwt/safehtml/shared/SafeHtmlString.java
new file mode 100644
index 0000000..2d7762a
--- /dev/null
+++ b/user/src/com/google/gwt/safehtml/shared/SafeHtmlString.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.safehtml.shared;
+
+/**
+ * A string wrapped as an object of type {@link SafeHtml}.
+ *
+ * <p>
+ * This class is package-private and intended for internal use by the
+ * {@link com.google.gwt.safehtml} package.
+ *
+ * All implementors must implement .equals and .hashCode so that they operate
+ * just like String.equals() and String.hashCode().
+ */
+class SafeHtmlString implements SafeHtml {
+ private String html;
+
+ /**
+ * Constructs a {@link SafeHtmlString} from a string. Callers are responsible
+ * for ensuring that the string passed as the argument to this constructor
+ * satisfies the constraints of the contract imposed by the {@link SafeHtml}
+ * interface.
+ *
+ * @param html the string to be wrapped as a {@link SafeHtml}
+ */
+ SafeHtmlString(String html) {
+ if (html == null) {
+ throw new NullPointerException("html is null");
+ }
+ this.html = html;
+ }
+
+ /**
+ * No-arg constructor for compatibility with GWT serialization.
+ */
+ @SuppressWarnings("unused")
+ private SafeHtmlString() {
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ public String asString() {
+ return html;
+ }
+
+ /**
+ * Compares this string to the specified object.
+ */
+ public boolean equals(Object obj) {
+ if (!(obj instanceof SafeHtml)) {
+ return false;
+ }
+ return html.equals(((SafeHtml) obj).asString());
+ }
+
+ /**
+ * Returns a hash code for this string.
+ */
+ public int hashCode() {
+ return html.hashCode();
+ }
+}
diff --git a/user/src/com/google/gwt/safehtml/shared/SafeHtmlUtils.java b/user/src/com/google/gwt/safehtml/shared/SafeHtmlUtils.java
new file mode 100644
index 0000000..0502c7e
--- /dev/null
+++ b/user/src/com/google/gwt/safehtml/shared/SafeHtmlUtils.java
@@ -0,0 +1,151 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.safehtml.shared;
+
+import com.google.gwt.regexp.shared.RegExp;
+import com.google.gwt.regexp.shared.SplitResult;
+
+/**
+ * Utility class containing static methods for escaping and sanitizing strings.
+ */
+public final class SafeHtmlUtils {
+
+ private static final String HTML_ENTITY_REGEX =
+ "[a-z]+|#[0-9]+|#x[0-9a-fA-F]+";
+
+ public static final SafeHtml EMPTY_SAFE_HTML = new SafeHtmlString("");
+
+ private static final RegExp AMP_RE = RegExp.compile("&", "g");
+ private static final RegExp GT_RE = RegExp.compile(">", "g");
+ private static final RegExp LT_RE = RegExp.compile("<", "g");
+ private static final RegExp SQUOT_RE = RegExp.compile("\'", "g");
+ private static final RegExp QUOT_RE = RegExp.compile("\"", "g");
+
+ /**
+ * Returns a SafeHtml constructed from a safe string, i.e. without escaping
+ * the string.
+ *
+ * <p>
+ * <b>Important</b>: For this class to be able to honor its contract as
+ * required by {@link SafeHtml}, all uses of this method must satisfy the
+ * following requirements:
+ *
+ * <ul>
+ *
+ * <li>The argument expression must be fully determined and known to be safe
+ * at compile time.
+ *
+ * <li>The value of the argument must not contain incomplete HTML tags.
+ *
+ * </ul>
+ */
+ public static SafeHtml fromSafeConstant(String s) {
+ // TODO(pdr): (hosted-mode only) assert that html satisfies the second
+ // constraint.
+ return new SafeHtmlString(s);
+ }
+
+ /**
+ * Returns a SafeHtml containing the escaped string.
+ */
+ public static SafeHtml fromString(String s) {
+ return new SafeHtmlString(htmlEscape(s));
+ }
+
+ /**
+ * HTML-escapes a string.
+ *
+ * Note: The following variants of this function were profiled on FF36,
+ * Chrome6, IE8:
+ * #1) for each case, check indexOf, then use s.replace(regex, string)
+ * #2) for each case, check indexOf, then use s.replaceAll()
+ * #3) check if any metachar is present using a regex, then use #1
+ * #4) for each case, use s.replace(regex, string)
+ *
+ * #1 was found to be the fastest, and is used below.
+ *
+ * @param s the string to be escaped
+ * @return the input string, with all occurrences of HTML meta-characters
+ * replaced with their corresponding HTML Entity References
+ */
+ public static String htmlEscape(String s) {
+ if (s.indexOf("&") != -1) {
+ s = AMP_RE.replace(s, "&");
+ }
+ if (s.indexOf("<") != -1) {
+ s = LT_RE.replace(s, "<");
+ }
+ if (s.indexOf(">") != -1) {
+ s = GT_RE.replace(s, ">");
+ }
+ if (s.indexOf("\"") != -1) {
+ s = QUOT_RE.replace(s, """);
+ }
+ if (s.indexOf("\'") != -1) {
+ s = SQUOT_RE.replace(s, "'");
+ }
+ return s;
+ }
+
+ /**
+ * HTML-escapes a string, but does not double-escape HTML-entities already
+ * present in the string.
+ *
+ * @param text the string to be escaped
+ * @return the input string, with all occurrences of HTML meta-characters
+ * replaced with their corresponding HTML Entity References, with the
+ * exception that ampersand characters are not double-escaped if they
+ * form the start of an HTML Entity Reference
+ */
+ public static String htmlEscapeAllowEntities(String text) {
+ StringBuilder escaped = new StringBuilder();
+
+ SplitResult splitSegment = AMP_RE.split(text, -1);
+ for (int i = 0, len = splitSegment.length(); i < len; i++) {
+ String segment = splitSegment.get(i);
+ if (i == 0) {
+ /*
+ * The first segment is never part of an entity reference, so we always
+ * escape it.
+ * Note that if the input starts with an ampersand, we will get an empty
+ * segment before that.
+ */
+ escaped.append(htmlEscape(segment));
+ continue;
+ }
+
+ int entityEnd = segment.indexOf(';');
+ if (entityEnd > 0
+ && segment.substring(0, entityEnd).matches(HTML_ENTITY_REGEX)) {
+ // Append the entity without escaping.
+ escaped.append("&").append(segment.substring(0, entityEnd + 1));
+
+ // Append the rest of the segment, escaped.
+ escaped.append(htmlEscape(segment.substring(entityEnd + 1)));
+ } else {
+ // The segment did not start with an entity reference, so escape the
+ // whole segment.
+ escaped.append("&").append(htmlEscape(segment));
+ }
+ }
+
+ return escaped.toString();
+ }
+
+ // prevent instantiation
+ private SafeHtmlUtils() {
+ }
+}
diff --git a/user/src/com/google/gwt/safehtml/shared/SimpleHtmlSanitizer.java b/user/src/com/google/gwt/safehtml/shared/SimpleHtmlSanitizer.java
new file mode 100644
index 0000000..4f761b1
--- /dev/null
+++ b/user/src/com/google/gwt/safehtml/shared/SimpleHtmlSanitizer.java
@@ -0,0 +1,153 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.safehtml.shared;
+
+import com.google.gwt.regexp.shared.RegExp;
+import com.google.gwt.regexp.shared.SplitResult;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * A simple and relatively inexpensive HTML sanitizer.
+ *
+ * <p>
+ * This sanitizer accepts the subset of HTML consisting of attribute-free tags
+ * in a whitelist (including {@code <b>, <em>, <i>}, etc; for the exact list
+ * consult the source), as well as numeric HTML entities and HTML entity
+ * references. Any HTML metacharacters that do not appear as part of markup in
+ * this subset will be HTML-esacped.
+ */
+public final class SimpleHtmlSanitizer implements HtmlSanitizer {
+
+ private static final SimpleHtmlSanitizer INSTANCE = new SimpleHtmlSanitizer();
+
+ private static final Set<String> TAG_WHITELIST = new HashSet<String>(
+ Arrays.asList("b", "em", "i", "h1", "h2", "h3", "h4", "h5", "h6", "hr",
+ "ul", "ol", "li"));
+
+ private static final RegExp LT_RE = RegExp.compile("<", "g");
+
+ public static SimpleHtmlSanitizer getInstance() {
+ return INSTANCE;
+ }
+
+ /**
+ * HTML-sanitizes a string.
+ *
+ * <p>
+ * The input string is processed as described above. The result of sanitizing
+ * the string is guaranteed to be safe to use (with respect to XSS
+ * vulnerabilities) in HTML contexts, and is returned as an instance of the
+ * {@link SafeHtml} type.
+ */
+ public static SafeHtml sanitizeHtml(String html) {
+ if (html == null) {
+ throw new NullPointerException("html is null");
+ }
+ return new SafeHtmlString(simpleSanitize(html));
+ }
+
+ /*
+ * Sanitize a string containing simple HTML markup as defined above. The
+ * approach is as follows: We split the string at each occurence of '<'. Each
+ * segment thus obtained is inspected to determine if the leading '<' was
+ * indeed the start of a whitelisted tag or not. If so, the tag is emitted
+ * unescaped, and the remainder of the segment (which cannot contain any
+ * additional tags) is emitted in escaped form. Otherwise, the entire segment
+ * is emitted in escaped form.
+ *
+ * In either case, EscapeUtils.htmlEscapeAllowEntities is used to escape,
+ * which escapes HTML but does not double escape existing syntactially valid
+ * HTML entities.
+ */
+ // TODO(xtof): should this be in a utils class?
+ private static String simpleSanitize(String text) {
+ StringBuilder sanitized = new StringBuilder();
+
+ SplitResult splitSegment = LT_RE.split(text, -1);
+ for (int i = 0, len = splitSegment.length(); i < len; i++) {
+ String segment = splitSegment.get(i);
+ if (i == 0) {
+ /*
+ * the first segment is never part of a valid tag; note that if the
+ * input string starts with a tag, we will get an empty segment at the
+ * beginning.
+ */
+ sanitized.append(SafeHtmlUtils.htmlEscapeAllowEntities(segment));
+ continue;
+ }
+
+ /*
+ * determine if the current segment is the start of an attribute-free tag
+ * or end-tag in our whitelist
+ */
+ int tagStart = 0; // will be 1 if this turns out to be an end tag.
+ int tagEnd = segment.indexOf('>');
+ String tag = null;
+ boolean isValidTag = false;
+ if (tagEnd > 0) {
+ if (segment.charAt(0) == '/') {
+ tagStart = 1;
+ }
+ tag = segment.substring(tagStart, tagEnd);
+ if (TAG_WHITELIST.contains(tag)) {
+ isValidTag = true;
+ }
+ }
+
+ if (isValidTag) {
+ // append the tag, not escaping it
+ if (tagStart == 0) {
+ sanitized.append('<');
+ } else {
+ // we had seen an end-tag
+ sanitized.append("</");
+ }
+ sanitized.append(tag).append('>');
+
+ // append the rest of the segment, escaping it
+ sanitized.append(SafeHtmlUtils.htmlEscapeAllowEntities(
+ segment.substring(tagEnd + 1)));
+ } else {
+ // just escape the whole segment
+ sanitized.append("<").append(
+ SafeHtmlUtils.htmlEscapeAllowEntities(segment));
+ }
+ }
+ return sanitized.toString();
+ }
+
+ /*
+ * Note: We purposely do not provide a method to create a SafeHtml from
+ * another (arbitrary) SafeHtml via sanitization, as this would permit the
+ * construction of SafeHtml objects that are not stable in the sense that for
+ * a {@code SafeHtml s} it may not be true that {@code s.asString()} equals
+ * {@code SimpleHtmlSanitizer.sanitizeHtml(s.asString()).asString()}. While
+ * this is not currently an issue, it might become one and result in
+ * unexpected behavior if this class were to become serializable and enforce
+ * its class invariant upon deserialization.
+ */
+
+ // prevent external instantiation
+ private SimpleHtmlSanitizer() {
+ }
+
+ public SafeHtml sanitize(String html) {
+ return sanitizeHtml(html);
+ }
+}
diff --git a/user/src/com/google/gwt/safehtml/shared/UriUtils.java b/user/src/com/google/gwt/safehtml/shared/UriUtils.java
new file mode 100644
index 0000000..f477d04
--- /dev/null
+++ b/user/src/com/google/gwt/safehtml/shared/UriUtils.java
@@ -0,0 +1,105 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.safehtml.shared;
+
+/**
+ * Utility class containing static methods for validating and sanitizing URIs.
+ */
+public final class UriUtils {
+
+ /**
+ * Extracts the scheme of a URI.
+ *
+ * @param uri the URI to extract the scheme from
+ * @return the URI's scheme, or {@code null} if the URI does not have one
+ */
+ public static String extractScheme(String uri) {
+ int colonPos = uri.indexOf(':');
+ if (colonPos < 0) {
+ return null;
+ }
+ String scheme = uri.substring(0, colonPos);
+ if (scheme.indexOf('/') >= 0 || scheme.indexOf('#') >= 0) {
+ /*
+ * The URI's prefix up to the first ':' contains other URI special
+ * chars, and won't be interpreted as a scheme.
+ *
+ * TODO(xtof): Consider basing this on URL#isValidProtocol or similar;
+ * however I'm worried that being too strict here will effectively
+ * allow dangerous schemes accepted in loosely parsing browsers.
+ */
+ return null;
+ }
+ return scheme;
+ }
+
+ /**
+ * Determines if a {@link String} is safe to use as the value of a URI-valued
+ * HTML attribute such as {@code src} or {@code href}.
+ *
+ * <p>
+ * In this context, a URI is safe if it can be established that using it as
+ * the value of a URI-valued HTML attribute such as {@code src} or {@code
+ * href} cannot result in script execution. Specifically, this method deems a
+ * URI safe if it either does not have a scheme, or its scheme is one of
+ * {@code http, https, ftp, mailto}.
+ *
+ * @param uri the URI to validate
+ * @return {@code true} if {@code uri} is safe in the above sense; {@code
+ * false} otherwise
+ */
+ public static boolean isSafeUri(String uri) {
+ String scheme = extractScheme(uri);
+ if (scheme == null) {
+ return true;
+ }
+ /*
+ * Special care is be taken with case-insensitive 'i' in the Turkish locale.
+ * i -> to upper in Turkish locale -> İ
+ * I -> to lower in Turkish locale -> ı
+ * For this reason there are two checks for mailto: "mailto" and "MAILTO"
+ * For details, see: http://www.i18nguy.com/unicode/turkish-i18n.html
+ */
+ String schemeLc = scheme.toLowerCase();
+ return ("http".equals(schemeLc)
+ || "https".equals(schemeLc)
+ || "ftp".equals(schemeLc)
+ || "mailto".equals(schemeLc)
+ || "MAILTO".equals(scheme.toUpperCase()));
+ }
+
+ /**
+ * Sanitizes a URI.
+ *
+ * <p>
+ * This method returns the URI provided if it is safe to use as the the value
+ * of a URI-valued HTML attribute according to {@link #isSafeUri}, or the URI
+ * "{@code #}" otherwise.
+ *
+ * @param uri the URI to sanitize.
+ */
+ public static String sanitizeUri(String uri) {
+ if (isSafeUri(uri)) {
+ return uri;
+ } else {
+ return "#";
+ }
+ }
+
+ // prevent instantiation
+ private UriUtils() {
+ }
+}
diff --git a/user/test/com/google/gwt/safehtml/SafeHtmlGwtSuite.java b/user/test/com/google/gwt/safehtml/SafeHtmlGwtSuite.java
new file mode 100644
index 0000000..1b656e3
--- /dev/null
+++ b/user/test/com/google/gwt/safehtml/SafeHtmlGwtSuite.java
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.safehtml;
+
+import com.google.gwt.junit.tools.GWTTestSuite;
+import com.google.gwt.safehtml.shared.GwtSafeHtmlUtilsTest;
+import com.google.gwt.safehtml.shared.SafeHtmlStringTest;
+
+import junit.framework.Test;
+
+/**
+ * Test suite for SafeHtml GWTTestCases.
+ */
+public class SafeHtmlGwtSuite {
+ public static Test suite() {
+ GWTTestSuite suite = new GWTTestSuite(
+ "Test suite for SafeHtml GWTTestCases");
+
+ suite.addTestSuite(GwtSafeHtmlUtilsTest.class);
+ suite.addTestSuite(SafeHtmlStringTest.class);
+
+ return suite;
+ }
+
+ private SafeHtmlGwtSuite() {
+ }
+}
diff --git a/user/test/com/google/gwt/safehtml/SafeHtmlJreSuite.java b/user/test/com/google/gwt/safehtml/SafeHtmlJreSuite.java
new file mode 100644
index 0000000..3bf970e
--- /dev/null
+++ b/user/test/com/google/gwt/safehtml/SafeHtmlJreSuite.java
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.safehtml;
+
+import com.google.gwt.safehtml.server.UriUtilsTest;
+import com.google.gwt.safehtml.shared.SafeHtmlUtilsTest;
+import com.google.gwt.safehtml.shared.SafeHtmlBuilderTest;
+import com.google.gwt.safehtml.shared.SafeHtmlStringTest;
+import com.google.gwt.safehtml.shared.SimpleHtmlSanitizerTest;
+
+import junit.framework.Test;
+import junit.framework.TestSuite;
+
+/**
+ * Test suite for SafeHtml tests that require the JRE.
+ */
+public class SafeHtmlJreSuite {
+ public static Test suite() {
+ TestSuite suite = new TestSuite(
+ "Test suite for SafeHtml tests that require the JRE");
+
+ suite.addTestSuite(SafeHtmlUtilsTest.class);
+ suite.addTestSuite(SafeHtmlBuilderTest.class);
+ suite.addTestSuite(SimpleHtmlSanitizerTest.class);
+ suite.addTestSuite(SafeHtmlStringTest.class);
+ suite.addTestSuite(UriUtilsTest.class);
+
+ return suite;
+ }
+
+ private SafeHtmlJreSuite() {
+ }
+}
diff --git a/user/test/com/google/gwt/safehtml/SafeHtmlTestsModule.gwt.xml b/user/test/com/google/gwt/safehtml/SafeHtmlTestsModule.gwt.xml
new file mode 100644
index 0000000..3cb77d1
--- /dev/null
+++ b/user/test/com/google/gwt/safehtml/SafeHtmlTestsModule.gwt.xml
@@ -0,0 +1,17 @@
+<!-- -->
+<!-- Copyright 2007 Google Inc. -->
+<!-- Licensed under the Apache License, Version 2.0 (the "License"); you -->
+<!-- may not use this file except in compliance with the License. You may -->
+<!-- may obtain a copy of the License at -->
+<!-- -->
+<!-- http://www.apache.org/licenses/LICENSE-2.0 -->
+<!-- -->
+<!-- Unless required by applicable law or agreed to in writing, software -->
+<!-- distributed under the License is distributed on an "AS IS" BASIS, -->
+<!-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -->
+<!-- implied. License for the specific language governing permissions and -->
+<!-- limitations under the License. -->
+
+<module>
+ <inherits name="com.google.gwt.safehtml.SafeHtml"/>
+</module>
diff --git a/user/test/com/google/gwt/safehtml/server/UriUtilsTest.java b/user/test/com/google/gwt/safehtml/server/UriUtilsTest.java
new file mode 100644
index 0000000..83e90d4
--- /dev/null
+++ b/user/test/com/google/gwt/safehtml/server/UriUtilsTest.java
@@ -0,0 +1,172 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.safehtml.server;
+
+import com.google.gwt.safehtml.shared.UriUtils;
+
+import junit.framework.TestCase;
+
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Unit tests for UriUtils.
+ */
+public class UriUtilsTest extends TestCase {
+
+ /**
+ * Encapsulates a URI and relevant attributes for use in tests of
+ * {@link UriUtils#sanitizeUri(String)} and related methods.
+ */
+ private static class UriTestCaseSpec {
+ private String uri;
+ private String scheme;
+ private boolean expectUriParseException;
+ private URI parsedUri;
+
+ /**
+ * Creates a URI test case specification.
+ *
+ * @param uri the URI of this test vector
+ * @param scheme the scheme that is expected to be parsed from {@code uri}
+ * by {@link UriUtils#extractScheme(String)}
+ * @param expectUriParseException true if parsing {@code uri} into a
+ * {@link URI} object is expected to result in a
+ * {@link URISyntaxException}
+ */
+ public UriTestCaseSpec(
+ String uri, String scheme, boolean expectUriParseException) {
+ this.uri = uri;
+ this.scheme = scheme;
+ this.expectUriParseException = expectUriParseException;
+ if (!expectUriParseException) {
+ try {
+ parsedUri = new URI(uri);
+ } catch (URISyntaxException e) {
+ throw new IllegalStateException(
+ "parsing \"" + uri + "\" resulted in " + "unexpected exception: "
+ + e);
+ }
+ }
+ }
+
+ public UriTestCaseSpec(String uri, String scheme) {
+ this(uri, scheme, false);
+ }
+
+ public String getUri() {
+ return uri;
+ }
+
+ public URI getParsedUri() {
+ return parsedUri;
+ }
+
+ public String getScheme() {
+ return scheme;
+ }
+
+ public boolean getExpectUriParseException() {
+ return expectUriParseException;
+ }
+ }
+
+ private static final List<UriTestCaseSpec> GOOD_URIS;
+ static {
+ ArrayList<UriTestCaseSpec> goodUris = new ArrayList<UriTestCaseSpec>();
+
+ // URIs with no scheme.
+ goodUris.add(new UriTestCaseSpec("bar", null));
+ goodUris.add(new UriTestCaseSpec("/foo/bar", null));
+ goodUris.add(new UriTestCaseSpec("/foo/bar#baz", null));
+ goodUris.add(new UriTestCaseSpec("/foo/bar:baz", null));
+ goodUris.add(new UriTestCaseSpec("#baz", null));
+ goodUris.add(new UriTestCaseSpec("#baz:dooz", null));
+ goodUris.add(new UriTestCaseSpec("foo#baz:dooz", null));
+
+ // URIs with http scheme.
+ goodUris.add(new UriTestCaseSpec("http:foo", "http"));
+ goodUris.add(new UriTestCaseSpec("http://foo.com:80/blah", "http"));
+ goodUris.add(new UriTestCaseSpec("http://foo.com/bar", "http"));
+ goodUris.add(new UriTestCaseSpec("http://foo.com/bar#baz", "http"));
+
+ // URIs with https, ftp, mailto scheme.
+ goodUris.add(new UriTestCaseSpec("mailto:good@good.com", "mailto"));
+ goodUris.add(new UriTestCaseSpec("https://foo.com", "https"));
+ goodUris.add(new UriTestCaseSpec("ftp://foo.com", "ftp"));
+
+ GOOD_URIS = Collections.unmodifiableList(goodUris);
+ }
+
+ private static final List<UriTestCaseSpec> BAD_URIS;
+ static {
+ ArrayList<UriTestCaseSpec> badUris = new ArrayList<UriTestCaseSpec>();
+
+ // URIs with defined, bad schemes.
+ badUris.add(new UriTestCaseSpec("javascript:evil", "javascript"));
+ badUris.add(new UriTestCaseSpec("javascript://foo()", "javascript"));
+ badUris.add(new UriTestCaseSpec("javascript:evil#world", "javascript"));
+ badUris.add(new UriTestCaseSpec("javascript:evil/is", "javascript"));
+
+ // URIs with weird schemes, neither of which can be parsed as a URI.
+ badUris.add(
+ new UriTestCaseSpec(" mailto:good@good.com", " mailto", true));
+ badUris.add(new UriTestCaseSpec("ma&ilto:good@good.com", "ma&ilto", true));
+
+ BAD_URIS = Collections.unmodifiableList(badUris);
+ }
+
+ public static void testExtractScheme() {
+ for (UriTestCaseSpec uriSpec : GOOD_URIS) {
+ assertEquals(
+ uriSpec.getScheme(), UriUtils.extractScheme(uriSpec.getUri()));
+ // Verify that the scheme parsed by extractScheme() is the same as
+ // obtained by {@link URI}'s parser.
+ assertEquals(uriSpec.getScheme(), uriSpec.getParsedUri().getScheme());
+ }
+ for (UriTestCaseSpec uriSpec : BAD_URIS) {
+ assertEquals(
+ uriSpec.getScheme(), UriUtils.extractScheme(uriSpec.getUri()));
+ if (!uriSpec.getExpectUriParseException()) {
+ // Verify that the scheme parsed by extractScheme() is the same as
+ // obtained by {@link URI}'s parser (for those URIs that can be parsed
+ // by the latter).
+ assertEquals(uriSpec.getScheme(), uriSpec.getParsedUri().getScheme());
+ }
+ }
+ }
+
+ public static void testIsSafeUri() {
+ for (UriTestCaseSpec uriSpec : GOOD_URIS) {
+ assertTrue(UriUtils.isSafeUri(uriSpec.getUri()));
+ }
+ for (UriTestCaseSpec uriSpec : BAD_URIS) {
+ assertFalse(UriUtils.isSafeUri(uriSpec.getUri()));
+ }
+ }
+
+ public static void testSanitizeUri() {
+ for (UriTestCaseSpec uriSpec : GOOD_URIS) {
+ assertEquals(uriSpec.getUri(), UriUtils.sanitizeUri(uriSpec.getUri()));
+ }
+ for (UriTestCaseSpec uriSpec : BAD_URIS) {
+ assertEquals("#", UriUtils.sanitizeUri(uriSpec.getUri()));
+ }
+ }
+}
diff --git a/user/test/com/google/gwt/safehtml/shared/GwtSafeHtmlStringTest.java b/user/test/com/google/gwt/safehtml/shared/GwtSafeHtmlStringTest.java
new file mode 100644
index 0000000..635545c
--- /dev/null
+++ b/user/test/com/google/gwt/safehtml/shared/GwtSafeHtmlStringTest.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.safehtml.shared;
+
+import com.google.gwt.junit.client.GWTTestCase;
+
+/**
+ * GWT Unit tests for SafeHtmlString.
+ */
+public class GwtSafeHtmlStringTest extends GWTTestCase {
+
+ // Test SafeHtmlString.equals()
+ public void testEquals() {
+ SafeHtmlString safe1 = new SafeHtmlString("stringsame");
+ SafeHtmlString safe2 = new SafeHtmlString("stringsame");
+ SafeHtmlString safe3 = new SafeHtmlString("stringdiff");
+ assertEquals(safe1, safe2);
+ assertFalse(safe1.equals(safe3));
+ }
+
+ // Test SafeHtmlString.hashCode()
+ public void testHashCode() {
+ SafeHtmlString safe1 = new SafeHtmlString("stringsame");
+ SafeHtmlString safe3 = new SafeHtmlString("stringdiff");
+ SafeHtmlString safe2 = new SafeHtmlString("stringsame");
+ assertEquals("stringsame".hashCode(), safe1.hashCode());
+ assertEquals(safe1.hashCode(), safe2.hashCode());
+ assertEquals("stringdiff".hashCode(), safe3.hashCode());
+ }
+
+ @Override
+ public String getModuleName() {
+ return "com.google.gwt.safehtml.SafeHtmlTestsModule";
+ }
+}
diff --git a/user/test/com/google/gwt/safehtml/shared/GwtSafeHtmlUtilsTest.java b/user/test/com/google/gwt/safehtml/shared/GwtSafeHtmlUtilsTest.java
new file mode 100644
index 0000000..b5eb126
--- /dev/null
+++ b/user/test/com/google/gwt/safehtml/shared/GwtSafeHtmlUtilsTest.java
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2008 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.safehtml.shared;
+
+import com.google.gwt.junit.client.GWTTestCase;
+
+/**
+ * Unit tests for GwtEscapeUtils.
+ */
+public class GwtSafeHtmlUtilsTest extends GWTTestCase {
+
+ public void testEscape_noEscape() {
+ String escaped = SafeHtmlUtils.htmlEscape("foobar");
+ assertEquals("foobar", escaped);
+ }
+
+ public void testEscape_ampersand() {
+ String escaped = SafeHtmlUtils.htmlEscape("foo&bar");
+ assertEquals("foo&bar", escaped);
+ }
+
+ public void testEscape_ampersandAndBrackets() {
+ String escaped = SafeHtmlUtils.htmlEscape("fo<o&b<em>ar");
+ assertEquals("fo<o&b<em>ar", escaped);
+ }
+
+ public void testEscape_allMetaCharacters() {
+ String escaped = SafeHtmlUtils.htmlEscape("f\"bar \'<&em><e/m>oo&bar");
+ assertEquals(
+ "f"bar '<&em><e/m>oo&bar", escaped);
+ }
+
+ public void testEscape_withEntities1() {
+ String escaped = SafeHtmlUtils.htmlEscapeAllowEntities(
+ "f\"bar \'<&em><e/m>oo&bar");
+ assertEquals(
+ "f"bar '<&em><e/m>oo&bar", escaped);
+ }
+
+ public void testEscape_withEntities2() {
+ String escaped = SafeHtmlUtils.htmlEscapeAllowEntities("& foo <");
+ assertEquals("& foo <", escaped);
+ }
+
+ public void testEscape_withEntities3() {
+ String escaped = SafeHtmlUtils.htmlEscapeAllowEntities(
+ "<foo> & <em> bar ' baz");
+ assertEquals("<foo> & <em> bar ' baz", escaped);
+ }
+
+ public void testEscape_withEntities4() {
+ String escaped = SafeHtmlUtils.htmlEscapeAllowEntities(
+ "&foo && bar ' baz&");
+ assertEquals("&foo && bar ' baz&", escaped);
+ }
+
+ public void testEscape_withEntitiesInvalidEntities() {
+ String escaped = SafeHtmlUtils.htmlEscapeAllowEntities(
+ "&a mp;&;&x;&#;&#x;");
+ assertEquals("&a mp;&;&x;&#;&#x;", escaped);
+ }
+
+ @Override
+ public String getModuleName() {
+ return "com.google.gwt.safehtml.SafeHtmlTestsModule";
+ }
+}
diff --git a/user/test/com/google/gwt/safehtml/shared/SafeHtmlBuilderTest.java b/user/test/com/google/gwt/safehtml/shared/SafeHtmlBuilderTest.java
new file mode 100644
index 0000000..b2f3b09
--- /dev/null
+++ b/user/test/com/google/gwt/safehtml/shared/SafeHtmlBuilderTest.java
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2008 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.safehtml.shared;
+
+import junit.framework.TestCase;
+
+/**
+ * Unit tests for SafeHtmlBuilder
+ */
+public class SafeHtmlBuilderTest extends TestCase {
+
+ private static final String FOOBARBAZ_HTML = "foo<em>bar</em>baz";
+
+ public void testEmpty() {
+ SafeHtmlBuilder b = new SafeHtmlBuilder();
+ assertEquals("", b.toSafeHtml().asString());
+ }
+
+ public void testFromSafeHtml() {
+ SafeHtml html = new SafeHtmlString(FOOBARBAZ_HTML);
+ SafeHtmlBuilder b = new SafeHtmlBuilder().append(html);
+ assertEquals(FOOBARBAZ_HTML, b.toSafeHtml().asString());
+ }
+
+ public void testAppend() {
+ SafeHtml html = new SafeHtmlString(FOOBARBAZ_HTML);
+ SafeHtmlBuilder b = new SafeHtmlBuilder().appendHtmlConstant(
+ "Yabba dabba & doo\n").appendEscaped("What's up so&so\n").append(
+ html);
+
+ String expected = "Yabba dabba & doo\n" + "What's up so&so\n"
+ + FOOBARBAZ_HTML;
+ assertEquals(expected, b.toSafeHtml().asString());
+ }
+}
diff --git a/user/test/com/google/gwt/safehtml/shared/SafeHtmlStringTest.java b/user/test/com/google/gwt/safehtml/shared/SafeHtmlStringTest.java
new file mode 100644
index 0000000..78a2543
--- /dev/null
+++ b/user/test/com/google/gwt/safehtml/shared/SafeHtmlStringTest.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.safehtml.shared;
+
+/**
+ * JUnit tests for SafeHtmlString.
+ */
+public class SafeHtmlStringTest extends GwtSafeHtmlStringTest {
+
+ // This forces a GWTTestCase to run as a vanilla JUnit TestCase.
+ @Override
+ public String getModuleName() {
+ return null;
+ }
+}
diff --git a/user/test/com/google/gwt/safehtml/shared/SafeHtmlUtilsTest.java b/user/test/com/google/gwt/safehtml/shared/SafeHtmlUtilsTest.java
new file mode 100644
index 0000000..b7bf9a2
--- /dev/null
+++ b/user/test/com/google/gwt/safehtml/shared/SafeHtmlUtilsTest.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2008 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.safehtml.shared;
+
+/**
+ * Unit tests for SafeHtmlUtils.
+ */
+public class SafeHtmlUtilsTest extends GwtSafeHtmlUtilsTest {
+
+ // This forces a GWTTestCase to run as a vanilla JUnit TestCase.
+ @Override
+ public String getModuleName() {
+ return null;
+ }
+}
diff --git a/user/test/com/google/gwt/safehtml/shared/SimpleHtmlSanitizerTest.java b/user/test/com/google/gwt/safehtml/shared/SimpleHtmlSanitizerTest.java
new file mode 100644
index 0000000..32983e2
--- /dev/null
+++ b/user/test/com/google/gwt/safehtml/shared/SimpleHtmlSanitizerTest.java
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2008 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.safehtml.shared;
+
+import junit.framework.TestCase;
+
+/**
+ * Unit tests for SanitizedHtml
+ */
+public class SimpleHtmlSanitizerTest extends TestCase {
+
+ public void testSimple() {
+ // simple case
+ SafeHtml html = SimpleHtmlSanitizer.sanitizeHtml("foobar");
+ assertEquals("foobar", html.asString());
+ }
+
+ public void testDontChangeWhiteSpace() {
+ // shouldn't change whitespace or newlines
+ SafeHtml html = SimpleHtmlSanitizer.sanitizeHtml(
+ "things are breezy\nand jolly\tgood");
+ assertEquals("things are breezy\nand jolly\tgood", html.asString());
+ }
+
+ public void testEscapeHtmlMetaCharacters() {
+ // need to escape HTML metacharacters appearing on their own
+ SafeHtml html = SimpleHtmlSanitizer.sanitizeHtml("foo < bar & that's good");
+ assertEquals("foo < bar & that's good", html.asString());
+ }
+
+ public void testDontDoubleEscape() {
+ // but don't double-escape HTML entities
+ SafeHtml html = SimpleHtmlSanitizer.sanitizeHtml(
+ "foo < bar & that's good");
+ assertEquals("foo < bar & that's good", html.asString());
+ }
+
+ public void testEscapeLoneMetacharacters() {
+ // need to escape HTML metacharacters appearing on their own
+ SafeHtml html = SimpleHtmlSanitizer.sanitizeHtml(
+ "\"foo < bar & that's good\"");
+ assertEquals(
+ ""foo < bar & that's good"", html.asString());
+ }
+
+ public void testDontEscapeValidTags() {
+ // leave simple tags alone
+ SafeHtml html = SimpleHtmlSanitizer.sanitizeHtml("foo <em>bar</em>");
+ assertEquals("foo <em>bar</em>", html.asString());
+ }
+
+ public void testTagAtBeginning() {
+ // correctly deal with a tag at the beginnign
+ SafeHtml html = SimpleHtmlSanitizer.sanitizeHtml("<em>bar</em>");
+ assertEquals("<em>bar</em>", html.asString());
+ }
+
+ public void testNonTagAtBeginning() {
+ // correctly deal with a non-tag at the beginnig
+ SafeHtml html = SimpleHtmlSanitizer.sanitizeHtml("<yow <em>bar</em>");
+ assertEquals("<yow <em>bar</em>", html.asString());
+ }
+
+ public void testNonTagAtEnd() {
+ // correctly deal with a non-tag at the end
+ SafeHtml html = SimpleHtmlSanitizer.sanitizeHtml("<em>bar</em> foo <");
+ assertEquals("<em>bar</em> foo <", html.asString());
+ }
+
+ public void testNullTag() {
+ // correctly deal with bogus empty tag
+ SafeHtml html = SimpleHtmlSanitizer.sanitizeHtml("<>bar</em> foo<>");
+ assertEquals("<>bar</em> foo<>", html.asString());
+ }
+
+ public void testNullEndTag() {
+ // correctly deal with bogus empty end tag
+ SafeHtml html = SimpleHtmlSanitizer.sanitizeHtml("</>bar</em> foo</>");
+ assertEquals("</>bar</em> foo</>", html.asString());
+ }
+
+ public void testSimpleTagsAndHtmlMetaChars() {
+ // mix of simple tags and HTML metacharacters appearing on their own
+ SafeHtml html = SimpleHtmlSanitizer.sanitizeHtml(
+ "foo < bar & that's <b>good</b>");
+ assertEquals("foo < bar & that's <b>good</b>", html.asString());
+ }
+
+ public void testEvilTags() {
+ // escape tags we don't know
+ SafeHtml html = SimpleHtmlSanitizer.sanitizeHtml("<script>evil()</script>");
+ assertEquals("<script>evil()</script>", html.asString());
+ }
+}