GWT versions of BidiUtil, BidiFormatter


git-svn-id: https://google-web-toolkit.googlecode.com/svn/trunk@7908 8db76d5a-ed1c-0410-87a9-c151d255dfc7
diff --git a/user/src/com/google/gwt/i18n/I18N.gwt.xml b/user/src/com/google/gwt/i18n/I18N.gwt.xml
index 8f9f256..3f0fa0b 100644
--- a/user/src/com/google/gwt/i18n/I18N.gwt.xml
+++ b/user/src/com/google/gwt/i18n/I18N.gwt.xml
@@ -15,6 +15,9 @@
 <!-- Internationalization support.                                          -->
 <!--                                                                        -->
 <module>
+  <inherits name="com.google.gwt.regexp.RegExp"/>
+  <source path="" includes="client/,shared/" />
+  
   <!-- Browser-sensitive code should use the 'locale' client property. -->
   <!-- 'default' is always defined.                                    -->
   <define-property name="locale" values="default" />
diff --git a/user/src/com/google/gwt/i18n/shared/BidiFormatter.java b/user/src/com/google/gwt/i18n/shared/BidiFormatter.java
new file mode 100644
index 0000000..f52f7ba
--- /dev/null
+++ b/user/src/com/google/gwt/i18n/shared/BidiFormatter.java
@@ -0,0 +1,625 @@
+/*
+ * Copyright 2010 Google Inc.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package com.google.gwt.i18n.shared;
+
+import com.google.gwt.i18n.client.HasDirection.Direction;
+
+/**
+ * Utility class for formatting text for display in a potentially
+ * opposite-direction context without garbling. The direction of the context is
+ * set at formatter creation and the direction of the text can be either
+ * estimated or passed in when known. Provides the following functionality:
+ * <p>
+ * 1. BiDi Wrapping: When text in one language is mixed into a document in
+ * another, opposite-direction language, e.g. when an English business name is
+ * embedded in a Hebrew web page, both the inserted string and the text
+ * following it may be displayed incorrectly unless the inserted string is
+ * explicitly separated from the surrounding text in a "wrapper" that declares
+ * its direction at the start and then resets it back at the end. This wrapping
+ * can be done in HTML mark-up (e.g. a 'span dir=rtl' tag) or - only in contexts
+ * where mark-up cannot be used - in Unicode BiDi formatting codes (LRE|RLE and
+ * PDF). Optionally, the mark-up can be inserted even when the direction is the
+ * same, in order to keep the DOM structure more stable. Providing such wrapping
+ * services is the basic purpose of the BiDi formatter.
+ * <p>
+ * 2. Direction estimation: How does one know whether a string about to be
+ * inserted into surrounding text has the same direction? Well, in many cases,
+ * one knows that this must be the case when writing the code doing the
+ * insertion, e.g. when a localized message is inserted into a localized page.
+ * In such cases there is no need to involve the BiDi formatter at all. In some
+ * other cases, it need not be the same as the context, but is either constant
+ * (e.g. urls are always LTR) or otherwise known. In the remaining cases, e.g.
+ * when the string is user-entered or comes from a database, the language of the
+ * string (and thus its direction) is not known a priori, and must be estimated
+ * at run-time. The BiDi formatter can do this automatically.
+ * <p>
+ * 3. Escaping: When wrapping plain text - i.e. text that is not already HTML or
+ * HTML-escaped - in HTML mark-up, the text must first be HTML-escaped to
+ * prevent XSS attacks and other nasty business. This of course is always true,
+ * but the escaping can not be done after the string has already been wrapped in
+ * mark-up, so the BiDi formatter also serves as a last chance and includes
+ * escaping services.
+ * <p>
+ * Thus, in a single call, the formatter will escape the input string as
+ * specified, determine its direction, and wrap it as necessary. It is then up
+ * to the caller to insert the return value in the output.
+ * 
+ */
+public class BidiFormatter {
+
+  /**
+   * A container class for direction-related string constants, e.g. Unicode
+   * formatting characters.
+   */
+  static final class Format {
+    /**
+     * "left" string constant.
+     */
+    public static final String LEFT = "left";
+
+    /**
+     * Unicode "Left-To-Right Embedding" (LRE) character.
+     */
+    public static final char LRE = '\u202A';
+    
+    /**
+     * Unicode "Left-To-Right Mark" (LRM) character.
+     */ 
+    public static final char LRM = '\u200E';
+    
+    /**
+     * String representation of LRM.
+     */
+    public static final String LRM_STRING = Character.toString(LRM);
+    
+    /**
+     * Unicode "Pop Directional Formatting" (PDF) character.
+     */ 
+    public static final char PDF = '\u202C';
+    
+    /**
+     * "right" string constant.
+     */
+    public static final String RIGHT = "right";
+
+    /**
+     * Unicode "Right-To-Left Embedding" (RLE) character.
+     */ 
+    public static final char RLE = '\u202B';
+    
+    /**
+     * Unicode "Right-To-Left Mark" (RLM) character.
+     */
+    public static final char RLM = '\u200F';
+
+    /**
+     * String representation of RLM.
+     */
+    public static final String RLM_STRING = Character.toString(RLM);
+
+    // Not instantiable.
+    private Format() {
+    } 
+  }
+
+  /**
+   * Factory for creating an instance of BidiFormatter given the context
+   * direction. The default behavior of {@link #spanWrap} and its variations is
+   * set to avoid span wrapping unless it's necessary ('dir' attribute needs to
+   * be set).
+   *
+   * @param rtlContext Whether the context direction is RTL.
+   *          In one simple use case, the context direction would simply be the
+   *          locale direction, which can be retrieved using
+   *          {@code LocaleInfo.getCurrentLocale().isRTL()}
+   */
+  public static BidiFormatter getInstance(boolean rtlContext) {
+    return getInstance(rtlContext, false);
+  }
+
+  /**
+   * Factory for creating an instance of BidiFormatter given the context
+   * direction and the desired span wrapping behavior (see below).
+   * 
+   * @param rtlContext Whether the context direction is RTL. See an example of
+   *          a simple use case at {@link #getInstance(boolean)}
+   * @param alwaysSpan Whether {@link #spanWrap} (and its variations) should
+   *          always use a 'span' tag, even when the input direction is neutral
+   *          or matches the context, so that the DOM structure of the output
+   *          does not depend on the combination of directions
+   */
+  public static BidiFormatter getInstance(boolean rtlContext,
+      boolean alwaysSpan) {
+    return new BidiFormatter(rtlContext ? Direction.RTL : Direction.LTR,
+        alwaysSpan);
+  }
+
+  /**
+   * Factory for creating an instance of BidiFormatter given the context
+   * direction. The default behavior of {@link #spanWrap} and its variations is
+   * set to avoid span wrapping unless it's necessary ('dir' attribute needs to
+   * be set).
+   * 
+   * @param contextDir The context direction. See an example of a simple use
+   *          case at {@link #getInstance(boolean)}. Note: Direction.DEFAULT
+   *          indicates unknown context direction. Try not to use it, since it
+   *          is impossible to reset the direction back to the context when it
+   *          is unknown
+   */
+  public static BidiFormatter getInstance(Direction contextDir) {
+    return getInstance(contextDir, false);
+  }
+
+  /**
+   * Factory for creating an instance of BidiFormatter given the context
+   * direction and the desired span wrapping behavior (see below).
+   * 
+   * @param contextDir The context direction. See an example of a simple use
+   *          case at {@link #getInstance(boolean)}. Note: Direction.DEFAULT
+   *          indicates unknown context direction. Try not to use it, since it
+   *          is impossible to reset the direction back to the context when it
+   *          is unknown
+   * @param alwaysSpan Whether {@link #spanWrap} (and its variations) should
+   *          always use a 'span' tag, even when the input direction is neutral
+   *          or matches the context, so that the DOM structure of the output
+   *          does not depend on the combination of directions
+   */
+  public static BidiFormatter getInstance(Direction contextDir,
+      boolean alwaysSpan) {
+    return new BidiFormatter(contextDir, alwaysSpan);
+  }
+
+  private boolean alwaysSpan;
+  private Direction contextDir;
+
+  /**
+   * @param contextDir The context direction
+   * @param alwaysSpan Whether {@link #spanWrap} (and its variations) should
+   *          always use a 'span' tag, even when the input direction is neutral
+   *          or matches the context, so that the DOM structure of the output
+   *          does not depend on the combination of directions
+   */
+  private BidiFormatter(Direction contextDir, boolean alwaysSpan) {
+    this.contextDir = contextDir;
+    this.alwaysSpan = alwaysSpan;
+  }
+
+  /**
+   * Like {@link #dirAttr(String, boolean)}, but assumes {@code isHtml} is
+   * false.
+   * 
+   * @param str String whose direction is to be estimated
+   * @return "dir=rtl" for RTL text in non-RTL context; "dir=ltr" for LTR text
+   *         in non-LTR context; else, the empty string.
+   */
+  public String dirAttr(String str) {
+    return dirAttr(str, false);
+  }
+
+  /**
+   * Returns "dir=ltr" or "dir=rtl", depending on {@code str}'s estimated
+   * direction, if it is not the same as the context direction. Otherwise,
+   * returns the empty string.
+   * 
+   * @param str String whose direction is to be estimated
+   * @param isHtml Whether {@code str} is HTML / HTML-escaped
+   * @return "dir=rtl" for RTL text in non-RTL context; "dir=ltr" for LTR text
+   *         in non-LTR context; else, the empty string.
+   */
+  public String dirAttr(String str, boolean isHtml) {
+    return knownDirAttr(BidiUtils.get().estimateDirection(str, isHtml));
+  }
+
+  /**
+   * Returns "left" for RTL context direction. Otherwise (LTR or default /
+   * unknown context direction) returns "right".
+   */
+  public String endEdge() {
+    return contextDir == Direction.RTL ? Format.LEFT : Format.RIGHT;
+  }
+
+  /**
+   * Like {@link #estimateDirection(String, boolean)}, but assumes {@code
+   * isHtml} is false.
+   * 
+   * @param str String whose direction is to be estimated
+   * @return {@code str}'s estimated overall direction
+   */
+  public Direction estimateDirection(String str) {
+    return BidiUtils.get().estimateDirection(str);
+  }
+
+  /**
+   * Estimates the direction of a string using the best known general-purpose
+   * method, i.e. using relative word counts. Direction.DEFAULT return value
+   * indicates completely neutral input.
+   * 
+   * @param str String whose direction is to be estimated
+   * @param isHtml Whether {@code str} is HTML / HTML-escaped
+   * @return {@code str}'s estimated overall direction
+   */
+  public Direction estimateDirection(String str, boolean isHtml) {
+    return BidiUtils.get().estimateDirection(str, isHtml);
+  }
+
+  /**
+   * @return Whether the span structure added by the formatter should be stable,
+   *         i.e. spans added even when the direction does not need to be
+   *         declared
+   */
+  public boolean getAlwaysSpan() {
+    return alwaysSpan;
+  }
+
+  /**
+   * @return The context direction
+   */
+  public Direction getContextDir() {
+    return contextDir;
+  }
+
+  /**
+   * @return Whether the context direction is RTL
+   */
+  public boolean isRtlContext() {
+    return contextDir == Direction.RTL;
+  }
+
+  /**
+   * Returns "dir=ltr" or "dir=rtl", depending on the given direction, if it is
+   * not the same as the context direction. Otherwise, returns the empty string.
+   * 
+   * @param dir Given direction
+   * @return "dir=rtl" for RTL text in non-RTL context; "dir=ltr" for LTR text
+   *         in non-LTR context; else, the empty string.
+   */
+  public String knownDirAttr(Direction dir) {
+    if (dir != contextDir) {
+      return dir == Direction.LTR ? "dir=ltr" : dir == Direction.RTL
+          ? "dir=rtl" : "";
+    }
+    return "";
+  }
+
+  /**
+   * Returns the Unicode BiDi mark matching the context direction (LRM for LTR
+   * context direction, RLM for RTL context direction), or the empty string for
+   * default / unknown context direction.
+   */
+  public String mark() {
+    return contextDir == Direction.LTR ? Format.LRM_STRING
+        : contextDir == Direction.RTL ? Format.RLM_STRING : "";
+  }
+
+  /**
+   * Like {@link #markAfter(String, boolean)}, but assumes {@code isHtml} is
+   * false.
+   * 
+   * @param str String after which the mark may need to appear
+   * @return LRM for RTL text in LTR context; RLM for LTR text in RTL context;
+   *         else, the empty string.
+   */
+  public String markAfter(String str) {
+    return markAfter(str, false);
+  }
+
+  /**
+   * Returns a Unicode BiDi mark matching the context direction (LRM or RLM) if
+   * either the direction or the exit direction of {@code str} is opposite to
+   * the context direction. Otherwise returns the empty string.
+   * 
+   * @param str String after which the mark may need to appear
+   * @param isHtml Whether {@code str} is HTML / HTML-escaped
+   * @return LRM for RTL text in LTR context; RLM for LTR text in RTL context;
+   *         else, the empty string.
+   */
+  public String markAfter(String str, boolean isHtml) {
+    str = BidiUtils.get().stripHtmlIfNeeded(str, isHtml);
+    return dirResetIfNeeded(str, BidiUtils.get().estimateDirection(str), false,
+        true);
+  }
+
+  /**
+   * Like {@link #spanWrap(String, boolean, boolean)}, but assumes {@code
+   * isHtml} is false and {@code dirReset} is true.
+   * 
+   * @param str The input string
+   * @return Input string after applying the above processing.
+   */
+  public String spanWrap(String str) {
+    return spanWrap(str, false, true);
+  }
+
+  /**
+   * Like {@link #spanWrap(String, boolean, boolean)}, but assumes {@code
+   * dirReset} is true.
+   * 
+   * @param str The input string
+   * @param isHtml Whether {@code str} is HTML / HTML-escaped
+   * @return Input string after applying the above processing.
+   */
+  public String spanWrap(String str, boolean isHtml) {
+    return spanWrap(str, isHtml, true);
+  }
+
+  /**
+   * Formats a string of unknown direction for use in HTML output of the context
+   * direction, so an opposite-direction string is neither garbled nor garbles
+   * what follows it.
+   * <p>
+   * The algorithm: estimates the direction of input argument {@code str}. In
+   * case its direction doesn't match the context direction, wraps it with a
+   * 'span' tag and adds a "dir" attribute (either 'dir=rtl' or 'dir=ltr').
+   * <p>
+   * If {@code setAlwaysSpan(true)} was used, the input is always wrapped with
+   * 'span', skipping just the dir attribute when it's not needed.
+   * <p>
+   * If {@code dirReset}, and if the overall direction or the exit direction of
+   * {@code str} are opposite to the context direction, a trailing unicode BiDi
+   * mark matching the context direction is appended (LRM or RLM).
+   * <p>
+   * If !{@code isHtml}, HTML-escapes {@code str} regardless of wrapping.
+   * 
+   * @param str The input string
+   * @param isHtml Whether {@code str} is HTML / HTML-escaped
+   * @param dirReset Whether to append a trailing unicode bidi mark matching the
+   *          context direction, when needed, to prevent the possible garbling
+   *          of whatever may follow {@code str}
+   * @return Input string after applying the above processing.
+   */
+  public String spanWrap(String str, boolean isHtml, boolean dirReset) {
+    Direction dir = BidiUtils.get().estimateDirection(str, isHtml);
+    return spanWrapWithKnownDir(dir, str, isHtml, dirReset);
+  }
+
+  /**
+   * Like {@link #spanWrapWithKnownDir(Direction, String, boolean, boolean)},
+   * but assumes {@code isHtml} is false and {@code dirReset} is true.
+   * 
+   * @param dir {@code str}'s direction
+   * @param str The input string
+   * @return Input string after applying the above processing.
+   */
+  public String spanWrapWithKnownDir(Direction dir, String str) {
+    return spanWrapWithKnownDir(dir, str, false, true);
+  }
+
+  /**
+   * Like {@link #spanWrapWithKnownDir(Direction, String, boolean, boolean)},
+   * but assumes {@code dirReset} is true.
+   * 
+   * @param dir {@code str}'s direction
+   * @param str The input string
+   * @param isHtml Whether {@code str} is HTML / HTML-escaped
+   * @return Input string after applying the above processing.
+   */
+  public String spanWrapWithKnownDir(Direction dir, String str, boolean isHtml) {
+    return spanWrapWithKnownDir(dir, str, isHtml, true);
+  }
+
+  /**
+   * Formats a string of given direction for use in HTML output of the context
+   * direction, so an opposite-direction string is neither garbled nor garbles
+   * what follows it.
+   * <p>
+   * The algorithm: estimates the direction of input argument {@code str}. In
+   * case its direction doesn't match the context direction, wraps it with a
+   * 'span' tag and adds a "dir" attribute (either 'dir=rtl' or 'dir=ltr').
+   * <p>
+   * If {@code setAlwaysSpan(true)} was used, the input is always wrapped with
+   * 'span', skipping just the dir attribute when it's not needed.
+   * <p>
+   * If {@code dirReset}, and if the overall direction or the exit direction of
+   * {@code str} are opposite to the context direction, a trailing unicode BiDi
+   * mark matching the context direction is appended (LRM or RLM).
+   * <p>
+   * If !{@code isHtml}, HTML-escapes {@code str} regardless of wrapping.
+   * 
+   * @param dir {@code str}'s direction
+   * @param str The input string
+   * @param isHtml Whether {@code str} is HTML / HTML-escaped
+   * @param dirReset Whether to append a trailing unicode bidi mark matching the
+   *          context direction, when needed, to prevent the possible garbling
+   *          of whatever may follow {@code str}
+   * @return Input string after applying the above processing.
+   */
+  public String spanWrapWithKnownDir(Direction dir, String str, boolean isHtml,
+      boolean dirReset) {
+    boolean dirCondition = dir != Direction.DEFAULT && dir != contextDir;
+    String origStr = str;
+    if (!isHtml) {
+      str = htmlEscape(str);
+    }
+
+    StringBuilder result = new StringBuilder();
+    if (alwaysSpan || dirCondition) {
+      result.append("<span");
+      if (dirCondition) {
+        result.append(" ");
+        result.append(dir == Direction.RTL ? "dir=rtl" : "dir=ltr");
+      }
+      result.append(">" + str + "</span>");
+    } else {
+      result.append(str);
+    }
+    // origStr is passed (more efficient when isHtml is false).
+    result.append(dirResetIfNeeded(origStr, dir, isHtml, dirReset));
+    return result.toString();
+  }
+
+  /**
+   * Returns "right" for RTL context direction. Otherwise (LTR or default /
+   * unknown context direction) returns "left".
+   */
+  public String startEdge() {
+    return contextDir == Direction.RTL ? Format.RIGHT : Format.LEFT;
+  }
+
+  /**
+   * Like {@link #unicodeWrap(String, boolean, boolean)}, but assumes {@code
+   * isHtml} is false and {@code dirReset} is true.
+   * 
+   * @param str The input string
+   * @return Input string after applying the above processing.
+   */
+  public String unicodeWrap(String str) {
+    return unicodeWrap(str, false, true);
+  }
+
+  /**
+   * Like {@link #unicodeWrap(String, boolean, boolean)}, but assumes {@code
+   * dirReset} is true.
+   * 
+   * @param str The input string
+   * @param isHtml Whether {@code str} is HTML / HTML-escaped
+   * @return Input string after applying the above processing.
+   */
+  public String unicodeWrap(String str, boolean isHtml) {
+    return unicodeWrap(str, isHtml, true);
+  }
+
+  /**
+   * Formats a string of unknown direction for use in plain-text output of the
+   * context direction, so an opposite-direction string is neither garbled nor
+   * garbles what follows it. As opposed to {@link #spanWrap}, this makes use of
+   * Unicode BiDi formatting characters. In HTML, its *only* valid use is inside
+   * of elements that do not allow mark-up, e.g. an 'option' tag.
+   * <p>
+   * The algorithm: estimates the direction of input argument {@code str}. In
+   * case it doesn't match the context direction, wraps it with Unicode BiDi
+   * formatting characters: RLE+{@code str}+PDF for RTL text, or LRE+ {@code
+   * str}+PDF for LTR text.
+   * <p>
+   * If {@code opt_dirReset}, and if the overall direction or the exit direction
+   * of {@code str} are opposite to the context direction, a trailing unicode
+   * BiDi mark matching the context direction is appended (LRM or RLM).
+   * <p>
+   * Does *not* do HTML-escaping regardless of the value of {@code isHtml}.
+   * 
+   * @param str The input string
+   * @param isHtml Whether {@code str} is HTML / HTML-escaped
+   * @param dirReset Whether to append a trailing unicode bidi mark matching the
+   *          context direction, when needed, to prevent the possible garbling
+   *          of whatever may follow {@code str}
+   * @return Input string after applying the above processing.
+   */
+  public String unicodeWrap(String str, boolean isHtml, boolean dirReset) {
+    Direction dir = BidiUtils.get().estimateDirection(str, isHtml);
+    return unicodeWrapWithKnownDir(dir, str, isHtml, dirReset);
+  }
+
+  /**
+   * Like {@link #unicodeWrapWithKnownDir(Direction, String, boolean, boolean)},
+   * but assumes {@code isHtml} is false and {@code dirReset} is true.
+   * 
+   * @param dir {@code str}'s direction
+   * @param str The input string
+   * @return Input string after applying the above processing.
+   */
+  public String unicodeWrapWithKnownDir(Direction dir, String str) {
+    return unicodeWrapWithKnownDir(dir, str, false, true);
+  }
+
+  /**
+   * Like {@link #unicodeWrapWithKnownDir(Direction, String, boolean, boolean)},
+   * but assumes {@code dirReset} is true.
+   * 
+   * @param dir {@code str}'s direction
+   * @param str The input string
+   * @param isHtml Whether {@code str} is HTML / HTML-escaped
+   * @return Input string after applying the above processing.
+   */
+  public String unicodeWrapWithKnownDir(Direction dir, String str,
+      boolean isHtml) {
+    return unicodeWrapWithKnownDir(dir, str, isHtml, true);
+  }
+
+  /**
+   * Formats a string of given direction for use in plain-text output of the
+   * context direction, so an opposite-direction string is neither garbled nor
+   * garbles what follows it. As opposed to {@link #spanWrapWithKnownDir}, this
+   * makes use of unicode BiDi formatting characters. In HTML, its *only* valid
+   * use is inside of elements that do not allow mark-up, e.g. an 'option' tag.
+   * <p>
+   * The algorithm: estimates the direction of input argument {@code str}. In
+   * case it doesn't match the context direction, wraps it with Unicode BiDi
+   * formatting characters: RLE+{@code str}+PDF for RTL text, or LRE+ {@code
+   * str}+PDF for LTR text.
+   * <p>
+   * If {@code opt_dirReset}, and if the overall direction or the exit direction
+   * of {@code str} are opposite to the context direction, a trailing unicode
+   * BiDi mark matching the context direction is appended (LRM or RLM).
+   * <p>
+   * Does *not* do HTML-escaping regardless of the value of {@code isHtml}.
+   * 
+   * @param dir {@code str}'s direction
+   * @param str The input string
+   * @param isHtml Whether {@code str} is HTML / HTML-escaped
+   * @param dirReset Whether to append a trailing unicode bidi mark matching the
+   *          context direction, when needed, to prevent the possible garbling
+   *          of whatever may follow {@code str}
+   * @return Input string after applying the above processing.
+   */
+  public String unicodeWrapWithKnownDir(Direction dir, String str,
+      boolean isHtml, boolean dirReset) {
+    StringBuilder result = new StringBuilder();
+    if (dir != Direction.DEFAULT && dir != contextDir) {
+      result.append(dir == Direction.RTL ? Format.RLE : Format.LRE);
+      result.append(str);
+      result.append(Format.PDF);
+    } else {
+      result.append(str);
+    }
+
+    result.append(dirResetIfNeeded(str, dir, isHtml, dirReset));
+    return result.toString();
+  }
+
+  /**
+   * Returns a unicode BiDi mark matching the context direction (LRM or RLM) if
+   * {@code dirReset}, and if the overall direction or the exit direction of
+   * {@code str} are opposite to the context direction. Otherwise returns the
+   * empty string.
+   * 
+   * @param str The input string
+   * @param dir {@code str}'s overall direction
+   * @param isHtml Whether {@code str} is HTML / HTML-escaped
+   * @param dirReset Whether to perform the reset
+   * @return A unicode BiDi mark or the empty string.
+   */
+  private String dirResetIfNeeded(String str, Direction dir, boolean isHtml,
+      boolean dirReset) {
+    // endsWithRtl and endsWithLtr are called only if needed (short-circuit).
+    if (dirReset
+        && ((contextDir == Direction.LTR &&
+            (dir == Direction.RTL ||
+             BidiUtils.get().endsWithRtl(str, isHtml))) ||
+            (contextDir == Direction.RTL &&
+            (dir == Direction.LTR ||
+             BidiUtils.get().endsWithLtr(str, isHtml))))) {
+      return contextDir == Direction.LTR ? Format.LRM_STRING
+          : Format.RLM_STRING;
+    } else {
+      return "";
+    }
+  }
+
+  private String htmlEscape(String str) {
+    // TODO(tomerigo): use a proper escaper class once such exists.
+    return str.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;").replace("\n", "<br>");
+  }
+}
diff --git a/user/src/com/google/gwt/i18n/shared/BidiUtils.java b/user/src/com/google/gwt/i18n/shared/BidiUtils.java
new file mode 100644
index 0000000..8a124ef
--- /dev/null
+++ b/user/src/com/google/gwt/i18n/shared/BidiUtils.java
@@ -0,0 +1,313 @@
+/*
+ * Copyright 2010 Google Inc.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.i18n.shared;
+
+import com.google.gwt.i18n.client.HasDirection.Direction;
+import com.google.gwt.regexp.shared.RegExp;
+import com.google.gwt.regexp.shared.SplitResult;
+
+/**
+ * Utility functions for performing common Bidi tests on strings.
+ */
+public class BidiUtils {
+
+  /**
+   * A practical pattern to identify strong LTR characters. This pattern is not
+   * completely correct according to the Unicode standard. It is simplified
+   * for performance and small code size.
+   * <p>
+   * This is volatile to prevent the compiler from inlining this constant in
+   * various references below.
+   */
+  private static volatile String LTR_CHARS =
+    "A-Za-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02B8\u0300-\u0590\u0800-\u1FFF" +
+    "\u2C00-\uFB1C\uFDFE-\uFE6F\uFEFD-\uFFFF";
+
+  /**
+   * A practical pattern to identify strong RTL characters. This pattern is not
+   * completely correct according to the Unicode standard. It is simplified for
+   * performance and small code size.
+   * <p>
+   * This is volatile to prevent the compiler from inlining this constant in
+   * various references below.
+   */
+  private static volatile String RTL_CHARS =
+      "\u0591-\u07FF\uFB1D-\uFDFD\uFE70-\uFEFC";
+  
+  /**
+   * Regular expression to check if the first strongly directional character in
+   * a string is LTR.
+   */
+  private static final RegExp FIRST_STRONG_IS_LTR_RE =
+      RegExp.compile("^[^" + RTL_CHARS + "]*[" + LTR_CHARS + ']');
+
+  /**
+   * Regular expression to check if the first strongly directional character in
+   * a string is RTL.
+   */
+  private static final RegExp FIRST_STRONG_IS_RTL_RE =
+      RegExp.compile("^[^" + LTR_CHARS + "]*[" + RTL_CHARS + ']');
+
+  /**
+   * Regular expression to check if a string contains any LTR characters.
+   */
+  private static final RegExp HAS_ANY_LTR_RE =
+      RegExp.compile("[" + LTR_CHARS + ']');
+
+  /**
+   * Regular expression to check if a string contains any RTL characters.
+   */
+  private static final RegExp HAS_ANY_RTL_RE =
+      RegExp.compile("[" + RTL_CHARS + ']');
+
+  /**
+   * Regular expression to check if a string contains any numerals. Used to
+   * differentiate between completely neutral strings and those containing
+   * numbers, which are weakly LTR.
+   */
+  private static final RegExp HAS_NUMERALS_RE = RegExp.compile("\\d");
+
+  /**
+   * Simplified regular expression for an HTML tag (opening or closing) or an
+   * HTML escape. We might want to skip over such expressions when estimating
+   * the text directionality.
+   */
+  private static final RegExp SKIP_HTML_RE =
+      RegExp.compile("<[^>]*>|&[^;]+;", "g");
+
+  /**
+   * An instance of BidiUtils, to be returned by {@link #get()}.
+   */
+  private static final BidiUtils INSTANCE = new BidiUtils();
+
+  /**
+   * Regular expression to check if a string looks like something that must
+   * always be LTR even in RTL text, e.g. a URL. When estimating the
+   * directionality of text containing these, we treat these as weakly LTR, like
+   * numbers.
+   */
+  private static final RegExp IS_REQUIRED_LTR_RE = RegExp.compile("^http://.*");
+
+  /**
+   * Regular expressions to check if the last strongly-directional character in
+   * a piece of text is LTR.
+   */
+  private static final RegExp LAST_STRONG_IS_LTR_RE =
+      RegExp.compile("[" + LTR_CHARS + "][^" + RTL_CHARS + "]*$");
+
+  /**
+   * Regular expressions to check if the last strongly-directional character in
+   * a piece of text is RTL.
+   */
+  private static final RegExp LAST_STRONG_IS_RTL_RE =
+      RegExp.compile("[" + RTL_CHARS + "][^" + LTR_CHARS + "]*$");
+
+  /**
+   * This constant defines the threshold of RTL directionality.
+   */
+  private static final float RTL_DETECTION_THRESHOLD = 0.40f;
+
+  /**
+   * Regular expression to split a string into "words" for directionality
+   * estimation based on relative word counts.
+   */
+  private static final RegExp WORD_SEPARATOR_RE = RegExp.compile("\\s+");
+
+  /**
+   * Get an instance of BidiUtils.
+   * @return An instance of BidiUtils
+   */
+  public static BidiUtils get() {
+    return INSTANCE;
+  }
+
+  /**
+   * Not instantiable.
+   */
+  private BidiUtils() {
+  }
+
+  /**
+   * Like {@link #endsWithLtr(String, boolean)}, but assumes {@code str} is not
+   * HTML / HTML-escaped.
+   */
+  public boolean endsWithLtr(String str) {
+    return LAST_STRONG_IS_LTR_RE.test(str);
+  }
+
+  /**
+   * Check whether the last strongly-directional character in the string is LTR.
+   * @param str the string to check
+   * @param isHtml whether str is HTML / HTML-escaped
+   * @return whether LTR exit directionality was detected
+   */
+  public boolean endsWithLtr(String str, boolean isHtml) {
+    return endsWithLtr(stripHtmlIfNeeded(str, isHtml));
+  }
+
+  /**
+   * Like {@link #endsWithRtl(String, boolean)}, but assumes {@code str} is not
+   * HTML / HTML-escaped.
+   */
+  public boolean endsWithRtl(String str) {
+    return LAST_STRONG_IS_RTL_RE.test(str);
+  }
+
+  /**
+   * Check whether the last strongly-directional character in the string is RTL.
+   * @param str the string to check
+   * @param isHtml whether str is HTML / HTML-escaped
+   * @return whether RTL exit directionality was detected
+   */
+  public boolean endsWithRtl(String str, boolean isHtml) {
+    return endsWithRtl(stripHtmlIfNeeded(str, isHtml));
+  }
+
+  /**
+   * Like {@link #estimateDirection(String, boolean)}, but assumes {@code str}
+   * is not HTML / HTML-escaped.
+   */
+  public Direction estimateDirection(String str) {
+    int rtlCount = 0;
+    int total = 0;
+    boolean hasWeaklyLtr = false;
+    SplitResult tokens = WORD_SEPARATOR_RE.split(str);
+    for (int i = 0; i < tokens.length(); i++) {
+      String token = tokens.get(i);
+      if (startsWithRtl(token)) {
+        rtlCount++;
+        total++;
+      } else if (IS_REQUIRED_LTR_RE.test(token)) {
+        hasWeaklyLtr = true;
+      } else if (hasAnyLtr(token)) {
+        total++;
+      } else if (HAS_NUMERALS_RE.test(token)) {
+        hasWeaklyLtr = true;
+      }
+    }
+
+    return total == 0 ? (hasWeaklyLtr ? Direction.LTR : Direction.DEFAULT)
+        : ((float) rtlCount / total > RTL_DETECTION_THRESHOLD ? Direction.RTL :
+        Direction.LTR);
+  }
+
+  /**
+   * Estimates the directionality of a string based on relative word counts.
+   * If the number of RTL words is above a certain percentage of the total
+   * number of strongly directional words, returns RTL.
+   * Otherwise, if any words are strongly or weakly LTR, returns LTR.
+   * Otherwise, returns DEFAULT, which is used to mean "neutral".
+   * Numbers are counted as weakly LTR.
+   * @param str the string to check
+   * @param isHtml whether {@code str} is HTML / HTML-escaped. Use this to
+   *        ignore HTML tags and escapes that would otherwise be mistaken for
+   *        LTR text.
+   * @return the string's directionality
+   */
+  public Direction estimateDirection(String str, boolean isHtml) {
+    return estimateDirection(stripHtmlIfNeeded(str, isHtml));
+  }
+
+  /**
+   * Like {@link #hasAnyLtr(String, boolean)}, but assumes {@code str} is not
+   * HTML / HTML-escaped.
+   * @param str the string to be tested
+   * @return whether the string contains any LTR characters
+   */
+  public boolean hasAnyLtr(String str) {
+    return HAS_ANY_LTR_RE.test(str);
+  }
+
+  /**
+   * Checks if the given string has any LTR characters in it.
+   * @param str the string to be tested
+   * @param isHtml whether str is HTML / HTML-escaped
+   * @return whether the string contains any LTR characters
+   */
+  public boolean hasAnyLtr(String str, boolean isHtml) {
+    return hasAnyLtr(stripHtmlIfNeeded(str, isHtml));
+  }
+
+  /**
+   * Like {@link #hasAnyRtl(String, boolean)}, but assumes {@code str} is not
+   * HTML / HTML-escaped.
+   * @param str the string to be tested
+   * @return whether the string contains any RTL characters
+   */
+  public boolean hasAnyRtl(String str) {
+    return HAS_ANY_RTL_RE.test(str);
+  }
+
+  /**
+   * Checks if the given string has any RTL characters in it.
+   * @param isHtml whether str is HTML / HTML-escaped
+   * @param str the string to be tested
+   * @return whether the string contains any RTL characters
+   */
+  public boolean hasAnyRtl(String str, boolean isHtml) {
+    return hasAnyRtl(stripHtmlIfNeeded(str, isHtml));
+  }
+
+  /**
+   * Like {@link #startsWithLtr(String, boolean)}, but assumes {@code str} is
+   * not HTML / HTML-escaped.
+   */
+  public boolean startsWithLtr(String str) {
+    return FIRST_STRONG_IS_LTR_RE.test(str);
+  }
+
+  /**
+   * Check whether the first strongly-directional character in the string is
+   * LTR.
+   * @param str the string to check
+   * @param isHtml whether str is HTML / HTML-escaped
+   * @return whether LTR exit directionality was detected
+   */
+  public boolean startsWithLtr(String str, boolean isHtml) {
+    return startsWithLtr(stripHtmlIfNeeded(str, isHtml));
+  }
+
+  /**
+   * Like {@link #startsWithRtl(String, boolean)}, but assumes {@code str} is
+   * not HTML / HTML-escaped.
+   */
+  public boolean startsWithRtl(String str) {
+    return FIRST_STRONG_IS_RTL_RE.test(str);
+  }
+
+  /**
+   * Check whether the first strongly-directional character in the string is
+   * RTL.
+   * @param str the string to check
+   * @param isHtml whether {@code str} is HTML / HTML-escaped
+   * @return whether RTL exit directionality was detected
+   */
+  public boolean startsWithRtl(String str, boolean isHtml) {
+    return startsWithRtl(stripHtmlIfNeeded(str, isHtml));
+  }
+
+  /**
+   * Returns the input text with spaces instead of HTML tags or HTML escapes, if
+   * isStripNeeded is true. Else returns the input as is.
+   * Useful for text directionality estimation.
+   * Note: the function should not be used in other contexts; it is not 100%
+   * correct, but rather a good-enough implementation for directionality
+   * estimation purposes.
+   */
+  String stripHtmlIfNeeded(String str, boolean isStripNeeded) {
+    return isStripNeeded ? SKIP_HTML_RE.replace(str, " ") : str;
+  }
+}
diff --git a/user/test/com/google/gwt/i18n/I18NSuite.java b/user/test/com/google/gwt/i18n/I18NSuite.java
index 826b1dc..0088aad 100644
--- a/user/test/com/google/gwt/i18n/I18NSuite.java
+++ b/user/test/com/google/gwt/i18n/I18NSuite.java
@@ -44,6 +44,9 @@
 import com.google.gwt.i18n.rebind.MessageFormatParserTest;
 import com.google.gwt.i18n.server.GwtLocaleTest;
 import com.google.gwt.i18n.server.RegionInheritanceTest;
+import com.google.gwt.i18n.shared.BidiFormatterTest;
+import com.google.gwt.i18n.shared.BidiUtilsTest;
+import com.google.gwt.i18n.shared.GwtBidiUtilsTest;
 import com.google.gwt.junit.tools.GWTTestSuite;
 
 import junit.framework.Test;
@@ -58,6 +61,8 @@
     // $JUnit-BEGIN$
     suite.addTestSuite(ArabicPluralsTest.class);
     suite.addTestSuite(AnnotationsTest.class);
+    suite.addTestSuite(BidiFormatterTest.class);
+    suite.addTestSuite(BidiUtilsTest.class);
     suite.addTestSuite(ConstantMapTest.class);
     suite.addTestSuite(CurrencyTest.class);
     suite.addTestSuite(CustomPluralsTest.class);
@@ -67,6 +72,7 @@
     suite.addTestSuite(DateTimeFormat_pl_Test.class);
     suite.addTestSuite(DateTimeParse_en_Test.class);
     suite.addTestSuite(DateTimeParse_zh_CN_Test.class);
+    suite.addTestSuite(GwtBidiUtilsTest.class);
     suite.addTestSuite(GwtLocaleTest.class);
     suite.addTestSuite(I18NTest.class);
     suite.addTestSuite(I18N2Test.class);
diff --git a/user/test/com/google/gwt/i18n/I18NTest_shared.gwt.xml b/user/test/com/google/gwt/i18n/I18NTest_shared.gwt.xml
new file mode 100644
index 0000000..6c81ec0
--- /dev/null
+++ b/user/test/com/google/gwt/i18n/I18NTest_shared.gwt.xml
@@ -0,0 +1,19 @@
+<!--                                                                        -->
+<!-- Copyright 2010 Google Inc.                                             -->
+<!-- Licensed under the Apache License, Version 2.0 (the "License"); you    -->
+<!-- may not use this file except in compliance with the License. You may   -->
+<!-- may obtain a copy of the License at                                    -->
+<!--                                                                        -->
+<!-- http://www.apache.org/licenses/LICENSE-2.0                             -->
+<!--                                                                        -->
+<!-- Unless required by applicable law or agreed to in writing, software    -->
+<!-- distributed under the License is distributed on an "AS IS" BASIS,      -->
+<!-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or        -->
+<!-- implied. License for the specific language governing permissions and   -->
+<!-- limitations under the License.                                         -->
+
+<module>
+  <inherits name='com.google.gwt.junit.JUnit'/>
+  <inherits name="com.google.gwt.i18n.I18N"/>
+  <source path="shared" />
+</module>
diff --git a/user/test/com/google/gwt/i18n/shared/BidiFormatterTest.java b/user/test/com/google/gwt/i18n/shared/BidiFormatterTest.java
new file mode 100644
index 0000000..72fe789
--- /dev/null
+++ b/user/test/com/google/gwt/i18n/shared/BidiFormatterTest.java
@@ -0,0 +1,287 @@
+/*
+ * Copyright 2010 Google Inc.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.i18n.shared;
+
+import static com.google.gwt.i18n.shared.BidiFormatter.Format.*;
+
+import com.google.gwt.i18n.client.HasDirection.Direction;
+
+import junit.framework.TestCase;
+
+/**
+ * Unit tests for BidiFormatter.
+ */
+public class BidiFormatterTest extends TestCase {
+  static final Direction DEFAULT = Direction.DEFAULT;
+  static final Direction LTR = Direction.LTR;
+  static final Direction RTL = Direction.RTL;
+
+  String en = "abba";
+  String he = "\u05e0\u05e1";
+  String html = "&lt;";
+  String longEn = "abba sabba gabba ";
+  String longHe = "\u05e0 \u05e1 \u05e0 ";
+  BidiFormatter ltrFmt = BidiFormatter.getInstance(LTR); // LTR context
+  BidiFormatter rtlFmt = BidiFormatter.getInstance(RTL); // RTL context
+  BidiFormatter unkFmt = BidiFormatter.getInstance(DEFAULT); // DEFAULT context
+  
+  public void testDirAttr() {
+    // Regular cases:
+    assertEquals("dir=rtl", ltrFmt.dirAttr(he, true));
+    assertEquals("", rtlFmt.dirAttr(he, true));
+    assertEquals("dir=ltr", rtlFmt.dirAttr(en, true));
+    assertEquals("", ltrFmt.dirAttr(en, true));
+
+    // Text contains HTML or HTML-escaping:
+    assertEquals("dir=rtl", ltrFmt.dirAttr(he + "<some sort of an HTML tag>",
+        true));
+    assertEquals("", ltrFmt.dirAttr(he + "<some sort of an HTML tag>", false));
+  }
+
+  public void testEndEdge() {
+    assertEquals(LEFT, rtlFmt.endEdge());
+    assertEquals(RIGHT, ltrFmt.endEdge());
+    assertEquals(RIGHT, unkFmt.endEdge());
+  }
+
+  public void testEstimateDirection() {
+    // Regular cases.
+    assertEquals(DEFAULT, ltrFmt.estimateDirection(""));
+    assertEquals(DEFAULT, rtlFmt.estimateDirection(""));
+    assertEquals(DEFAULT, unkFmt.estimateDirection(""));
+    assertEquals(LTR, ltrFmt.estimateDirection(en));
+    assertEquals(LTR, rtlFmt.estimateDirection(en));
+    assertEquals(LTR, unkFmt.estimateDirection(en));
+    assertEquals(RTL, ltrFmt.estimateDirection(he));
+    assertEquals(RTL, rtlFmt.estimateDirection(he));
+    assertEquals(RTL, unkFmt.estimateDirection(he));
+
+    // Text contains HTML or HTML-escaping.
+    assertEquals(LTR, ltrFmt.estimateDirection("<some sort of tag/>" + he
+        + " &amp;", false));
+    assertEquals(RTL, ltrFmt.estimateDirection(he + "<some sort of tag/>" + he
+        + " &amp;", true));
+  }
+
+  public void testGetContextDir() {
+    assertEquals(LTR, ltrFmt.getContextDir());
+    assertEquals(RTL, rtlFmt.getContextDir());
+    assertEquals(DEFAULT, unkFmt.getContextDir());
+  }
+
+  public void testGetInstanceForRtlContext() {
+    assertEquals(LTR, BidiFormatter.getInstance(false).getContextDir());
+    assertEquals(RTL, BidiFormatter.getInstance(true).getContextDir());
+  }
+
+  public void testIsRtlContext() {
+    assertEquals(false, ltrFmt.isRtlContext());
+    assertEquals(true, rtlFmt.isRtlContext());
+    assertEquals(false, unkFmt.isRtlContext());
+  }
+
+  public void testKnownDirAttr() {
+    // Regular cases:
+    assertEquals("dir=rtl", ltrFmt.knownDirAttr(RTL));
+    assertEquals("", rtlFmt.knownDirAttr(RTL));
+    assertEquals("dir=ltr", rtlFmt.knownDirAttr(LTR));
+    assertEquals("", ltrFmt.knownDirAttr(LTR));
+  }
+
+  public void testMark() {
+    assertEquals(RLM_STRING, rtlFmt.mark());
+    assertEquals(LRM_STRING, ltrFmt.mark());
+    assertEquals("", unkFmt.mark());
+  }
+
+  public void testMarkAfter() {
+    assertEquals("exit dir (RTL) is opposite to context dir (LTR)", LRM_STRING,
+        ltrFmt.markAfter(longEn + he + html, true));
+    assertEquals("exit dir (LTR) is opposite to context dir (RTL)", RLM_STRING,
+        rtlFmt.markAfter(longHe + en, true));
+    assertEquals("exit dir (LTR) doesnt match context dir (DEFAULT)", "",
+        unkFmt.markAfter(longEn + en, true));
+    assertEquals("overall dir (RTL) is opposite to context dir (LTR)",
+        LRM_STRING, ltrFmt.markAfter(longHe + en, true));
+    assertEquals("overall dir (LTR) is opposite to context dir (RTL)",
+        RLM_STRING, rtlFmt.markAfter(longEn + he, true));
+    assertEquals("exit dir and overall dir match context dir (LTR)", "",
+        ltrFmt.markAfter(longEn + he + html, false));
+    assertEquals("exit dir and overall dir matches context dir (RTL)", "",
+        rtlFmt.markAfter(longHe + he, true));
+  }
+
+  public void testSpanWrap() {
+    // The main testing of the logic is done in testSpanWrapWithKnownDir.
+    assertEquals("<span dir=rtl>" + he + "</span>" + LRM, ltrFmt.spanWrap(he,
+        true));
+    assertEquals(he, rtlFmt.spanWrap(he, true));
+    assertEquals("<span dir=ltr>" + en + "</span>" + RLM, rtlFmt.spanWrap(en,
+        true));
+    assertEquals(en, ltrFmt.spanWrap(en, true));
+  }
+
+  public void testSpanWrapWithKnownDir() {
+    assertEquals("overall dir matches context dir (LTR)", en + "&lt;",
+        ltrFmt.spanWrapWithKnownDir(LTR, en + "<"));
+    assertEquals("overall dir matches context dir (LTR), HTML", en + "<br>",
+        ltrFmt.spanWrapWithKnownDir(LTR, en + "<br>", true));
+    assertEquals("overall dir matches context dir (RTL)", he + "&lt;",
+        rtlFmt.spanWrapWithKnownDir(RTL, he + "<"));
+    assertEquals("overall dir matches context dir (RTL), HTML", he
+        + " <some strange tag>", rtlFmt.spanWrapWithKnownDir(RTL, he
+        + " <some strange tag>", true));
+
+    assertEquals("overall dir (RTL) doesnt match context dir (LTR)",
+        "<span dir=rtl>" + he + "</span>" + LRM, ltrFmt.spanWrapWithKnownDir(
+            RTL, he));
+    assertEquals(
+        "overall dir (RTL) doesnt match context dir (LTR), no dirReset",
+        "<span dir=rtl>" + he + "</span>", ltrFmt.spanWrapWithKnownDir(RTL, he,
+            false, false));
+    assertEquals("overall dir (LTR) doesnt match context dir (RTL)",
+        "<span dir=ltr>" + en + "</span>" + RLM, rtlFmt.spanWrapWithKnownDir(
+            LTR, en));
+    assertEquals(
+        "overall dir (LTR) doesnt match context dir (RTL), no dirReset",
+        "<span dir=ltr>" + en + "</span>", rtlFmt.spanWrapWithKnownDir(LTR, en,
+            false, false));
+    assertEquals("overall dir (RTL) doesnt match context dir (unknown)",
+        "<span dir=rtl>" + he + "</span>", unkFmt.spanWrapWithKnownDir(RTL, he));
+    assertEquals(
+        "overall dir (LTR) doesnt match context dir (unknown), no dirReset",
+        "<span dir=ltr>" + en + "</span>", unkFmt.spanWrapWithKnownDir(LTR, en,
+            false, false));
+    assertEquals("overall dir (neutral) doesnt match context dir (LTR)", ".",
+        ltrFmt.spanWrapWithKnownDir(DEFAULT, "."));
+
+    assertEquals("exit dir (but not overall dir) is opposite to context dir",
+        longEn + he + LRM, ltrFmt.spanWrapWithKnownDir(LTR, longEn + he));
+    assertEquals("overall dir (but not exit dir) is opposite to context dir",
+        "<span dir=ltr>" + longEn + he + "</span>" + RLM,
+        rtlFmt.spanWrapWithKnownDir(LTR, longEn + he));
+
+    assertEquals("exit dir (but not overall dir) is opposite to context dir",
+        longEn + he + html + LRM, ltrFmt.spanWrapWithKnownDir(LTR, longEn + he
+            + html, true, true));
+    assertEquals(
+        "overall dir (but not exit dir) is opposite to context dir, dirReset",
+        "<span dir=ltr>" + longEn + he + "</span>" + RLM,
+        rtlFmt.spanWrapWithKnownDir(LTR, longEn + he, true, true));
+
+    assertEquals("plain text overall and exit dir same as context dir",
+        "&lt;br&gt; " + he + " &lt;br&gt;", ltrFmt.spanWrapWithKnownDir(LTR,
+            "<br> " + he + " <br>", false));
+    assertEquals("HTML overall and exit dir opposite to context dir",
+        "<span dir=rtl><br> " + he + " <br></span>" + LRM,
+        ltrFmt.spanWrapWithKnownDir(RTL, "<br> " + he + " <br>", true));
+
+    BidiFormatter ltrAlwaysSpanFmt = BidiFormatter.getInstance(LTR, true);
+    BidiFormatter rtlAlwaysSpanFmt = BidiFormatter.getInstance(RTL, true);
+    BidiFormatter unkAlwaysSpanFmt = BidiFormatter.getInstance(DEFAULT, true);
+
+    assertEquals("alwaysSpan, overall dir matches context dir (LTR)", "<span>"
+        + en + "</span>", ltrAlwaysSpanFmt.spanWrapWithKnownDir(LTR, en));
+    assertEquals(
+        "alwaysSpan, overall dir matches context dir (LTR), no dirReset",
+        "<span>" + en + "</span>", ltrAlwaysSpanFmt.spanWrapWithKnownDir(LTR,
+            en, false, false));
+    assertEquals("alwaysSpan, overall dir matches context dir (RTL)", "<span>"
+        + he + "</span>", rtlAlwaysSpanFmt.spanWrapWithKnownDir(RTL, he));
+    assertEquals(
+        "alwaysSpan, overall dir matches context dir (RTL), no dirReset",
+        "<span>" + he + "</span>", rtlAlwaysSpanFmt.spanWrapWithKnownDir(RTL,
+            he, false, false));
+
+    assertEquals(
+        "alwaysSpan, exit dir (but not overall dir) is opposite to context dir",
+        "<span>" + longEn + he + "</span>" + LRM,
+        ltrAlwaysSpanFmt.spanWrapWithKnownDir(LTR, longEn + he, true, true));
+    assertEquals(
+        "alwaysSpan, overall dir (but not exit dir) is opposite to context dir, dirReset",
+        "<span dir=ltr>" + longEn + he + "</span>" + RLM,
+        rtlAlwaysSpanFmt.spanWrapWithKnownDir(LTR, longEn + he, true, true));
+
+    assertEquals(
+        "alwaysSpan, plain text overall and exit dir same as context dir",
+        "<span>&lt;br&gt; " + he + " &lt;br&gt;</span>",
+        ltrAlwaysSpanFmt.spanWrapWithKnownDir(LTR, "<br> " + he + " <br>",
+            false));
+    assertEquals(
+        "alwaysSpan, HTML overall and exit dir opposite to context dir",
+        "<span dir=rtl><br> " + he + " <br></span>" + LRM,
+        ltrAlwaysSpanFmt.spanWrapWithKnownDir(RTL, "<br> " + he + " <br>", true));
+  }
+
+  public void testStartEdge() {
+    assertEquals(RIGHT, rtlFmt.startEdge());
+    assertEquals(LEFT, ltrFmt.startEdge());
+    assertEquals(LEFT, unkFmt.startEdge());
+  }
+
+  public void testUnicodeWrap() {
+    // The main testing of the logic is done in testUnicodeWrapWithKnownDir.
+    assertEquals(RLE + he + PDF + LRM, ltrFmt.unicodeWrap(he, true));
+    assertEquals(he, rtlFmt.unicodeWrap(he, true));
+    assertEquals(LRE + en + PDF + RLM, rtlFmt.unicodeWrap(en, true));
+    assertEquals(en, ltrFmt.unicodeWrap(en, true));
+  }
+
+  public void testUnicodeWrapWithKnownDir() {
+    assertEquals("overall dir matches context dir (LTR)", en + "<",
+        ltrFmt.unicodeWrapWithKnownDir(LTR, en + "<"));
+    assertEquals("overall dir matches context dir (LTR), HTML", en + "<br>",
+        ltrFmt.unicodeWrapWithKnownDir(LTR, en + "<br>", true));
+    assertEquals("overall dir matches context dir (RTL)", he + "<",
+        rtlFmt.unicodeWrapWithKnownDir(RTL, he + "<"));
+    assertEquals("overall dir matches context dir (RTL), HTML", he
+        + " <some strange tag>", rtlFmt.unicodeWrapWithKnownDir(RTL, he
+        + " <some strange tag>", true));
+
+    assertEquals("overall dir (RTL) doesnt match context dir (LTR), dirReset",
+        RLE + he + PDF + LRM, ltrFmt.unicodeWrapWithKnownDir(RTL, he));
+    assertEquals(
+        "overall dir (RTL) doesnt match context dir (LTR), no dirReset", RLE
+            + he + PDF, ltrFmt.unicodeWrapWithKnownDir(RTL, he, false, false));
+    assertEquals("overall dir (LTR) doesnt match context dir (RTL), dirReset",
+        LRE + en + PDF + RLM, rtlFmt.unicodeWrapWithKnownDir(LTR, en));
+    assertEquals(
+        "overall dir (LTR) doesnt match context dir (RTL), no dirReset", LRE
+            + en + PDF, rtlFmt.unicodeWrapWithKnownDir(LTR, en, false, false));
+    assertEquals(
+        "overall dir (RTL) doesnt match context dir (unknown), dirReset", RLE
+            + he + PDF, unkFmt.unicodeWrapWithKnownDir(RTL, he));
+    assertEquals(
+        "overall dir (LTR) doesnt match context dir (unknown), no dirReset",
+        LRE + en + PDF, unkFmt.unicodeWrapWithKnownDir(LTR, en, false, false));
+    assertEquals(
+        "overall dir (neutral) doesnt match context dir (LTR), dirReset", ".",
+        ltrFmt.unicodeWrapWithKnownDir(DEFAULT, "."));
+
+    assertEquals("exit dir (but not overall dir) is opposite to context dir",
+        longEn + he + LRM, ltrFmt.unicodeWrapWithKnownDir(LTR, longEn + he));
+    assertEquals("overall dir (but not exit dir) is opposite to context dir",
+        LRE + longEn + he + PDF + RLM, rtlFmt.unicodeWrapWithKnownDir(LTR,
+            longEn + he));
+
+    assertEquals("plain text overall and exit dir same as context dir", html
+        + " " + he + " " + html, ltrFmt.unicodeWrapWithKnownDir(LTR, html + " "
+        + he + " " + html, false));
+    assertEquals("HTML overall and exit dir opposite to context dir", RLE
+        + html + " " + he + " " + html + PDF + LRM,
+        ltrFmt.unicodeWrapWithKnownDir(RTL, html + " " + he + " " + html, true));
+  }
+}
diff --git a/user/test/com/google/gwt/i18n/shared/BidiUtilsTest.java b/user/test/com/google/gwt/i18n/shared/BidiUtilsTest.java
new file mode 100644
index 0000000..6fcb420
--- /dev/null
+++ b/user/test/com/google/gwt/i18n/shared/BidiUtilsTest.java
@@ -0,0 +1,209 @@
+/*
+ * Copyright 2010 Google Inc.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.i18n.shared;
+
+import com.google.gwt.i18n.client.HasDirection.Direction;
+import com.google.gwt.junit.client.GWTTestCase;
+
+/**
+ * Unit tests for BidiUtils.
+ * Uses the pure Java implementation of
+ * {@link com.google.gwt.regexp.shared.RegExp}. {@link GwtBidiUtilsTest}
+ * performs all the tests using the GWT version of RegExp. This is needed
+ * due to several differences between the two implementations of RegExp (see
+ * {@link com.google.gwt.regexp.shared.RegExpTest} for details). 
+ */
+public class BidiUtilsTest extends GWTTestCase {
+  
+  private static BidiUtils bidiUtils = BidiUtils.get();
+
+  // This is a hack to force a GWTTestCase to run as a vanilla JUnit TestCase.
+  @Override
+  public String getModuleName() {
+    return null;
+  }
+  
+  public void testEndsWithLtr() {
+    assertTrue(bidiUtils.endsWithLtr("a"));
+    assertTrue(bidiUtils.endsWithLtr("abc"));
+    assertTrue(bidiUtils.endsWithLtr("a (!)"));
+    assertTrue(bidiUtils.endsWithLtr("a.1"));
+    assertTrue(bidiUtils.endsWithLtr("http://www.google.com "));
+    assertTrue(bidiUtils.endsWithLtr("\u05e0 \u05e0 \u05e0a"));
+    assertTrue(bidiUtils.endsWithLtr(" \u05e0 \u05e0\u05e1a \u05e2 a !"));
+    assertFalse(bidiUtils.endsWithLtr(""));
+    assertFalse(bidiUtils.endsWithLtr(" "));
+    assertFalse(bidiUtils.endsWithLtr("1"));
+    assertFalse(bidiUtils.endsWithLtr("\u05e0"));
+    assertFalse(bidiUtils.endsWithLtr("\u05e0 1(!)"));
+    assertFalse(bidiUtils.endsWithLtr("a a a \u05e0"));
+    assertFalse(bidiUtils.endsWithLtr("a a abc\u05e0\u05e1def\u05e2. 1"));
+
+    assertTrue(bidiUtils.endsWithLtr("a a abc\u05e0<nasty tag>", false));
+    assertFalse(bidiUtils.endsWithLtr("a a abc\u05e0<nasty tag>", true));
+  }
+
+  public void testEndsWithRtl() {
+    assertTrue(bidiUtils.endsWithRtl("\u05e0"));
+    assertTrue(bidiUtils.endsWithRtl("\u05e0\u05e1\u05e2"));
+    assertTrue(bidiUtils.endsWithRtl("\u05e0 (!)"));
+    assertTrue(bidiUtils.endsWithRtl("\u05e0.1"));
+    assertTrue(bidiUtils.endsWithRtl("http://www.google.com/\u05e0 "));
+    assertTrue(bidiUtils.endsWithRtl("a a a a\u05e0"));
+    assertTrue(bidiUtils.endsWithRtl(" a a a abc\u05e0def\u05e3. 1"));
+    assertFalse(bidiUtils.endsWithRtl(""));
+    assertFalse(bidiUtils.endsWithRtl(" "));
+    assertFalse(bidiUtils.endsWithRtl("1"));
+    assertFalse(bidiUtils.endsWithRtl("a"));
+    assertFalse(bidiUtils.endsWithRtl("a 1(!)"));
+    assertFalse(bidiUtils.endsWithRtl("\u05e0 \u05e0 \u05e0a"));
+    assertFalse(bidiUtils.endsWithRtl("\u05e0 \u05e0\u05e1ab\u05e2 a (!)"));
+
+    assertFalse(bidiUtils.endsWithRtl("a a abc\u05e0<nasty tag>", false));
+    assertTrue(bidiUtils.endsWithRtl("a a abc\u05e0<nasty tag>", true));
+  }
+
+  public void testEstimateDirection() {
+    assertEquals(Direction.DEFAULT, bidiUtils.estimateDirection("", false));
+    assertEquals(Direction.DEFAULT, bidiUtils.estimateDirection(" ", false));
+    assertEquals(Direction.DEFAULT, bidiUtils.estimateDirection("! (...)",
+        false));
+    assertEquals(Direction.LTR, bidiUtils.estimateDirection(
+        "Pure Ascii content", false));
+    assertEquals(Direction.LTR, bidiUtils.estimateDirection("-17.0%", false));
+    assertEquals(Direction.LTR, bidiUtils.estimateDirection("http://foo/bar/",
+        false));
+    assertEquals(Direction.LTR, bidiUtils.estimateDirection(
+        "http://foo/bar/?s=\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0"
+        + "\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0"
+        + "\u05d0\u05d0\u05d0\u05d0\u05d0\u05d0", false));
+    assertEquals(Direction.RTL, bidiUtils.estimateDirection("\u05d0", false));
+    assertEquals(Direction.RTL, bidiUtils.estimateDirection(
+        "9 \u05d0 -> 17.5, 23, 45, 19", false));
+    assertEquals(Direction.RTL, bidiUtils.estimateDirection(
+        "http://foo/bar/ \u05d0 http://foo2/bar2/ http://foo3/bar3/", false));
+    assertEquals(Direction.RTL, bidiUtils.estimateDirection(
+        "\u05d0\u05d9\u05df \u05de\u05de\u05e9 "
+        + "\u05de\u05d4 \u05dc\u05e8\u05d0\u05d5\u05ea: "
+        + "\u05dc\u05d0 \u05e6\u05d9\u05dc\u05de\u05ea\u05d9 "
+        + "\u05d4\u05e8\u05d1\u05d4 \u05d5\u05d2\u05dd \u05d0"
+        + "\u05dd \u05d4\u05d9\u05d9\u05ea\u05d9 \u05de\u05e6\u05dc"
+        + "\u05dd, \u05d4\u05d9\u05d4 \u05e9\u05dd", false));
+    assertEquals(Direction.RTL, bidiUtils.estimateDirection(
+        "\u05db\u05d0\u05df - http://geek.co.il/gallery/v/2007-06"
+        + " - \u05d0\u05d9\u05df \u05de\u05de\u05e9 \u05de\u05d4 "
+        + "\u05dc\u05e8\u05d0\u05d5\u05ea: \u05dc\u05d0 \u05e6"
+        + "\u05d9\u05dc\u05de\u05ea\u05d9 \u05d4\u05e8\u05d1\u05d4 "
+        + "\u05d5\u05d2\u05dd \u05d0\u05dd \u05d4\u05d9\u05d9\u05ea"
+        + "\u05d9 \u05de\u05e6\u05dc\u05dd, \u05d4\u05d9\u05d4 "
+        + "\u05e9\u05dd \u05d1\u05e2\u05d9\u05e7\u05e8 \u05d4\u05e8"
+        + "\u05d1\u05d4 \u05d0\u05e0\u05e9\u05d9\u05dd. \u05de"
+        + "\u05d4 \u05e9\u05db\u05df - \u05d0\u05e4\u05e9\u05e8 "
+        + "\u05dc\u05e0\u05e6\u05dc \u05d0\u05ea \u05d4\u05d4 "
+        + "\u05d3\u05d6\u05de\u05e0\u05d5\u05ea \u05dc\u05d4\u05e1"
+        + "\u05ea\u05db\u05dc \u05e2\u05dc \u05db\u05de\u05d4 "
+        + "\u05ea\u05de\u05d5\u05e0\u05d5\u05ea \u05de\u05e9\u05e2"
+        + "\u05e9\u05e2\u05d5\u05ea \u05d9\u05e9\u05e0\u05d5\u05ea "
+        + "\u05d9\u05d5\u05ea\u05e8 \u05e9\u05d9\u05e9 \u05dc"
+        + "\u05d9 \u05d1\u05d0\u05ea\u05e8", false));
+    assertEquals(Direction.RTL, bidiUtils.estimateDirection(
+        "CAPTCHA \u05de\u05e9\u05d5\u05db\u05dc\u05dc "
+        + "\u05de\u05d3\u05d9?", false));
+    assertEquals(Direction.RTL, bidiUtils.estimateDirection(
+        "Yes Prime Minister \u05e2\u05d3\u05db\u05d5\u05df. "
+        + "\u05e9\u05d0\u05dc\u05d5 \u05d0\u05d5\u05ea\u05d9 "
+        + "\u05de\u05d4 \u05d0\u05e0\u05d9 \u05e8\u05d5\u05e6"
+        + "\u05d4 \u05de\u05ea\u05e0\u05d4 \u05dc\u05d7\u05d2",
+        false));
+    assertEquals(Direction.RTL, bidiUtils.estimateDirection(
+        "17.4.02 \u05e9\u05e2\u05d4:13-20 .15-00 .\u05dc\u05d0 "
+        + "\u05d4\u05d9\u05d9\u05ea\u05d9 \u05db\u05d0\u05df.",
+        false));
+    assertEquals(Direction.RTL, bidiUtils.estimateDirection(
+        "5710 5720 5730. \u05d4\u05d3\u05dc\u05ea. "
+        + "\u05d4\u05e0\u05e9\u05d9\u05e7\u05d4", false));
+    assertEquals(Direction.RTL, bidiUtils.estimateDirection(
+        "\u05d4\u05d3\u05dc\u05ea http://www.google.com "
+        + "http://www.gmail.com", false));
+    assertEquals(Direction.LTR, bidiUtils.estimateDirection(
+        "\u05d4\u05d3\u05dc\u05ea <some quite nasty html mark up>", false));
+    assertEquals(Direction.RTL, bidiUtils.estimateDirection(
+        "\u05d4\u05d3\u05dc\u05ea <some quite nasty html mark up>", true));
+    assertEquals(Direction.LTR, bidiUtils.estimateDirection(
+        "\u05d4\u05d3\u05dc\u05ea &amp; &lt; &gt;", false));
+    assertEquals(Direction.RTL, bidiUtils.estimateDirection(
+        "\u05d4\u05d3\u05dc\u05ea &amp; &lt; &gt;", true));
+  }
+
+  public void testHasAnyLtr() {
+    assertFalse(bidiUtils.hasAnyLtr(""));
+    assertFalse(bidiUtils.hasAnyLtr("\u05e0\u05e1\u05e2"));
+    assertTrue(bidiUtils.hasAnyLtr("\u05e0\u05e1z\u05e2"));
+    assertFalse(bidiUtils.hasAnyLtr("123\t...  \n"));
+  }
+
+  public void testHasAnyRtl() {
+    assertFalse(bidiUtils.hasAnyRtl(""));
+    assertFalse(bidiUtils.hasAnyRtl("abc"));
+    assertTrue(bidiUtils.hasAnyRtl("ab\u05e0c"));
+    assertFalse(bidiUtils.hasAnyRtl("123\t...  \n"));
+  }
+
+  public void testStartsWithLtr() {
+    assertTrue(bidiUtils.startsWithLtr("a"));
+    assertTrue(bidiUtils.startsWithLtr("abc"));
+    assertTrue(bidiUtils.startsWithLtr("(!) a"));
+    assertTrue(bidiUtils.startsWithLtr("1.a"));
+    assertTrue(bidiUtils.startsWithLtr("/a/\u05e0/\u05e1/\u05e2"));
+    assertTrue(bidiUtils.startsWithLtr("a\u05e0 \u05e0 \u05e0"));
+    assertTrue(bidiUtils.startsWithLtr("! a \u05e0 \u05e0\u05e1a \u05e2"));
+    assertFalse(bidiUtils.startsWithLtr(""));
+    assertFalse(bidiUtils.startsWithLtr(" "));
+    assertFalse(bidiUtils.startsWithLtr("1"));
+    assertFalse(bidiUtils.startsWithLtr("\u05e0"));
+    assertFalse(bidiUtils.startsWithLtr("1(!) \u05e0"));
+    assertFalse(bidiUtils.startsWithLtr("\u05e0 a a a"));
+
+    assertTrue(bidiUtils.startsWithLtr("<nasty tag>\u05e0:a a abc", false));
+    assertFalse(bidiUtils.startsWithLtr("<nasty tag>\u05e0:a a abc", true));
+  }
+
+  public void testStartsWithRtl() {
+    assertTrue(bidiUtils.startsWithRtl("\u05e0"));
+    assertTrue(bidiUtils.startsWithRtl("\u05e0\u05e1\u05e2"));
+    assertTrue(bidiUtils.startsWithRtl("(!) \u05e0"));
+    assertTrue(bidiUtils.startsWithRtl("1.\u05e0"));
+    assertTrue(bidiUtils.startsWithRtl("/\u05e0/a/b/c"));
+    assertTrue(bidiUtils.startsWithRtl("\u05e0a a a a"));
+    assertTrue(bidiUtils.startsWithRtl("1. \u05e0. a a a abc\u05e1 def"));
+    assertFalse(bidiUtils.startsWithRtl(""));
+    assertFalse(bidiUtils.startsWithRtl(" "));
+    assertFalse(bidiUtils.startsWithRtl("1"));
+    assertFalse(bidiUtils.startsWithRtl("a"));
+    assertFalse(bidiUtils.startsWithRtl("(!) a"));
+    assertFalse(bidiUtils.startsWithRtl("a \u05e0 \u05e0 \u05e0"));
+
+    assertFalse(bidiUtils.startsWithRtl("<nasty tag>\u05e0:a a abc", false));
+    assertTrue(bidiUtils.startsWithRtl("<nasty tag>\u05e0:a a abc", true));
+  }
+
+  public void testStripHtmlIfNeeded() {
+    String str = "foo&lt;gev<nasty tag/>";
+    String stripped = "foo gev ";
+    assertEquals(stripped, bidiUtils.stripHtmlIfNeeded(str, true));
+    assertEquals(str, bidiUtils.stripHtmlIfNeeded(str, false));
+  }
+}
diff --git a/user/test/com/google/gwt/i18n/shared/GwtBidiUtilsTest.java b/user/test/com/google/gwt/i18n/shared/GwtBidiUtilsTest.java
new file mode 100644
index 0000000..4963b70
--- /dev/null
+++ b/user/test/com/google/gwt/i18n/shared/GwtBidiUtilsTest.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2010 Google Inc.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.i18n.shared;
+
+/**
+ * Performs all the tests in {@link BidiUtilsTest} using the GWT implementation
+ * of {@link com.google.gwt.regexp.shared.RegExp}. This is needed due to several
+ * differences between the two implementations of RegExp (see
+ * {@link com.google.gwt.regexp.shared.RegExpTest} for details).
+ */
+public class GwtBidiUtilsTest extends BidiUtilsTest {
+  
+  @Override
+  public String getModuleName() {
+    return "com.google.gwt.i18n.I18NTest_shared";
+  }
+}