Integrate new, faster, Javascript function clustering

http://gwt-code-reviews.appspot.com/669801

Review by: spoon@google.com

git-svn-id: https://google-web-toolkit.googlecode.com/svn/trunk@8890 8db76d5a-ed1c-0410-87a9-c151d255dfc7
diff --git a/dev/core/src/com/google/gwt/dev/util/editdistance/CharIndex.java b/dev/core/src/com/google/gwt/dev/util/editdistance/CharIndex.java
new file mode 100644
index 0000000..097ccb5
--- /dev/null
+++ b/dev/core/src/com/google/gwt/dev/util/editdistance/CharIndex.java
@@ -0,0 +1,371 @@
+/*
+ * Copyright 2010 Google Inc.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.dev.util.editdistance;
+
+/**
+ * A performance-oriented character-indexed map.
+ * Provides a mapping from characters in a limited alphabet
+ * (defined by a CharSequence) to integer values (for array indexing).
+ * Emphasis is on performance (e.g., compared to using a generic HashMap),
+ * in terms of speed of (post-construction) lookup and space required.
+ *
+ * Maps are constructed using a static <tt>getInstance</tt> method that
+ * chooses an efficient implementation for a given string.
+ * The map interface consists of:
+ *    a <tt>size</tt> method for determining the maximum range of indices used;
+ *    a <tt>lookup</tt> method for translating a character to its index; and,
+ *    a <tt>nullElement</tt> method that returns the index that is not
+ *     returned by <tt>lookup</tt> for any characer actually in the map.
+ *
+ * The target application is the mapping of natural-language strings.
+ * Most strings include characters from a single language, and thus
+ * fall into a small slice of the Unicode space.  In particular,
+ * many strings fall into just the Latin series.
+ */
+public abstract class CharIndex {
+  /**
+   * An index based on a garden-variety hash map.
+   * This is the implementation of last recourse (from a
+   * performance perspective).
+   *
+   * TODO(mwyoung): there may be value in an implementation
+   * that falls in between the masked and fullhash variants;
+   * for example, one using a closed hash table with rehashing.
+   */
+  public static class FullHash extends CharIndex {
+    /**
+     * Mutable holder for a character
+     */
+    static class Char {
+      char c;
+      @Override
+      public boolean equals(Object x) {
+        return (x != null) && (((Char)x).c == this.c);
+      }
+      @Override
+      public int hashCode() {
+        return c;
+      }
+      @Override
+      public String toString() {
+        return "'" + c + "'";
+      }
+    }
+    
+    static final int NULL_ELEMENT = 0;
+    protected int lastUsed = NULL_ELEMENT;
+    /**
+     * Mapping from pattern characters to their integer index values
+     */
+    final java.util.HashMap<Char,Integer> map;
+
+    /**
+     * Constructs a full hash-based mapping
+     */
+    FullHash(CharSequence s) {
+      /* Choose a hash size larger at least twice the string length */
+      int len = s.length();
+      int power = Integer.highestOneBit(len);
+
+      /* Create the map */
+      map = new java.util.HashMap<Char,Integer>(
+          power << ((power == len) ? 1 : 2));
+
+      /*
+       * Add characters one at a time to the map
+       */
+
+      Char test = new Char();                   /* (re)used for lookup */
+      for (int i = 0; i < s.length(); i++) {
+        test.c = s.charAt(i);
+        if (map.get(test) == null) {
+          /* Not there... add it */
+          map.put(test, new Integer(++lastUsed));
+
+          /* Grow a new holder for subsequent lookups */
+          test = new Char();
+        }
+      }
+    }
+    
+    @Override
+    public int lookup(char c) {
+      final Char lookupTest = new Char();
+      lookupTest.c = c;
+      Integer result = map.get(lookupTest);
+      return (result != null) ? result.intValue() : 0;
+    }
+
+    @Override
+    public int[] map(CharSequence s, int[] mapped) {
+      /* Create one mutable Char, and reuse it for all lookups */
+      final Char lookupTest = new Char();
+
+      int len = s.length();
+      if (mapped.length < len) {
+        mapped = new int[len];
+      }
+
+      for (int i = 0; i < len; i++) {
+        lookupTest.c = s.charAt(i);
+        Integer result = map.get(lookupTest);
+        mapped[i] = (result != null) ? result.intValue() : NULL_ELEMENT;
+      }
+      return mapped;
+    }
+
+    @Override
+    public int nullElement() {
+      return NULL_ELEMENT;
+    }
+
+    @Override
+    public int size() {
+      return lastUsed + 1;
+    }
+  }
+  
+  /**
+   * An index based on a simple mask: index(c) = (c & MASK).
+   * This allows for a very fast index function for many
+   * languages outside of the ASCII spectrum.  Most languages
+   * fit in a single 8-bit Unicode slice.  Even languages
+   * that cross slices for special characters (such as extended
+   * Latin) often have no collisions under simple masking.
+   */
+  public static class Masked extends CharIndex {
+    /**
+     * Hash table size
+     */
+    static final int SIZE = 0x100;
+
+    /**
+     * Mask used for hashing
+     */
+    static final int MASK = (SIZE - 1);
+
+    /**
+     * Where we may invalid characters: beyond the hash table
+     */
+    static final int NULL_ELEMENT = SIZE;
+
+
+    /**
+     * Generates an instance of this implementation if possible.
+     * @param s pattern string
+     * @return an instance of this CharIndex if the pattern satisfies
+     *         the constraints for this implementation; otherwise, null
+     */
+    static Masked generate(CharSequence s) {
+      /*
+       * This implementation requires that there be no hash collisions
+       * among the pattern characters, using a simple mask-based hash.
+       */
+      char [] contains = new char[SIZE];
+
+      /* Ensure that for all x, hash(contains[x]) != x initially. */
+      contains[0] = (char)1;
+
+      /* Hash characters, recording values seen, rejecting collisions */
+      for (int i = 0; i < s.length(); i++) {
+        char c = s.charAt(i);
+        int index = c & MASK;
+        if (contains[index] != c) {
+          if ((contains[index] & MASK) == index) {
+            return null;
+          }
+          contains[index] = c;
+        }
+      }
+      return new Masked(contains);
+    }
+
+    /**
+     * Closed hash table: characters actually indexed.
+     * Unused cells contain values inappropriate to their index.
+     */
+    final char[] contains;
+
+    /**
+     * Constructor based on hash table built by generate()
+     */
+    private Masked(char [] contains) {
+      this.contains = contains;
+    }
+    
+    @Override
+    public int lookup(char c) {
+      int index = c & MASK;
+      return (c == contains[index]) ? index : NULL_ELEMENT;
+    }
+
+    @Override
+    public int[] map(CharSequence s, int[] mapped) {
+      int len = s.length();
+      if (mapped.length < len) mapped = new int[len];
+      for (int i = 0; i < len; i++) {
+        char c = s.charAt(i);
+        int index = c & MASK;
+        mapped[i] = (c == contains[index]) ? index : NULL_ELEMENT;
+      }
+      return mapped;
+    }
+
+    @Override
+    public int nullElement() { return NULL_ELEMENT; }
+
+    @Override
+    public int size() { return NULL_ELEMENT + 1; }
+  }
+
+  /**
+   * An index based on the identity mapping: index(c) = c.
+   * This requires minimal computation and no additional space
+   * for the basic ASCII character set.
+   */
+  public static class Straight extends CharIndex {
+    /**
+     * The largest character we will map (directly)
+     */
+    static final int MAX = 0x80;
+
+    /** 
+     * A mask used to find characters that fall outside
+     */
+    static final int MASK = ~(MAX - 1);
+
+    /**
+     * A map result we never generate for valid characters
+     */
+    static final int NULL_ELEMENT = MAX;
+
+
+    /**
+     * Generates an instance of this implementation if possible.
+     * @param s pattern string
+     * @return an instance of this CharIndex if the pattern satisfies
+     *         the constraints for this implementation; otherwise, null
+     */
+    static Straight generate(CharSequence s) {
+      /* This implementation requires that all characters fall below MAX */
+      for (int i = 0; i < s.length(); i++) {
+        if ((s.charAt(i) & MASK) != 0) {
+          return null;
+        }
+      }
+
+      return new Straight();
+    }
+
+    /**
+     * Simple private constructor, no state required
+     */
+    private Straight() { }
+
+    @Override
+    public int lookup(char c) {
+      return ((c & MASK) == 0) ? c : NULL_ELEMENT;
+    }
+
+    @Override
+    public int[] map(CharSequence s, int[] mapped) {
+      int len = s.length();
+      if (mapped.length < len) mapped = new int[len];
+      for (int i = 0; i < len; i++) {
+        char c = s.charAt(i);
+        mapped[i] = ((c & MASK) == 0) ? c : NULL_ELEMENT;
+      }
+      return mapped;
+    }
+
+    @Override
+    public int nullElement() {
+      return NULL_ELEMENT;
+    }
+
+    @Override
+    public int size() {
+      return NULL_ELEMENT + 1;
+    }
+  }
+
+  /**
+   * Generates an index for a given alphabet, defined by a character sequence.
+   * An appropriate implementation is chosen for the alphabet.  The
+   * character sequence is allowed to contain multiple instances of
+   * the same character.
+   *
+   * @param s the alphabet to be indexed
+   * @return an index appropriate to this alphabet
+   */
+  public static CharIndex getInstance(CharSequence s) {
+    CharIndex result;
+
+    /* Try fastest mappings first, then more general ones */
+
+    if ((result = Straight.generate(s)) != null) {
+      return result;
+    }
+
+    if ((result = Masked.generate(s)) != null) {
+      return result;
+    }
+
+    /* Full hash always works */
+    return new FullHash(s);
+  }
+  
+  /**
+   * Returns the index (mapping result) for a given character.
+   * If the character is present in the alphabet, then a unique index
+   * is returned; other characters may be mapped to unique
+   * values, *or* may be mapped to a shared <tt>nullElement</tt> value.
+   * @param c character for which an index is sought
+   * @return an integer index for the character
+   */
+  public abstract int lookup(char c);
+
+  /**
+   * Performs lookups on an entire sequence of characters.
+   * This allows an entire string to be mapped with fewer
+   * method invocations, and possibly with some added internal
+   * efficiencies.
+   *
+   * @param s character sequence to be indexed
+   * @param mapped array for storing results
+   * @return an array of indices for the character sequence,
+   *  either the <tt>mapped</tt> parameter or a larger newly-allocated
+   *  array if required
+   */
+  public abstract int[] map(CharSequence s, int[] mapped);
+
+  /**
+   * Returns an index that is not used for any character in
+   * the alphabet.  This index *may* be returned for characters
+   * not in the alphabet.
+   * @return an index not used for a valid character
+   */
+  public abstract int nullElement();
+
+  /**
+   * Returns the maximum index result that can be returned by
+   * the <tt>lookup</tt> function (including the <tt>nullElement</tt> result
+   * if applicable).  No negative values are ever returned
+   * from <tt>lookup</tt>.
+   * @return maximum index returned from any <tt>lookup</tt>
+   */
+  public abstract int size();
+}
diff --git a/dev/core/src/com/google/gwt/dev/util/editdistance/GeneralEditDistance.java b/dev/core/src/com/google/gwt/dev/util/editdistance/GeneralEditDistance.java
new file mode 100644
index 0000000..db3f45a
--- /dev/null
+++ b/dev/core/src/com/google/gwt/dev/util/editdistance/GeneralEditDistance.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.dev.util.editdistance;
+
+/**
+ * An engine definition for computing string edit distances, and
+ * implementation generators.
+ *
+ * Generally, edit distance is the minimum number of edit operations required
+ * to transform one string into another.  Being a minimum transform, it
+ * forms a metric space (one that is non-negative, symmetric, and obeys
+ * triangle inequality).
+ *
+ * The most common form is Levenshtein distance, where the set of operations
+ * consists of:
+ *   deleting one character;
+ *   inserting one character; or,
+ *   substituting one character for another,
+ * each having a "cost" of 1.
+ *
+ * Another common form is Damerau-Levenshtein distance, which adds
+ *   transpose two adjacent characters
+ * to the set of operations.  Note that many implementations of
+ * Damerau distance do not account for insertion after transposition;
+ * this "restricted" Damerau distance does not obey triangle inequality,
+ * but is substantially easier to compute than the unrestricted metric.
+ *
+ * Another variation is to apply differential costs for the various
+ * operations.  For example, substitutions could be weighted by
+ * character type (e.g., vowel<=>vowel being a cheaper edit than
+ * vowel<=>consonant).
+ *
+ * This class defines an engine for computing edit distance from
+ * a fixed ("pattern") string to other ("target") strings.  The pattern
+ * is fixed at instantiation time; this allows an implementation to
+ * perform precomputations to make individual target computations faster.
+ *
+ * To allow further performance optimization, the GeneralEditDistance
+ * contract does not require thread-safety.  A <tt>duplicate</tt> method is
+ * defined to allow additional equivalent instances to be created
+ * for separate threads; it is essentially a type-knowledgeable <tt>clone</tt>.
+ */
+public interface GeneralEditDistance {
+  /**
+   * Creates a duplicate (clone) of this distance engine, typically
+   * in order to use it in another thread (as the GeneralEditDistance
+   * contract does not specify thread safety).
+   *
+   * This method differs from Object.clone() in two ways:
+   * it is typed (to GeneralEditDistance); and, it must not throw
+   * an exception.
+   *
+   * @return     a clone of this instance.
+   */
+  GeneralEditDistance duplicate();
+
+  /**
+   * Computes edit distance from the pattern string (used to
+   * construct this GeneralEditDistance instance) to a given target
+   * string, bounded by a limit of interest.
+   *
+   * The specified limit must be non-negative.  If the actual distance
+   * exceeds the limit, this method may return *any* value above the limit;
+   * it need not be the actual distance.  The behavior of
+   * illegal limit values is undefined; an implementation may
+   * return an incorrect result or raise a runtime exception.
+   *
+   * @param target string for which distance is to be compared
+   *        against a predefined pattern
+   * @param limit maximum distance for which exact result is
+   *        required (and beyond which any over-limit result
+   *        can be returned)
+   * @return edit distance from the predefined pattern to the
+   *        target (or any value above limit, if the actual distance
+   *        exceeds the prescribed limit)
+   */
+  int getDistance(CharSequence target, int limit);
+}
diff --git a/dev/core/src/com/google/gwt/dev/util/editdistance/GeneralEditDistances.java b/dev/core/src/com/google/gwt/dev/util/editdistance/GeneralEditDistances.java
new file mode 100644
index 0000000..32ebde2
--- /dev/null
+++ b/dev/core/src/com/google/gwt/dev/util/editdistance/GeneralEditDistances.java
@@ -0,0 +1,212 @@
+/*
+ * Copyright 2010 Google Inc.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.dev.util.editdistance;
+
+/**
+ * A collection of instance generators for the GeneralEditDistance interface.
+ */
+public class GeneralEditDistances {
+  /**
+   * Chooses the best implementation of Levenshtein string edit distance
+   * available at the current time.
+   */
+  /*
+   * As of 2007-08-23, the best algorithm known (to the author=mwyoung) for
+   * short strings is one due to Eugene Myers, except for the special case
+   * where the distance limit is 0 or 1.  The Myers algorithm also has good
+   * worst-case performance for long strings when the edit distance is not
+   * reasonably bounded.
+   *
+   * When there is a good bound, a variant of the Ukkonen algorithm due to
+   * Berghel and Roach (modified by Michael Young to use linear space)
+   * is faster for long strings.
+   *
+   * Note that other algorithms that perform better in some cases for running
+   * text searches do not outperform Myers for rigid distance computations.
+   * Notably:
+   *   Navarro/Baeza-Yates (Algorithmica 23,2) simulates an NFA with an
+   *   epsilon-cycle on the initial state (appropriate for running texts)
+   *   and reports success without computing exact distance.  When adjusted
+   *   to a fixed starting point and computing distance, its state machine
+   *   is larger and it underperforms.
+   *
+   *   BITAP (Baeza-Yates/Gonnet, Manber/Wu) also simulates an NFA, and
+   *   Navarro claims that it wins for small patterns and small limits for
+   *   running search.  Experiments with a Java implementation showed that
+   *   it beat Myers on pure string edit distance only for limits where the
+   *   special 0-1 limit applied, where special-case comparison beats all.
+   *
+   * A survey of algorithms for running text search by Navarro appeared
+   * in ACM Computing Surveys 33#1: http://portal.acm.org/citation.cfm?id=375365
+   * Another algorithm (Four Russians) that Navarro claims superior for very
+   * long patterns and high limits was not evaluated for inclusion here.
+   * Filtering algorithms also improve running search, but do not help
+   * for pure edit distance.
+   */
+  private static class Levenshtein implements GeneralEditDistance {
+    /**
+     * Long+bounded implementation class: distance-only Berghel-Roach
+     */
+    private ModifiedBerghelRoachEditDistance berghel;
+
+    /**
+     * Short/unbounded implementation class: Myers bit-parallel
+     */
+    private MyersBitParallelEditDistance myers;
+
+    /**
+     * Saved pattern, for specialized comparisons
+     */    
+    private final CharSequence pattern;
+
+    /**
+     * Length of saved pattern
+     */
+    private final int patternLength;
+
+    private Levenshtein(CharSequence pattern) {
+      this.pattern = pattern;
+      this.patternLength = pattern.length();
+    }
+
+    public GeneralEditDistance duplicate() {
+      Levenshtein dup = new Levenshtein(pattern);
+
+      /* Duplicate the Myers engine, as it is cheaper than rebuilding */
+      if (this.myers != null) {
+        dup.myers = (MyersBitParallelEditDistance) this.myers.duplicate();
+      }
+      
+      /* Do not duplicate the Berghel engine; it provides no savings. */
+
+      return dup;
+    }
+
+    public int getDistance(CharSequence target, int limit) {
+      /* When the limit is 0 or 1, specialized comparisons are much faster. */
+      if (limit <= 1) {
+        return limit == 0 ?
+                 (pattern.equals(target) ? 0 : 1) :
+                 atMostOneError(pattern, target);
+      }
+
+      /*
+       * The best algorithm for long strings depends on the resulting
+       * edit distance (or the limit placed on it).  Without further
+       * information on the likelihood of a low distance, we guess
+       * based on the provided limit.  We currently lean toward using
+       * the Myers algorithm unless we are pretty sure that the
+       * Berghel-Roach algorithm will win (based on the limit).
+       *
+       * Note that when the string lengths are small (fewer characters
+       * than bits in a long), Myers wins regardless of limit.
+       */
+      if ((patternLength > 64)
+          && (limit < (target.length() / 10))) {
+        if (berghel == null) {
+          berghel = ModifiedBerghelRoachEditDistance.getInstance(pattern);
+        }
+        return berghel.getDistance(target, limit);
+      }
+
+      if (myers == null) {
+        myers = MyersBitParallelEditDistance.getInstance(pattern);
+      }
+
+      return myers.getDistance(target, limit);
+    }
+  }
+
+  /**
+   * Compares two strings for at most one insert/delete/substitute difference.
+   * Since operations cannot be composed, a simple case analysis is possible.
+   *
+   * @param s1 one string to be compared
+   * @param s2 the other string to be compared
+   * @return Levenshtein edit distance if no greater than 1;
+   *         otherwise, more than 1
+   */
+  public static int atMostOneError(CharSequence s1, CharSequence s2) {
+    int s1Length = s1.length();
+    int s2Length = s2.length();
+    int errors = 0;             /* running count of edits required */
+
+    switch(s2Length - s1Length) {
+      /*
+       * Strings are the same length.  No single insert/delete is possible;
+       * at most one substitution can be present.
+       */
+      case 0:
+        for (int i = 0; i < s2Length; i++) {
+          if ((s2.charAt(i) != s1.charAt(i)) && (errors++ != 0)) {
+            break;
+          }
+        }
+        return errors;
+
+      /*
+       * Strings differ in length by 1, so we have an insertion
+       * (and therefore cannot have any other substitutions).
+       */
+      case 1: /* s2Length > s1Length */
+        for (int i = 0; i < s1Length; i++) {
+          if (s2.charAt(i) != s1.charAt(i)) {
+            for (; i < s1Length; i++) {
+              if (s2.charAt(i + 1) != s1.charAt(i)) {
+                return 2;
+              }
+            }
+            return 1;
+          }
+        }
+        return 1;
+
+      /* Same as above case, with strings reversed */
+      case -1: /* s1Length > s2Length */
+        for (int i = 0; i < s2Length; i++) {
+          if (s2.charAt(i) != s1.charAt(i)) {
+            for (; i < s2Length; i++) {
+              if (s2.charAt(i) != s1.charAt(i + 1)) {
+                return 2;
+              }
+            }
+            return 1;
+          }
+        }
+        return 1;
+
+      /* Edit distance is at least difference in lengths; more than 1 here. */
+      default:
+        return 2;
+    }
+  }
+
+  /**
+   * Generates an GeneralEditDistance engine for a particular pattern string
+   * based on Levenshtein distance.  Caller must ensure that the
+   * pattern does not change (consider using pattern.toString() if
+   * necessary) as long as the generated object is to be used.
+   *
+   * @param pattern a string from which distance computations are desired
+   * @return an engine for computing Levenshtein distances from that pattern
+   */
+  public static GeneralEditDistance
+      getLevenshteinDistance(CharSequence pattern) {
+    return new Levenshtein(pattern);
+  }
+
+  private GeneralEditDistances() { }
+}
diff --git a/dev/core/src/com/google/gwt/dev/util/editdistance/ModifiedBerghelRoachEditDistance.java b/dev/core/src/com/google/gwt/dev/util/editdistance/ModifiedBerghelRoachEditDistance.java
new file mode 100644
index 0000000..de61af5
--- /dev/null
+++ b/dev/core/src/com/google/gwt/dev/util/editdistance/ModifiedBerghelRoachEditDistance.java
@@ -0,0 +1,480 @@
+/*
+ * Copyright 2010 Google Inc.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.dev.util.editdistance;
+
+/**
+ * A modified version of a string edit distance described by Berghel and
+ * Roach that uses only O(d) space and O(n*d) worst-case time, where n is
+ * the pattern string length and d is the edit distance computed.
+ * We achieve the space reduction by keeping only those sub-computations
+ * required to compute edit distance, giving up the ability to
+ * reconstruct the edit path.
+ */
+public class ModifiedBerghelRoachEditDistance implements GeneralEditDistance {
+  /*
+   * This is a modification of the original Berghel-Roach edit
+   * distance (based on prior work by Ukkonen) described in
+   *   ACM Transactions on Information Systems, Vol. 14, No. 1,
+   *   January 1996, pages 94-106.
+   *
+   * I observed that only O(d) prior computations are required
+   * to compute edit distance.  Rather than keeping all prior
+   * f(k,p) results in a matrix, we keep only the two "outer edges"
+   * in the triangular computation pattern that will be used in
+   * subsequent rounds.  We cannot reconstruct the edit path,
+   * but many applications do not require that; for them, this
+   * modification uses less space (and empirically, slightly
+   * less time).
+   *
+   * First, some history behind the algorithm necessary to understand
+   * Berghel-Roach and our modification...
+   *
+   * The traditional algorithm for edit distance uses dynamic programming,
+   * building a matrix of distances for substrings:
+   * D[i,j] holds the distance for string1[0..i]=>string2[0..j].
+   * The matrix is initially populated with the trivial values
+   * D[0,j]=j and D[i,0]=i; and then expanded with the rule:
+   * <pre>
+   *    D[i,j] = min( D[i-1,j]+1,       // insertion
+   *                  D[i,j-1]+1,       // deletion
+   *                  (D[i-1,j-1]
+   *                   + (string1[i]==string2[j])
+   *                      ? 0           // match
+   *                      : 1           // substitution ) )
+   * </pre>
+   *
+   * Ukknonen observed that each diagonal of the matrix must increase
+   * by either 0 or 1 from row to row.  If D[i,j] = p, then the
+   * then the matching rule requires that D[i+x,j+x] = p for all
+   * x where string1[i..i+x) matches string2[j..j+j+x).  Ukkonen
+   * defined a function f(k,p) as the highest row number in which p
+   * appears on the k-th diagonal (those D[i,j] where k=(i-j), noting
+   * that k may be negative).  The final result of the edit
+   * distance is the D[n,m] cell, on the (n-m) diagonal; it is
+   * the value of p for which f(n-m, p) = m.  The function f can
+   * also be computed dynamically, according to a simple recursion:
+   * <pre>
+   *    f(k,p) {
+   *      contains_p = max(f(k-1,p-1), f(k,p-1)+1, f(k+1,p-1)+1)
+   *      while (string1[contains_p] == string2[contains_p + k])
+   *        contains_p++;
+   *      return contains_p;
+   *    }
+   * </pre>
+   * The max() expression finds a row where the k-th diagonal must
+   * contain p by virtue of an edit from the prior, same, or following
+   * diagonal (corresponding to an insert, substitute, or delete);
+   * we need not consider more distant diagonals because row-to-row
+   * and column-to-column changes are at most +/- 1.
+   *
+   * The original Ukkonen algorithm computed f(k,p) roughly as
+   * follows:
+   * <pre>
+   *    for (p = 0; ; p++) {
+   *      compute f(k,p) for all valid k
+   *      if (f(n-m, p) == m) return p;
+   *    }
+   * </pre>
+   *
+   * Berghel and Roach observed that many values of f(k,p) are
+   * computed unnecessarily, and reorganized the computation into
+   * a just-in-time sequence.  In each iteration, we are primarily
+   * interested in the terminating value f(main,p), where main=(n-m)
+   * is the main diagonal.  To compute that we need f(x,p-1) for
+   * three values of x: main-1, main, and main+1.  Those depend on
+   * values for p-2, and so forth.  We will already have computed
+   * f(main,p-1) in the prior round, and thus f(main-1,p-2) and
+   * f(main+1,p-2), and so forth.  The only new values we need to compute
+   * are on the edges: f(main-i,p-i) and f(main+i,p-i).  Noting that
+   * f(k,p) is only meaningful when abs(k) is no greater than p,
+   * one of the Berghel-Roach reviewers noted that we can compute
+   * the bounds for i:
+   * <pre>
+   *    (main+i &le p-i) implies (i &le; (p-main)/2)
+   * </pre>
+   * (where main+i is limited on the positive side) and similarly
+   * <pre>
+   *    (-(main-i) &le p-i) implies (i &le; (p+main)/2).
+   * </pre>
+   * (where main-i is limited on the negative side).
+   *
+   * This reduces the computation sequence to
+   * <pre>
+   *   for (i = (p-main)/2; i > 0; i--) compute f(main+i,p-i);
+   *   for (i = (p+main)/2; i > 0; i--) compute f(main-i,p-i);
+   *   if (f(main, p) == m) return p;
+   * </pre>
+   *
+   * The original Berghel-Roach algorithm recorded prior values
+   * of f(k,p) in a matrix, using O(distance^2) space, enabling
+   * reconstruction of the edit path, but if all we want is the
+   * edit *distance*, we only need to keep O(distance) prior computations.
+   *
+   * The requisite prior k-1, k, and k+1 values are conveniently
+   * computed in the current round and the two preceding it.
+   * For example, on the higher-diagonal side, we compute:
+   * <pre>
+   *    current[i] = f(main+i, p-i)
+   * </pre>
+   * We keep the two prior rounds of results, where p was one and two
+   * smaller.  So, from the preceidng round
+   * <pre>
+   *    last[i] = f(main+i, (p-1)-i)
+   * </pre>
+   *  and from the prior round, but one position back:
+   * <pre>
+   *    prior[i-1] = f(main+(i-1), (p-2)-(i-1))
+   * </pre>
+   * In the current round, one iteration earlier:
+   * <pre>
+   *    current[i+1] = f(main+(i+1), p-(i+1))
+   * </pre>
+   * Note that the distance in all of these evaluates to p-i-1,
+   * and the diagonals are (main+i) and its neighbors... just
+   * what we need.  The lower-diagonal side behaves similarly.
+   *
+   * We need to materialize values that are not computed in prior
+   * rounds, for either of two reasons: <ul>
+   *    <li> Initially, we have no prior rounds, so we need to fill
+   *     all of the "last" and "prior" values for use in the
+   *     first round.  The first round uses only on one side
+   *     of the main diagonal or the other.
+   *    <li> In every other round, we compute one more diagonal than before.
+   * </ul>
+   * In all of these cases, the missing f(k,p) values are for abs(k) > p,
+   * where a real value of f(k,p) is undefined.  [The original Berghel-Roach
+   * algorithm prefills its F matrix with these values, but we fill
+   * them as we go, as needed.]  We define
+   * <pre>
+   *    f(-p-1,p) = p, so that we start diagonal -p with row p,
+   *    f(p+1,p) = -1, so that we start diagonal p with row 0.
+   * </pre>
+   * (We also allow f(p+2,p)=f(-p-2,p)=-1, causing those values to
+   * have no effect in the starting row computation.]
+   *
+   * We only expand the set of diagonals visited every other round,
+   * when (p-main) or (p+main) is even.  We keep track of even/oddness
+   * to save some arithmetic.  The first round is always even, as p=abs(main).
+   * Note that we rename the "f" function to "computeRow" to be Googley.
+   */
+
+  private static final int [] EMPTY_INT_ARRAY = new int[0];
+
+  /**
+   * Creates an instance for computing edit distance from {@code pattern}.
+   * @param pattern string from which distances are measured
+   * @return an instance for computing distances from the pattern
+   */
+  public static ModifiedBerghelRoachEditDistance
+      getInstance(CharSequence pattern) {
+    return getInstance(pattern.toString());
+  }
+
+  /**
+   * Creates an instance for computing edit distance from {@code pattern}.
+   * @param pattern string from which distances are measured
+   * @return an instance for computing distances from the pattern
+   */
+  public static ModifiedBerghelRoachEditDistance
+      getInstance(String pattern) {
+    return new ModifiedBerghelRoachEditDistance(pattern.toCharArray());
+  }
+
+  /*
+   * The current and two preceding sets of Ukkonen f(k,p) values for diagonals
+   * around the main, computed by the main loop of {@code getDistance}.  These
+   * arrays are retained between calls to save allocation costs.  (They are all
+   * initialized to a real array so that we can indiscriminately use length
+   * when ensuring/resizing.)
+   */
+  private int[] currentLeft = EMPTY_INT_ARRAY;
+
+  private int[] currentRight = EMPTY_INT_ARRAY;
+  
+  private int[] lastLeft = EMPTY_INT_ARRAY;
+
+  private int[] lastRight = EMPTY_INT_ARRAY;
+
+  /**
+   * The "pattern" string against which others are compared
+   */
+  private final char[] pattern;
+
+  private int[] priorLeft = EMPTY_INT_ARRAY;
+
+  private int[] priorRight = EMPTY_INT_ARRAY;
+
+  private ModifiedBerghelRoachEditDistance(char[] pattern) {
+    this.pattern = pattern;
+  }
+
+  public ModifiedBerghelRoachEditDistance duplicate() {
+    return new ModifiedBerghelRoachEditDistance(pattern);
+  }
+
+  public int getDistance(CharSequence targetSequence, int limit) {
+    final int targetLength = targetSequence.length();
+
+    /*
+     * Compute the main diagonal number.
+     * The final result lies on this diagonal.
+     */
+    final int main = pattern.length - targetLength;
+
+    /*
+     * Compute our initial distance candidate.
+     * The result cannot be less than the difference in
+     * string lengths, so we start there.
+     */
+    int distance = Math.abs(main);
+    if (distance > limit) {
+      /* More than we wanted.  Give up right away */
+      return Integer.MAX_VALUE;
+    }
+
+    /* Snag a copy of the second string for efficiency */
+    final char[] target = new char[targetLength];
+    for (int i = 0; i < targetLength; i++) {
+      target[i] = targetSequence.charAt(i);
+    }
+
+    /*
+     * In the main loop below, the current{Right,Left} arrays record results
+     * from the current outer loop pass.  The last{Right,Left} and
+     * prior{Right,Left} arrays hold the results from the preceding two passes.
+     * At the end of the outer loop, we shift them around (reusing the prior
+     * array as the current for the next round, to avoid reallocating).
+     * The Right reflects higher-numbered diagonals, Left lower-numbered.
+     */
+
+    /*
+     * Fill in "prior" values for the first two passes through
+     * the distance loop.  Note that we will execute only one side of
+     * the main diagonal in these passes, so we only need
+     * initialize one side of prior values.
+     */
+
+    if (main <= 0) {
+      ensureCapacityRight(distance, false);
+      for (int j = 0; j <= distance; j++) {
+        lastRight[j] = distance - j - 1;  /* Make diagonal -k start in row k */
+        priorRight[j] = -1;
+      }
+    } else {
+      ensureCapacityLeft(distance, false);
+      for (int j = 0; j <= distance; j++) {
+        lastLeft[j] = -1;                 /* Make diagonal +k start in row 0 */
+        priorLeft[j] = -1;
+      }
+    }
+
+    /*
+     * Keep track of even rounds.  Only those rounds consider new diagonals,
+     * and thus only they require artificial "last" values below.
+     */
+    boolean even = true;
+
+    /*
+     * MAIN LOOP: try each successive possible distance until one succeeds.
+     */
+    while (true) {
+      /*
+       * Before calling computeRow(main, distance), we need to fill in
+       * missing cache elements.  See the high-level description above.
+       */
+
+      /*
+       * Higher-numbered diagonals
+       */
+
+      int offDiagonal = (distance - main) / 2;
+      ensureCapacityRight(offDiagonal, true);
+
+      if (even) {
+        /* Higher diagonals start at row 0 */
+        lastRight[offDiagonal] = -1;
+      }
+
+      int immediateRight = -1;
+      for (; offDiagonal > 0; offDiagonal--) {
+        currentRight[offDiagonal] = immediateRight = computeRow(
+            (main + offDiagonal), 
+            (distance - offDiagonal),
+            pattern,
+            target,
+            priorRight[offDiagonal - 1],
+            lastRight[offDiagonal],
+            immediateRight);
+      }
+
+
+      /*
+       * Lower-numbered diagonals
+       */
+
+      offDiagonal = (distance + main) / 2;
+      ensureCapacityLeft(offDiagonal, true);
+
+      if (even) {
+        /* Lower diagonals, fictitious values for f(-x-1,x) = x */
+        lastLeft[offDiagonal] = (distance - main) / 2 - 1;
+      }
+
+      int immediateLeft = even ? -1 : (distance - main) / 2;
+
+      for (; offDiagonal > 0; offDiagonal--) {
+        currentLeft[offDiagonal] = immediateLeft = computeRow(
+            (main - offDiagonal),
+            (distance - offDiagonal),
+            pattern, target,
+            immediateLeft,
+            lastLeft[offDiagonal],
+            priorLeft[offDiagonal - 1]);
+      }
+
+      /*
+       * We are done if the main diagonal has distance in the last row.
+       */
+      int mainRow = computeRow(main, distance, pattern, target,
+                               immediateLeft, lastLeft[0], immediateRight);
+
+      if ((mainRow == targetLength) || (++distance > limit) || (distance < 0)) {
+        break;
+      }
+
+      /* The [0] element goes to both sides. */
+      currentLeft[0] = currentRight[0] = mainRow;
+
+      /* Rotate rows around for next round: current=>last=>prior (=>current) */
+      int[] tmp = priorLeft;
+      priorLeft = lastLeft;
+      lastLeft = currentLeft;
+      currentLeft = priorLeft;
+
+      tmp = priorRight;
+      priorRight = lastRight;
+      lastRight = currentRight;
+      currentRight = tmp;
+
+      /* Update evenness, too */
+      even = !even;
+    }
+
+    return distance;
+  }
+
+  /**
+   * Computes the highest row in which the distance {@code p} appears
+   * in diagonal {@code k} of the edit distance computation for
+   * strings {@code a} and {@code b}.  The diagonal number is
+   * represented by the difference in the indices for the two strings;
+   * it can range from {@code -b.length()} through {@code a.length()}.
+   *
+   * More precisely, this computes the highest value x such that
+   * <pre>
+   *     p = edit-distance(a[0:(x+k)), b[0:x)).
+   * </pre>
+   *
+   * This is the "f" function described by Ukkonen.
+   *
+   * The caller must assure that abs(k) &le; p, the only values for
+   * which this is well-defined.
+   *
+   * The implementation depends on the cached results of prior
+   * computeRow calls for diagonals k-1, k, and k+1 for distance p-1.
+   * These must be supplied in {@code knownLeft}, {@code knownAbove},
+   * and {@code knownRight}, respectively.
+   * @param k diagonal number
+   * @param p edit distance
+   * @param a one string to be compared
+   * @param b other string to be compared
+   * @param knownLeft value of {@code computeRow(k-1, p-1, ...)}
+   * @param knownAbove value of {@code computeRow(k, p-1, ...)}
+   * @param knownRight value of {@code computeRow(k+1, p-1, ...)}
+   */
+  private int computeRow(int k, int p, char[] a, char[] b,
+                         int knownLeft, int knownAbove, int knownRight) {
+    assert (Math.abs(k) <= p);
+    assert (p >= 0);
+
+    /*
+     * Compute our starting point using the recurrance.
+     * That is, find the first row where the desired edit distance
+     * appears in our diagonal.  This is at least one past
+     * the highest row for 
+     */
+    int t;
+    if (p == 0) {
+      t = 0;
+    } else {
+      /*
+       * We look at the adjacent diagonals for the next lower edit distance.
+       * We can start in the next row after the prior result from
+       * our own diagonal (the "substitute" case), or the next diagonal
+       * ("delete"), but only the same row as the prior result from
+       * the prior diagonal ("insert").
+       */
+      t = Math.max(Math.max(knownAbove, knownRight) + 1, knownLeft);
+    }
+
+    /*
+     * Look down our diagonal for matches to find the maximum
+     * row with edit-distance p.
+     */
+    int tmax = Math.min(b.length, (a.length - k));
+
+    while ((t < tmax) && b[t] == a[t + k]) {
+      t++;
+    }
+
+    return t;
+  }
+
+  /*
+   * Ensures that the Left arrays can be indexed through {@code index},
+   * inclusively, resizing (and copying) as necessary.
+   */
+  private void ensureCapacityLeft(int index, boolean copy) {
+    if (currentLeft.length <= index) {
+      index++;
+      priorLeft = resize(priorLeft, index, copy);
+      lastLeft = resize(lastLeft, index, copy);
+      currentLeft = resize(currentLeft, index, false);
+    }
+  }
+
+  /*
+   * Ensures that the Right arrays can be indexed through {@code index},
+   * inclusively, resizing (and copying) as necessary.
+   */
+  private void ensureCapacityRight(int index, boolean copy) {
+    if (currentRight.length <= index) {
+      index++;
+      priorRight = resize(priorRight, index, copy);
+      lastRight = resize(lastRight, index, copy);
+      currentRight = resize(currentRight, index, false);
+    }
+  }
+
+  /* Resize an array, copying old contents if requested */
+  private int[] resize(int[] array, int size, boolean copy) {
+    int[] result = new int[size];
+    if (copy) {
+      System.arraycopy(array, 0, result, 0, array.length);
+    }
+    return result;
+  }
+}
diff --git a/dev/core/src/com/google/gwt/dev/util/editdistance/MyersBitParallelEditDistance.java b/dev/core/src/com/google/gwt/dev/util/editdistance/MyersBitParallelEditDistance.java
new file mode 100644
index 0000000..9e02ebd
--- /dev/null
+++ b/dev/core/src/com/google/gwt/dev/util/editdistance/MyersBitParallelEditDistance.java
@@ -0,0 +1,517 @@
+/*
+ * Copyright 2010 Google Inc.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.dev.util.editdistance;
+
+
+/**
+ * Computes Levenshtein string-edit distance using the
+ * algorithm of Eugene Myers (see "A fast bit-vector algorithm for
+ * approximate string matching based on dynamic progamming",
+ * Journal of the ACM, 46(3): 395-415, 1999), using the reformulations
+ * of Heikki Hyyro (see "Explaining and Extending the Bit-parallel
+ * Approximate String Matching Algorithm of Myers") along with
+ * Gonzalo Navarro ("Faster Bit-Parallel Approximate String Matching",
+ * Proc. 13th Combinatorial Pattern Matching (CPM'2002)).
+ */
+public abstract class MyersBitParallelEditDistance
+    implements GeneralEditDistance, Cloneable {
+  /*
+   * The Myers algorithm is based on an adaptation of the traditional
+   * dynamic programming algorithm, in which a matrix of distances
+   * is computed column-wise (or row-wise).  For strings of length
+   * M and N, an MxN matrix (D) is filled with distances for substrings:
+   * D[i,j] holds the distance for string1[0..i]=>string2[0..j].
+   * The matrix is initially populated with the trivial values
+   * D[0,j]=j and D[i,0]=i; and then expanded with the rule:
+   *    D[i,j] = min( D[i-1,j]+1,       // insertion
+   *                  D[i,j-1]+1,       // deletion
+   *                  (D[i-1,j-1]
+   *                   + (string1[i]==string2[j])
+   *                      ? 0           // match
+   *                      : 1           // substitution ) )
+   *
+   * The Myers algorithm takes advantage of the observation that
+   * the difference from one matrix position to its higher diagonal
+   * is either 0 or 1, and consequently row-to-row and column-to-column
+   * differences are 1, 0, or -1.  It organizes whole columns of
+   * differences into bit arrays, computing new columns with
+   * arithmetic on whole words.  The edit distance is computed
+   * by tracking the column changes in the last row.
+   *
+   * The description below follows the naming from the Hyrro papers above.
+   * The pattern string (length M) is arranged vertically; the
+   * string to be matched (length N) is horizontal.  Columns are
+   * represented by bit-arrays with (1<<j) set for row i.  The
+   * inter-cell differences are grouped into column-variables:
+   * VN has bits set when the transition from row i-1 to row i is
+   * negative; VP has bits set when it is positive; HN and HP
+   * have bits set for column j-1 to j transitions that are
+   * negative or positive, respectively.  DZ has bits set when
+   * the diagonal from (i-1,j-1) to (i,j) is zero.
+   *
+   * The computation proceeds as follows:
+   *    We start at column 0, where all VP bits are 1.  The distance
+   *    is accumulated in row M, with initial value M in the matrix.
+   *
+   *    For each new column, we compute the diagonal-zero (DZ) first.
+   *    This happens under three circumstances:
+   *
+   *      when the pattern matches in this position ("match" above):
+   *        We make use of a bitmask, one for each character
+   *        in the alphabet, that has bits set wherever that character
+   *        appears in the pattern.  These bitmasks are pre-computed
+   *        when the pattern is established (in the class constructor).
+   *        In the column loop, we simply look up the bitmask for the
+   *        character in this column.  We call this PM.
+   *
+   *      when the prior column has a negative vertical transition
+   *       in this row ("deletion"):
+   *        This is simply the VN bit array from the prior column.
+   *    
+   *      when the row above has a negative transition in this column
+   *       ("insertion").
+   *        This happens when the diagonal coming into the cell above
+   *        (in *this* column) is zero but the vertical transition
+   *        in the preceding column for that row is positive.  That is:
+   *
+   *                D[i-2,j-1]
+   *                    |     \
+   *                    +1       0
+   *                    |           \
+   *                D[i-1,j-1]  -1 -> D[i-1,j]
+   *                    |     \         |
+   *                    ?        0      +1
+   *                    |            \  |
+   *                D[i,j-1]          D[i,j]
+   *
+   *        That is DZ[i,j] = DZ[i-1,j] & VP[i-1,j-1].
+   *        Note that this is recursive in the DZ[*,j] bit array, but we
+   *        want to compute the whole array at once.  We need an expression
+   *        that will propagate a 1 bit down the DZ column when the associated
+   *        VP bit is set.  Noting that the DZ[*,0] is the least significant
+   *        bit, this means propagating to more significant bits.  The carry bit
+   *        of an addition has this behavior.  If we set
+   *            DZ = (PM | VN)          // first two cases
+   *        then we can propagate bits up iff the corresponding VP bit
+   *        is set with:
+   *            DZ |= (((DZ & VP) + VP) ^ VP);
+   *        The (DZ&VP) piece covers our required condition on VP.  Adding
+   *        the VP bits causes any 1 bits from (DZ&VP) to result in a carry,
+   *        but also introduces a 1 bit where VP=1 but DZ=0, which are then
+   *        cleared with the ^VP piece.  The propagated bits are |ed into the
+   *        base case to get a final result.
+   *
+   *    Once we have DZ, we can compute HP and HN.  Note that VP and VN have
+   *    not yet been updated, and so refer to the preceding column; DZ refers
+   *    to the current column.
+   *      HN is true when DZ is true in this column but VP was true
+   *        in the prior column (HN = DZ & VP)
+   *      HP is true when: the prior VN is true (otherwise, the diagonal would
+   *        be negative, and that cannot happen); or, DZ is false (meaning
+   *        the diagonal increased by 1) and the prior-column VP is also false
+   *        (HP = VN | ~(DZ|VP))
+   *
+   *    Once we have HP and HN, we can tally the effect on the last row (adding
+   *    or subtracting one, respectively).
+   *
+   *    Finally, we can compute VP and VN for this column, in preparation
+   *    for the next round.  These computations are analagous to the ones for
+   *    HN and HP above, except that there, the incoming V values already
+   *    reflected the prior column and DZ this one, but now the H and DZ values
+   *    reflect the same column.  So, we first shift the H values to associate
+   *    with the proper bits from DZ.
+   */
+
+  /**
+   * A trivial implementation for the zero-length string
+   */
+  static class Empty extends MyersBitParallelEditDistance {
+    Empty(CharSequence s) {
+      super(s);
+    }
+
+    @Override
+    public GeneralEditDistance duplicate() {
+      return this;      /* thread-safe */
+    }
+
+    @Override
+    public int getDistance(CharSequence s, int k) {
+      return s.length();
+    }
+  }
+
+  /**
+   * An implementation using multiple integers for each column.
+   * This follows the pattern for the single-word implementation, with
+   * these changes:
+   *    sums/shifts must propagate from one word to the next;
+   *    to make that easier (and cheaper), we use only (SIZE-1) bits per word;
+   */
+  static class Multi extends MyersBitParallelEditDistance {
+    /* How many integers we use per column */
+    int count;
+    
+    /**
+     * Where the last-row bit lives -- only in the last array slot, though
+     */
+    final int lastBitPosition;
+
+    /**
+     * Bitmaps for pattern string: [pattern_index][column_index]
+     */
+    final int[][] positions;
+
+    int[] verticalNegativesReusable;
+    /* Reusable arrays for vertical changes */
+    int[] verticalPositivesReusable;
+
+    /**
+     * A mask with those bits set
+     */
+    final int wordMask = (-1 >>> 1);
+
+    /**
+     * How many bits we use per word
+     */
+    final int wordSize = Integer.SIZE - 1;
+
+    /**
+     * Constructs a multi-word engine
+     */
+    Multi(CharSequence s) {
+      super(s);
+
+      /* Compute number of words to use (rounding up) */
+      count = (m + wordSize - 1) / wordSize;
+
+      /* Precompute bitmaps for pattern string */
+      positions = PatternBitmap.map(s, idx, new int [idx.size()][], wordSize);
+
+      /* Where in last word does last row appear */
+      lastBitPosition = (1 << ((m - 1) % wordSize));
+      
+      /* Initialize scratchpad items */
+      perThreadInit();
+    }
+
+    @Override
+    public int getDistance(CharSequence s, int k) {
+      indices = idx.map(s, indices);
+
+      /* Initialize verticalPositive to all bits on, verticalNegative all off */
+      int[] verticalPositives = verticalPositivesReusable;
+      java.util.Arrays.fill(verticalPositives, wordMask);
+      int[] verticalNegatives = verticalNegativesReusable;
+      java.util.Arrays.fill(verticalNegatives, 0);
+
+      int distance = m;
+      int len = s.length();
+
+      /* We can only miss the distance-- below this many times: */
+      int maxMisses = k + len - m;
+      if (maxMisses < 0) maxMisses = Integer.MAX_VALUE;
+
+      outer:
+      for (int j = 0; j < len; j++) {
+        int[] position = positions[indices[j]];
+
+        /* Carry bits from one word to the next */
+        int sum = 0;
+        int horizontalPositiveShift = 1;
+        int horizontalNegativeShift = 0;
+
+        /* Iterate through words for this column */
+        for (int i = 0; i < count; i++) {
+          int verticalNegative = verticalNegatives[i];
+          int patternMatch = (position[i] | verticalNegative);
+          int verticalPositive = verticalPositives[i];
+          sum = (verticalPositive & patternMatch)
+                + (verticalPositive) + (sum >>> wordSize);
+          int diagonalZero = ((sum & wordMask) ^ verticalPositive)
+                             | patternMatch;
+          int horizontalPositive = (verticalNegative
+                                   | ~(diagonalZero | verticalPositive));
+          int horizontalNegative = diagonalZero & verticalPositive;
+
+          if (i == (count - 1)) {            /* only last bit in last word */
+            if ((horizontalNegative & lastBitPosition) != 0) {
+              distance--;
+            } else if ((horizontalPositive & lastBitPosition) != 0) {
+              distance++;
+              if ((maxMisses -= 2) < 0) {
+                break outer;
+              }
+            } else if (--maxMisses < 0) {
+              break outer;
+            }
+          }
+
+          horizontalPositive = ((horizontalPositive << 1)
+                               | horizontalPositiveShift);
+          horizontalPositiveShift = (horizontalPositive >>> wordSize);
+
+          horizontalNegative = ((horizontalNegative << 1)
+                               | horizontalNegativeShift);
+          horizontalNegativeShift = (horizontalNegative >>> wordSize);
+
+          verticalPositives[i] = (horizontalNegative
+                                  | ~(diagonalZero | horizontalPositive))
+                                 & wordMask;
+          verticalNegatives[i] = (diagonalZero & horizontalPositive) & wordMask;
+        }
+      }
+      return distance;
+    }
+
+    @Override
+    protected void perThreadInit() {
+      super.perThreadInit();
+
+      /* Allocate verticalPositive/verticalNegative arrays */
+      verticalPositivesReusable = new int[count];
+      verticalNegativesReusable = new int[count];
+    }
+  }
+
+  /*
+   * The following code is duplicated with both "int" and "long"
+   * as the word primitive type.  To improve maintainability, all instances
+   * of the word type are marked with "WORD" in comments.  This
+   * allows the "long" version to be regenerated from the "int" version
+   * through a mechanical replacement.  To make this work, the
+   * class name also needs to include the primitive type (without
+   * altered capitalization) and the WORD marker -- this is a quite
+   * intentional violation of the usual class naming rules.
+   */
+  /**
+   * An implementation using "int" as the word size.
+   */
+  // Replace "int/*WORD*/" with "long/*WORD*/" to generate the long version.
+  static class TYPEint/*WORD*/ extends MyersBitParallelEditDistance {
+    final int/*WORD*/ lastBitPosition;
+    final int/*WORD*/[] map;
+
+    @SuppressWarnings("cast")
+    TYPEint/*WORD*/(CharSequence s) {
+      super(s);
+      /* Precompute bitmaps for this pattern */
+      map = PatternBitmap.map(s, idx, new int/*WORD*/[idx.size()]);
+      /* Compute the bit that represents a change in the last row */
+      lastBitPosition = (((int/*WORD*/)1) << (m - 1));
+    }
+
+    @Override
+    public int getDistance(CharSequence s, int k) {
+      int len = s.length();
+
+      /* Quick check based on length */
+      if (((len - m) > k) || ((m - len) > k))
+        return k + 1;
+
+      /* Map characters to their integer positions in the bitmap array */
+      indices = idx.map(s, indices);
+
+      /* Initially, vertical change is all positive (none negative) */
+      int/*WORD*/ verticalPositive = -1;
+      int/*WORD*/ verticalNegative = 0;
+      int distance = m;
+
+      /* We can only miss the "distance--" below this many times: */
+      int maxMisses = k + len - m;
+      if (maxMisses < 0) maxMisses = Integer.MAX_VALUE;
+
+      for (int j = 0; j < len; j++) {
+        /* Where is diagonal zero: matches, or prior VN; plus recursion */
+        int/*WORD*/ diagonalZero = map[indices[j]] | verticalNegative;
+        diagonalZero |= (((diagonalZero & verticalPositive) + verticalPositive)
+                        ^ verticalPositive);
+
+        /* Compute horizontal changes */
+        int/*WORD*/ horizontalPositive = verticalNegative
+                                         | ~(diagonalZero | verticalPositive);
+        int/*WORD*/ horizontalNegative = diagonalZero & verticalPositive;
+
+        /* Update final distance based on horizontal changes */
+        if ((horizontalNegative & lastBitPosition) != 0) {
+          distance--;
+        } else if ((horizontalPositive & lastBitPosition) != 0) {
+          distance++;
+          if ((maxMisses -= 2) < 0) {
+            break;
+          }
+        } else if (--maxMisses < 0) {
+          break;
+        }
+
+        /* Shift Hs to next row, compute new Vs analagously to Hs above */
+        horizontalPositive = (horizontalPositive << 1) | 1;
+        verticalPositive = (horizontalNegative << 1)
+                           | ~(diagonalZero | horizontalPositive);
+        verticalNegative = diagonalZero & horizontalPositive;
+      }
+      return distance;
+    }
+  }
+
+  /**
+   * An implementation using "long" as the word size.
+   * GENERATED MECHANICALLY FROM THE "int" VERSION ABOVE.
+   * DO NOT EDIT THIS ONE -- EDIT ABOVE AND REAPPLY QUERY/REPLACE.
+   */
+  static class TYPElong/*WORD*/ extends MyersBitParallelEditDistance {
+    final long/*WORD*/ lastBitPosition;
+    final long/*WORD*/[] map;
+
+    @SuppressWarnings("cast")
+    TYPElong/*WORD*/(CharSequence s) {
+      super(s);
+      /* Precompute bitmaps for this pattern */
+      map = PatternBitmap.map(s, idx, new long/*WORD*/[idx.size()]);
+      /* Compute the bit that represents a change in the last row */
+      lastBitPosition = (((long/*WORD*/)1) << (m - 1));
+    }
+
+    @Override
+    public int getDistance(CharSequence s, int k) {
+      int len = s.length();
+
+      /* Quick check based on length */
+      if (((len - m) > k) || ((m - len) > k))
+        return k + 1;
+
+      /* Map characters to their integer positions in the bitmap array */
+      indices = idx.map(s, indices);
+
+      /* Initially, vertical change is all positive (none negative) */
+      long/*WORD*/ verticalPositive = -1;
+      long/*WORD*/ verticalNegative = 0;
+      int distance = m;
+
+      /* We can only miss the "distance--" below this many times: */
+      int maxMisses = k + len - m;
+      if (maxMisses < 0) maxMisses = Integer.MAX_VALUE;
+
+      for (int j = 0; j < len; j++) {
+        /* Where is diagonal zero: matches, or prior VN; plus recursion */
+        long/*WORD*/ diagonalZero = map[indices[j]] | verticalNegative;
+        diagonalZero |= (((diagonalZero & verticalPositive) + verticalPositive)
+                        ^ verticalPositive);
+
+        /* Compute horizontal changes */
+        long/*WORD*/ horizontalPositive = verticalNegative
+                                         | ~(diagonalZero | verticalPositive);
+        long/*WORD*/ horizontalNegative = diagonalZero & verticalPositive;
+
+        /* Update final distance based on horizontal changes */
+        if ((horizontalNegative & lastBitPosition) != 0) {
+          distance--;
+        } else if ((horizontalPositive & lastBitPosition) != 0) {
+          distance++;
+          if ((maxMisses -= 2) < 0) {
+            break;
+          }
+        } else if (--maxMisses < 0) {
+          break;
+        }
+
+        /* Shift Hs to next row, compute new Vs analagously to Hs above */
+        horizontalPositive = (horizontalPositive << 1) | 1;
+        verticalPositive = (horizontalNegative << 1)
+                           | ~(diagonalZero | horizontalPositive);
+        verticalNegative = diagonalZero & horizontalPositive;
+      }
+      return distance;
+    }
+  }
+  
+  /**
+   * Chooses an appropriate implementation for a given pattern string.
+   * @param s pattern string
+   * @return distance calculator appropriate for the pattern
+   */
+  public static MyersBitParallelEditDistance getInstance(CharSequence s) {
+    int m = s.length();
+    return (m <= Integer.SIZE) ?
+             ((m == 0) ? new Empty(s) : new TYPEint(s)) :
+           (s.length() <= Long.SIZE) ?
+             new TYPElong(s) :
+             new Multi(s);
+  }
+
+  /**
+   * Tests a computation manually
+   */
+  public static void main(String[] args) {
+    MyersBitParallelEditDistance b = getInstance(args[0]);
+    int k = args.length > 2 ? Integer.parseInt(args[2]) : 0;
+    System.out.println("Result: " + b.getDistance(args[1], k));
+  }
+
+  /**
+   * Index mapping for pattern string
+   */
+  final CharIndex idx;
+
+  /**
+   * Reusable array of indices for target strings
+   */
+  int[] indices = new int[0];
+
+  /**
+   * Length of pattern
+   */
+  final int m;
+
+  /**
+   * Constructs a distance calculator for a given string
+   */
+  protected MyersBitParallelEditDistance(CharSequence s) {
+    m = s.length();
+    idx = CharIndex.getInstance(s);
+  }
+
+  public GeneralEditDistance duplicate() {
+    try {
+      return (MyersBitParallelEditDistance)clone();
+    } catch (CloneNotSupportedException x) { /*IMPOSSIBLE */
+      throw new IllegalStateException("Cloneable object would not clone");
+    }
+  }
+
+  /**
+   * Computes distance from the pattern to a given string, bounded by
+   * a limiting distance @see(GeneralEditDistance.getDistance(CharSequence,int))
+   */
+  public abstract int getDistance(CharSequence s, int k);
+
+  @Override
+  protected Object clone() throws CloneNotSupportedException {
+    Object obj = super.clone();
+
+    /* Re-initialize any non-thread-safe parts */
+    ((MyersBitParallelEditDistance)obj).perThreadInit();
+
+    return obj;
+  }
+
+  /**
+   * Initializes items that cannot be shared among threads
+   */
+  protected void perThreadInit() {
+    indices = new int[0];
+  }
+}
diff --git a/dev/core/src/com/google/gwt/dev/util/editdistance/PatternBitmap.java b/dev/core/src/com/google/gwt/dev/util/editdistance/PatternBitmap.java
new file mode 100644
index 0000000..5ee1733
--- /dev/null
+++ b/dev/core/src/com/google/gwt/dev/util/editdistance/PatternBitmap.java
@@ -0,0 +1,121 @@
+/*
+ * Copyright 2010 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.dev.util.editdistance;
+
+
+/**
+ * A bitmap array generator for the positions that a given character
+ * appears within a pattern.  The array of bitmaps is indexed by a
+ * CharIndex that represents the alphabet for the pattern.
+ * One bitmap is produced for each character.  The bitmap for
+ * a character "c" has the <tt>(1<<i)</tt> bit set for each position "i"
+ * where that character appears in the pattern.
+ *
+ * Several implementations are provided for different string
+ * sizes: int, long, int[].  The caller provides a (zeroed) array of the
+ * desired type, sized according to the CharIndex; the array is
+ * returned as a convenience.
+ */
+public class PatternBitmap {
+  /**
+   * Computes a pattern bitmap for a short character sequence.
+   * For each character in the alphabet (represented by a CharIndex),
+   * bits are set where that character appears in the sequence.
+   * For this generator, the pattern must fit in an "int": the
+   * character sequence must not be longer than Integer.SIZE.
+   * @param s the character sequence defining the bits to be set
+   * @param idx the alphabet for which bitmaps should be produced
+   * @param result an array (of size <tt>idx.size()</tt> or greater) to
+   *        hold the resulting bitmaps
+   * @return the result array passed as a parameter, for convenience
+   */
+  public static int[] map(CharSequence s, CharIndex idx, int[] result) {
+    int len = s.length();
+    assert (len <= Integer.SIZE);
+    for (int i = 0; i < len; i++) {
+      result[idx.lookup(s.charAt(i))] |= (1 << i);
+    }
+    return result;
+  }
+
+  /**
+   * Computes a pattern bitmap for a character sequence of any length.
+   * For each character in the alphabet (represented by a CharIndex),
+   * bits are set where that character appears in the sequence.
+   * Each (per-character) bitmap is represented by an "int[]".
+   * @param s the character sequence defining the bits to be set
+   * @param idx the alphabet for which bitmaps should be produced
+   * @param result an array (of size <tt>idx.size()</tt> or greater) to
+   *        hold the resulting bitmaps
+   * @param width how many bits to be use in each word, no more
+   *        than Integer.SIZE
+   * @return the result array passed as a parameter, for convenience
+   */
+  public static int[][] map(CharSequence s, CharIndex idx,
+                             int[][] result, int width) {
+    assert (width <= Integer.SIZE);
+    int len = s.length();
+    int rowSize = (len + width - 1) / width;
+
+    /*
+     * Use one zero-filled bitmap for alphabet characters not in the pattern
+     */
+    int[] nullElement = new int[rowSize];
+    java.util.Arrays.fill(result, nullElement);
+
+    int wordIndex = 0;          /* Which word we are on now */
+    int bitWithinWord = 0;      /* Which bit within that word */
+
+    for (int i = 0; i < s.length(); i++) {
+      int[] r = result[idx.lookup(s.charAt(i))];
+      if (r == nullElement) {
+
+        /* Create a separate zero-filled bitmap for this alphabet character */
+        r = result[idx.lookup(s.charAt(i))] = new int[rowSize];
+      }
+      r[wordIndex] |= (1 << bitWithinWord);
+
+      /* Step to the next bit (and word if appropriate) */
+      if (++bitWithinWord == width) {
+        bitWithinWord = 0;
+        wordIndex++;
+      }
+    }
+    return result;
+  }
+
+  /**
+   * Computes a pattern bitmap for a medium character sequence.
+   * For each character in the alphabet (represented by a CharIndex),
+   * bits are set where that character appears in the sequence.
+   * For this generator, the pattern must fit in a "long": the
+   * character sequence must not be longer than Long.SIZE.
+   * @param s the character sequence defining the bits to be set
+   * @param idx the alphabet for which bitmaps should be produced
+   * @param result an array (of size <tt>idx.size()</tt> or greater) to
+   *        hold the resulting bitmaps
+   * @return the result array passed as a parameter, for convenience
+   */
+  public static long[] map(CharSequence s, CharIndex idx, long[] result) {
+    int len = s.length();
+    assert (len <= Long.SIZE);
+    for (int i = 0; i < len; i++) {
+      result[idx.lookup(s.charAt(i))] |= (1L << i);
+    }
+    return result;
+  }
+  private PatternBitmap() { /* Prevent instantiation */ }
+}
diff --git a/dev/core/test/com/google/gwt/dev/util/editdistance/CharIndexTest.java b/dev/core/test/com/google/gwt/dev/util/editdistance/CharIndexTest.java
new file mode 100644
index 0000000..cdd2490
--- /dev/null
+++ b/dev/core/test/com/google/gwt/dev/util/editdistance/CharIndexTest.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2010 Google Inc.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.dev.util.editdistance;
+
+import com.google.gwt.dev.util.editdistance.CharIndex;
+
+import junit.framework.TestCase;
+
+/**
+ * Tests for {@link CharIndex}.
+ */
+public class CharIndexTest extends TestCase {
+  
+  /**
+   * Tests a mapping where the masked reduction fails (where
+   * the low-order bits of several characters overlap.
+   */
+  public void testFullHash() throws Exception {
+    String string = "abc\u0001\u0101\u0201\u0301";
+    CharIndex idx = CharIndex.getInstance(string+string);
+    assertEquals(idx.getClass(), CharIndex.FullHash.class);
+    assertEquals(idx.lookup('c'), 3);
+    assertEquals(idx.nullElement(), 0);
+
+    /* because string has no duplicates: */
+    assertEquals(idx.size(), string.length() + 1);
+
+    generalVerify(idx, string, "xyz012\u0123\u1234");
+  }
+
+  /** Tests fullhash.Char overridden methods */
+  public void testFullHashChar() {
+    CharIndex.FullHash.Char x = new CharIndex.FullHash.Char();
+    x.c = 'A';
+    assertEquals("'A'", x.toString());
+  }
+
+  /**
+   * Tests a masked mapping (where characters all fall within
+   * a slice of Unicode defined by a simple mask).
+   */
+  public void testMasked() throws Exception {
+    String string = "\u0141\u0142\u0171\u0131";
+    CharIndex idx = CharIndex.getInstance(string+string);
+    assertEquals(idx.getClass(), CharIndex.Masked.class);
+    assertEquals(idx.lookup('\u0141'), 0x0041);
+    assertEquals(idx.size(), (idx.nullElement()+1));
+    generalVerify(idx, string, "xyz012\u0123\u1234");
+  }
+
+  /**
+   * Tests a straight mapping (where characters are all less
+   * than some maximum, including at least ASCII.
+   */
+  public void testStraight() throws Exception {
+    String string = "abcdandmore";
+    CharIndex idx = CharIndex.getInstance(string+string);
+    assertEquals(idx.getClass(), CharIndex.Straight.class);
+    assertEquals(idx.lookup('a'), 'a');
+    assertEquals(idx.size(), (idx.nullElement()+1));
+    generalVerify(idx, string, "xyz012\u0123\u1234");
+  }
+
+  /**
+   * Verifies abstract properties of any CharIndex
+   * @param idx index to test
+   * @param string characters in the pattern for that index
+   * @param more characters not in the pattern
+   */
+  void generalVerify(CharIndex idx, String string, String more) {
+    /* Test the map() method */
+    int[] mapped = idx.map(string, new int[0]);
+
+    for (int i = 0; i < string.length(); i++) {
+      char ci = string.charAt(i);
+      for (int j = 0; j < string.length(); j++) {
+        char cj = string.charAt(j);
+
+        /* Each character in the pattern should get a unique index */
+        assertTrue((mapped[i] == idx.lookup(cj)) == (ci == cj));
+      }
+      for (int j = 0; j < more.length(); j++) {
+        char cj = more.charAt(j);
+
+        /*
+         * Characters not in the pattern should not match any that
+         * are in the pattern (but they may match one another).
+         */
+        assertTrue(idx.lookup(ci) != idx.lookup(cj));
+      }
+    }
+  }
+}
diff --git a/dev/core/test/com/google/gwt/dev/util/editdistance/GeneralEditDistanceTest.java b/dev/core/test/com/google/gwt/dev/util/editdistance/GeneralEditDistanceTest.java
new file mode 100644
index 0000000..b0d9afe
--- /dev/null
+++ b/dev/core/test/com/google/gwt/dev/util/editdistance/GeneralEditDistanceTest.java
@@ -0,0 +1,327 @@
+/*
+ * Copyright 2010 Google Inc.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.dev.util.editdistance;
+
+import com.google.gwt.dev.util.editdistance.GeneralEditDistance;
+import com.google.gwt.dev.util.editdistance.GeneralEditDistances;
+
+import junit.framework.TestCase;
+
+/**
+ * Test cases for the GeneralEditDistance class.
+ */
+public class GeneralEditDistanceTest extends TestCase {
+
+  /**
+   * A basic set of tests for any unit-cost Levenshtein edit distance
+   * implementation.
+   */
+  protected static abstract class AbstractLevenshteinTestCase extends TestCase {
+    Factory g;
+    AbstractLevenshteinTestCase(Factory g) {
+     this.g = g;
+    }
+
+    /**
+     * Tests a Levenshtein engine against the DP-based computation
+     * for a bunch of string pairs.
+     */
+    public void testLevenshteinOnWords() {
+      for (int i = 0; i < words.length; i++) {
+        for (int j = 0; j < words.length; j++) {
+          GeneralEditDistance ed = g.getInstance(words[i]);
+          specificAlgorithmVerify(ed,
+                                  words[i], words[j],
+                                  wordsDistances[i][j]);
+        }
+      }
+    }
+
+    /** Tests Levenshtein edit distance on a longer pattern */
+    public void testLongerPattern() {
+      genericLevenshteinVerify("abcdefghijklmnopqrstuvwxyz",
+                               "abcefghijklMnopqrStuvwxyz..",
+                               5 /* dMS.. */);
+    }
+
+    /** Tests Levenshtein edit distance on a very short pattern */
+    public void testShortPattern() {
+      genericLevenshteinVerify("short", "shirt", 1);
+    }
+
+    /** Verifies zero-length behavior */
+    public void testZeroLengthPattern() {
+      String nonEmpty = "target";
+      genericLevenshteinVerify("", nonEmpty, nonEmpty.length());
+      genericLevenshteinVerify(nonEmpty, "", nonEmpty.length());
+    }
+
+    /** Tests the default Levenshtein engine on a pair of strings */
+    void genericLevenshteinVerify(CharSequence s1, CharSequence s2,
+                                  int expectedResult) {
+      specificAlgorithmVerify(g.getInstance(s1),
+                              s1, s2, expectedResult);
+    }
+  }
+
+  /**
+   * A base class for other reusable tests
+   */
+  public interface Factory {
+    public GeneralEditDistance getInstance(CharSequence s1);
+  }
+
+  /** Test of the "best-of-breed" Levenshtein engine (getLevenshteinDistance) */
+  public static class GenericLevenshteinTest extends AbstractLevenshteinTestCase {
+    public GenericLevenshteinTest() {
+      super(new Factory() {
+          public GeneralEditDistance getInstance(CharSequence s1) {
+            return GeneralEditDistances.getLevenshteinDistance(s1);
+          }
+        });
+    }
+  }
+
+  /** A very large string for testing. */
+  public final static String MAGNA =
+        "We have granted to God, and by this our present Charter have " +
+        "confirmed, for Us and our Heirs for ever, that the Church of " +
+        "England shall be free, and shall have all her whole Rights and " +
+        "Liberties inviolable.  We have granted also, and given to all " +
+        "the Freemen of our Realm, for Us and our Heirs for ever, these " +
+        "Liberties under-written, to have and to hold to them and their " +
+        "Heirs, of Us and our Heirs for ever.";
+
+  /**
+   * A small set of words for testing, including at least some of
+   * each of these: empty, very short, more than 32/64 character,
+   * punctuation, non-ASCII characters
+   */
+  static String[] words = { "", "a", "b", "c", "ab", "ace",
+    "fortressing",      "inadequately", "prank",        "authored",
+    "fortresing",       "inadeqautely", "prang",        "awthered",
+    "cruller's",        "fanatic",      "Laplace",      "recollections",
+    "Kevlar",           "underpays",    "jalape\u00f1o","ch\u00e2telaine",
+    "kevlar",           "overpaid",     "jalapeno",     "chatelaine",
+    "A survey of algorithms for running text search by Navarro appeared",
+    "in ACM Computing Surveys 33#1: http://portal.acm.org/citation.cfm?...",
+    "Another algorithm (Four Russians) that Navarro",
+    "long patterns and high limits was not evaluated for inclusion here.",
+    "long patterns and low limits were evaluated for inclusion here.",
+    "Filtering algorithms also improve running search",
+    "for pure edit distance."
+  };
+
+  static int[][] wordsDistances = new int[words.length][words.length];
+  static {
+    int[][] expect = wordsDistances;
+    for (int i = 0; i < words.length; i++) {
+      for (int j = 0; j < i; j++) {
+        expect[i][j] = GeneralEditDistanceTest
+                       .dynamicProgrammingLevenshtein(words[i],words[j]);
+        expect[j][i] = expect[i][j];
+      }
+    }
+  }
+  /**
+   * Computes Levenshtein edit distance using the far-from-optimal
+   * dynamic programming technique.  This is here purely to verify
+   * the results of better algorithms.
+   */
+  public static int dynamicProgrammingLevenshtein(String s1, String s2) {
+    int[] lastRow = new int[s1.length()+1];
+    for (int i = 0; i < lastRow.length; i++) {
+      lastRow[i] = i;
+    }
+    for (int j = 0; j < s2.length(); j++) {
+      int[] thisRow = new int[lastRow.length];
+      thisRow[0] = j + 1;
+      char s2c = s2.charAt(j);
+      for (int i = 1; i < thisRow.length; i++) {
+        thisRow[i] = Math.min(Math.min(lastRow[i]+1,thisRow[i-1]+1),
+                              lastRow[i-1] + ((s2c == s1.charAt(i-1)) ? 0 : 1));
+      }
+      lastRow = thisRow;
+    }
+    return lastRow[lastRow.length-1];
+  }
+
+  /**
+   * Performs some edits on a string in a StringBuilder.
+   * @param b string to be modified in place
+   * @param alphabet some characters guaranteed not to be in the original
+   * @param replaces how many single-character replacements to try
+   * @param inserts how many characters to insert
+   * @return the number of edits actually performed
+   */
+  public static int performSomeEdits(StringBuilder b,
+                                     String alphabet,
+                                     int replaces, int inserts) {
+    java.util.Random r = new java.util.Random(768614336404564651L);
+    int edits = 0;
+
+    for (int i = 0; i < inserts; i++) {
+      int where = r.nextInt(b.length());
+      b.insert(where, alphabet.charAt(r.nextInt(alphabet.length())));
+      edits++;
+    }
+    for (int i = 0; i < replaces; i++) {
+      int where = r.nextInt(b.length());
+      if (alphabet.indexOf(b.charAt(where)) < 0) {
+        String sub = "" + alphabet.charAt(r.nextInt(alphabet.length()));
+        b.replace(where, (where + 1), sub);
+        edits++;
+      }
+    }
+    return edits;
+  }
+  
+  /**
+   * Generates a long random alphabetic string,
+   * suitable for use with testSomeEdits (using digits for the alphabet).
+   * @param size desired string length
+   * @param seed random number generator seed
+   * @return random alphabetic string of the requested length
+   */
+  static String generateRandomString(int size, Long seed) {
+    char[] alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".toCharArray();
+
+    /* Create a (repeatable) random string from the alphabet */
+    java.util.Random rand = new java.util.Random(seed);
+    StringBuilder huge = new StringBuilder(size);
+    for (int i = 0; i < size; i++) {
+      huge.append(alphabet[rand.nextInt(alphabet.length)]);
+    }
+    return huge.toString();
+  }
+
+  /** Exercises an edit distance engine across a wide range of limit values */
+  static void genericVerification(GeneralEditDistance ed,
+                                  CharSequence s1, CharSequence s2,
+                                  int expectedResult) {
+
+    if (s1.length() < 500) {
+      /*
+       * For small strings, try every limit 
+       */
+      int max = Math.max(s1.length(), s2.length()) + 2;
+      for (int k = 0; k < max; k++) {
+        verifyResult(s1, s2, expectedResult, k,
+                     ed.getDistance(s2, k));
+      }
+    } else {
+      /*
+       * For big strings, try a sampling of limits:
+       *   0 to 3,
+       *   another 4 on either side of the expected result
+       *   s2 length
+       */
+      for (int k = 0; k < 4; k++) {
+        verifyResult(s1, s2, expectedResult, k,
+                     ed.getDistance(s2, k));
+      }
+      for (int k = Math.max(4, expectedResult - 4); k < expectedResult + 4; k++) {
+        verifyResult(s1, s2, expectedResult, k,
+                     ed.getDistance(s2, k));
+      }
+      verifyResult(s1, s2, expectedResult, s2.length(),
+                   ed.getDistance(s2, s2.length()));
+    }
+
+    /* Always try near MAX_VALUE */
+    assertEquals(ed.getDistance(s2, Integer.MAX_VALUE-1), expectedResult);
+    assertEquals(ed.getDistance(s2, Integer.MAX_VALUE), expectedResult);
+  }
+
+  /** Tests a specific engine on a pair of strings */
+  static void specificAlgorithmVerify(GeneralEditDistance ed,
+                                      CharSequence s1, CharSequence s2,
+                                      int expectedResult) {
+    genericVerification(ed, s1, s2, expectedResult);
+
+    /* Try again with the same instance */
+    genericVerification(ed, s1, s2, expectedResult);
+
+    /* Try again with a duplicate instance */
+    genericVerification(ed.duplicate(), s1, s2, expectedResult);
+  }
+
+  /**
+   * Verifies the distance between an original string and some
+   * number of simple edits on it.  The distance is assumed to
+   * be unit-cost Levenshtein distance.
+   */
+  static void testSomeEdits(Factory g, String original,
+                            int replaces, int inserts) {
+    StringBuilder modified = new StringBuilder(original);
+    int edits = performSomeEdits(modified, "0123456789", replaces, inserts);
+
+    GeneralEditDistanceTest.specificAlgorithmVerify(
+        g.getInstance(original), original, modified, edits);
+
+    GeneralEditDistanceTest.specificAlgorithmVerify(
+        g.getInstance(modified), modified, original, edits);
+
+    GeneralEditDistanceTest.specificAlgorithmVerify(
+        g.getInstance(modified).duplicate(), modified, original, edits);
+  }
+
+  /**
+   * Verifies a single edit distance result.
+   * If the expected distance is within limit, result must b
+   * be correct; otherwise, result must be over limit.
+   *
+   * @param s1 one string compared
+   * @param s2 other string compared
+   * @param expectedResult correct distance from s1 to s2
+   * @param k limit applied to computation
+   * @param d distance computed
+   */
+  static void verifyResult(CharSequence s1, CharSequence s2,
+                           int expectedResult, int k, int d) {
+    if (k >= expectedResult) {
+      assertEquals("Distance from " + s1 + " to " + s2
+                    + " should be " + expectedResult
+                    + " (within limit=" + k + ") but was " + d,
+                   expectedResult, d);
+    } else {
+      assertTrue("Distance from " + s1 + " to " + s2
+                 + " should be " + expectedResult
+                 + " (exceeding limit=" + k + ") but was " + d,
+                 d > k);
+    }
+  }
+
+  /** Tests special case of Levenshtein edit distance with small limits */
+  public void testAtMostOneError() {
+    assertEquals(
+        GeneralEditDistances.atMostOneError("unchanged", "unchanged"),
+        0);
+
+    StringBuilder un = new StringBuilder("un");
+    assertEquals(
+        GeneralEditDistances.atMostOneError("unchanged", un.append("changed")),
+        0);
+    
+    assertEquals(GeneralEditDistances.atMostOneError("shirt", "short"), 1);
+    assertEquals(GeneralEditDistances.atMostOneError("shirt", "shirts"), 1);
+    assertEquals(GeneralEditDistances.atMostOneError("sort", "short"), 1);
+
+    assertTrue(GeneralEditDistances.atMostOneError("short", "verylong") > 1);
+    assertTrue(GeneralEditDistances.atMostOneError("short", "sharp") > 1);
+    assertTrue(GeneralEditDistances.atMostOneError("small", "malls") > 1);
+  }
+}
diff --git a/dev/core/test/com/google/gwt/dev/util/editdistance/ModifiedBerghelRoachEditDistanceTest.java b/dev/core/test/com/google/gwt/dev/util/editdistance/ModifiedBerghelRoachEditDistanceTest.java
new file mode 100644
index 0000000..bed7b27
--- /dev/null
+++ b/dev/core/test/com/google/gwt/dev/util/editdistance/ModifiedBerghelRoachEditDistanceTest.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2010 Google Inc.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.dev.util.editdistance;
+
+import com.google.gwt.dev.util.editdistance.GeneralEditDistance;
+import com.google.gwt.dev.util.editdistance.ModifiedBerghelRoachEditDistance;
+import com.google.gwt.dev.util.editdistance.GeneralEditDistanceTest.AbstractLevenshteinTestCase;
+
+import static com.google.gwt.dev.util.editdistance.GeneralEditDistanceTest.MAGNA;
+import static com.google.gwt.dev.util.editdistance.GeneralEditDistanceTest.generateRandomString;
+import static com.google.gwt.dev.util.editdistance.GeneralEditDistanceTest.testSomeEdits;
+
+/**
+ * Test cases for the ModifiedBerghelRoachEditDistance class.
+ *
+ * The bulk of the test is provided by the superclass, for
+ * which we provide GeneralEditDistance instances.
+ *
+ * Since Berghel-Roach is superior for longer strings with moderately
+ * low edit distances, we try a few of those specifically.
+ * This Modified form uses less space, and can handle yet larger ones.
+ */
+public class ModifiedBerghelRoachEditDistanceTest extends junit.framework.TestCase {
+  /** Basic Levenshtein tests for ModifedBerghelRoachEditDistance */
+  public static class Basic extends AbstractLevenshteinTestCase {
+    Basic() {
+      super(new Factory());
+    }
+  }
+  private static class Factory implements GeneralEditDistanceTest.Factory {
+    public GeneralEditDistance getInstance(CharSequence s) {
+      return ModifiedBerghelRoachEditDistance.getInstance(s.toString());
+    }
+  }
+
+  static final Factory FACTORY = new Factory();
+
+  public void testHugeEdit() {
+    final int SIZE = 10000;
+    final long SEED = 1;
+
+    testSomeEdits(FACTORY, generateRandomString(SIZE, SEED), (SIZE / 50), (SIZE / 50));
+  }
+
+  public void testHugeString() {
+    /*
+     * An even larger size is feasible, but the test would no longer
+     * qualify as "small".
+     */
+    final int SIZE = 20000;
+    final long SEED = 1;
+
+    testSomeEdits(FACTORY, generateRandomString(SIZE, SEED), 30, 25);
+  }
+
+  public void testLongString() {
+    testSomeEdits(FACTORY, MAGNA, 8, 10);
+  }
+
+  public void testLongStringMoreEdits() {
+    testSomeEdits(FACTORY, MAGNA, 40, 30);
+  }
+}
diff --git a/dev/core/test/com/google/gwt/dev/util/editdistance/MyersBitParallelEditDistanceTest.java b/dev/core/test/com/google/gwt/dev/util/editdistance/MyersBitParallelEditDistanceTest.java
new file mode 100644
index 0000000..9998788
--- /dev/null
+++ b/dev/core/test/com/google/gwt/dev/util/editdistance/MyersBitParallelEditDistanceTest.java
@@ -0,0 +1,213 @@
+/*
+ * Copyright 2010 Google Inc.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.dev.util.editdistance;
+
+import com.google.gwt.dev.util.editdistance.GeneralEditDistance;
+import com.google.gwt.dev.util.editdistance.MyersBitParallelEditDistance;
+
+import junit.framework.TestCase;
+
+/**
+ * Test cases for the MyersBitParallelEditDistance class.
+ *
+ * Note that the GeneralEditDistance class tests provide
+ * good general coverage of this class as well.
+ *
+ * The tests here look for boundary conditions in the
+ * specific algorithm: near 32- and 64-bit edges.
+ */
+public class MyersBitParallelEditDistanceTest extends TestCase {
+
+  /** Generates an instance (just a notational shorthand) */
+  static MyersBitParallelEditDistance generate(String pattern) {
+    return MyersBitParallelEditDistance.getInstance(pattern);
+  }
+
+  public void test32end() {
+    String s1 = "abcdefghijklmnopqrstuvwxyz012345";
+    String s2 = "abcdefghijklmnopqrstuvwxyz01234";
+    GeneralEditDistanceTest.genericVerification(generate(s1),
+                                                s1, s2,
+                                                1);
+    GeneralEditDistanceTest.genericVerification(generate(s1),
+                                                s1, s2+"x",
+                                                1);
+  }
+
+  public void test32start() {
+    String s1 = "abcdefghijklmnopqrstuvwxyz012345";
+    String s2 = "Abcdefghijklmnopqrstuvwxyz012345";
+    GeneralEditDistanceTest.genericVerification(generate(s1),
+                                                s1, s2,
+                                                1);
+    GeneralEditDistanceTest.genericVerification(generate(s1),
+                                                s1, s2.substring(1),
+                                                1);
+  }
+
+  public void test32various() {
+    String s1 = "abcdefghijklmnopqrstuvwxyz012345";
+    String s2 = "abcdeghijklmNopqrstu@vwxyz1234";
+    GeneralEditDistanceTest.genericVerification(generate(s1),
+                                                s1, s2,
+                                                5 /*fN@05*/);
+  }
+  
+  /*
+   * Some tests with strings exactly 32 characters long -- just enough
+   * for an "int" bitmap.
+   */
+
+  public void test33end() {
+    String s1 = "abcdefghijklmnopqrstuvwxyz0123456";
+    String s2 = "abcdefghijklmnopqrstuvwxyz012345";
+    GeneralEditDistanceTest.genericVerification(generate(s1),
+                                                s1, s2,
+                                                1);
+    GeneralEditDistanceTest.genericVerification(generate(s1),
+                                                s1, s2+"x",
+                                                1);
+  }
+
+  public void test33start() {
+    String s1 = "abcdefghijklmnopqrstuvwxyz0123456";
+    String s2 = "Abcdefghijklmnopqrstuvwxyz0123456";
+    GeneralEditDistanceTest.genericVerification(generate(s1),
+                                                s1, s2,
+                                                1);
+    GeneralEditDistanceTest.genericVerification(generate(s1),
+                                                s1, s2.substring(1),
+                                                1);
+  }
+
+  public void test64end() {
+    String s1 = "abcdefghijklmnopqrstuvwxyz0123456";
+    String s2 = "abcdefghijklmnopqrstuvwxyz012345";
+    GeneralEditDistanceTest.genericVerification(generate(s1+s1),
+                                                s1+s1, s1+s2,
+                                                1);
+    GeneralEditDistanceTest.genericVerification(generate(s1+s1),
+                                                s1+s1, s1+s2+"x",
+                                                1);
+  }
+
+  public void test64middle() {
+    String s1 = "abcdefghijklmnopqrstuvwxyz0123456";
+    String s2 = "abcdefghijklmnopqrstuvwxyz012345";
+    GeneralEditDistanceTest.genericVerification(generate(s1+s1),
+                                                s1+s1, s1+"x"+s2,
+                                                2);
+    GeneralEditDistanceTest.genericVerification(generate(s1+s1),
+                                                s1+s1, s2+"x"+s1,
+                                                1);
+  }
+
+  public void test64start() {
+    String s1 = "abcdefghijklmnopqrstuvwxyz0123456";
+    String s2 = "Abcdefghijklmnopqrstuvwxyz0123456";
+    GeneralEditDistanceTest.genericVerification(generate(s1+s1),
+                                                s1+s1, s2+s1,
+                                                1);
+    GeneralEditDistanceTest.genericVerification(generate(s1),
+                                                s1, s2.substring(1),
+                                                1);
+  }
+
+  public void testbig() {
+    String s1base = "abcdefghijklmnopqrstuvwxyz0123456";
+    String s1 = s1base + s1base + s1base + s1base + s1base;
+    String s2 = s1base
+                + "abcdeghijklmNopqrstu@vwxyz12346"
+                + s1base
+                + "bcdefGhijklmno$pqrstuvwxyz012345"
+                + s1base;
+
+    GeneralEditDistanceTest.genericVerification(generate(s1),
+                                                s1, s2, 
+                                                5+4 /*fN@05, aG$6*/);
+  }
+
+  /** Verifies the choice of bit array sizing */
+  public void testBitArraySizing() {
+    String thirtyTwo = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
+    assertEquals(generate("").getClass(),
+                 MyersBitParallelEditDistance.Empty.class);
+    assertEquals(generate(thirtyTwo).getClass(),
+                 MyersBitParallelEditDistance.TYPEint.class);
+    assertEquals(generate(thirtyTwo+"x").getClass(),
+                 MyersBitParallelEditDistance.TYPElong.class);
+    assertEquals(generate(thirtyTwo+thirtyTwo).getClass(),
+                 MyersBitParallelEditDistance.TYPElong.class);
+    assertEquals(generate(thirtyTwo+thirtyTwo+"x").getClass(),
+                 MyersBitParallelEditDistance.Multi.class);
+  }
+
+  /** Tests an "impossible" exception path */
+  public void testInternalCloneNotSupportedException() {
+    try {
+      new MyersBitParallelEditDistance.Multi("not really impossible") {
+        @Override
+        public Object clone() throws CloneNotSupportedException {
+          throw new CloneNotSupportedException("do the impossible");
+        }
+      }.duplicate();
+      assertTrue("Failed to throw exception", false);
+    } catch (IllegalStateException x) {
+      /* EXPECTED RESULT */
+    }
+  }
+
+  /** Test main programs to make sure they do not die unnaturally */
+  public void testMainProgramsForSanity() {
+    MyersBitParallelEditDistance.main(new String[] { "yes", "no", "5" });
+    MyersBitParallelEditDistance.Multi.main(new String[] { "yes", "no", "5" });
+  }
+
+  /** Test on a variety of word pairs, reusing an instance appropriately */
+  public void testOnWordSet() {
+    String [] words = GeneralEditDistanceTest.words;
+    int [][] expect = GeneralEditDistanceTest.wordsDistances;
+    for (int i = 0; i < words.length; i++) {
+      GeneralEditDistance engine = generate(words[i]);
+      for (int j = 0; j <= i; j++) {
+        GeneralEditDistanceTest.genericVerification(engine,
+                                                    words[i], words[j],
+                                                    expect[i][j]);
+      }
+    }
+  }
+
+  /** Tests a short pattern and target */
+  public void testShort() {
+    String s1 = "short";
+    String s2 = "snorts";
+    GeneralEditDistanceTest.genericVerification(generate(s1),
+                                                s1, s2, 2);
+  }
+
+  /** Tests zero-length patterns and targets; distance is other length */
+  public void testZeroLength() {
+    assertEquals(0, generate("").getDistance("", 1));
+
+    String other = "other";
+    GeneralEditDistanceTest.genericVerification(generate(""),
+                                                "", other,
+                                                other.length());
+    GeneralEditDistanceTest.genericVerification(generate(other),
+                                                other, "",
+                                                other.length());
+  }
+}
diff --git a/dev/core/test/com/google/gwt/dev/util/editdistance/PatternBitmapTest.java b/dev/core/test/com/google/gwt/dev/util/editdistance/PatternBitmapTest.java
new file mode 100644
index 0000000..8941315
--- /dev/null
+++ b/dev/core/test/com/google/gwt/dev/util/editdistance/PatternBitmapTest.java
@@ -0,0 +1,137 @@
+/*
+ * Copyright 2010 Google Inc.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.dev.util.editdistance;
+
+import com.google.gwt.dev.util.editdistance.CharIndex;
+import com.google.gwt.dev.util.editdistance.PatternBitmap;
+
+import junit.framework.TestCase;
+
+/**
+ * Tests for {@link PatternBitmap}.
+ */
+public class PatternBitmapTest extends TestCase {
+  
+  /** Reverses a string */
+  public static String reverse(String s) {
+    StringBuilder b = new StringBuilder();
+    for (int i = s.length(); --i >= 0;) {
+      b.append(s.charAt(i));
+    }
+    return b.toString();
+  }
+
+  /**
+   * Tests the int[] form of mapping.
+   */
+  public void testInteger() throws Exception {
+    String string = "abcdandmore";
+    CharIndex idx = CharIndex.getInstance(string);
+
+    int[] map = PatternBitmap.map(string, idx, new int[idx.size()]);
+
+    /* Spot check some */
+    assertEquals(0x11, map[idx.lookup('a')]);
+    assertEquals(0x02, map[idx.lookup('b')]);
+    assertEquals(0x04, map[idx.lookup('c')]);
+    assertEquals(0x48, map[idx.lookup('d')]);
+
+    /* Check all others for zero/non-zero */
+    for (int i = 0; i < 0xffff; i++) {
+      char c = (char)i;
+      int where = string.indexOf(c);
+      if (where >= 0) {
+        assertTrue("Map for pattern character '" + c + "' should be non-zero",
+                     (map[idx.lookup(c)] & (1<<where)) != 0);
+      } else {
+        assertEquals("Map for unused character '" + c + "' should be zero",
+                     0, map[idx.lookup(c)]);
+      }
+    }
+  }
+
+  /**
+   * Tests the int[][] form of mapping.
+   */
+  public void testIntegerArray() throws Exception {
+    String string = "x012345678901234567890123456789"
+                  + "01234567890123456789x0123456789"
+                  + "01234x567890123456789";
+    int wordSize = 31;
+
+    CharIndex idx = CharIndex.getInstance(string);
+
+    int[][] map = PatternBitmap.map(string, idx, new int[idx.size()][],
+                                     wordSize);
+
+    /* Spot check some */
+    assertEquals(1<<0,  map[idx.lookup('x')][0]);
+    assertEquals(1<<20, map[idx.lookup('x')][1]);
+    assertEquals(1<<5,  map[idx.lookup('x')][2]);
+
+    /* Check all others for null element/not */
+    int[] notThere = map[idx.lookup('\u0000')];
+    for (int i = 0; i < 0xffff; i++) {
+      char c = (char)i;
+      int where = string.indexOf(c);
+      if (where >= 0) {
+        assertTrue("Map for pattern character '" + c + "'"
+                   + " should be non-zero",
+                   (map[idx.lookup(c)] != notThere));
+        int bit = map[idx.lookup(c)][where/wordSize] & (1 << (where%wordSize));
+        assertTrue("Map for pattern character '" + c + "'"
+                   + " should have " + where + " bit on",
+                   (bit != 0));
+      } else {
+        assertEquals("Map for unused character '" + c + "' should be none",
+                     notThere, map[idx.lookup(c)]);
+      }
+    }
+  }
+
+  /**
+   * Tests the long[] form of mapping.
+   */
+  public void testLong() throws Exception {
+    String string = reverse("The quick brown fox jumps over the lazy dog.");
+    CharIndex idx = CharIndex.getInstance(string);
+
+    long[] map = PatternBitmap.map(string, idx, new long[idx.size()]);
+
+    /* Spot check some */
+    long e = Long.parseLong("00100000000000000000000000001000010000000000", 2);
+             /* ............ The quick brown fox jumps over the lazy dog. */
+
+    assertEquals(e,  map[idx.lookup('e')]);
+
+    long o = Long.parseLong("00000000000010000100000000100000000000000100", 2);
+             /* ............ The quick brown fox jumps over the lazy dog. */
+    assertEquals(o, map[idx.lookup('o')]);
+
+    /* Check all others for zero/non-zero */
+    for (int i = 0; i < 0xffff; i++) {
+      char c = (char)i;
+      int where = string.indexOf(c);
+      if (where >= 0) {
+        assertTrue("Map for pattern character '" + c + "' should be non-zero",
+                   (map[idx.lookup(c)] & (1L<<where)) != 0);
+      } else {
+        assertEquals("Map for unused character '" + c + "' should be zero",
+                     0, map[idx.lookup(c)]);
+      }
+    }
+  }
+}