Fix the worst concurrent modification problems in compiler memory-light set/map.

Due to a bug in com.google.gwt.dev.util.collect.HashMap.put(), we were eagerly growing the underlying table even when the key was already mapped.  This causes an iterators to be invalidated on a non-structural change.  Of course, we weren't actually tracking structural changes, which would cause the iterator to quietly continue, returning a 'null' keyed entry when it should not have, and possibly duplicating / skipping other entries.

1) I fixed the bug in HashMap.put() and HashSet.add() to defer growth until we're sure the key doesn't exist already.

2) Added basic concurrent modification tracking to the associated iterators, in a way that doesn't add additional memory to the collections themselves.

3) Other various and sundry cleanups.

http://gwt-code-reviews.appspot.com/1384804

Review by: zundel@google.com

git-svn-id: https://google-web-toolkit.googlecode.com/svn/trunk@9858 8db76d5a-ed1c-0410-87a9-c151d255dfc7
diff --git a/dev/core/src/com/google/gwt/dev/util/collect/HashMap.java b/dev/core/src/com/google/gwt/dev/util/collect/HashMap.java
index c02df9e..d59f04a 100644
--- a/dev/core/src/com/google/gwt/dev/util/collect/HashMap.java
+++ b/dev/core/src/com/google/gwt/dev/util/collect/HashMap.java
@@ -22,6 +22,7 @@
 import java.util.AbstractCollection;
 import java.util.AbstractSet;
 import java.util.Collection;
+import java.util.ConcurrentModificationException;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.NoSuchElementException;
@@ -43,39 +44,44 @@
    */
   private static final int INITIAL_TABLE_SIZE = 4;
 
-  private class EntryIterator implements Iterator<Entry<K, V>> {
+  private abstract class BaseIterator<E> implements Iterator<E> {
+    private Object[] coModCheckKeys = keys;
     private int index = 0;
     private int last = -1;
 
-    {
-      advanceToItem();
-    }
-
     public boolean hasNext() {
+      if (coModCheckKeys != keys) {
+        throw new ConcurrentModificationException();
+      }
+      advanceToItem();
       return index < keys.length;
     }
 
-    public Entry<K, V> next() {
+    public E next() {
       if (!hasNext()) {
         throw new NoSuchElementException();
       }
       last = index;
-      Entry<K, V> toReturn = new HashEntry(index++);
-      advanceToItem();
-      return toReturn;
+      return iteratorItem(index++);
     }
 
     public void remove() {
       if (last < 0) {
         throw new IllegalStateException();
       }
+      if (coModCheckKeys != keys) {
+        throw new ConcurrentModificationException();
+      }
       internalRemove(last);
       if (keys[last] != null) {
+        // Hole was plugged.
         index = last;
       }
       last = -1;
     }
 
+    protected abstract E iteratorItem(int index);
+
     private void advanceToItem() {
       for (; index < keys.length; ++index) {
         if (keys[index] != null) {
@@ -85,6 +91,13 @@
     }
   }
 
+  private class EntryIterator extends BaseIterator<Entry<K, V>> {
+    @Override
+    protected Entry<K, V> iteratorItem(int index) {
+      return new HashEntry(index);
+    }
+  }
+
   private class EntrySet extends AbstractSet<Entry<K, V>> {
     @Override
     public boolean add(Entry<K, V> entry) {
@@ -95,7 +108,7 @@
 
     @Override
     public boolean addAll(Collection<? extends Entry<K, V>> c) {
-      HashMap.this.ensureSizeFor(size() + c.size());
+      HashMap.this.resizeForJoin(c.size());
       return super.addAll(c);
     }
 
@@ -201,46 +214,11 @@
     }
   }
 
-  private class KeyIterator implements Iterator<K> {
-    private int index = 0;
-    private int last = -1;
-
-    {
-      advanceToItem();
-    }
-
-    public boolean hasNext() {
-      return index < keys.length;
-    }
-
+  private class KeyIterator extends BaseIterator<K> {
     @SuppressWarnings("unchecked")
-    public K next() {
-      if (!hasNext()) {
-        throw new NoSuchElementException();
-      }
-      last = index;
-      Object toReturn = unmaskNullKey(keys[index++]);
-      advanceToItem();
-      return (K) toReturn;
-    }
-
-    public void remove() {
-      if (last < 0) {
-        throw new IllegalStateException();
-      }
-      internalRemove(last);
-      if (keys[last] != null) {
-        index = last;
-      }
-      last = -1;
-    }
-
-    private void advanceToItem() {
-      for (; index < keys.length; ++index) {
-        if (keys[index] != null) {
-          return;
-        }
-      }
+    @Override
+    protected K iteratorItem(int index) {
+      return (K) unmaskNullKey(keys[index]);
     }
   }
 
@@ -297,46 +275,11 @@
     }
   }
 
-  private class ValueIterator implements Iterator<V> {
-    private int index = 0;
-    private int last = -1;
-
-    {
-      advanceToItem();
-    }
-
-    public boolean hasNext() {
-      return index < keys.length;
-    }
-
+  private class ValueIterator extends BaseIterator<V> {
     @SuppressWarnings("unchecked")
-    public V next() {
-      if (!hasNext()) {
-        throw new NoSuchElementException();
-      }
-      last = index;
-      Object toReturn = values[index++];
-      advanceToItem();
-      return (V) toReturn;
-    }
-
-    public void remove() {
-      if (last < 0) {
-        throw new IllegalStateException();
-      }
-      internalRemove(last);
-      if (keys[last] != null) {
-        index = last;
-      }
-      last = -1;
-    }
-
-    private void advanceToItem() {
-      for (; index < keys.length; ++index) {
-        if (keys[index] != null) {
-          return;
-        }
-      }
+    @Override
+    protected V iteratorItem(int index) {
+      return (V) values[index];
     }
   }
 
@@ -446,7 +389,7 @@
     }
 
     initTable(newCapacity);
-    internalPutAll(m);
+    putAll(m);
   }
 
   public void clear() {
@@ -517,14 +460,18 @@
 
   @SuppressWarnings("unchecked")
   public V put(K key, V value) {
-    ensureSizeFor(size + 1);
     int index = findKeyOrEmpty(key);
     if (keys[index] == null) {
-      ++size;
+      // Not in the map, may need to grow.
+      if (ensureSizeFor(++size)) {
+        // If we had to grow the table, must recompute the index.
+        index = findKeyOrEmpty(key);
+      }
       keys[index] = maskNullKey(key);
       values[index] = value;
       return null;
     } else {
+      // In the map, set a new value;
       Object previousValue = values[index];
       values[index] = value;
       return (V) previousValue;
@@ -532,8 +479,10 @@
   }
 
   public void putAll(Map<? extends K, ? extends V> m) {
-    ensureSizeFor(size + m.size());
-    internalPutAll(m);
+    resizeForJoin(m.size());
+    for (Entry<? extends K, ? extends V> entry : m.entrySet()) {
+      put(entry.getKey(), entry.getValue());
+    }
   }
 
   @SuppressWarnings("unchecked")
@@ -644,9 +593,9 @@
    * Ensures the map is large enough to contain the specified number of entries.
    * Default access to avoid synthetic accessors from inner classes.
    */
-  void ensureSizeFor(int expectedSize) {
+  boolean ensureSizeFor(int expectedSize) {
     if (keys.length * 3 >= expectedSize * 4) {
-      return;
+      return false;
     }
 
     int newCapacity = keys.length << 1;
@@ -670,6 +619,7 @@
         values[newIndex] = oldValues[i];
       }
     }
+    return true;
   }
 
   /**
@@ -727,6 +677,27 @@
     plugHole(index);
   }
 
+  /**
+   * Resizes this map to accommodate the minimum size required to join this map
+   * with another map. This is an optimization to prevent multiple resizes
+   * during the join operation. Naively, it would seem like we should resize to
+   * hold {@code (size + otherSize)}. However, the incoming map might have
+   * duplicates with this map; it might even be all duplicates. The correct
+   * behavior when the incoming map is all duplicates is NOT to resize, and
+   * therefore not to invalidate any iterators.
+   * <p>
+   * In practice, this strategy results in a worst-case of two resizes. In the
+   * worst case, where {@code size} and {@code otherSize} are roughly equal and
+   * the sets are completely disjoint, we might do 1 initial rehash and then 1
+   * additional rehash down the road. But this is an edge case that requires
+   * getting unlucky on both boundaries. Most of the time, we do either 1
+   * initial rehash or 1 down the road, because doubling the capacity generally
+   * allows this map to absorb an equally-sized disjoint map.
+   */
+  boolean resizeForJoin(int sizeOther) {
+    return ensureSizeFor(Math.max(size, sizeOther));
+  }
+
   private int getKeyIndex(Object k) {
     int h = keyHashCode(k);
     // Copied from Apache's AbstractHashedMap; prevents power-of-two collisions.
@@ -743,21 +714,6 @@
     values = new Object[capacity];
   }
 
-  private void internalPutAll(Map<? extends K, ? extends V> m) {
-    for (Entry<? extends K, ? extends V> entry : m.entrySet()) {
-      K key = entry.getKey();
-      V value = entry.getValue();
-      int index = findKeyOrEmpty(key);
-      if (keys[index] == null) {
-        ++size;
-        keys[index] = maskNullKey(key);
-        values[index] = value;
-      } else {
-        values[index] = value;
-      }
-    }
-  }
-
   /**
    * Tricky, we left a hole in the map, which we have to fill. The only way to
    * do this is to search forwards through the map shuffling back values that
diff --git a/dev/core/src/com/google/gwt/dev/util/collect/HashSet.java b/dev/core/src/com/google/gwt/dev/util/collect/HashSet.java
index 6476e6d..f3e26ed 100644
--- a/dev/core/src/com/google/gwt/dev/util/collect/HashSet.java
+++ b/dev/core/src/com/google/gwt/dev/util/collect/HashSet.java
@@ -22,6 +22,7 @@
 import java.lang.reflect.Array;
 import java.util.AbstractSet;
 import java.util.Collection;
+import java.util.ConcurrentModificationException;
 import java.util.Iterator;
 import java.util.NoSuchElementException;
 
@@ -33,14 +34,15 @@
 public class HashSet<E> extends AbstractSet<E> implements Serializable {
 
   private class SetIterator implements Iterator<E> {
+    private Object[] coModCheckTable = table;
     private int index = 0;
     private int last = -1;
 
-    public SetIterator() {
-      advanceToItem();
-    }
-
     public boolean hasNext() {
+      if (coModCheckTable != table) {
+        throw new ConcurrentModificationException();
+      }
+      advanceToItem();
       return index < table.length;
     }
 
@@ -50,17 +52,19 @@
         throw new NoSuchElementException();
       }
       last = index;
-      E toReturn = (E) unmaskNull(table[index++]);
-      advanceToItem();
-      return toReturn;
+      return (E) unmaskNull(table[index++]);
     }
 
     public void remove() {
       if (last < 0) {
         throw new IllegalStateException();
       }
+      if (coModCheckTable != table) {
+        throw new ConcurrentModificationException();
+      }
       internalRemove(last);
       if (table[last] != null) {
+        // Hole was plugged.
         index = last;
       }
       last = -1;
@@ -124,12 +128,32 @@
     super.addAll(c);
   }
 
+  /**
+   * Works just like {@link #HashSet(Collection)}, but for arrays. Used to avoid
+   * having to synthesize a collection in {@link Sets}.
+   */
+  HashSet(E[] c) {
+    int newCapacity = INITIAL_TABLE_SIZE;
+    int expectedSize = c.length;
+    while (newCapacity * 3 < expectedSize * 4) {
+      newCapacity <<= 1;
+    }
+
+    table = new Object[newCapacity];
+    for (E e : c) {
+      add(e);
+    }
+  }
+
   @Override
   public boolean add(E e) {
-    ensureSizeFor(size + 1);
     int index = findOrEmpty(e);
     if (table[index] == null) {
-      ++size;
+      // Not in the map, may need to grow.
+      if (ensureSizeFor(++size)) {
+        // If we had to grow the table, must recompute the index.
+        index = findOrEmpty(e);
+      }
       table[index] = maskNull(e);
       return true;
     }
@@ -138,7 +162,7 @@
 
   @Override
   public boolean addAll(Collection<? extends E> c) {
-    ensureSizeFor(size + c.size());
+    resizeForJoin(c.size());
     return super.addAll(c);
   }
 
@@ -239,21 +263,6 @@
   }
 
   /**
-   * Works just like {@link #addAll(Collection)}, but for arrays. Used to avoid
-   * having to synthesize a collection in {@link Sets}.
-   */
-  void addAll(E[] elements) {
-    ensureSizeFor(size + elements.length);
-    for (E e : elements) {
-      int index = findOrEmpty(e);
-      if (table[index] == null) {
-        ++size;
-        table[index] = maskNull(e);
-      }
-    }
-  }
-
-  /**
    * Removes the item at the specified index, and performs internal management
    * to make sure we don't wind up with a hole in the table. Default access to
    * avoid synthetic accessors from inner classes.
@@ -267,9 +276,9 @@
   /**
    * Ensures the set is large enough to contain the specified number of entries.
    */
-  private void ensureSizeFor(int expectedSize) {
+  private boolean ensureSizeFor(int expectedSize) {
     if (table.length * 3 >= expectedSize * 4) {
-      return;
+      return false;
     }
 
     int newCapacity = table.length << 1;
@@ -290,6 +299,7 @@
         table[newIndex] = o;
       }
     }
+    return true;
   }
 
   /**
@@ -391,6 +401,27 @@
     doReadObject(in);
   }
 
+  /**
+   * Resizes this set to accommodate the minimum size required to join this set
+   * with another set. This is an optimization to prevent multiple resizes
+   * during the join operation. Naively, it would seem like we should resize to
+   * hold {@code (size + otherSize)}. However, the incoming set might have
+   * duplicates with this set; it might even be all duplicates. The correct
+   * behavior when the incoming set is all duplicates is NOT to resize, and
+   * therefore not to invalidate any iterators.
+   * <p>
+   * In practice, this strategy results in a worst-case of two resizes. In the
+   * worst case, where {@code size} and {@code otherSize} are roughly equal and
+   * the sets are completely disjoint, we might do 1 initial rehash and then 1
+   * additional rehash down the road. But this is an edge case that requires
+   * getting unlucky on both boundaries. Most of the time, we do either 1
+   * initial rehash or 1 down the road, because doubling the capacity generally
+   * allows this set to absorb an equally-sized disjoint set.
+   */
+  private void resizeForJoin(int sizeOther) {
+    ensureSizeFor(Math.max(size, sizeOther));
+  }
+
   private void writeObject(ObjectOutputStream out) throws IOException {
     out.defaultWriteObject();
     doWriteObject(out);
diff --git a/dev/core/src/com/google/gwt/dev/util/collect/IdentityHashSet.java b/dev/core/src/com/google/gwt/dev/util/collect/IdentityHashSet.java
index bdf673d..b6d3f7d 100644
--- a/dev/core/src/com/google/gwt/dev/util/collect/IdentityHashSet.java
+++ b/dev/core/src/com/google/gwt/dev/util/collect/IdentityHashSet.java
@@ -33,6 +33,14 @@
     super(c);
   }
 
+  /**
+   * Works just like {@link #HashSet(Collection)}, but for arrays. Used to avoid
+   * having to synthesize a collection in {@link IdentitySets}.
+   */
+  IdentityHashSet(E[] c) {
+    super(c);
+  }
+
   @Override
   protected boolean itemEquals(Object a, Object b) {
     return a == b;
diff --git a/dev/core/src/com/google/gwt/dev/util/collect/IdentitySets.java b/dev/core/src/com/google/gwt/dev/util/collect/IdentitySets.java
index 9edccb5..273ae83 100644
--- a/dev/core/src/com/google/gwt/dev/util/collect/IdentitySets.java
+++ b/dev/core/src/com/google/gwt/dev/util/collect/IdentitySets.java
@@ -146,9 +146,7 @@
       case 1:
         return create(items[0]);
       default:
-        IdentityHashSet<T> result = new IdentityHashSet<T>();
-        result.addAll(items);
-        return result;
+        return new IdentityHashSet<T>(items);
     }
   }
 
diff --git a/dev/core/src/com/google/gwt/dev/util/collect/Sets.java b/dev/core/src/com/google/gwt/dev/util/collect/Sets.java
index 8cb8064..0ffbaa9 100644
--- a/dev/core/src/com/google/gwt/dev/util/collect/Sets.java
+++ b/dev/core/src/com/google/gwt/dev/util/collect/Sets.java
@@ -93,9 +93,7 @@
       case 1:
         return create(items[0]);
       default:
-        HashSet<T> result = new HashSet<T>();
-        result.addAll(items);
-        return result;
+        return new HashSet<T>(items);
     }
   }