Fix the worst concurrent modification problems in compiler memory-light set/map.
Due to a bug in com.google.gwt.dev.util.collect.HashMap.put(), we were eagerly growing the underlying table even when the key was already mapped. This causes an iterators to be invalidated on a non-structural change. Of course, we weren't actually tracking structural changes, which would cause the iterator to quietly continue, returning a 'null' keyed entry when it should not have, and possibly duplicating / skipping other entries.
1) I fixed the bug in HashMap.put() and HashSet.add() to defer growth until we're sure the key doesn't exist already.
2) Added basic concurrent modification tracking to the associated iterators, in a way that doesn't add additional memory to the collections themselves.
3) Other various and sundry cleanups.
http://gwt-code-reviews.appspot.com/1384804
Review by: zundel@google.com
git-svn-id: https://google-web-toolkit.googlecode.com/svn/trunk@9858 8db76d5a-ed1c-0410-87a9-c151d255dfc7
diff --git a/dev/core/src/com/google/gwt/dev/util/collect/HashMap.java b/dev/core/src/com/google/gwt/dev/util/collect/HashMap.java
index c02df9e..d59f04a 100644
--- a/dev/core/src/com/google/gwt/dev/util/collect/HashMap.java
+++ b/dev/core/src/com/google/gwt/dev/util/collect/HashMap.java
@@ -22,6 +22,7 @@
import java.util.AbstractCollection;
import java.util.AbstractSet;
import java.util.Collection;
+import java.util.ConcurrentModificationException;
import java.util.Iterator;
import java.util.Map;
import java.util.NoSuchElementException;
@@ -43,39 +44,44 @@
*/
private static final int INITIAL_TABLE_SIZE = 4;
- private class EntryIterator implements Iterator<Entry<K, V>> {
+ private abstract class BaseIterator<E> implements Iterator<E> {
+ private Object[] coModCheckKeys = keys;
private int index = 0;
private int last = -1;
- {
- advanceToItem();
- }
-
public boolean hasNext() {
+ if (coModCheckKeys != keys) {
+ throw new ConcurrentModificationException();
+ }
+ advanceToItem();
return index < keys.length;
}
- public Entry<K, V> next() {
+ public E next() {
if (!hasNext()) {
throw new NoSuchElementException();
}
last = index;
- Entry<K, V> toReturn = new HashEntry(index++);
- advanceToItem();
- return toReturn;
+ return iteratorItem(index++);
}
public void remove() {
if (last < 0) {
throw new IllegalStateException();
}
+ if (coModCheckKeys != keys) {
+ throw new ConcurrentModificationException();
+ }
internalRemove(last);
if (keys[last] != null) {
+ // Hole was plugged.
index = last;
}
last = -1;
}
+ protected abstract E iteratorItem(int index);
+
private void advanceToItem() {
for (; index < keys.length; ++index) {
if (keys[index] != null) {
@@ -85,6 +91,13 @@
}
}
+ private class EntryIterator extends BaseIterator<Entry<K, V>> {
+ @Override
+ protected Entry<K, V> iteratorItem(int index) {
+ return new HashEntry(index);
+ }
+ }
+
private class EntrySet extends AbstractSet<Entry<K, V>> {
@Override
public boolean add(Entry<K, V> entry) {
@@ -95,7 +108,7 @@
@Override
public boolean addAll(Collection<? extends Entry<K, V>> c) {
- HashMap.this.ensureSizeFor(size() + c.size());
+ HashMap.this.resizeForJoin(c.size());
return super.addAll(c);
}
@@ -201,46 +214,11 @@
}
}
- private class KeyIterator implements Iterator<K> {
- private int index = 0;
- private int last = -1;
-
- {
- advanceToItem();
- }
-
- public boolean hasNext() {
- return index < keys.length;
- }
-
+ private class KeyIterator extends BaseIterator<K> {
@SuppressWarnings("unchecked")
- public K next() {
- if (!hasNext()) {
- throw new NoSuchElementException();
- }
- last = index;
- Object toReturn = unmaskNullKey(keys[index++]);
- advanceToItem();
- return (K) toReturn;
- }
-
- public void remove() {
- if (last < 0) {
- throw new IllegalStateException();
- }
- internalRemove(last);
- if (keys[last] != null) {
- index = last;
- }
- last = -1;
- }
-
- private void advanceToItem() {
- for (; index < keys.length; ++index) {
- if (keys[index] != null) {
- return;
- }
- }
+ @Override
+ protected K iteratorItem(int index) {
+ return (K) unmaskNullKey(keys[index]);
}
}
@@ -297,46 +275,11 @@
}
}
- private class ValueIterator implements Iterator<V> {
- private int index = 0;
- private int last = -1;
-
- {
- advanceToItem();
- }
-
- public boolean hasNext() {
- return index < keys.length;
- }
-
+ private class ValueIterator extends BaseIterator<V> {
@SuppressWarnings("unchecked")
- public V next() {
- if (!hasNext()) {
- throw new NoSuchElementException();
- }
- last = index;
- Object toReturn = values[index++];
- advanceToItem();
- return (V) toReturn;
- }
-
- public void remove() {
- if (last < 0) {
- throw new IllegalStateException();
- }
- internalRemove(last);
- if (keys[last] != null) {
- index = last;
- }
- last = -1;
- }
-
- private void advanceToItem() {
- for (; index < keys.length; ++index) {
- if (keys[index] != null) {
- return;
- }
- }
+ @Override
+ protected V iteratorItem(int index) {
+ return (V) values[index];
}
}
@@ -446,7 +389,7 @@
}
initTable(newCapacity);
- internalPutAll(m);
+ putAll(m);
}
public void clear() {
@@ -517,14 +460,18 @@
@SuppressWarnings("unchecked")
public V put(K key, V value) {
- ensureSizeFor(size + 1);
int index = findKeyOrEmpty(key);
if (keys[index] == null) {
- ++size;
+ // Not in the map, may need to grow.
+ if (ensureSizeFor(++size)) {
+ // If we had to grow the table, must recompute the index.
+ index = findKeyOrEmpty(key);
+ }
keys[index] = maskNullKey(key);
values[index] = value;
return null;
} else {
+ // In the map, set a new value;
Object previousValue = values[index];
values[index] = value;
return (V) previousValue;
@@ -532,8 +479,10 @@
}
public void putAll(Map<? extends K, ? extends V> m) {
- ensureSizeFor(size + m.size());
- internalPutAll(m);
+ resizeForJoin(m.size());
+ for (Entry<? extends K, ? extends V> entry : m.entrySet()) {
+ put(entry.getKey(), entry.getValue());
+ }
}
@SuppressWarnings("unchecked")
@@ -644,9 +593,9 @@
* Ensures the map is large enough to contain the specified number of entries.
* Default access to avoid synthetic accessors from inner classes.
*/
- void ensureSizeFor(int expectedSize) {
+ boolean ensureSizeFor(int expectedSize) {
if (keys.length * 3 >= expectedSize * 4) {
- return;
+ return false;
}
int newCapacity = keys.length << 1;
@@ -670,6 +619,7 @@
values[newIndex] = oldValues[i];
}
}
+ return true;
}
/**
@@ -727,6 +677,27 @@
plugHole(index);
}
+ /**
+ * Resizes this map to accommodate the minimum size required to join this map
+ * with another map. This is an optimization to prevent multiple resizes
+ * during the join operation. Naively, it would seem like we should resize to
+ * hold {@code (size + otherSize)}. However, the incoming map might have
+ * duplicates with this map; it might even be all duplicates. The correct
+ * behavior when the incoming map is all duplicates is NOT to resize, and
+ * therefore not to invalidate any iterators.
+ * <p>
+ * In practice, this strategy results in a worst-case of two resizes. In the
+ * worst case, where {@code size} and {@code otherSize} are roughly equal and
+ * the sets are completely disjoint, we might do 1 initial rehash and then 1
+ * additional rehash down the road. But this is an edge case that requires
+ * getting unlucky on both boundaries. Most of the time, we do either 1
+ * initial rehash or 1 down the road, because doubling the capacity generally
+ * allows this map to absorb an equally-sized disjoint map.
+ */
+ boolean resizeForJoin(int sizeOther) {
+ return ensureSizeFor(Math.max(size, sizeOther));
+ }
+
private int getKeyIndex(Object k) {
int h = keyHashCode(k);
// Copied from Apache's AbstractHashedMap; prevents power-of-two collisions.
@@ -743,21 +714,6 @@
values = new Object[capacity];
}
- private void internalPutAll(Map<? extends K, ? extends V> m) {
- for (Entry<? extends K, ? extends V> entry : m.entrySet()) {
- K key = entry.getKey();
- V value = entry.getValue();
- int index = findKeyOrEmpty(key);
- if (keys[index] == null) {
- ++size;
- keys[index] = maskNullKey(key);
- values[index] = value;
- } else {
- values[index] = value;
- }
- }
- }
-
/**
* Tricky, we left a hole in the map, which we have to fill. The only way to
* do this is to search forwards through the map shuffling back values that
diff --git a/dev/core/src/com/google/gwt/dev/util/collect/HashSet.java b/dev/core/src/com/google/gwt/dev/util/collect/HashSet.java
index 6476e6d..f3e26ed 100644
--- a/dev/core/src/com/google/gwt/dev/util/collect/HashSet.java
+++ b/dev/core/src/com/google/gwt/dev/util/collect/HashSet.java
@@ -22,6 +22,7 @@
import java.lang.reflect.Array;
import java.util.AbstractSet;
import java.util.Collection;
+import java.util.ConcurrentModificationException;
import java.util.Iterator;
import java.util.NoSuchElementException;
@@ -33,14 +34,15 @@
public class HashSet<E> extends AbstractSet<E> implements Serializable {
private class SetIterator implements Iterator<E> {
+ private Object[] coModCheckTable = table;
private int index = 0;
private int last = -1;
- public SetIterator() {
- advanceToItem();
- }
-
public boolean hasNext() {
+ if (coModCheckTable != table) {
+ throw new ConcurrentModificationException();
+ }
+ advanceToItem();
return index < table.length;
}
@@ -50,17 +52,19 @@
throw new NoSuchElementException();
}
last = index;
- E toReturn = (E) unmaskNull(table[index++]);
- advanceToItem();
- return toReturn;
+ return (E) unmaskNull(table[index++]);
}
public void remove() {
if (last < 0) {
throw new IllegalStateException();
}
+ if (coModCheckTable != table) {
+ throw new ConcurrentModificationException();
+ }
internalRemove(last);
if (table[last] != null) {
+ // Hole was plugged.
index = last;
}
last = -1;
@@ -124,12 +128,32 @@
super.addAll(c);
}
+ /**
+ * Works just like {@link #HashSet(Collection)}, but for arrays. Used to avoid
+ * having to synthesize a collection in {@link Sets}.
+ */
+ HashSet(E[] c) {
+ int newCapacity = INITIAL_TABLE_SIZE;
+ int expectedSize = c.length;
+ while (newCapacity * 3 < expectedSize * 4) {
+ newCapacity <<= 1;
+ }
+
+ table = new Object[newCapacity];
+ for (E e : c) {
+ add(e);
+ }
+ }
+
@Override
public boolean add(E e) {
- ensureSizeFor(size + 1);
int index = findOrEmpty(e);
if (table[index] == null) {
- ++size;
+ // Not in the map, may need to grow.
+ if (ensureSizeFor(++size)) {
+ // If we had to grow the table, must recompute the index.
+ index = findOrEmpty(e);
+ }
table[index] = maskNull(e);
return true;
}
@@ -138,7 +162,7 @@
@Override
public boolean addAll(Collection<? extends E> c) {
- ensureSizeFor(size + c.size());
+ resizeForJoin(c.size());
return super.addAll(c);
}
@@ -239,21 +263,6 @@
}
/**
- * Works just like {@link #addAll(Collection)}, but for arrays. Used to avoid
- * having to synthesize a collection in {@link Sets}.
- */
- void addAll(E[] elements) {
- ensureSizeFor(size + elements.length);
- for (E e : elements) {
- int index = findOrEmpty(e);
- if (table[index] == null) {
- ++size;
- table[index] = maskNull(e);
- }
- }
- }
-
- /**
* Removes the item at the specified index, and performs internal management
* to make sure we don't wind up with a hole in the table. Default access to
* avoid synthetic accessors from inner classes.
@@ -267,9 +276,9 @@
/**
* Ensures the set is large enough to contain the specified number of entries.
*/
- private void ensureSizeFor(int expectedSize) {
+ private boolean ensureSizeFor(int expectedSize) {
if (table.length * 3 >= expectedSize * 4) {
- return;
+ return false;
}
int newCapacity = table.length << 1;
@@ -290,6 +299,7 @@
table[newIndex] = o;
}
}
+ return true;
}
/**
@@ -391,6 +401,27 @@
doReadObject(in);
}
+ /**
+ * Resizes this set to accommodate the minimum size required to join this set
+ * with another set. This is an optimization to prevent multiple resizes
+ * during the join operation. Naively, it would seem like we should resize to
+ * hold {@code (size + otherSize)}. However, the incoming set might have
+ * duplicates with this set; it might even be all duplicates. The correct
+ * behavior when the incoming set is all duplicates is NOT to resize, and
+ * therefore not to invalidate any iterators.
+ * <p>
+ * In practice, this strategy results in a worst-case of two resizes. In the
+ * worst case, where {@code size} and {@code otherSize} are roughly equal and
+ * the sets are completely disjoint, we might do 1 initial rehash and then 1
+ * additional rehash down the road. But this is an edge case that requires
+ * getting unlucky on both boundaries. Most of the time, we do either 1
+ * initial rehash or 1 down the road, because doubling the capacity generally
+ * allows this set to absorb an equally-sized disjoint set.
+ */
+ private void resizeForJoin(int sizeOther) {
+ ensureSizeFor(Math.max(size, sizeOther));
+ }
+
private void writeObject(ObjectOutputStream out) throws IOException {
out.defaultWriteObject();
doWriteObject(out);
diff --git a/dev/core/src/com/google/gwt/dev/util/collect/IdentityHashSet.java b/dev/core/src/com/google/gwt/dev/util/collect/IdentityHashSet.java
index bdf673d..b6d3f7d 100644
--- a/dev/core/src/com/google/gwt/dev/util/collect/IdentityHashSet.java
+++ b/dev/core/src/com/google/gwt/dev/util/collect/IdentityHashSet.java
@@ -33,6 +33,14 @@
super(c);
}
+ /**
+ * Works just like {@link #HashSet(Collection)}, but for arrays. Used to avoid
+ * having to synthesize a collection in {@link IdentitySets}.
+ */
+ IdentityHashSet(E[] c) {
+ super(c);
+ }
+
@Override
protected boolean itemEquals(Object a, Object b) {
return a == b;
diff --git a/dev/core/src/com/google/gwt/dev/util/collect/IdentitySets.java b/dev/core/src/com/google/gwt/dev/util/collect/IdentitySets.java
index 9edccb5..273ae83 100644
--- a/dev/core/src/com/google/gwt/dev/util/collect/IdentitySets.java
+++ b/dev/core/src/com/google/gwt/dev/util/collect/IdentitySets.java
@@ -146,9 +146,7 @@
case 1:
return create(items[0]);
default:
- IdentityHashSet<T> result = new IdentityHashSet<T>();
- result.addAll(items);
- return result;
+ return new IdentityHashSet<T>(items);
}
}
diff --git a/dev/core/src/com/google/gwt/dev/util/collect/Sets.java b/dev/core/src/com/google/gwt/dev/util/collect/Sets.java
index 8cb8064..0ffbaa9 100644
--- a/dev/core/src/com/google/gwt/dev/util/collect/Sets.java
+++ b/dev/core/src/com/google/gwt/dev/util/collect/Sets.java
@@ -93,9 +93,7 @@
case 1:
return create(items[0]);
default:
- HashSet<T> result = new HashSet<T>();
- result.addAll(items);
- return result;
+ return new HashSet<T>(items);
}
}