Adds a Text->Text transformation pass which sorts functions by size and clusters them by edit distance. It also integrates the IE7 block-size fix by introducing inner block scopes whenever 32767 top-level statements are added to the output.

Reduction on compressed output of Showcase by up to 20%.

Reviewed by: spoon, scottb

git-svn-id: https://google-web-toolkit.googlecode.com/svn/trunk@5972 8db76d5a-ed1c-0410-87a9-c151d255dfc7
diff --git a/dev/core/src/com/google/gwt/dev/jjs/JavaToJavaScriptCompiler.java b/dev/core/src/com/google/gwt/dev/jjs/JavaToJavaScriptCompiler.java
index 4c94359..da19855 100644
--- a/dev/core/src/com/google/gwt/dev/jjs/JavaToJavaScriptCompiler.java
+++ b/dev/core/src/com/google/gwt/dev/jjs/JavaToJavaScriptCompiler.java
@@ -16,6 +16,7 @@
 package com.google.gwt.dev.jjs;
 
 import com.google.gwt.core.ext.PropertyOracle;
+import com.google.gwt.core.ext.SelectionProperty;
 import com.google.gwt.core.ext.TreeLogger;
 import com.google.gwt.core.ext.UnableToCompleteException;
 import com.google.gwt.core.ext.linker.ArtifactSet;
@@ -56,6 +57,7 @@
 import com.google.gwt.dev.jjs.impl.CastNormalizer;
 import com.google.gwt.dev.jjs.impl.CatchBlockNormalizer;
 import com.google.gwt.dev.jjs.impl.CodeSplitter;
+import com.google.gwt.dev.jjs.impl.CodeSplitter.MultipleDependencyGraphRecorder;
 import com.google.gwt.dev.jjs.impl.ControlFlowAnalyzer;
 import com.google.gwt.dev.jjs.impl.DeadCodeElimination;
 import com.google.gwt.dev.jjs.impl.EqualityNormalizer;
@@ -66,6 +68,8 @@
 import com.google.gwt.dev.jjs.impl.GenerateJavaScriptAST;
 import com.google.gwt.dev.jjs.impl.JavaScriptObjectNormalizer;
 import com.google.gwt.dev.jjs.impl.JavaToJavaScriptMap;
+import com.google.gwt.dev.jjs.impl.JsFunctionClusterer;
+import com.google.gwt.dev.jjs.impl.JsIEBlockTextTransformer;
 import com.google.gwt.dev.jjs.impl.JsoDevirtualizer;
 import com.google.gwt.dev.jjs.impl.LongCastNormalizer;
 import com.google.gwt.dev.jjs.impl.LongEmulationNormalizer;
@@ -81,7 +85,6 @@
 import com.google.gwt.dev.jjs.impl.SourceGenerationVisitor;
 import com.google.gwt.dev.jjs.impl.TypeMap;
 import com.google.gwt.dev.jjs.impl.TypeTightener;
-import com.google.gwt.dev.jjs.impl.CodeSplitter.MultipleDependencyGraphRecorder;
 import com.google.gwt.dev.js.EvalFunctionsAtTopScope;
 import com.google.gwt.dev.js.JsBreakUpLargeVarStatements;
 import com.google.gwt.dev.js.JsIEBlockSizeVisitor;
@@ -313,7 +316,23 @@
 
       // Work around an IE7 bug,
       // http://code.google.com/p/google-web-toolkit/issues/detail?id=1440
-      JsIEBlockSizeVisitor.exec(jsProgram);
+      // note, JsIEBlockTextTransformer now handles restructuring top level
+      // blocks, this class now handles non-top level blocks only. 
+      SelectionProperty userAgentProperty = null;
+      for (PropertyOracle oracle : propertyOracles) {
+        userAgentProperty = oracle.getSelectionProperty(logger, "user.agent");
+        if (userAgentProperty != null) {
+          break;
+        }
+      }
+      // if user agent is known or ie6, split overly large blocks
+      boolean splitBlocks = userAgentProperty == null || (
+          userAgentProperty != null &&
+              "ie6".equals(userAgentProperty.getCurrentValue()));
+
+      if (splitBlocks) {
+        JsIEBlockSizeVisitor.exec(jsProgram);
+      }
       JsBreakUpLargeVarStatements.exec(jsProgram, propertyOracles);
 
       // (12) Generate the final output text.
@@ -324,7 +343,7 @@
       List<Map<Range, SourceInfo>> sourceInfoMaps = options.isSoycExtra()
           ? new ArrayList<Map<Range, SourceInfo>>() : null;
       generateJavaScriptCode(options, jsProgram, map, js, ranges,
-          sizeBreakdowns, sourceInfoMaps);
+          sizeBreakdowns, sourceInfoMaps, splitBlocks);
 
       PermutationResult toReturn = new PermutationResultImpl(js,
           makeSymbolMap(symbolTable), ranges, permutationId);
@@ -817,11 +836,12 @@
    *          JavaScript
    * @param sourceInfoMaps An array to hold the source info maps for that
    *          JavaScript
+   * @param splitBlocks true if current permutation is for IE6 or unknown
    */
   private static void generateJavaScriptCode(JJSOptions options,
       JsProgram jsProgram, JavaToJavaScriptMap jjsMap, String[] js,
       StatementRanges[] ranges, SizeBreakdown[] sizeBreakdowns,
-      List<Map<Range, SourceInfo>> sourceInfoMaps) {
+      List<Map<Range, SourceInfo>> sourceInfoMaps, boolean splitBlocks) {
     for (int i = 0; i < js.length; i++) {
       DefaultTextOutput out = new DefaultTextOutput(
           options.getOutput().shouldMinimize());
@@ -832,14 +852,31 @@
         v = new JsSourceGenerationVisitorWithSizeBreakdown(out, jjsMap);
       }
       v.accept(jsProgram.getFragmentBlock(i));
-      js[i] = out.toString();
+      
+      /**
+       * Reorder function decls to improve compression ratios. Also restructures
+       * the top level blocks into sub-blocks if they exceed 32767 statements.
+       */
+      JsFunctionClusterer clusterer = new JsFunctionClusterer(out.toString(),
+          v.getStatementRanges());
+      // only cluster for obfuscated mode
+      if (options.getOutput() == JsOutputOption.OBFUSCATED) {
+        clusterer.exec();
+      }
+      // rewrite top-level blocks to limit the number of statements
+      JsIEBlockTextTransformer ieXformer = new JsIEBlockTextTransformer(
+          clusterer);
+      if (splitBlocks) {
+        ieXformer.exec();
+      }
+      js[i] = ieXformer.getJs();
       if (sizeBreakdowns != null) {
         sizeBreakdowns[i] = v.getSizeBreakdown();
       }
       if (sourceInfoMaps != null) {
         sourceInfoMaps.add(((JsReportGenerationVisitor) v).getSourceInfoMap());
       }
-      ranges[i] = v.getStatementRanges();
+      ranges[i] = ieXformer.getStatementRanges();
     }
   }
 
diff --git a/dev/core/src/com/google/gwt/dev/jjs/impl/JsAbstractTextTransformer.java b/dev/core/src/com/google/gwt/dev/jjs/impl/JsAbstractTextTransformer.java
new file mode 100644
index 0000000..8f738de
--- /dev/null
+++ b/dev/core/src/com/google/gwt/dev/jjs/impl/JsAbstractTextTransformer.java
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2009 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.dev.jjs.impl;
+
+import com.google.gwt.core.ext.linker.StatementRanges;
+import com.google.gwt.core.ext.linker.impl.StandardStatementRanges;
+
+import java.util.ArrayList;
+
+/**
+ * Base class for transforming program text.
+ */
+public abstract class JsAbstractTextTransformer {
+
+  protected String js;
+
+  protected StatementRanges statementRanges;
+
+  public JsAbstractTextTransformer(String js, StatementRanges statementRanges) {
+    this.js = js;
+    this.statementRanges = statementRanges;
+  }
+
+  public JsAbstractTextTransformer(JsAbstractTextTransformer xformer) {
+    this.js = xformer.getJs();
+    this.statementRanges = xformer.getStatementRanges();
+  }
+
+  public abstract void exec();
+
+  public String getJs() {
+    return js;
+  }
+
+  public StatementRanges getStatementRanges() {
+    return statementRanges;
+  }
+
+  protected void addStatement(String code, StringBuilder newJs,
+      ArrayList<Integer> starts, ArrayList<Integer> ends) {
+    beginStatement(newJs, starts);
+    newJs.append(code);
+    endStatement(newJs, ends);
+  }
+
+  protected void beginStatement(StringBuilder newJs,
+      ArrayList<Integer> starts) {
+    starts.add(newJs.length());
+  }
+
+  /**
+   * Called if any operations need to be performed before all statements have
+   * been processed.
+   */
+  protected void beginStatements(StringBuilder newJs, ArrayList<Integer> starts,
+      ArrayList<Integer> ends) {
+  }
+
+  protected void endStatement(StringBuilder newJs, ArrayList<Integer> ends) {
+    ends.add(newJs.length());
+  }
+
+  /**
+   * Called if any operations need to be performed after all statements have
+   * been processed.
+   */
+  protected void endStatements(StringBuilder newJs, ArrayList<Integer> starts,
+      ArrayList<Integer> ends) {
+  }
+
+  protected String getJsForRange(int stmtIndex) {
+    return js.substring(statementRanges.start(stmtIndex),
+        statementRanges.end(stmtIndex));
+  }
+
+  /**
+   * Dump functions and fragments back into a new JS string, and calculate a new
+   * StatementRanges object.
+   */
+  protected void recomputeJsAndStatementRanges(int[] stmtIndices) {
+
+    StringBuilder newJs = new StringBuilder();
+    ArrayList<Integer> starts = new ArrayList<Integer>();
+    ArrayList<Integer> ends = new ArrayList<Integer>();
+
+    beginStatements(newJs, starts, ends);
+    for (int i = 0; i < stmtIndices.length; i++) {
+      String code = getJsForRange(stmtIndices[i]);
+      addStatement(code, newJs, starts, ends);
+    }
+    endStatements(newJs, starts, ends);
+
+    assert
+        starts.size() == ends.size() :
+        "Size mismatch between start and" + " end statement ranges.";
+    assert starts.get(0) == 0 && ends.get(ends.size() - 1) ==
+        newJs.length() :
+        "statement ranges don't cover entire JS output string.";
+
+    StandardStatementRanges newRanges = new StandardStatementRanges(starts,
+        ends);
+    js = newJs.toString();
+    statementRanges = newRanges;
+  }
+}
diff --git a/dev/core/src/com/google/gwt/dev/jjs/impl/JsFunctionClusterer.java b/dev/core/src/com/google/gwt/dev/jjs/impl/JsFunctionClusterer.java
new file mode 100644
index 0000000..9e56801
--- /dev/null
+++ b/dev/core/src/com/google/gwt/dev/jjs/impl/JsFunctionClusterer.java
@@ -0,0 +1,189 @@
+/*
+ * Copyright 2009 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.dev.jjs.impl;
+
+import com.google.gwt.core.ext.linker.StatementRanges;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.LinkedList;
+import java.util.Iterator;
+
+/**
+ * Re-orders function declarations according to a given metric and clustering
+ * algorithm in order to boost gzip/deflation compression efficiency. This
+ * version uses the edit-distance algorithm as a metric, and a semi-greedy
+ * strategy for grouping functions together. 
+ */
+public class JsFunctionClusterer extends JsAbstractTextTransformer {
+
+  /**
+   * Limit edit-distance search to MAX_DIST.
+   */
+  private static final int MAX_DIST = 10;
+
+  private static final int MAX_DISTANCE_LIMIT = 100;
+
+  private List<Integer> functionIndices;
+
+  private int[] clusteredIndices;
+
+  public JsFunctionClusterer(String js, StatementRanges statementRanges) {
+    super(js, statementRanges);
+  }
+
+  public JsFunctionClusterer(JsAbstractTextTransformer xformer) {
+    super(xformer);
+  }
+
+  public void exec() {
+    functionIndices = new LinkedList<Integer>();
+
+    // gather up all of the indices of function decl statements
+    for (int i = 0; i < statementRanges.numStatements(); i++) {
+      String code = getJsForRange(i);
+      if (code.startsWith("function")) {
+        functionIndices.add(i);
+      }
+    }
+
+    // sort the indices according to size of statement range
+    Collections.sort(functionIndices, new Comparator<Integer>() {
+      public int compare(Integer index1, Integer index2) {
+        return stmtSize(index1) - (stmtSize(index2));
+      }
+    });
+
+    // used to hold the new output order
+    clusteredIndices = new int[functionIndices.size()];
+    int currentFunction = 0;
+
+    // remove the first function and stick it in the output array
+    clusteredIndices[currentFunction] = functionIndices.get(0);
+    functionIndices.remove(0);
+
+    while (!functionIndices.isEmpty()) {
+
+      // get the last outputted function to match against
+      String currentCode = getJsForRange(clusteredIndices[currentFunction]);
+      int bestDistance = 99999;
+      int bestIndex = 0;
+      int bestFunction = 0;
+
+      Iterator<Integer> it = functionIndices.iterator();
+      int count = 0;
+      // search up to MAX_DIST functions for the best match
+      while (it.hasNext() &&
+             count < Math.min(MAX_DIST, functionIndices.size())) {
+        int functionIndex = it.next();
+        String testCode = getJsForRange(functionIndex);
+        int distanceLimit = Math.min(bestDistance, MAX_DISTANCE_LIMIT);
+        int dist = levdist(currentCode, testCode, distanceLimit);
+        if (dist < bestDistance) {
+          bestDistance = dist;
+          bestIndex = count;
+          bestFunction = functionIndex;
+        }
+        count++;
+      }
+      // output the best match and remove it from worklist of functions
+      currentFunction++;
+      clusteredIndices[currentFunction] = bestFunction;
+      functionIndices.remove(bestIndex);
+    }
+
+    recomputeJsAndStatementRanges(clusteredIndices);
+  }
+
+  @Override
+  protected void endStatements(StringBuilder newJs, ArrayList<Integer> starts,
+      ArrayList<Integer> ends) {
+    // Then output everything else that is not a function.
+    for (int i = 0; i < statementRanges.numStatements(); i++) {
+      String code = getJsForRange(i);
+      if (!code.startsWith("function")) {
+        addStatement(code, newJs, starts, ends);
+      }
+    }
+    super.endStatements(newJs, starts, ends);
+  }
+
+  /**
+   * Compute the Levenshtein distance between two strings, up to a distance
+   * limit.
+   */
+  private int levdist(String str1, String str2, int limit) {
+    if (str1.length() > str2.length()) {
+      return levdist(str2, str1, limit);
+    }
+    if (str1.length() == 0) {
+      return str2.length();
+    }
+    if (Math.abs(str1.length() - str2.length()) >= limit) {
+      return limit;
+    }
+
+    int str1len = str1.length();
+    int str2len = str2.length();
+
+    int lastRow[] = new int[str2len + 1];
+    int nextRow[] = new int[str2len + 1];
+
+    for (int j = 0; j <= Math.min(str2len, limit + 1); j++) {
+      lastRow[j] = j;
+    }
+
+    for (int i = 1; i <= str1len; i++) {
+      nextRow[0] = i;
+
+      if (i >= limit) {
+        nextRow[i - limit] = limit;
+      }
+      if (i >= limit + 1) {
+        nextRow[i - limit - 1] = limit;
+      }
+      if (i + limit <= str2len) {
+        nextRow[i + limit] = limit;
+      }
+      if (i + limit + 1 <= str2len) {
+        nextRow[i + limit + 1] = limit;
+      }
+
+      char c1 = str1.charAt(i - 1);
+
+      int j = Math.max(1, (i - limit + 1));
+      int jmax = Math.min(str2len, (i + limit - 1));
+
+      while (j <= jmax) {
+        char c2 = str2.charAt(j - 1);
+        int costSwap = c1 == c2 ? 0 : 1;
+        nextRow[j] = Math.min(Math.min(lastRow[j] + 1, nextRow[j - 1] + 1),
+            lastRow[j - 1] + costSwap);
+        j = j + 1;
+      }
+      int tmpRow[] = nextRow;
+      nextRow = lastRow;
+      lastRow = tmpRow;
+    }
+    return lastRow[Math.min(str2len, limit)];
+  }
+
+  private int stmtSize(int index1) {
+    return statementRanges.end(index1) - statementRanges.start(index1);
+  }
+}
diff --git a/dev/core/src/com/google/gwt/dev/jjs/impl/JsIEBlockTextTransformer.java b/dev/core/src/com/google/gwt/dev/jjs/impl/JsIEBlockTextTransformer.java
new file mode 100644
index 0000000..1088cff
--- /dev/null
+++ b/dev/core/src/com/google/gwt/dev/jjs/impl/JsIEBlockTextTransformer.java
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2009 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.google.gwt.dev.jjs.impl;
+
+import com.google.gwt.core.ext.linker.StatementRanges;
+
+import java.util.ArrayList;
+
+/**
+ * Limits top-level blocks to MAX_BLOCK_SIZE statements. 
+ */
+public class JsIEBlockTextTransformer extends JsAbstractTextTransformer {
+
+  // uncomment to test
+
+  //  private static final int MAX_BLOCK_SIZE = 10;
+  private static final int MAX_BLOCK_SIZE = 1 << 15 - 1;
+
+  private boolean doSplits;
+
+  private int currentStatementCount;
+
+  public JsIEBlockTextTransformer(String js, StatementRanges statementRanges) {
+    super(js, statementRanges);
+  }
+
+  public JsIEBlockTextTransformer(JsAbstractTextTransformer xformer) {
+    super(xformer);
+  }
+
+  /**
+   * Do not perform clustering, only fix up IE7 block issue.
+   */
+  public void exec() {
+    doSplits = statementRanges.numStatements() > MAX_BLOCK_SIZE;
+    if (doSplits) {
+      int statementIndices[] = new int[statementRanges.numStatements()];
+      for (int i = 0; i < statementRanges.numStatements(); i++) {
+        statementIndices[i] = i;
+      }
+      recomputeJsAndStatementRanges(statementIndices);
+    }
+  }
+
+  /**
+   * Record start of statement, and optionally inject new open block.
+   */
+  protected void beginStatement(StringBuilder newJs,
+      ArrayList<Integer> starts) {
+    if (doSplits && currentStatementCount == 0) {
+      super.beginStatement(newJs, starts);
+      newJs.append('{');
+    } else if (!doSplits) {
+      super.beginStatement(newJs, starts);
+    }
+  }
+
+  @Override
+  protected void beginStatements(StringBuilder newJs, ArrayList<Integer> starts,
+      ArrayList<Integer> ends) {
+    super.beginStatements(newJs, starts, ends);
+    currentStatementCount = 0;
+  }
+
+  /**
+   * Record end of statement, and optionally inject close block, if block is
+   * full.
+   */
+  protected void endStatement(StringBuilder newJs, ArrayList<Integer> ends) {
+    currentStatementCount++;
+    if (doSplits && currentStatementCount == MAX_BLOCK_SIZE) {
+      newJs.append('}');
+      super.endStatement(newJs, ends);
+      currentStatementCount = 0;
+    } else if (!doSplits) {
+      super.endStatement(newJs, ends);
+    }
+  }
+
+  /**
+   * Used to close a trailing block which never filled.
+   */
+  @Override
+  protected void endStatements(StringBuilder newJs, ArrayList<Integer> starts,
+      ArrayList<Integer> ends) {
+    optionallyCloseLastBlock(newJs, ends);
+    super.endStatements(newJs, starts, ends);
+  }
+
+  /**
+   * Close last block if it never filled.
+   */
+  private void optionallyCloseLastBlock(StringBuilder newJs,
+      ArrayList<Integer> ends) {
+    if (doSplits && currentStatementCount > 1
+        && currentStatementCount < MAX_BLOCK_SIZE) {
+      newJs.append("}");
+      ends.add(newJs.length());
+    }
+  }
+}
\ No newline at end of file
diff --git a/dev/core/src/com/google/gwt/dev/js/JsIEBlockSizeVisitor.java b/dev/core/src/com/google/gwt/dev/js/JsIEBlockSizeVisitor.java
index 8927798..8ff10c1 100644
--- a/dev/core/src/com/google/gwt/dev/js/JsIEBlockSizeVisitor.java
+++ b/dev/core/src/com/google/gwt/dev/js/JsIEBlockSizeVisitor.java
@@ -33,7 +33,7 @@
  * within a JsBlock. This visitor will restructure blocks and other block-like
  * structures with too many statements in order to reduce the total number of
  * statements that appear within any given block to fewer than
- * {@value #MAX_BLOCK_SIZE} statements by creating nested blocks:
+ * {@value #MAX_BLOCK_SIZE} statements by creating nested blocks:  
  * 
  * <pre>
  * {
@@ -60,7 +60,10 @@
 
     @Override
     public void endVisit(JsBlock x, JsContext<JsStatement> ctx) {
-      restructure(x.getStatements());
+      // JsFunctionClusterer handles restructuring top-level statement blocks
+      if (!x.isGlobalBlock()) {
+        restructure(x.getStatements());
+      }
     }
 
     @Override