blob: 4e61d6e517fb722abee581a397077e5b9880b066 [file] [log] [blame]
/*
* Copyright 2009 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.gwt.dev.jjs.impl;
import com.google.gwt.core.ext.linker.StatementRanges;
import com.google.gwt.core.ext.soyc.Range;
import com.google.gwt.dev.jjs.SourceInfo;
import com.google.gwt.dev.util.editdistance.GeneralEditDistance;
import com.google.gwt.dev.util.editdistance.GeneralEditDistances;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
import java.util.regex.Pattern;
/**
* Re-orders function declarations according to a given metric and clustering
* algorithm in order to boost gzip/deflation compression efficiency. This
* version uses the edit-distance algorithm as a metric, and a semi-greedy
* strategy for grouping functions together.
*/
public class JsFunctionClusterer extends JsAbstractTextTransformer {
/**
* Used by isFunctionDeclaration to check a statement is a function
* declaration or not. This should match standard declarations, such as
* "function a() { ... }" and "jslink.a=function() { ... }". The latter form
* is typically emitted by the cross-site linker.
*/
private static final Pattern functionDeclarationPattern = Pattern
.compile("function |[a-zA-Z][.$_a-zA-Z0-9]*=function");
/**
* Functions which have an edit-distance greater than this limit are
* considered equally different.
*/
private static final int MAX_DISTANCE_LIMIT = 100;
/**
* Maximum number of functions to search for minimal edit-distance before
* giving up.
*/
private static final int SEARCH_LIMIT = 10;
/**
* Tells whether a statement is a function declaration or not.
*/
private static boolean isFunctionDeclaration(String code) {
return functionDeclarationPattern.matcher(code).lookingAt();
}
/**
* Number of function declarations found.
*/
private int numFunctions;
/**
* The statement indices after clustering. The element at index j represents
* the index of the statement in the original code that is moved to index j
* in the new code after clustering.
*/
private int[] reorderedIndices;
public JsFunctionClusterer(JsAbstractTextTransformer xformer) {
super(xformer);
}
public JsFunctionClusterer(String js, StatementRanges statementRanges,
Map<Range, SourceInfo> sourceInfoMap) {
super(js, statementRanges, sourceInfoMap);
}
@Override
public void exec() {
LinkedList<Integer> functionIndices = new LinkedList<Integer>();
// gather up all of the indices of function decl statements
for (int i = 0; i < statementRanges.numStatements(); i++) {
String code = getJsForRange(i);
if (isFunctionDeclaration(code)) {
functionIndices.add(i);
}
}
numFunctions = functionIndices.size();
if (functionIndices.size() < 2) {
// No need to sort 0 or 1 functions.
return;
}
// sort the indices according to size of statement range
Collections.sort(functionIndices, new Comparator<Integer>() {
public int compare(Integer index1, Integer index2) {
return stmtSize(index1) - (stmtSize(index2));
}
});
// used to hold the new output order
int[] clusteredIndices = new int[functionIndices.size()];
int currentFunction = 0;
// remove the first function and stick it in the output array
clusteredIndices[currentFunction] = functionIndices.get(0);
functionIndices.remove(0);
while (!functionIndices.isEmpty()) {
// get the last outputted function to match against
String currentCode = getJsForRange(clusteredIndices[currentFunction]);
final GeneralEditDistance editDistance =
GeneralEditDistances.getLevenshteinDistance(currentCode);
int bestIndex = 0;
int bestFunction = functionIndices.getFirst();
int bestDistance = MAX_DISTANCE_LIMIT;
int count = 0;
for (int functionIndex : functionIndices) {
if (count >= SEARCH_LIMIT) {
break;
}
String testCode = getJsForRange(functionIndex);
int dist = editDistance.getDistance(testCode, bestDistance);
if (dist < bestDistance) {
bestDistance = dist;
bestIndex = count;
bestFunction = functionIndex;
}
count++;
}
// output the best match and remove it from worklist of functions
currentFunction++;
clusteredIndices[currentFunction] = bestFunction;
functionIndices.remove(bestIndex);
}
reorderedIndices = Arrays.copyOf(clusteredIndices, statementRanges.numStatements());
recomputeJsAndStatementRanges(clusteredIndices);
}
/**
* Returns the array of reordered statement indices after clustering.
* @return The array of indices, where the element at index j represents
* the index of the statement in the original code that is moved to index j
* in the new code after clustering.
*/
public int[] getReorderedIndices() {
return reorderedIndices;
}
@Override
protected void endStatements(StringBuilder newJs, ArrayList<Integer> starts,
ArrayList<Integer> ends) {
int j = numFunctions;
// Then output everything else that is not a function.
for (int i = 0; i < statementRanges.numStatements(); i++) {
String code = getJsForRange(i);
if (!isFunctionDeclaration(code)) {
addStatement(j, code, newJs, starts, ends);
reorderedIndices[j] = i;
j++;
}
}
super.endStatements(newJs, starts, ends);
}
/**
* Fixes the index ranges of individual expressions in the generated
* JS after function clustering has reordered statements. Loops over
* each expression, determines which statement it falls in, and shifts
* the indices according to where that statement moved.
*/
@Override
protected void updateSourceInfoMap() {
if (sourceInfoMap != null) {
// create mapping of statement ranges
Map<Range, Range> statementShifts = new HashMap<Range, Range>();
for (int j = 0; j < statementRanges.numStatements(); j++) {
int permutedStart = statementRanges.start(j);
int permutedEnd = statementRanges.end(j);
int originalStart = originalStatementRanges.start(reorderedIndices[j]);
int originalEnd = originalStatementRanges.end(reorderedIndices[j]);
statementShifts.put(new Range(originalStart, originalEnd),
new Range(permutedStart, permutedEnd));
}
Range[] oldStatementRanges = statementShifts.keySet().toArray(new Range[0]);
Arrays.sort(oldStatementRanges, Range.SOURCE_ORDER_COMPARATOR);
Range[] oldExpressionRanges = sourceInfoMap.keySet().toArray(new Range[0]);
Arrays.sort(oldExpressionRanges, Range.SOURCE_ORDER_COMPARATOR);
// iterate over expression ranges and shift
Map<Range, SourceInfo> updatedInfoMap = new HashMap<Range, SourceInfo>();
Range entireProgram =
new Range(0, oldStatementRanges[oldStatementRanges.length - 1].getEnd());
for (int i = 0, j = 0; j < oldExpressionRanges.length; j++) {
Range oldExpression = oldExpressionRanges[j];
if (oldExpression.equals(entireProgram)) {
updatedInfoMap.put(oldExpression, sourceInfoMap.get(oldExpression));
continue;
}
if (!oldStatementRanges[i].contains(oldExpressionRanges[j])) {
// expression should fall in the next statement
i++;
assert oldStatementRanges[i].contains(oldExpressionRanges[j]);
}
Range oldStatement = oldStatementRanges[i];
Range newStatement = statementShifts.get(oldStatement);
int shift = newStatement.getStart() - oldStatement.getStart();
Range oldExpressionRange = oldExpressionRanges[j];
Range newExpressionRange = new Range(oldExpressionRange.getStart() + shift,
oldExpressionRange.getEnd() + shift);
updatedInfoMap.put(newExpressionRange, sourceInfoMap.get(oldExpressionRange));
}
sourceInfoMap = updatedInfoMap;
}
}
private int stmtSize(int index1) {
return statementRanges.end(index1) - statementRanges.start(index1);
}
}