blob: 9ee8454248ebdad5b2fec5b7c50785cac8a8215e [file] [log] [blame]
/*
* Copyright 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.gwt.dev.util.editdistance;
/**
* A bitmap array generator for the positions that a given character
* appears within a pattern. The array of bitmaps is indexed by a
* CharIndex that represents the alphabet for the pattern.
* One bitmap is produced for each character. The bitmap for
* a character "c" has the <tt>(1<<i)</tt> bit set for each position "i"
* where that character appears in the pattern.
*
* Several implementations are provided for different string
* sizes: int, long, int[]. The caller provides a (zeroed) array of the
* desired type, sized according to the CharIndex; the array is
* returned as a convenience.
*/
public class PatternBitmap {
/**
* Computes a pattern bitmap for a short character sequence.
* For each character in the alphabet (represented by a CharIndex),
* bits are set where that character appears in the sequence.
* For this generator, the pattern must fit in an "int": the
* character sequence must not be longer than Integer.SIZE.
* @param s the character sequence defining the bits to be set
* @param idx the alphabet for which bitmaps should be produced
* @param result an array (of size <tt>idx.size()</tt> or greater) to
* hold the resulting bitmaps
* @return the result array passed as a parameter, for convenience
*/
public static int[] map(CharSequence s, CharIndex idx, int[] result) {
int len = s.length();
assert (len <= Integer.SIZE);
for (int i = 0; i < len; i++) {
result[idx.lookup(s.charAt(i))] |= (1 << i);
}
return result;
}
/**
* Computes a pattern bitmap for a character sequence of any length.
* For each character in the alphabet (represented by a CharIndex),
* bits are set where that character appears in the sequence.
* Each (per-character) bitmap is represented by an "int[]".
* @param s the character sequence defining the bits to be set
* @param idx the alphabet for which bitmaps should be produced
* @param result an array (of size <tt>idx.size()</tt> or greater) to
* hold the resulting bitmaps
* @param width how many bits to be use in each word, no more
* than Integer.SIZE
* @return the result array passed as a parameter, for convenience
*/
public static int[][] map(CharSequence s, CharIndex idx,
int[][] result, int width) {
assert (width <= Integer.SIZE);
int len = s.length();
int rowSize = (len + width - 1) / width;
/*
* Use one zero-filled bitmap for alphabet characters not in the pattern
*/
int[] nullElement = new int[rowSize];
java.util.Arrays.fill(result, nullElement);
int wordIndex = 0; /* Which word we are on now */
int bitWithinWord = 0; /* Which bit within that word */
for (int i = 0; i < s.length(); i++) {
int[] r = result[idx.lookup(s.charAt(i))];
if (r == nullElement) {
/* Create a separate zero-filled bitmap for this alphabet character */
r = result[idx.lookup(s.charAt(i))] = new int[rowSize];
}
r[wordIndex] |= (1 << bitWithinWord);
/* Step to the next bit (and word if appropriate) */
if (++bitWithinWord == width) {
bitWithinWord = 0;
wordIndex++;
}
}
return result;
}
/**
* Computes a pattern bitmap for a medium character sequence.
* For each character in the alphabet (represented by a CharIndex),
* bits are set where that character appears in the sequence.
* For this generator, the pattern must fit in a "long": the
* character sequence must not be longer than Long.SIZE.
* @param s the character sequence defining the bits to be set
* @param idx the alphabet for which bitmaps should be produced
* @param result an array (of size <tt>idx.size()</tt> or greater) to
* hold the resulting bitmaps
* @return the result array passed as a parameter, for convenience
*/
public static long[] map(CharSequence s, CharIndex idx, long[] result) {
int len = s.length();
assert (len <= Long.SIZE);
for (int i = 0; i < len; i++) {
result[idx.lookup(s.charAt(i))] |= (1L << i);
}
return result;
}
private PatternBitmap() { /* Prevent instantiation */ }
}