| /* |
| * Copyright 2010 Google Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); you may not |
| * use this file except in compliance with the License. You may obtain a copy of |
| * the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| * License for the specific language governing permissions and limitations under |
| * the License. |
| */ |
| package com.google.gwt.dev.util.editdistance; |
| |
| /** |
| * A bitmap array generator for the positions that a given character |
| * appears within a pattern. The array of bitmaps is indexed by a |
| * CharIndex that represents the alphabet for the pattern. |
| * One bitmap is produced for each character. The bitmap for |
| * a character "c" has the <tt>(1<<i)</tt> bit set for each position "i" |
| * where that character appears in the pattern. |
| * |
| * Several implementations are provided for different string |
| * sizes: int, long, int[]. The caller provides a (zeroed) array of the |
| * desired type, sized according to the CharIndex; the array is |
| * returned as a convenience. |
| */ |
| public class PatternBitmap { |
| /** |
| * Computes a pattern bitmap for a short character sequence. |
| * For each character in the alphabet (represented by a CharIndex), |
| * bits are set where that character appears in the sequence. |
| * For this generator, the pattern must fit in an "int": the |
| * character sequence must not be longer than Integer.SIZE. |
| * @param s the character sequence defining the bits to be set |
| * @param idx the alphabet for which bitmaps should be produced |
| * @param result an array (of size <tt>idx.size()</tt> or greater) to |
| * hold the resulting bitmaps |
| * @return the result array passed as a parameter, for convenience |
| */ |
| public static int[] map(CharSequence s, CharIndex idx, int[] result) { |
| int len = s.length(); |
| assert (len <= Integer.SIZE); |
| for (int i = 0; i < len; i++) { |
| result[idx.lookup(s.charAt(i))] |= (1 << i); |
| } |
| return result; |
| } |
| |
| /** |
| * Computes a pattern bitmap for a character sequence of any length. |
| * For each character in the alphabet (represented by a CharIndex), |
| * bits are set where that character appears in the sequence. |
| * Each (per-character) bitmap is represented by an "int[]". |
| * @param s the character sequence defining the bits to be set |
| * @param idx the alphabet for which bitmaps should be produced |
| * @param result an array (of size <tt>idx.size()</tt> or greater) to |
| * hold the resulting bitmaps |
| * @param width how many bits to be use in each word, no more |
| * than Integer.SIZE |
| * @return the result array passed as a parameter, for convenience |
| */ |
| public static int[][] map(CharSequence s, CharIndex idx, |
| int[][] result, int width) { |
| assert (width <= Integer.SIZE); |
| int len = s.length(); |
| int rowSize = (len + width - 1) / width; |
| |
| /* |
| * Use one zero-filled bitmap for alphabet characters not in the pattern |
| */ |
| int[] nullElement = new int[rowSize]; |
| java.util.Arrays.fill(result, nullElement); |
| |
| int wordIndex = 0; /* Which word we are on now */ |
| int bitWithinWord = 0; /* Which bit within that word */ |
| |
| for (int i = 0; i < s.length(); i++) { |
| int[] r = result[idx.lookup(s.charAt(i))]; |
| if (r == nullElement) { |
| |
| /* Create a separate zero-filled bitmap for this alphabet character */ |
| r = result[idx.lookup(s.charAt(i))] = new int[rowSize]; |
| } |
| r[wordIndex] |= (1 << bitWithinWord); |
| |
| /* Step to the next bit (and word if appropriate) */ |
| if (++bitWithinWord == width) { |
| bitWithinWord = 0; |
| wordIndex++; |
| } |
| } |
| return result; |
| } |
| |
| /** |
| * Computes a pattern bitmap for a medium character sequence. |
| * For each character in the alphabet (represented by a CharIndex), |
| * bits are set where that character appears in the sequence. |
| * For this generator, the pattern must fit in a "long": the |
| * character sequence must not be longer than Long.SIZE. |
| * @param s the character sequence defining the bits to be set |
| * @param idx the alphabet for which bitmaps should be produced |
| * @param result an array (of size <tt>idx.size()</tt> or greater) to |
| * hold the resulting bitmaps |
| * @return the result array passed as a parameter, for convenience |
| */ |
| public static long[] map(CharSequence s, CharIndex idx, long[] result) { |
| int len = s.length(); |
| assert (len <= Long.SIZE); |
| for (int i = 0; i < len; i++) { |
| result[idx.lookup(s.charAt(i))] |= (1L << i); |
| } |
| return result; |
| } |
| private PatternBitmap() { /* Prevent instantiation */ } |
| } |