blob: 2ad632448766f245f04075435b14521e1cdde578 [file] [log] [blame]
* Copyright 2009 Google Inc.
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
import java.util.regex.Matcher;
import java.util.regex.Pattern;
* A HTML context-aware parser for a simple HTML template language.
* <p>
* This parser parses templates consisting of HTML markup, with template
* variables of the form {@code "{n}"}. For example, a template might look like,
* <pre> {@code
* <span style="{0}"><a href="{1}/{2}">{3}</a></span>
* }</pre>
* <p>
* The parser is lenient, and will accept HTML that is not well-formed; the
* accepted set of HTML is similar to what is typically accepted by browsers.
* However, the following constraints on the HTML template are enforced:
* <ol>
* <li>Template variables may not appear in a JavaScript context (inside a
* {@code <script>} tag, or in an {@code onClick} etc handler).</li>
* <li>Template variables may not appear inside HTML comments.</li>
* <li>If a template variable appears inside the value of an attribute, the
* value must be enclosed in quotes.</li>
* <li>Template variables may not appear in the context of an attribute name,
* nor elsewhere inside a tag except within a quoted attribute value.
* </li>
* <li>The template must end in "inner HTML" context, and not inside a tag or
* attribute.</li>
* </ol>
* <p>
* The parser produces a parsed form of the template (returned as a
* {@link ParsedHtmlTemplate}) consisting of a sequence of chunks corresponding
* to the literal strings and parameters of the template. The parser is HTML
* context aware and tags each parameter with its parameter index as well as a
* {@link HtmlContext} that corresponds to the HTML context in which the
* parameter occurs in the template.
* <p>
* The following contexts are recognized and instantiated:
* <dl>
* <dt>{@link HtmlContext.Type#TEXT}
* <dd>This context corresponds to basic inner text. In the above example,
* parameter #3 would be tagged with this context.
* <dt>{@link HtmlContext.Type#URL_START}
* <dd>This context corresponds to a parameter that appears at the very start of
* a URL-valued HTML attribute's value; in the above example this applies to
* parameter #1.
* <dt>{@link HtmlContext.Type#CSS_ATTRIBUTE}
* <dd>This context corresponds to a parameter that appears in the context of a
* {@code style} attribute; in the above example this applies to
* parameter #0.
* <dt>{@link HtmlContext.Type#ATTRIBUTE_VALUE}
* <dd>This context corresponds to a parameter that appears within an attribute
* and is not in one of the more specific in-attribute contexts above. In
* the example, this applies to parameter #2.
* <dt>{@link HtmlContext.Type#CSS}
* <dd>This context corresponds to a parameter that appears within a
* {@code <style>} tag.
* </dl>
* <p>
* For attribute contexts, the {@code tag} and {@code attribute} properties
* of the context are set to the name of the enclosing tag and attribute,
* respectively.
* <p>
* The implementation is subject to the following limitation:
* <p>
* There is no escaping mechanism for the parameter syntax, i.e. it is
* impossible to write a template that results in a literal output chunk
* containing a substring of the form "{@code {0}}".
* <p>
* This class is not thread safe.
final class HtmlTemplateParser {
* Pattern to find template parameters references.
private static final Pattern TEMPLATE_PARAM_PATTERN =
private final TreeLogger logger;
private final ParsedHtmlTemplate parsedTemplate;
private final HtmlParser streamHtmlParser;
* The template string being parsed.
private String template;
* The index in the template up to which the template has been parsed.
* <p>
* Used for error reporting.
private int parsePosition;
* Creates a {@link HtmlTemplateParser}.
* @param logger the {@link TreeLogger} to log to
public HtmlTemplateParser(TreeLogger logger) {
this.logger = logger;
this.parsedTemplate = new ParsedHtmlTemplate();
this.streamHtmlParser = HtmlParserFactory.createParser();
* Returns the parsed representation of the template.
public ParsedHtmlTemplate getParsedTemplate() {
return parsedTemplate;
* Parses a {@link String} that may contain template parameters of the form
* {@code {n}} into corresponding literal and parameter
* {@link ParsedHtmlTemplate.TemplateChunk}s.
* @param template the template {@link String} to parse
* @throws UnableToCompleteException if an unrecoverable parse error occurs
// @VisibleForTesting
void parseTemplate(String template) throws UnableToCompleteException {
this.template = template;
this.parsePosition = 0;
Matcher match = TEMPLATE_PARAM_PATTERN.matcher(template);
int endOfPreviousMatch = 0;
while (match.find()) {
if (match.start() > endOfPreviousMatch) {
// There is a non-empty string between the previous match and this
// match; add this as a literal chunk to the parsed representation.
template.substring(endOfPreviousMatch, match.start()));
parsePosition = match.start();
int paramIndex = Integer.parseInt(;
parsePosition = match.end();
new ParameterChunk(getHtmlContextFromParseState(), paramIndex));
endOfPreviousMatch = match.end();
// Add a literal chunk for the substring after the last match, if any.
if (endOfPreviousMatch < template.length()) {
if (!streamHtmlParser.getState().equals(HtmlParser.STATE_TEXT)) {
"Template does not end in inner-HTML context: " + template);
throw new UnableToCompleteException();
* Determines the {@link HtmlContext} in the parser's current state.
* <p>
* This method translates from the stream HTML parser's internal state
* representation to our HTML context representation, and is intended to be
* invoked at the point where a template variable is encountered.
* <p>
* This method checks for certain illegal/unsupported template constructs,
* such as template variables that occur in an un-quoted attribute (see this
* class' class documentation for details).
* @throws UnableToCompleteException if an illegal/unuspported template
* construct is encountered
private HtmlContext getHtmlContextFromParseState()
throws UnableToCompleteException {
if (streamHtmlParser.getState().equals(HtmlParser.STATE_ERROR)) {
"Parsing template resulted in parse error: "
+ getTemplateParsedSoFar());
throw new UnableToCompleteException();
if (streamHtmlParser.inJavascript()) {
"Template variables in javascript context are not supported: "
+ getTemplateParsedSoFar());
throw new UnableToCompleteException();
if (streamHtmlParser.getState().equals(HtmlParser.STATE_COMMENT)) {
"Template variables inside HTML comments are not supported: "
+ getTemplateParsedSoFar());
throw new UnableToCompleteException();
} else if (streamHtmlParser.getState().equals(HtmlParser.STATE_TEXT)
&& !streamHtmlParser.inCss()) {
return new HtmlContext(HtmlContext.Type.TEXT);
} else if (streamHtmlParser.getState().equals(HtmlParser.STATE_VALUE)) {
final String tag = streamHtmlParser.getTag();
final String attribute = streamHtmlParser.getAttribute();
"streamHtmlParser.getTag() should not be empty while in "
+ "attribute value context; at %s", getTemplateParsedSoFar());
"streamHtmlParser.getAttribute() should not be empty while in "
+ "attribute value context; at %s", getTemplateParsedSoFar());
if (!streamHtmlParser.isAttributeQuoted()) {
"Template variable in unquoted attribute value: "
+ getTemplateParsedSoFar());
throw new UnableToCompleteException();
if (streamHtmlParser.isUrlStart()) {
return new HtmlContext(HtmlContext.Type.URL_START, tag, attribute);
} else if (streamHtmlParser.inCss()) {
return new HtmlContext(HtmlContext.Type.CSS_ATTRIBUTE, tag, attribute);
} else {
return new HtmlContext(
HtmlContext.Type.ATTRIBUTE_VALUE, tag, attribute);
} else if (streamHtmlParser.inCss()) {
return new HtmlContext(HtmlContext.Type.CSS);
} else if (streamHtmlParser.getState().equals(HtmlParser.STATE_TAG)
|| streamHtmlParser.inAttribute()) {
"Template variables in tags or in attribute names are not supported: "
+ getTemplateParsedSoFar());
throw new UnableToCompleteException();
"unhandeled/illegal parse state" + streamHtmlParser.getState());
throw new UnableToCompleteException();
* Returns the prefix of the template string that has been parsed so far.
private String getTemplateParsedSoFar() {
return template.substring(0, parsePosition);
* Feeds a literal string to the stream parser and appends it to the parsed
* template representation.
* @param segment the template segment to parse and append to the parsed
* template representation
* @throws UnableToCompleteException if an unrecoverable parse error occurs
private void parseAndAppendTemplateSegment(String segment)
throws UnableToCompleteException {
try {
} catch (ParseException cause) {
"Parse exception when parsing segment '" + segment + "' of template '"
+ template + "'", cause);
throw new UnableToCompleteException();