Parser.java
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.esigate.parser;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.http.HttpResponse;
import org.esigate.HttpErrorPage;
import org.esigate.impl.DriverRequest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class Parser {
private static final Logger LOG = LoggerFactory.getLogger(Parser.class);
private final Pattern pattern;
private final List<ElementType> elementTypes;
private DriverRequest httpRequest;
private HttpResponse httpResponse;
/**
* Creates a Parser with a given regular expression pattern and ElementTypes.
*
* @param pattern
* The regular expression Pattern
* @param elementTypes
* The element types
*/
public Parser(Pattern pattern, ElementType... elementTypes) {
this.pattern = pattern;
this.elementTypes = new ArrayList<>(elementTypes.length + 1);
Collections.addAll(this.elementTypes, elementTypes);
this.elementTypes.add(new UnknownElementType());
}
/**
* Parses all the CharSequence.
*
* @param in
* The CharSequence to parse
* @param out
* The Writable to write the result to
* @throws IOException
* @throws HttpErrorPage
*/
public void parse(CharSequence in, Appendable out) throws IOException, HttpErrorPage {
ParserContextImpl ctx = new ParserContextImpl(out, httpRequest, httpResponse);
Matcher matcher = pattern.matcher(in);
int currentPosition = 0;
while (matcher.find()) {
String tag = matcher.group();
ctx.characters(in, currentPosition, matcher.start());
currentPosition = matcher.end();
if (ctx.isCurrentTagEnd(tag)) {
// check if this is the end tag for current element
LOG.info("Processing end tag {}", tag);
ctx.endElement(tag);
} else {
// if not, it is an opening tag for a new element
LOG.info("Processing start tag {}", tag);
ElementType type = null;
for (ElementType t : elementTypes) {
if (t.isStartTag(tag)) {
type = t;
break;
}
}
Element element = type.newInstance();
ctx.startElement(type, element, tag);
if (type.isSelfClosing(tag)) {
ctx.endElement(tag);
}
}
}
// we reached the end of input
ctx.characters(in, currentPosition, in.length());
}
public void setHttpRequest(DriverRequest httpRequest) {
this.httpRequest = httpRequest;
}
}