1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package org.esigate.parser;
17
18 import java.io.IOException;
19 import java.util.ArrayList;
20 import java.util.Collections;
21 import java.util.List;
22 import java.util.regex.Matcher;
23 import java.util.regex.Pattern;
24
25 import org.apache.http.HttpResponse;
26 import org.esigate.HttpErrorPage;
27 import org.esigate.impl.DriverRequest;
28 import org.slf4j.Logger;
29 import org.slf4j.LoggerFactory;
30
31 public class Parser {
32 private static final Logger LOG = LoggerFactory.getLogger(Parser.class);
33 private final Pattern pattern;
34 private final List<ElementType> elementTypes;
35 private DriverRequest httpRequest;
36 private HttpResponse httpResponse;
37
38
39
40
41
42
43
44
45
46 public Parser(Pattern pattern, ElementType... elementTypes) {
47 this.pattern = pattern;
48 this.elementTypes = new ArrayList<>(elementTypes.length + 1);
49 Collections.addAll(this.elementTypes, elementTypes);
50 this.elementTypes.add(new UnknownElementType());
51
52 }
53
54
55
56
57
58
59
60
61
62
63
64 public void parse(CharSequence in, Appendable out) throws IOException, HttpErrorPage {
65 ParserContextImpl ctx = new ParserContextImpl(out, httpRequest, httpResponse);
66 Matcher matcher = pattern.matcher(in);
67 int currentPosition = 0;
68 while (matcher.find()) {
69 String tag = matcher.group();
70 ctx.characters(in, currentPosition, matcher.start());
71 currentPosition = matcher.end();
72 if (ctx.isCurrentTagEnd(tag)) {
73
74 LOG.info("Processing end tag {}", tag);
75 ctx.endElement(tag);
76 } else {
77
78 LOG.info("Processing start tag {}", tag);
79 ElementType type = null;
80 for (ElementType t : elementTypes) {
81 if (t.isStartTag(tag)) {
82 type = t;
83 break;
84 }
85 }
86 Element element = type.newInstance();
87 ctx.startElement(type, element, tag);
88 if (type.isSelfClosing(tag)) {
89 ctx.endElement(tag);
90 }
91
92 }
93 }
94
95 ctx.characters(in, currentPosition, in.length());
96 }
97
98 public void setHttpRequest(DriverRequest httpRequest) {
99 this.httpRequest = httpRequest;
100 }
101
102 }