1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package org.esigate.impl;
17
18 import java.net.URI;
19 import java.util.regex.Matcher;
20 import java.util.regex.Pattern;
21
22 import org.apache.commons.lang3.StringEscapeUtils;
23 import org.apache.commons.lang3.StringUtils;
24 import org.esigate.util.UriUtils;
25 import org.slf4j.Logger;
26 import org.slf4j.LoggerFactory;
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43 public class UrlRewriter {
44 private static final Logger LOG = LoggerFactory.getLogger(UrlRewriter.class);
45
46 private static final Pattern URL_PATTERN = Pattern.compile(
47 "<([^\\!:>]+)(src|href|action|background|content)\\s*=\\s*('[^<']*'|\"[^<\"]*\")([^>]*)>",
48 Pattern.CASE_INSENSITIVE);
49
50 private static final Pattern JAVASCRIPT_CONCATENATION_PATTERN = Pattern.compile(
51 "\\+\\s*'|\\+\\s*\"|'\\s*\\+|\"\\s*\\+", Pattern.CASE_INSENSITIVE);
52
53 private static final Pattern META_REFRESH_PATTERN = Pattern.compile(
54 "<\\s*meta([^>]+)http-equiv\\s*=\\s*(\"|')refresh(\"|')", Pattern.CASE_INSENSITIVE);
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72 public UrlRewriter() {
73 }
74
75
76
77
78
79
80
81
82
83
84
85
86
87 public String rewriteReferer(String referer, String baseUrl, String visibleBaseUrl) {
88 URI uri = UriUtils.createURI(referer);
89
90
91 if (!baseUrl.endsWith("/")) {
92 baseUrl = baseUrl + "/";
93 }
94 URI baseUri = UriUtils.createURI(baseUrl);
95
96
97 if (!visibleBaseUrl.endsWith("/")) {
98 visibleBaseUrl = visibleBaseUrl + "/";
99 }
100 URI visibleBaseUri = UriUtils.createURI(visibleBaseUrl);
101
102
103 URI relativeUri = visibleBaseUri.relativize(uri);
104
105 if (relativeUri.equals(uri)) {
106 LOG.debug("url kept unchanged: [{}]", referer);
107 return referer;
108 }
109
110 URI result = baseUri.resolve(relativeUri);
111 LOG.debug("referer fixed: [{}] -> [{}]", referer, result);
112 return result.toString();
113 }
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133 public String rewriteUrl(String url, String requestUrl, String baseUrl, String visibleBaseUrl, boolean absolute) {
134
135
136
137
138 if (url.startsWith("$(")) {
139 return url;
140 }
141
142
143 if (!baseUrl.endsWith("/")) {
144 baseUrl = baseUrl + "/";
145 }
146 URI baseUri = UriUtils.createURI(baseUrl);
147
148
149 if (!visibleBaseUrl.endsWith("/")) {
150 visibleBaseUrl = visibleBaseUrl + "/";
151 }
152 URI visibleBaseUri = UriUtils.createURI(visibleBaseUrl);
153
154
155 URI requestUri;
156 if (requestUrl.startsWith(visibleBaseUrl)) {
157 requestUri = UriUtils.createURI(requestUrl);
158 } else {
159 requestUri = UriUtils.concatPath(baseUri, requestUrl);
160 }
161
162
163 URI uri = UriUtils.resolve(url, requestUri);
164
165 uri = uri.normalize();
166
167
168 URI relativeUri = baseUri.relativize(uri);
169
170 if (relativeUri.equals(uri)) {
171 LOG.debug("url kept unchanged: [{}]", url);
172 return url;
173 }
174
175 URI result = visibleBaseUri.resolve(relativeUri);
176
177
178 if (!absolute) {
179 result = UriUtils.removeServer(result);
180 }
181 LOG.debug("url fixed: [{}] -> [{}]", url, result);
182 return result.toString();
183 }
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202 public CharSequence rewriteHtml(CharSequence input, String requestUrl, String baseUrlParam, String visibleBaseUrl,
203 boolean absolute) {
204 StringBuffer result = new StringBuffer(input.length());
205 Matcher m = URL_PATTERN.matcher(input);
206 while (m.find()) {
207 String url = input.subSequence(m.start(3) + 1, m.end(3) - 1).toString();
208 String tag = m.group(0);
209 String quote = input.subSequence(m.end(3) - 1, m.end(3)).toString();
210
211
212 String trimmedUrl = StringUtils.trim(url);
213
214 String rewrittenUrl = url;
215
216 trimmedUrl = unescapeHtml(trimmedUrl);
217
218 if (trimmedUrl.isEmpty()) {
219 LOG.debug("empty url kept unchanged");
220 } else if (trimmedUrl.startsWith("#")) {
221 LOG.debug("anchor url kept unchanged: [{}]", url);
222 } else if (JAVASCRIPT_CONCATENATION_PATTERN.matcher(trimmedUrl).find()) {
223 LOG.debug("url in javascript kept unchanged: [{}]", url);
224 } else if (m.group(2).equalsIgnoreCase("content")) {
225 if (META_REFRESH_PATTERN.matcher(tag).find()) {
226 rewrittenUrl = rewriteRefresh(trimmedUrl, requestUrl, baseUrlParam, visibleBaseUrl);
227 rewrittenUrl = escapeHtml(rewrittenUrl);
228 LOG.debug("refresh url [{}] rewritten [{}]", url, rewrittenUrl);
229 } else {
230 LOG.debug("content attribute kept unchanged: [{}]", url);
231 }
232 } else {
233 rewrittenUrl = rewriteUrl(trimmedUrl, requestUrl, baseUrlParam, visibleBaseUrl, absolute);
234 rewrittenUrl = escapeHtml(rewrittenUrl);
235 LOG.debug("url [{}] rewritten [{}]", url, rewrittenUrl);
236 }
237
238 m.appendReplacement(result, "");
239 result.append("<");
240 result.append(m.group(1));
241 result.append(m.group(2));
242 result.append("=");
243 result.append(quote);
244 result.append(rewrittenUrl);
245 result.append(quote);
246 if (m.groupCount() > 3) {
247 result.append(m.group(4));
248 }
249 result.append(">");
250 }
251
252 m.appendTail(result);
253
254 return result;
255 }
256
257 private String unescapeHtml(String url) {
258
259 url = StringEscapeUtils.unescapeHtml4(url);
260 return url;
261 }
262
263 private String escapeHtml(String url) {
264
265 url = StringEscapeUtils.escapeHtml4(url);
266
267 url = url.replaceAll("'", "'");
268 url = url.replaceAll("\"", """);
269 return url;
270 }
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288 public String rewriteRefresh(String input, String requestUrl, String baseUrl, String visibleBaseUrl) {
289
290
291 int urlPosition = input.indexOf("url=");
292 if (urlPosition >= 0) {
293 String urlValue = input.substring(urlPosition + "url=".length());
294 String targetUrlValue = rewriteUrl(urlValue, requestUrl, baseUrl, visibleBaseUrl, true);
295 return input.substring(0, urlPosition) + "url=" + targetUrlValue;
296 } else {
297 return input;
298 }
299 }
300 }