DefaultCharset.java
package org.esigate.extension;
import java.util.Collection;
import java.util.Properties;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.Header;
import org.esigate.Driver;
import org.esigate.Parameters;
import org.esigate.events.Event;
import org.esigate.events.EventDefinition;
import org.esigate.events.EventManager;
import org.esigate.events.IEventListener;
import org.esigate.events.impl.FetchEvent;
import org.esigate.util.Parameter;
import org.esigate.util.ParameterString;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This extension adds a default charset to responses which lack the charset attribute in Content-Type header. Only
* parsable MIME types are modified :
*
* <pre>
* Content-Type: text/html
* </pre>
*
* becomes
*
* <pre>
* Content-Type:text/html; charset=utf-8
* </pre>
*
* <p>
* Default charset can be set in esigate.properties using
*
* <pre>
* driverid.defaultCharset = utf - 8
* </pre>
*
* @author Nicolas Richeton
*
*/
public class DefaultCharset implements Extension, IEventListener {
private static final Logger LOG = LoggerFactory.getLogger(DefaultCharset.class);
/** default charset. */
public static final Parameter<String> PARAM_DEFAULT_CHARSET = new ParameterString("defaultCharset", "ISO-8859-1");
private Collection<String> parsableContentTypes;
private String defaultCharset;
@Override
public void init(Driver driver, Properties properties) {
driver.getEventManager().register(EventManager.EVENT_FETCH_POST, this);
parsableContentTypes = Parameters.PARSABLE_CONTENT_TYPES.getValue(properties);
defaultCharset = PARAM_DEFAULT_CHARSET.getValue(properties);
LOG.info("Will use " + defaultCharset + " as default charset for " + parsableContentTypes.toString());
}
@Override
public boolean event(EventDefinition arg0, Event arg1) {
FetchEvent fe = (FetchEvent) arg1;
Header contentTypeHeader = fe.getHttpResponse().getFirstHeader("Content-Type");
// No content type, there is nothing we can do
if (contentTypeHeader == null) {
return true;
}
String contentType = contentTypeHeader.getValue();
// Charset is present -> OK
if (StringUtils.containsIgnoreCase(contentType, "charset")) {
return true;
}
// Is document parsable
boolean parsable = false;
for (String parseableContentType : parsableContentTypes) {
if (StringUtils.containsIgnoreCase(contentType, parseableContentType)) {
parsable = true;
break;
}
}
// Add default charset
if (parsable) {
fe.getHttpResponse().setHeader("Content-Type", contentType + "; charset=" + defaultCharset);
}
return true;
}
}