package com.cloudera.flume.handlers.text;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.cloudera.flume.core.Event;
import com.cloudera.flume.core.EventImpl;
import com.cloudera.flume.core.Event.Priority;
import com.cloudera.util.Clock;
import com.cloudera.util.NetUtils;
static final Logger LOG = LoggerFactory.getLogger(ApacheAccessLogFormat.class);
final static Pattern APACHE_PAT = Pattern
.compile("^(\\S+) (\\S+) (\\S+) \\[(.*?)\\] \\\"(.*?)\\\" (\\S+) (\\S+)( \\\"(.*?)\\\" \\\"(.*?)\\\")?$");
final SimpleDateFormat APACHE_DF = new SimpleDateFormat(
"dd/MMM/yyyy:HH:mm:ss zzzzz");
public Event (String s) {
Matcher m = APACHE_PAT.matcher(s);
if (!m.matches())
return null;
try {
String service = "apache";
String date = m.group(4);
Date d = APACHE_DF.parse(date);
Calendar c = Calendar.getInstance();
c.setTime(d);
d = c.getTime();
String host = NetUtils.localhost();
String body = m.group(5);
String client = m.group(1);
String res = m.group(6);
String size = m.group(7);
String referrer = m.group(9);
String browser = m.group(10);
Map<String, byte[]> fields = new HashMap<String, byte[]>();
fields.put("service", service.getBytes());
fields.put("client", client.getBytes());
fields.put("req_result", res.getBytes());
fields.put("req_size", size.getBytes());
if (referrer != null && !referrer.equals("-")) {
fields.put("referrer", referrer.getBytes());
}
if (browser != null && !browser.equals("-")) {
fields.put("browser", browser.getBytes());
}
Event e = new EventImpl(body.getBytes(), d.getTime(), Priority.INFO,
Clock.nanos(), host, fields);
return e;
} catch (ParseException e) {
LOG.warn("Failed to parse apache access log line: '" + s + "'", e);
return null;
}
}
}