We had a need to analyze how often a web shopper used more than one device to manage her shopping cart within a given time tolerance. We store our Apache access logs in Splunk, so this post is simply a pointer to how you can query Splunk, and take the results and analyze them with an in-memory database in python.
from pydblite.sqlite import Database, Table
import splunklib.client as client
import splunklib.results as results
service = client.connect(host="cmhlpsplksea01",port="8089",username="showard",password="**********")
rr = results.ResultsReader(service.jobs.oneshot("search host=\"cmhlpecomweb*\" sourcetype=access_combined 104.137.126.226 | fields _raw",
**{"earliest_time":"2017-01-12T15:30:00.000-05:00",
"latest_time":"2017-01-12T17:30:00.000-05:00",
"count": 0}))
db = Database(":memory:")
table = Table("sessions", db)
table.create(('ts', 'TEXT'), ('ip_address', 'TEXT'), ('session_id', 'TEXT'))
table.open()
orders = dict()
for result in rr:
agents = result['_raw'].split('\t')
table.insert(ts=agents[0],ip_address=agents[13], session_id=agents[14])
assert rr.is_preview == False
for r in (r for r in table if r['ts'].find('2017:17:2') > -1):
print "found", r