We had a need to analyze how often a web shopper used more than one device to manage her shopping cart within a given time tolerance. We store our Apache access logs in Splunk, so this post is simply a pointer to how you can query Splunk, and take the results and analyze them with an in-memory database in python.
from pydblite.sqlite import Database, Table import splunklib.client as client import splunklib.results as results service = client.connect(host="cmhlpsplksea01",port="8089",username="showard",password="**********") rr = results.ResultsReader(service.jobs.oneshot("search host=\"cmhlpecomweb*\" sourcetype=access_combined 104.137.126.226 | fields _raw", **{"earliest_time":"2017-01-12T15:30:00.000-05:00", "latest_time":"2017-01-12T17:30:00.000-05:00", "count": 0})) db = Database(":memory:") table = Table("sessions", db) table.create(('ts', 'TEXT'), ('ip_address', 'TEXT'), ('session_id', 'TEXT')) table.open() orders = dict() for result in rr: agents = result['_raw'].split('\t') table.insert(ts=agents[0],ip_address=agents[13], session_id=agents[14]) assert rr.is_preview == False for r in (r for r in table if r['ts'].find('2017:17:2') > -1): print "found", r