Searching splunk and analyzing results with PyDbLite

We had a need to analyze how often a web shopper used more than one device to manage her shopping cart within a given time tolerance. We store our Apache access logs in Splunk, so this post is simply a pointer to how you can query Splunk, and take the results and analyze them with an in-memory database in python.

from pydblite.sqlite import Database, Table
import splunklib.client as client
import splunklib.results as results

service = client.connect(host="cmhlpsplksea01",port="8089",username="showard",password="**********")
rr = results.ResultsReader(service.jobs.oneshot("search host=\"cmhlpecomweb*\" sourcetype=access_combined 104.137.126.226 | fields _raw", 
                                               **{"earliest_time":"2017-01-12T15:30:00.000-05:00",
                                                  "latest_time":"2017-01-12T17:30:00.000-05:00",
                                                  "count": 0}))

db = Database(":memory:")
table = Table("sessions", db)
table.create(('ts', 'TEXT'), ('ip_address', 'TEXT'), ('session_id', 'TEXT'))
table.open()

orders = dict()
for result in rr:
  agents = result['_raw'].split('\t')
  table.insert(ts=agents[0],ip_address=agents[13], session_id=agents[14])

assert rr.is_preview == False

for r in (r for r in table if r['ts'].find('2017:17:2') > -1):
  print "found", r

Leave a Reply

Your email address will not be published. Required fields are marked *

This site uses Akismet to reduce spam. Learn how your comment data is processed.