Solved: python SDK parse xml output

EricPartington · ‎12-08-2011

I am attempting to parse the results from a search using the python sdk.
I can successfully run a search against splunk and return the results via xml output.

#!c:/Python26/python.exe -u
import splunk.client as client
import sys, datetime
from pprint import pprint
HOST = "1.2.3.4"
PORT = 8090
USERNAME = "admin"
PASSWORD = "aaaaaaaa"

service = client.connect(
   host=HOST,
   port=PORT,
   username=USERNAME,
   password=PASSWORD)
oldtime = datetime.datetime.now()
search = 'search  index="firewall_rules" table=rule disable=no | dedup name,host |table    host,name,rulegroup,appcode,_time | head 2' 
job = service.jobs.create(search, exec_mode="blocking")
#job = service.jobs.create(search)

while True: 
  stats = job.read('isDone') 
  if stats['isDone'] == str(1): 
    break 
content = str(job.results(output_mode='xml'))
print "Output: %s" % content

Output: <?xml version='1.0' encoding='UTF-8'?>
<results preview='0'>
<meta>
<fieldOrder>
<field>host</field>
<field>name</field>
<field>rulegroup</field>
<field>appcode</field>
<field>_time</field>
</fieldOrder>
</meta>
    <result offset='0'>
            <field k='host'>
                    <value><text>FW1</text></value>
            </field>
            <field k='name'>
                    <value><text>&apos;Deny All&apos;</text></value>
            </field>
            <field k='rulegroup'>
                    <value><text>&apos;&apos;</text></value>
            </field>
            <field k='_time'>
                    <value><text>2011-12-03T03:24:02.000-05:00</text></value>
            </field>
    </result>
    <result offset='1'>
            <field k='host'>
                    <value><text>FW2</text></value>
            </field>
            <field k='name'>
                    <value><text>WEB1</text></value>
            </field>
            <field k='rulegroup'>
                    <value><text>WEB1</text></value>
            </field>
            <field k='appcode'>
                    <value><text>ABC123</text></value>
            </field>
            <field k='_time'>
                    <value><text>2011-12-03T03:24:02.000-05:00</text></value>
            </field>
    </result>
</results>
Elapsed Time: 0:00:04.500000

I have attempted to parse the xml output into a structure that I can use to insert into another system (mysql). I have tried xmlminidom and etree without success, is there an easier way to access the values with python?

EricPartington · ‎12-14-2011

Here is my latest attempt at using the sdk, python.. SplunkDev helped out a great deal with this.

#!c:/Python26/python.exe -u
import splunk.client as client
import splunk.results as results
import sys, datetime
from pprint import pprint

HOST = "localhost"
PORT = 8090
USERNAME = "admin"
PASSWORD = "abc123"

service = client.connect(
    host=HOST,
    port=PORT,
    username=USERNAME,
    password=PASSWORD)
# ----------------------------------------
oldtime = datetime.datetime.now()

search = 'search  index="sidewinder_rules" table=rule disable=no earliest=-8d| dedup name,cluster |table cluster,name,rulegroup,sourcetype,appcode,_time' 

job = service.jobs.create(search, exec_mode="blocking", max_count=5000)
job_results = job.results(count=0, output_mode="xml")
reader = results.ResultsReader(job_results)

num_results = 0;
for kind, result in reader:
    if kind == results.RESULT:
        host = result.get("cluster", 0)
        rulegroup = result.get("rulegroup", 0)
        name = result.get("name", 0)
        sourcetype = result.get("sourcetype", 0)
        appcode = result.get("appcode", 0)
        pprint(host)

newtime = datetime.datetime.now()
print "Elapsed Time: %s" % (newtime - oldtime)

Still have to make some improvements as suggested by splunkDev but it functions for my needs.
It also gets more than the default 100 rows when returning results (count=0) and sets a limit of 5000 rows.

View solution in original post

to914868 · ‎08-10-2018

You can change the output mode to csv which will parse it for you.

import splunklib.client as client
import splunklib.results as results
import csv
service = client.connect(
 host=HOST,
 port=PORT,
 username=un,
 password=pwd
query= """search {+enter your query here}"""

results_kwargs = {
 "earliest_time": "-30min",
# or "earliest_time": datetime.datetime(2015, 6, 29).isoformat()
 "latest_time": "now",
 "search_mode": "normal",
 "output_mode": "csv"
}
oneshotsearch_results = service.jobs.oneshot(query, **results_kwargs)
f=open('myresults.csv', 'w')
f.write(oneshotsearch_results.read())
f.close()

EricPartington · ‎12-14-2011

Here is my latest attempt at using the sdk, python.. SplunkDev helped out a great deal with this.

#!c:/Python26/python.exe -u
import splunk.client as client
import splunk.results as results
import sys, datetime
from pprint import pprint

HOST = "localhost"
PORT = 8090
USERNAME = "admin"
PASSWORD = "abc123"

service = client.connect(
    host=HOST,
    port=PORT,
    username=USERNAME,
    password=PASSWORD)
# ----------------------------------------
oldtime = datetime.datetime.now()

search = 'search  index="sidewinder_rules" table=rule disable=no earliest=-8d| dedup name,cluster |table cluster,name,rulegroup,sourcetype,appcode,_time' 

job = service.jobs.create(search, exec_mode="blocking", max_count=5000)
job_results = job.results(count=0, output_mode="xml")
reader = results.ResultsReader(job_results)

num_results = 0;
for kind, result in reader:
    if kind == results.RESULT:
        host = result.get("cluster", 0)
        rulegroup = result.get("rulegroup", 0)
        name = result.get("name", 0)
        sourcetype = result.get("sourcetype", 0)
        appcode = result.get("appcode", 0)
        pprint(host)

newtime = datetime.datetime.now()
print "Elapsed Time: %s" % (newtime - oldtime)

Still have to make some improvements as suggested by splunkDev but it functions for my needs.
It also gets more than the default 100 rows when returning results (count=0) and sets a limit of 5000 rows.

rroberts · ‎12-14-2011

Great! http://dev.splunk.com is cool.

rroberts · ‎12-14-2011

There is an example here:
http://wiki.splunk.com/Dev:PySearch

You might find useful using xmlDoc

python SDK parse xml output

What's New in Splunk Enterprise 9.4: Features to Power Your Digital Resilience

Take Your Breath Away with Splunk Risk-Based Alerting (RBA)

SignalFlow: What? Why? How?