Hi there,
I am trying to use the Python Splunk-SDK to query results from a search, and return a specific field that i would generally see when i run the search in Splunk Web. For some reason it doesn't look like my fields are being separated when i run the search using the SDK, it only returns the _raw
message and other fields from Splunk. Is there a way to return fields with service.jobs.export()
that are normally extracted with the field extractors in Splunk Web? Here is the field extractor for the _raw
message that :
suricata\[(?P<pid>\d+)\]: \[(?P<gid>\d+):(?P<sid>\d+):(?P<rev>\d+)\] (?P<msg>.+) \[Classification:(?P<classification>.+)\] \[Priority:(?P<priority> \d+)\] \{(?P<proto>.+)\} (?P<src_ip>[\d.]{7,15}):(?P<src_port>\d+) -> (?P<dst_ip>[\d.]{7,15}):(?P<dst_port>\d+)$
code:
import splunklib.client as client
import splunklib.results as results
import prettyprint
import time
import json
HOST = 'secrets'
PORT = 8089
USERNAME = 'moresecrets'
PASSWORD = 'evenmoresecrets'
service = client.connect(
host=HOST,
port=PORT,
username=USERNAME,
password=PASSWORD)
kwargs_export = {
"search_mode": "normal",
"preview": False,
}
searchquery = "search index=main sourcetype=suricata earliest= -1m"
resultexport = service.jobs.export(searchquery, **kwargs_export )
for i in results.ResultsReader(resultexport):
time.sleep(5)
if isinstance(i, dict) and 'ET' in i['_raw'] :
prettyprint.pp(i)
results :
{
"_bkt": "main~7~343A73CE-58CA-4278-9DAC-CDF498B5BBF1",
"_cd": "7:4405902",
"_indextime": "1482452940",
"_raw": "Dec 22 17:28:56 2.2.2.2 Dec 22 17:28:56 myserver suricata[24186]: [1:2006402:10] ET POLICY Incoming Basic Auth Base64 HTTP Password detected unencrypted [Classification: Potential Corporate Privacy Violation] [Priority: 1] {TCP} 4.5.6.7:40247 -> 1.2.3.4:80",
"_serial": "28",
"_si": [
"secrets.server",
"main"
],
"_sourcetype": "suricata",
"_time": "2016-12-22 17:28:56.000 MST",
"host": "x.x.x.x",
"index": "main",
"linecount": "1",
"source": "udp:514",
"sourcetype": "suricata",
"splunk_server": "secrets"
}
try your search with
kwargs_export = {
"output_mode": "csv"
}
It should give you all fields as a dict.
I was successful in getting results by using the following code, but let me know if it's not working for you:
from splunklib.binding import connect
from splunklib.modularinput.utils import parse_parameters
from splunklib import client, results
def cleanup_tail(options):
""" cleanup the tail of a recovery """
if options['format'] == "csv":
options['fd'].write("\n")
elif options['format'] == "xml":
options['fd'].write("\n</results>\n")
else:
options['fd'].write("\n]\n")
def export(options, service, index):
""" main export method: export any number of indexes """
start = options['start']
end = options['end']
fixtail = options['fixtail']
once = True
squery = "search"
squery = squery + "index=%s" % options['index']
if (start != ""):
squery = squery + " earliest_time=%s" % start
if (end != ""):
squery = squery + " latest_time=%s" % end
print(squery)
success = False
while not success:
# issue query to splunkd
# count=0 overrides the maximum number of events
# returned (normally 50K) regardless of what the .conf
# file for splunkd says.
result = service.get('search/jobs/export',
search=squery,
output_mode=options['format'],
timeout=60,
earliest_time="0.000",
time_format="%s.%Q",
count=0)
print(result.status)
if result.status != 200:
print("warning: export job failed: %d, sleep/retry" % result.status)
time.sleep(60)
else:
success = True
# write export file
while True:
if fixtail and once:
cleanup_tail(options)
once = False
content = result.body.read()
if len(content) == 0: break
options['fd'].write(content)
options['fd'].write("\n".encode("utf-8"))
options['fd'].flush()
options = {"host" : None,
"port" : None,
"username": None,
"password": None,
"format":"csv",
"fixtail":False}
service = connect(**options)
export(options, service, index)
Hi @zhatsispgx & @burtica
Have you tried by adding | table *
in search in python code?
Like for example code in question.
searchquery = "search index=main sourcetype=suricata earliest= -1m | table * "
Thanks
I have the same issue. Tried adding "| table *" but then I get job["eventFieldCount"] of 0. Before I added rf the count was 11. After adding rf the count was 16 but the fields in rf were not in the results.
The Splunk documentation for REST searches implies that the rf parameter can do this:
"Use the rf parameter to add required
fields to a search. Adding fields
guarantees results for the endpoints
that return events and a summary. (The
required_fields parameter has been
deprecated in favor of the rf
parameter.)"
Older Splunk answers posts advise to use the fields command in the search to ensure the required fields are returned...
I have the same problem as @zhatsispgx...
I added the RF Parameter in **kwargs_export
and it still didn't work. i'm not sure if this is a bug or what