- Mark as New
- Bookmark Message
- Subscribe to Message
- Mute Message
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
I'm fetching some data from API via a python script and passing it to Splunk. it's is not paring the JSON format. I've tested my output with JSON parser with no error. If I set the source type to some custom I'm receiving events as a text. but when I'm putting source type as _json it is giving line breaking error expected : \
Below is the python script. I'm using json.dumps also while printing. Now I'm writing to the file and fetching with monitor.
# This sript is fetching data from virustotal api and passing to splunk.
# checkpointing is enabled to drop duplicate events
import json,requests,sys,time,os
from datetime import datetime
proxies = { 'https': 'http://security-proxy.emea.svc.corpintra.net:3128' }
url = "https://www.virustotal.com/api/v3/intelligence/hunting_notifications"
params = { 'limit' : 40,
'count_limit' : 10000
}
headers = {
"Accept": "application/json",
"x-apikey": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
}
current_time = datetime.now()
file_path = f'/opt/splunk/etc/apps/infy_ta_virustotal_livehunt_validation/bin/data/'
complete_name = file_path + f'livehunt_{time.strftime("%Y_%m_%d_%H_%M_%S")}'
keys_filename = f'/opt/splunk/etc/apps/infy_ta_virustotal_livehunt_validation/bin/keys.txt'
def write_new_keys_in_file(keys_filename, keys_to_be_indexed):
try:
with open(keys_filename, 'w') as file:
for key in keys_to_be_indexed:
file.write(str(key))
file.write('\n')
except Exception as e:
print(e)
def get_indexed_key(keys_filename):
try:
with open(keys_filename, 'r') as file:
indexed_keys = file.read().splitlines()
return indexed_keys
except Exception as e:
with open(keys_filename, 'w') as file:
indexed_keys = []
return indexed_keys
def get_json_data(url, headers, params, proxies):
try:
response = requests.get(url = url, headers=headers,params = params, proxies=proxies).json()
return response
except Exception as e:
print(e)
sys.exit(1)
def write_to_file(complete_name, data):
with open(complete_name, 'a') as f:
json.dump(data, f)
f.write('\n')
def stream_to_splunk(json_response,indexed_keys, complete_name):
try:
keys_to_be_indexed = []
events_to_be_indexed = []
for item in json_response['data']:
keys_to_be_indexed.append(item['id'])
if item['id'] not in indexed_keys:
write_to_file(complete_name = complete_name, data = item)
events_to_be_indexed.append(item)
print(json.dumps(events_to_be_indexed, indent = 4, sort_keys = True)) if len(events_to_be_indexed) else 1==1
return keys_to_be_indexed
except Exception as e:
print(e)
def main():
try:
json_response = get_json_data(url = url, headers = headers, params = params, proxies = proxies)
indexed_keys = get_indexed_key(keys_filename = keys_filename)
keys_to_be_indexed = stream_to_splunk(json_response = json_response, indexed_keys = indexed_keys, complete_name = complete_name)
write_new_keys_in_file(keys_filename = keys_filename, keys_to_be_indexed = keys_to_be_indexed)
except Exception as e:
print(e)
if __name__ == "__main__":
main()
- Mark as New
- Bookmark Message
- Subscribe to Message
- Mute Message
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
[
{
"attributes": {
"date": 1651251035,
"match_in_subfile": false,
"rule_name": "yara_abcd",
"rule_tags": [],
"snippet": "67 69 65 0D 0A 72 6F 62 65 72 74 6F 2E 67 61 73 gie..roberto.gas\n70 61 72 65 74 74 69 *begin_highlight*40 64 61 69 6D 6C 65 72 2E*end_highlight* paretti*begin_highlight*@daimler.*end_highlight*\n*begin_highlight*63 6F 6D *end_highlight*3A 4D 61 72 69 6E 69 6E 68 61 33 35 0D *begin_highlight*com*end_highlight*:Marininha35.",
"tags": [
"abc_mail",
"30fa32f991d028fe69442b6aa60ea7fafc7a21af3107b66e04922c087329de83",
"yara_abcd"
]
},
"id": "6716443993997312-ecc78fb335be2a817b2f607bf0b18511-30fa32f991d028fe69442b6aa60ea7fafc7a21af3107b66e04922c087329de83-1651250973",
"links": {
"self": "https://www.virSAtotal.com/api/v3/intelligence/hunting_notifications/6716443993997312-ecc78fb335be2a817b2f607bf0b18511-30fa32f991d028fe69442b6aa60ea7fafc7a21af3107b66e04922c087329de83-1651250973"
},
"type": "hunting_notification"
},
{
"attributes": {
"date": 1651248256,
"match_in_subfile": false,
"rule_name": "yara_abcd",
"rule_tags": [],
"snippet": "62 75 70 6B 69 73 73 2E 6E 65 74 3A 54 68 65 72 bupkiss.net:Ther\n61 70 79 0D 0A 61 6C 6C 69 2E 62 65 79 *begin_highlight*40 64 61*end_highlight* apy..alli.bey*begin_highlight*@da*end_highlight*\n*begin_highlight*69 6D 6C 65 72 2E 63 6F 6D *end_highlight*3A 4E 6F 6F 72 6A 65 *begin_highlight*imler.com*end_highlight*:Noorje",
"source_country": "SA",
"source_key": "bbf46859",
"tags": [
"abc_mail",
"6ab4a7246878ad33b3e342e0f9dedf8b977016e212c0393b365d6eb81b9090cd",
"yara_abcd"
]
},
"id": "6716443993997312-ecc78fb335be2a817b2f607bf0b18511-6ab4a7246878ad33b3e342e0f9dedf8b977016e212c0393b365d6eb81b9090cd-1651244349",
"links": {
"self": "https://www.virSAtotal.com/api/v3/intelligence/hunting_notifications/6716443993997312-ecc78fb335be2a817b2f607bf0b18511-6ab4a7246878ad33b3e342e0f9dedf8b977016e212c0393b365d6eb81b9090cd-1651244349"
},
"type": "hunting_notification"
}
]
- Mark as New
- Bookmark Message
- Subscribe to Message
- Mute Message
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
my issue has been resolved. solution shared is correct only. print each json event in newline(python does automatically in simple print() ). no comma.
- Mark as New
- Bookmark Message
- Subscribe to Message
- Mute Message
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
but still getting below error
5-12-2022 12:47:37.258 +0000 ERROR JsonLineBreaker [20720 parsing] - JSON StreamId:12396707386195060580 had parsing error:Unexpected character while looking for value: '}' - data_source="/opt/splunk/etc/apps/my_app/bin/virustotal_livehunt.py", data_host="my_host_name", data_sourcetype="_json"
- Mark as New
- Bookmark Message
- Subscribe to Message
- Mute Message
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Now script is printing in suggested order but still linebreaking error.
{
"attributes": "value"
"type": "hunting_notification"
}
{
"attributes": "value"
"type": "hunting_notification"
}
{
"attributes": "value"
"type": "hunting_notification"
}
- Mark as New
- Bookmark Message
- Subscribe to Message
- Mute Message
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content

@vikramnain1995 - The reason is that your data is not correct JSON format.
JSON format always starts with "{".
So, the right JSON format should look something like:
{
"attributes": {
"date": 1651251035,
"match_in_subfile": false,
....
}
{
"attributes": {
"date": 1651248256,
"match_in_subfile": false,
"rule_name": "yara_abcd",
"rule_tags": [],
.....
}
You see we are not putting "[" bracket here and also there is no comma between two different events.
This should be able to parse the events properly.
I hope this helps!!!!
- Mark as New
- Bookmark Message
- Subscribe to Message
- Mute Message
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Please find the error for this.
04-29-2022 16:28:09.410 +0000 ERROR JsonLineBreaker [3843466 parsing] - JSON StreamId:12469636465500470991 had parsing error:Unexpected character while expecting '"': '\'' - data_source="/opt/splunk/etc/apps/mytestingapp_ta_virustotal_livehunt_validation/bin/virustotal_livehunt.py", data_host="my_search_head", data_sourcetype="_json"
- Mark as New
- Bookmark Message
- Subscribe to Message
- Mute Message
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
above is the output of the script
- Mark as New
- Bookmark Message
- Subscribe to Message
- Mute Message
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
[
{
"attributes": {
"date": 1651251035,
"match_in_subfile": false,
"rule_name": "yara_abcd",
"rule_tags": [],
"snippet": "67 69 65 0D 0A 72 6F 62 65 72 74 6F 2E 67 61 73 gie..roberto.gas\n70 61 72 65 74 74 69 *begin_highlight*40 64 61 69 6D 6C 65 72 2E*end_highlight* paretti*begin_highlight*@daimler.*end_highlight*\n*begin_highlight*63 6F 6D *end_highlight*3A 4D 61 72 69 6E 69 6E 68 61 33 35 0D *begin_highlight*com*end_highlight*:Marininha35.",
"tags": [
"abc_mail",
"30fa32f991d028fe69442b6aa60ea7fafc7a21af3107b66e04922c087329de83",
"yara_abcd"
]
},
"id": "6716443993997312-ecc78fb335be2a817b2f607bf0b18511-30fa32f991d028fe69442b6aa60ea7fafc7a21af3107b66e04922c087329de83-1651250973",
"links": {
"self": "https://www.virSAtotal.com/api/v3/intelligence/hunting_notifications/6716443993997312-ecc78fb335be2a817b2f607bf0b18511-30fa32f991d028fe69442b6aa60ea7fafc7a21af3107b66e04922c087329de83-1651250973"
},
"type": "hunting_notification"
},
{
"attributes": {
"date": 1651248256,
"match_in_subfile": false,
"rule_name": "yara_abcd",
"rule_tags": [],
"snippet": "62 75 70 6B 69 73 73 2E 6E 65 74 3A 54 68 65 72 bupkiss.net:Ther\n61 70 79 0D 0A 61 6C 6C 69 2E 62 65 79 *begin_highlight*40 64 61*end_highlight* apy..alli.bey*begin_highlight*@da*end_highlight*\n*begin_highlight*69 6D 6C 65 72 2E 63 6F 6D *end_highlight*3A 4E 6F 6F 72 6A 65 *begin_highlight*imler.com*end_highlight*:Noorje",
"source_country": "SA",
"source_key": "bbf46859",
"tags": [
"abc_mail",
"6ab4a7246878ad33b3e342e0f9dedf8b977016e212c0393b365d6eb81b9090cd",
"yara_abcd"
]
},
"id": "6716443993997312-ecc78fb335be2a817b2f607bf0b18511-6ab4a7246878ad33b3e342e0f9dedf8b977016e212c0393b365d6eb81b9090cd-1651244349",
"links": {
"self": "https://www.virSAtotal.com/api/v3/intelligence/hunting_notifications/6716443993997312-ecc78fb335be2a817b2f607bf0b18511-6ab4a7246878ad33b3e342e0f9dedf8b977016e212c0393b365d6eb81b9090cd-1651244349"
},
"type": "hunting_notification"
}
]
