Getting Data In

Why is Splunk is not parsing json output from scripted inputs?

vikramnain1995
Explorer

I'm fetching some data from API via a python script and passing it to Splunk. it's is not paring the JSON format. I've tested my output with JSON parser with no error. If I set the source type to some custom I'm receiving events as a text. but when I'm putting source type as _json it is giving line breaking error expected : \ 

Below is the python script. I'm using json.dumps also while printing. Now I'm writing to the file and fetching with monitor. 

 

 

# This sript is fetching data from virustotal api and passing to splunk.
# checkpointing is enabled to drop duplicate events

import json,requests,sys,time,os
from datetime import datetime


 


proxies = { 'https': 'http://security-proxy.emea.svc.corpintra.net:3128' }

url = "https://www.virustotal.com/api/v3/intelligence/hunting_notifications"

params = { 'limit' : 40, 
           'count_limit' : 10000
         }

headers = {
    "Accept": "application/json",
    "x-apikey": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
}

current_time = datetime.now()
file_path = f'/opt/splunk/etc/apps/infy_ta_virustotal_livehunt_validation/bin/data/'
complete_name = file_path + f'livehunt_{time.strftime("%Y_%m_%d_%H_%M_%S")}'
keys_filename = f'/opt/splunk/etc/apps/infy_ta_virustotal_livehunt_validation/bin/keys.txt'


def write_new_keys_in_file(keys_filename, keys_to_be_indexed):
    try:
        
        with open(keys_filename, 'w') as file:
            for key in keys_to_be_indexed:
                file.write(str(key))
                file.write('\n')
    except  Exception as e:
        print(e)
    

def get_indexed_key(keys_filename):
    try:
              
        with open(keys_filename, 'r') as file:
            indexed_keys = file.read().splitlines()
        return indexed_keys
            
        
    except Exception as e:
        with open(keys_filename, 'w') as file:
            indexed_keys = []
        return indexed_keys
        

def get_json_data(url, headers, params, proxies):
    try:
        
        response = requests.get(url = url, headers=headers,params = params, proxies=proxies).json()
        return response
    except Exception as e:
        print(e)
        sys.exit(1)

def write_to_file(complete_name, data):
    with open(complete_name, 'a') as f:
        json.dump(data, f)
        f.write('\n')
    

def stream_to_splunk(json_response,indexed_keys, complete_name):
    try:
        
        keys_to_be_indexed = []
        events_to_be_indexed = []
        
        for item in json_response['data']:
            keys_to_be_indexed.append(item['id'])
            if item['id'] not in indexed_keys:

                write_to_file(complete_name = complete_name, data = item)
                events_to_be_indexed.append(item)
        print(json.dumps(events_to_be_indexed, indent = 4, sort_keys = True)) if len(events_to_be_indexed) else 1==1
        return keys_to_be_indexed
    except Exception as e:
        print(e)
            
    
def main():
    try:
        
        json_response = get_json_data(url = url, headers = headers, params = params, proxies = proxies)
        
        indexed_keys = get_indexed_key(keys_filename = keys_filename)
        
        keys_to_be_indexed = stream_to_splunk(json_response = json_response, indexed_keys = indexed_keys, complete_name = complete_name)
        
        write_new_keys_in_file(keys_filename = keys_filename, keys_to_be_indexed = keys_to_be_indexed)
    
    except Exception as e:
        print(e)

if __name__ == "__main__":
    main()

 

 

 

Labels (2)
0 Karma
1 Solution

vikramnain1995
Explorer
[
    {
        "attributes": {
            "date": 1651251035,
            "match_in_subfile": false,
            "rule_name": "yara_abcd",
            "rule_tags": [],
            "snippet": "67 69 65 0D 0A 72 6F 62 65 72 74 6F 2E 67 61 73  gie..roberto.gas\n70 61 72 65 74 74 69 *begin_highlight*40 64 61 69 6D 6C 65 72 2E*end_highlight*  paretti*begin_highlight*@daimler.*end_highlight*\n*begin_highlight*63 6F 6D *end_highlight*3A 4D 61 72 69 6E 69 6E 68 61 33 35 0D  *begin_highlight*com*end_highlight*:Marininha35.",
            "tags": [
                "abc_mail",
                "30fa32f991d028fe69442b6aa60ea7fafc7a21af3107b66e04922c087329de83",
                "yara_abcd"
            ]
        },
        "id": "6716443993997312-ecc78fb335be2a817b2f607bf0b18511-30fa32f991d028fe69442b6aa60ea7fafc7a21af3107b66e04922c087329de83-1651250973",
        "links": {
            "self": "https://www.virSAtotal.com/api/v3/intelligence/hunting_notifications/6716443993997312-ecc78fb335be2a817b2f607bf0b18511-30fa32f991d028fe69442b6aa60ea7fafc7a21af3107b66e04922c087329de83-1651250973"
        },
        "type": "hunting_notification"
    },
    {
        "attributes": {
            "date": 1651248256,
            "match_in_subfile": false,
            "rule_name": "yara_abcd",
            "rule_tags": [],
            "snippet": "62 75 70 6B 69 73 73 2E 6E 65 74 3A 54 68 65 72  bupkiss.net:Ther\n61 70 79 0D 0A 61 6C 6C 69 2E 62 65 79 *begin_highlight*40 64 61*end_highlight*  apy..alli.bey*begin_highlight*@da*end_highlight*\n*begin_highlight*69 6D 6C 65 72 2E 63 6F 6D *end_highlight*3A 4E 6F 6F 72 6A 65  *begin_highlight*imler.com*end_highlight*:Noorje",
            "source_country": "SA",
            "source_key": "bbf46859",
            "tags": [
                "abc_mail",
                "6ab4a7246878ad33b3e342e0f9dedf8b977016e212c0393b365d6eb81b9090cd",
                "yara_abcd"
            ]
        },
        "id": "6716443993997312-ecc78fb335be2a817b2f607bf0b18511-6ab4a7246878ad33b3e342e0f9dedf8b977016e212c0393b365d6eb81b9090cd-1651244349",
        "links": {
            "self": "https://www.virSAtotal.com/api/v3/intelligence/hunting_notifications/6716443993997312-ecc78fb335be2a817b2f607bf0b18511-6ab4a7246878ad33b3e342e0f9dedf8b977016e212c0393b365d6eb81b9090cd-1651244349"
        },
        "type": "hunting_notification"
    }
]

View solution in original post

0 Karma

vikramnain1995
Explorer

my issue has been resolved. solution shared is correct only. print each json event in newline(python does automatically in simple print()  ). no comma.

0 Karma

vikramnain1995
Explorer

but still getting below error

5-12-2022 12:47:37.258 +0000 ERROR JsonLineBreaker [20720 parsing] - JSON StreamId:12396707386195060580 had parsing error:Unexpected character while looking for value: '}' - data_source="/opt/splunk/etc/apps/my_app/bin/virustotal_livehunt.py", data_host="my_host_name", data_sourcetype="_json"

0 Karma

vikramnain1995
Explorer

Now script is printing in suggested order but still linebreaking error.

{
    "attributes": "value"
    "type": "hunting_notification"
}
{
    "attributes": "value"
    "type": "hunting_notification"
}
{
    "attributes": "value"
    "type": "hunting_notification"
}
0 Karma

VatsalJagani
SplunkTrust
SplunkTrust

@vikramnain1995 - The reason is that your data is not correct JSON format.

JSON format always starts with "{".

So, the right JSON format should look something like:

{
        "attributes": {
            "date": 1651251035,
            "match_in_subfile": false,
....
}
{
        "attributes": {
            "date": 1651248256,
            "match_in_subfile": false,
            "rule_name": "yara_abcd",
            "rule_tags": [],
.....
}

 

You see we are not putting "[" bracket here and also there is no comma between two different events.

This should be able to parse the events properly.

 

I hope this helps!!!!

0 Karma

vikramnain1995
Explorer

Please find the error for this. 

04-29-2022 16:28:09.410 +0000 ERROR JsonLineBreaker [3843466 parsing] - JSON StreamId:12469636465500470991 had parsing error:Unexpected character while expecting '"': '\'' - data_source="/opt/splunk/etc/apps/mytestingapp_ta_virustotal_livehunt_validation/bin/virustotal_livehunt.py", data_host="my_search_head", data_sourcetype="_json"

0 Karma

vikramnain1995
Explorer

above is the output of the script

0 Karma

vikramnain1995
Explorer
[
    {
        "attributes": {
            "date": 1651251035,
            "match_in_subfile": false,
            "rule_name": "yara_abcd",
            "rule_tags": [],
            "snippet": "67 69 65 0D 0A 72 6F 62 65 72 74 6F 2E 67 61 73  gie..roberto.gas\n70 61 72 65 74 74 69 *begin_highlight*40 64 61 69 6D 6C 65 72 2E*end_highlight*  paretti*begin_highlight*@daimler.*end_highlight*\n*begin_highlight*63 6F 6D *end_highlight*3A 4D 61 72 69 6E 69 6E 68 61 33 35 0D  *begin_highlight*com*end_highlight*:Marininha35.",
            "tags": [
                "abc_mail",
                "30fa32f991d028fe69442b6aa60ea7fafc7a21af3107b66e04922c087329de83",
                "yara_abcd"
            ]
        },
        "id": "6716443993997312-ecc78fb335be2a817b2f607bf0b18511-30fa32f991d028fe69442b6aa60ea7fafc7a21af3107b66e04922c087329de83-1651250973",
        "links": {
            "self": "https://www.virSAtotal.com/api/v3/intelligence/hunting_notifications/6716443993997312-ecc78fb335be2a817b2f607bf0b18511-30fa32f991d028fe69442b6aa60ea7fafc7a21af3107b66e04922c087329de83-1651250973"
        },
        "type": "hunting_notification"
    },
    {
        "attributes": {
            "date": 1651248256,
            "match_in_subfile": false,
            "rule_name": "yara_abcd",
            "rule_tags": [],
            "snippet": "62 75 70 6B 69 73 73 2E 6E 65 74 3A 54 68 65 72  bupkiss.net:Ther\n61 70 79 0D 0A 61 6C 6C 69 2E 62 65 79 *begin_highlight*40 64 61*end_highlight*  apy..alli.bey*begin_highlight*@da*end_highlight*\n*begin_highlight*69 6D 6C 65 72 2E 63 6F 6D *end_highlight*3A 4E 6F 6F 72 6A 65  *begin_highlight*imler.com*end_highlight*:Noorje",
            "source_country": "SA",
            "source_key": "bbf46859",
            "tags": [
                "abc_mail",
                "6ab4a7246878ad33b3e342e0f9dedf8b977016e212c0393b365d6eb81b9090cd",
                "yara_abcd"
            ]
        },
        "id": "6716443993997312-ecc78fb335be2a817b2f607bf0b18511-6ab4a7246878ad33b3e342e0f9dedf8b977016e212c0393b365d6eb81b9090cd-1651244349",
        "links": {
            "self": "https://www.virSAtotal.com/api/v3/intelligence/hunting_notifications/6716443993997312-ecc78fb335be2a817b2f607bf0b18511-6ab4a7246878ad33b3e342e0f9dedf8b977016e212c0393b365d6eb81b9090cd-1651244349"
        },
        "type": "hunting_notification"
    }
]
0 Karma
Get Updates on the Splunk Community!

What You Read The Most: Splunk Lantern’s Most Popular Articles!

Splunk Lantern is a Splunk customer success center that provides advice from Splunk experts on valuable data ...

See your relevant APM services, dashboards, and alerts in one place with the updated ...

As a Splunk Observability user, you have a lot of data you have to manage, prioritize, and troubleshoot on a ...

Index This | What goes away as soon as you talk about it?

May 2025 Edition Hayyy Splunk Education Enthusiasts and the Eternally Curious!  We’re back with this month’s ...