Getting Data In

Why is Splunk is not parsing json output from scripted inputs?

vikramnain1995
Explorer

I'm fetching some data from API via a python script and passing it to Splunk. it's is not paring the JSON format. I've tested my output with JSON parser with no error. If I set the source type to some custom I'm receiving events as a text. but when I'm putting source type as _json it is giving line breaking error expected : \ 

Below is the python script. I'm using json.dumps also while printing. Now I'm writing to the file and fetching with monitor. 

 

 

# This sript is fetching data from virustotal api and passing to splunk.
# checkpointing is enabled to drop duplicate events

import json,requests,sys,time,os
from datetime import datetime


 


proxies = { 'https': 'http://security-proxy.emea.svc.corpintra.net:3128' }

url = "https://www.virustotal.com/api/v3/intelligence/hunting_notifications"

params = { 'limit' : 40, 
           'count_limit' : 10000
         }

headers = {
    "Accept": "application/json",
    "x-apikey": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
}

current_time = datetime.now()
file_path = f'/opt/splunk/etc/apps/infy_ta_virustotal_livehunt_validation/bin/data/'
complete_name = file_path + f'livehunt_{time.strftime("%Y_%m_%d_%H_%M_%S")}'
keys_filename = f'/opt/splunk/etc/apps/infy_ta_virustotal_livehunt_validation/bin/keys.txt'


def write_new_keys_in_file(keys_filename, keys_to_be_indexed):
    try:
        
        with open(keys_filename, 'w') as file:
            for key in keys_to_be_indexed:
                file.write(str(key))
                file.write('\n')
    except  Exception as e:
        print(e)
    

def get_indexed_key(keys_filename):
    try:
              
        with open(keys_filename, 'r') as file:
            indexed_keys = file.read().splitlines()
        return indexed_keys
            
        
    except Exception as e:
        with open(keys_filename, 'w') as file:
            indexed_keys = []
        return indexed_keys
        

def get_json_data(url, headers, params, proxies):
    try:
        
        response = requests.get(url = url, headers=headers,params = params, proxies=proxies).json()
        return response
    except Exception as e:
        print(e)
        sys.exit(1)

def write_to_file(complete_name, data):
    with open(complete_name, 'a') as f:
        json.dump(data, f)
        f.write('\n')
    

def stream_to_splunk(json_response,indexed_keys, complete_name):
    try:
        
        keys_to_be_indexed = []
        events_to_be_indexed = []
        
        for item in json_response['data']:
            keys_to_be_indexed.append(item['id'])
            if item['id'] not in indexed_keys:

                write_to_file(complete_name = complete_name, data = item)
                events_to_be_indexed.append(item)
        print(json.dumps(events_to_be_indexed, indent = 4, sort_keys = True)) if len(events_to_be_indexed) else 1==1
        return keys_to_be_indexed
    except Exception as e:
        print(e)
            
    
def main():
    try:
        
        json_response = get_json_data(url = url, headers = headers, params = params, proxies = proxies)
        
        indexed_keys = get_indexed_key(keys_filename = keys_filename)
        
        keys_to_be_indexed = stream_to_splunk(json_response = json_response, indexed_keys = indexed_keys, complete_name = complete_name)
        
        write_new_keys_in_file(keys_filename = keys_filename, keys_to_be_indexed = keys_to_be_indexed)
    
    except Exception as e:
        print(e)

if __name__ == "__main__":
    main()

 

 

 

Labels (2)
0 Karma
1 Solution

vikramnain1995
Explorer
[
    {
        "attributes": {
            "date": 1651251035,
            "match_in_subfile": false,
            "rule_name": "yara_abcd",
            "rule_tags": [],
            "snippet": "67 69 65 0D 0A 72 6F 62 65 72 74 6F 2E 67 61 73  gie..roberto.gas\n70 61 72 65 74 74 69 *begin_highlight*40 64 61 69 6D 6C 65 72 2E*end_highlight*  paretti*begin_highlight*@daimler.*end_highlight*\n*begin_highlight*63 6F 6D *end_highlight*3A 4D 61 72 69 6E 69 6E 68 61 33 35 0D  *begin_highlight*com*end_highlight*:Marininha35.",
            "tags": [
                "abc_mail",
                "30fa32f991d028fe69442b6aa60ea7fafc7a21af3107b66e04922c087329de83",
                "yara_abcd"
            ]
        },
        "id": "6716443993997312-ecc78fb335be2a817b2f607bf0b18511-30fa32f991d028fe69442b6aa60ea7fafc7a21af3107b66e04922c087329de83-1651250973",
        "links": {
            "self": "https://www.virSAtotal.com/api/v3/intelligence/hunting_notifications/6716443993997312-ecc78fb335be2a817b2f607bf0b18511-30fa32f991d028fe69442b6aa60ea7fafc7a21af3107b66e04922c087329de83-1651250973"
        },
        "type": "hunting_notification"
    },
    {
        "attributes": {
            "date": 1651248256,
            "match_in_subfile": false,
            "rule_name": "yara_abcd",
            "rule_tags": [],
            "snippet": "62 75 70 6B 69 73 73 2E 6E 65 74 3A 54 68 65 72  bupkiss.net:Ther\n61 70 79 0D 0A 61 6C 6C 69 2E 62 65 79 *begin_highlight*40 64 61*end_highlight*  apy..alli.bey*begin_highlight*@da*end_highlight*\n*begin_highlight*69 6D 6C 65 72 2E 63 6F 6D *end_highlight*3A 4E 6F 6F 72 6A 65  *begin_highlight*imler.com*end_highlight*:Noorje",
            "source_country": "SA",
            "source_key": "bbf46859",
            "tags": [
                "abc_mail",
                "6ab4a7246878ad33b3e342e0f9dedf8b977016e212c0393b365d6eb81b9090cd",
                "yara_abcd"
            ]
        },
        "id": "6716443993997312-ecc78fb335be2a817b2f607bf0b18511-6ab4a7246878ad33b3e342e0f9dedf8b977016e212c0393b365d6eb81b9090cd-1651244349",
        "links": {
            "self": "https://www.virSAtotal.com/api/v3/intelligence/hunting_notifications/6716443993997312-ecc78fb335be2a817b2f607bf0b18511-6ab4a7246878ad33b3e342e0f9dedf8b977016e212c0393b365d6eb81b9090cd-1651244349"
        },
        "type": "hunting_notification"
    }
]

View solution in original post

0 Karma

vikramnain1995
Explorer

my issue has been resolved. solution shared is correct only. print each json event in newline(python does automatically in simple print()  ). no comma.

0 Karma

vikramnain1995
Explorer

but still getting below error

5-12-2022 12:47:37.258 +0000 ERROR JsonLineBreaker [20720 parsing] - JSON StreamId:12396707386195060580 had parsing error:Unexpected character while looking for value: '}' - data_source="/opt/splunk/etc/apps/my_app/bin/virustotal_livehunt.py", data_host="my_host_name", data_sourcetype="_json"

0 Karma

vikramnain1995
Explorer

Now script is printing in suggested order but still linebreaking error.

{
    "attributes": "value"
    "type": "hunting_notification"
}
{
    "attributes": "value"
    "type": "hunting_notification"
}
{
    "attributes": "value"
    "type": "hunting_notification"
}
0 Karma

VatsalJagani
SplunkTrust
SplunkTrust

@vikramnain1995 - The reason is that your data is not correct JSON format.

JSON format always starts with "{".

So, the right JSON format should look something like:

{
        "attributes": {
            "date": 1651251035,
            "match_in_subfile": false,
....
}
{
        "attributes": {
            "date": 1651248256,
            "match_in_subfile": false,
            "rule_name": "yara_abcd",
            "rule_tags": [],
.....
}

 

You see we are not putting "[" bracket here and also there is no comma between two different events.

This should be able to parse the events properly.

 

I hope this helps!!!!

0 Karma

vikramnain1995
Explorer

Please find the error for this. 

04-29-2022 16:28:09.410 +0000 ERROR JsonLineBreaker [3843466 parsing] - JSON StreamId:12469636465500470991 had parsing error:Unexpected character while expecting '"': '\'' - data_source="/opt/splunk/etc/apps/mytestingapp_ta_virustotal_livehunt_validation/bin/virustotal_livehunt.py", data_host="my_search_head", data_sourcetype="_json"

0 Karma

vikramnain1995
Explorer

above is the output of the script

0 Karma

vikramnain1995
Explorer
[
    {
        "attributes": {
            "date": 1651251035,
            "match_in_subfile": false,
            "rule_name": "yara_abcd",
            "rule_tags": [],
            "snippet": "67 69 65 0D 0A 72 6F 62 65 72 74 6F 2E 67 61 73  gie..roberto.gas\n70 61 72 65 74 74 69 *begin_highlight*40 64 61 69 6D 6C 65 72 2E*end_highlight*  paretti*begin_highlight*@daimler.*end_highlight*\n*begin_highlight*63 6F 6D *end_highlight*3A 4D 61 72 69 6E 69 6E 68 61 33 35 0D  *begin_highlight*com*end_highlight*:Marininha35.",
            "tags": [
                "abc_mail",
                "30fa32f991d028fe69442b6aa60ea7fafc7a21af3107b66e04922c087329de83",
                "yara_abcd"
            ]
        },
        "id": "6716443993997312-ecc78fb335be2a817b2f607bf0b18511-30fa32f991d028fe69442b6aa60ea7fafc7a21af3107b66e04922c087329de83-1651250973",
        "links": {
            "self": "https://www.virSAtotal.com/api/v3/intelligence/hunting_notifications/6716443993997312-ecc78fb335be2a817b2f607bf0b18511-30fa32f991d028fe69442b6aa60ea7fafc7a21af3107b66e04922c087329de83-1651250973"
        },
        "type": "hunting_notification"
    },
    {
        "attributes": {
            "date": 1651248256,
            "match_in_subfile": false,
            "rule_name": "yara_abcd",
            "rule_tags": [],
            "snippet": "62 75 70 6B 69 73 73 2E 6E 65 74 3A 54 68 65 72  bupkiss.net:Ther\n61 70 79 0D 0A 61 6C 6C 69 2E 62 65 79 *begin_highlight*40 64 61*end_highlight*  apy..alli.bey*begin_highlight*@da*end_highlight*\n*begin_highlight*69 6D 6C 65 72 2E 63 6F 6D *end_highlight*3A 4E 6F 6F 72 6A 65  *begin_highlight*imler.com*end_highlight*:Noorje",
            "source_country": "SA",
            "source_key": "bbf46859",
            "tags": [
                "abc_mail",
                "6ab4a7246878ad33b3e342e0f9dedf8b977016e212c0393b365d6eb81b9090cd",
                "yara_abcd"
            ]
        },
        "id": "6716443993997312-ecc78fb335be2a817b2f607bf0b18511-6ab4a7246878ad33b3e342e0f9dedf8b977016e212c0393b365d6eb81b9090cd-1651244349",
        "links": {
            "self": "https://www.virSAtotal.com/api/v3/intelligence/hunting_notifications/6716443993997312-ecc78fb335be2a817b2f607bf0b18511-6ab4a7246878ad33b3e342e0f9dedf8b977016e212c0393b365d6eb81b9090cd-1651244349"
        },
        "type": "hunting_notification"
    }
]
0 Karma
Get Updates on the Splunk Community!

Index This | I am a number, but when you add ‘G’ to me, I go away. What number am I?

March 2024 Edition Hayyy Splunk Education Enthusiasts and the Eternally Curious!  We’re back with another ...

What’s New in Splunk App for PCI Compliance 5.3.1?

The Splunk App for PCI Compliance allows customers to extend the power of their existing Splunk solution with ...

Extending Observability Content to Splunk Cloud

Register to join us !   In this Extending Observability Content to Splunk Cloud Tech Talk, you'll see how to ...