<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Why is Splunk is not parsing json output from scripted inputs? in Getting Data In</title>
    <link>https://community.splunk.com/t5/Getting-Data-In/Why-is-Splunk-is-not-parsing-json-output-from-scripted-inputs/m-p/597584#M104324</link>
    <description>&lt;P&gt;but still getting below error&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;5-12-2022 12:47:37.258 +0000 ERROR JsonLineBreaker [20720 parsing] - JSON StreamId:12396707386195060580 had parsing error:Unexpected character while looking for value: '}' - data_source="/opt/splunk/etc/apps/my_app/bin/virustotal_livehunt.py", data_host="my_host_name", data_sourcetype="_json"&lt;/STRONG&gt;&lt;/P&gt;</description>
    <pubDate>Thu, 12 May 2022 13:00:19 GMT</pubDate>
    <dc:creator>vikramnain1995</dc:creator>
    <dc:date>2022-05-12T13:00:19Z</dc:date>
    <item>
      <title>Why is Splunk is not parsing json output from scripted inputs?</title>
      <link>https://community.splunk.com/t5/Getting-Data-In/Why-is-Splunk-is-not-parsing-json-output-from-scripted-inputs/m-p/596008#M104079</link>
      <description>&lt;P&gt;I'm fetching some data from API via a python script and passing it to Splunk. it's is not paring the JSON format. I've tested my output with JSON parser with no error. If I set the source type to some custom I'm receiving events as a text. but when I'm putting source type as _json it is giving line breaking error expected : \&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Below is the python script. I'm using json.dumps also while printing. Now I'm writing to the file and fetching with monitor.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;LI-CODE lang="python"&gt;# This sript is fetching data from virustotal api and passing to splunk.
# checkpointing is enabled to drop duplicate events

import json,requests,sys,time,os
from datetime import datetime


 


proxies = { 'https': 'http://security-proxy.emea.svc.corpintra.net:3128' }

url = "https://www.virustotal.com/api/v3/intelligence/hunting_notifications"

params = { 'limit' : 40, 
           'count_limit' : 10000
         }

headers = {
    "Accept": "application/json",
    "x-apikey": "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
}

current_time = datetime.now()
file_path = f'/opt/splunk/etc/apps/infy_ta_virustotal_livehunt_validation/bin/data/'
complete_name = file_path + f'livehunt_{time.strftime("%Y_%m_%d_%H_%M_%S")}'
keys_filename = f'/opt/splunk/etc/apps/infy_ta_virustotal_livehunt_validation/bin/keys.txt'


def write_new_keys_in_file(keys_filename, keys_to_be_indexed):
    try:
        
        with open(keys_filename, 'w') as file:
            for key in keys_to_be_indexed:
                file.write(str(key))
                file.write('\n')
    except  Exception as e:
        print(e)
    

def get_indexed_key(keys_filename):
    try:
              
        with open(keys_filename, 'r') as file:
            indexed_keys = file.read().splitlines()
        return indexed_keys
            
        
    except Exception as e:
        with open(keys_filename, 'w') as file:
            indexed_keys = []
        return indexed_keys
        

def get_json_data(url, headers, params, proxies):
    try:
        
        response = requests.get(url = url, headers=headers,params = params, proxies=proxies).json()
        return response
    except Exception as e:
        print(e)
        sys.exit(1)

def write_to_file(complete_name, data):
    with open(complete_name, 'a') as f:
        json.dump(data, f)
        f.write('\n')
    

def stream_to_splunk(json_response,indexed_keys, complete_name):
    try:
        
        keys_to_be_indexed = []
        events_to_be_indexed = []
        
        for item in json_response['data']:
            keys_to_be_indexed.append(item['id'])
            if item['id'] not in indexed_keys:

                write_to_file(complete_name = complete_name, data = item)
                events_to_be_indexed.append(item)
        print(json.dumps(events_to_be_indexed, indent = 4, sort_keys = True)) if len(events_to_be_indexed) else 1==1
        return keys_to_be_indexed
    except Exception as e:
        print(e)
            
    
def main():
    try:
        
        json_response = get_json_data(url = url, headers = headers, params = params, proxies = proxies)
        
        indexed_keys = get_indexed_key(keys_filename = keys_filename)
        
        keys_to_be_indexed = stream_to_splunk(json_response = json_response, indexed_keys = indexed_keys, complete_name = complete_name)
        
        write_new_keys_in_file(keys_filename = keys_filename, keys_to_be_indexed = keys_to_be_indexed)
    
    except Exception as e:
        print(e)

if __name__ == "__main__":
    main()&lt;/LI-CODE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 30 Apr 2022 19:50:50 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Getting-Data-In/Why-is-Splunk-is-not-parsing-json-output-from-scripted-inputs/m-p/596008#M104079</guid>
      <dc:creator>vikramnain1995</dc:creator>
      <dc:date>2022-04-30T19:50:50Z</dc:date>
    </item>
    <item>
      <title>Re: splunk is not parsing json output from scripted inputs</title>
      <link>https://community.splunk.com/t5/Getting-Data-In/Why-is-Splunk-is-not-parsing-json-output-from-scripted-inputs/m-p/596009#M104080</link>
      <description>&lt;LI-CODE lang="python"&gt;[
    {
        "attributes": {
            "date": 1651251035,
            "match_in_subfile": false,
            "rule_name": "yara_abcd",
            "rule_tags": [],
            "snippet": "67 69 65 0D 0A 72 6F 62 65 72 74 6F 2E 67 61 73  gie..roberto.gas\n70 61 72 65 74 74 69 *begin_highlight*40 64 61 69 6D 6C 65 72 2E*end_highlight*  paretti*begin_highlight*@daimler.*end_highlight*\n*begin_highlight*63 6F 6D *end_highlight*3A 4D 61 72 69 6E 69 6E 68 61 33 35 0D  *begin_highlight*com*end_highlight*:Marininha35.",
            "tags": [
                "abc_mail",
                "30fa32f991d028fe69442b6aa60ea7fafc7a21af3107b66e04922c087329de83",
                "yara_abcd"
            ]
        },
        "id": "6716443993997312-ecc78fb335be2a817b2f607bf0b18511-30fa32f991d028fe69442b6aa60ea7fafc7a21af3107b66e04922c087329de83-1651250973",
        "links": {
            "self": "https://www.virSAtotal.com/api/v3/intelligence/hunting_notifications/6716443993997312-ecc78fb335be2a817b2f607bf0b18511-30fa32f991d028fe69442b6aa60ea7fafc7a21af3107b66e04922c087329de83-1651250973"
        },
        "type": "hunting_notification"
    },
    {
        "attributes": {
            "date": 1651248256,
            "match_in_subfile": false,
            "rule_name": "yara_abcd",
            "rule_tags": [],
            "snippet": "62 75 70 6B 69 73 73 2E 6E 65 74 3A 54 68 65 72  bupkiss.net:Ther\n61 70 79 0D 0A 61 6C 6C 69 2E 62 65 79 *begin_highlight*40 64 61*end_highlight*  apy..alli.bey*begin_highlight*@da*end_highlight*\n*begin_highlight*69 6D 6C 65 72 2E 63 6F 6D *end_highlight*3A 4E 6F 6F 72 6A 65  *begin_highlight*imler.com*end_highlight*:Noorje",
            "source_country": "SA",
            "source_key": "bbf46859",
            "tags": [
                "abc_mail",
                "6ab4a7246878ad33b3e342e0f9dedf8b977016e212c0393b365d6eb81b9090cd",
                "yara_abcd"
            ]
        },
        "id": "6716443993997312-ecc78fb335be2a817b2f607bf0b18511-6ab4a7246878ad33b3e342e0f9dedf8b977016e212c0393b365d6eb81b9090cd-1651244349",
        "links": {
            "self": "https://www.virSAtotal.com/api/v3/intelligence/hunting_notifications/6716443993997312-ecc78fb335be2a817b2f607bf0b18511-6ab4a7246878ad33b3e342e0f9dedf8b977016e212c0393b365d6eb81b9090cd-1651244349"
        },
        "type": "hunting_notification"
    }
]
&lt;/LI-CODE&gt;</description>
      <pubDate>Sat, 30 Apr 2022 02:40:40 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Getting-Data-In/Why-is-Splunk-is-not-parsing-json-output-from-scripted-inputs/m-p/596009#M104080</guid>
      <dc:creator>vikramnain1995</dc:creator>
      <dc:date>2022-04-30T02:40:40Z</dc:date>
    </item>
    <item>
      <title>Re: splunk is not parsing json output from scripted inputs</title>
      <link>https://community.splunk.com/t5/Getting-Data-In/Why-is-Splunk-is-not-parsing-json-output-from-scripted-inputs/m-p/596010#M104081</link>
      <description>&lt;P&gt;above is the output of the script&lt;/P&gt;</description>
      <pubDate>Sat, 30 Apr 2022 02:41:57 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Getting-Data-In/Why-is-Splunk-is-not-parsing-json-output-from-scripted-inputs/m-p/596010#M104081</guid>
      <dc:creator>vikramnain1995</dc:creator>
      <dc:date>2022-04-30T02:41:57Z</dc:date>
    </item>
    <item>
      <title>Re: Why is Splunk is not parsing json output from scripted inputs?</title>
      <link>https://community.splunk.com/t5/Getting-Data-In/Why-is-Splunk-is-not-parsing-json-output-from-scripted-inputs/m-p/596085#M104090</link>
      <description>&lt;P&gt;Please find the error for this.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;04-29-2022 16:28:09.410 +0000 ERROR JsonLineBreaker [3843466 parsing] - JSON StreamId:12469636465500470991 had parsing error:Unexpected character while expecting '"': '\'' - data_source="/opt/splunk/etc/apps/mytestingapp_ta_virustotal_livehunt_validation/bin/virustotal_livehunt.py", data_host="my_search_head", data_sourcetype="_json"&lt;/STRONG&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 02 May 2022 04:29:59 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Getting-Data-In/Why-is-Splunk-is-not-parsing-json-output-from-scripted-inputs/m-p/596085#M104090</guid>
      <dc:creator>vikramnain1995</dc:creator>
      <dc:date>2022-05-02T04:29:59Z</dc:date>
    </item>
    <item>
      <title>Re: Why is Splunk is not parsing json output from scripted inputs?</title>
      <link>https://community.splunk.com/t5/Getting-Data-In/Why-is-Splunk-is-not-parsing-json-output-from-scripted-inputs/m-p/596087#M104091</link>
      <description>&lt;P&gt;&lt;a href="https://community.splunk.com/t5/user/viewprofilepage/user-id/245394"&gt;@vikramnain1995&lt;/a&gt;&amp;nbsp;- The reason is that your data is not correct JSON format.&lt;/P&gt;&lt;P&gt;JSON format always starts with "{".&lt;/P&gt;&lt;P&gt;So, the right JSON format should look something like:&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;{
        "attributes": {
            "date": 1651251035,
            "match_in_subfile": false,
....
}
{
        "attributes": {
            "date": 1651248256,
            "match_in_subfile": false,
            "rule_name": "yara_abcd",
            "rule_tags": [],
.....
}&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;You see we are not putting "[" bracket here and also there is no comma between two different events.&lt;/P&gt;&lt;P&gt;This should be able to parse the events properly.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I hope this helps!!!!&lt;/P&gt;</description>
      <pubDate>Mon, 02 May 2022 04:49:37 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Getting-Data-In/Why-is-Splunk-is-not-parsing-json-output-from-scripted-inputs/m-p/596087#M104091</guid>
      <dc:creator>VatsalJagani</dc:creator>
      <dc:date>2022-05-02T04:49:37Z</dc:date>
    </item>
    <item>
      <title>Re: Why is Splunk is not parsing json output from scripted inputs?</title>
      <link>https://community.splunk.com/t5/Getting-Data-In/Why-is-Splunk-is-not-parsing-json-output-from-scripted-inputs/m-p/597583#M104323</link>
      <description>&lt;P&gt;Now script is printing in suggested order but still linebreaking error.&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;{
    "attributes": "value"
    "type": "hunting_notification"
}
{
    "attributes": "value"
    "type": "hunting_notification"
}
{
    "attributes": "value"
    "type": "hunting_notification"
}&lt;/LI-CODE&gt;</description>
      <pubDate>Thu, 12 May 2022 12:59:34 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Getting-Data-In/Why-is-Splunk-is-not-parsing-json-output-from-scripted-inputs/m-p/597583#M104323</guid>
      <dc:creator>vikramnain1995</dc:creator>
      <dc:date>2022-05-12T12:59:34Z</dc:date>
    </item>
    <item>
      <title>Re: Why is Splunk is not parsing json output from scripted inputs?</title>
      <link>https://community.splunk.com/t5/Getting-Data-In/Why-is-Splunk-is-not-parsing-json-output-from-scripted-inputs/m-p/597584#M104324</link>
      <description>&lt;P&gt;but still getting below error&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;5-12-2022 12:47:37.258 +0000 ERROR JsonLineBreaker [20720 parsing] - JSON StreamId:12396707386195060580 had parsing error:Unexpected character while looking for value: '}' - data_source="/opt/splunk/etc/apps/my_app/bin/virustotal_livehunt.py", data_host="my_host_name", data_sourcetype="_json"&lt;/STRONG&gt;&lt;/P&gt;</description>
      <pubDate>Thu, 12 May 2022 13:00:19 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Getting-Data-In/Why-is-Splunk-is-not-parsing-json-output-from-scripted-inputs/m-p/597584#M104324</guid>
      <dc:creator>vikramnain1995</dc:creator>
      <dc:date>2022-05-12T13:00:19Z</dc:date>
    </item>
    <item>
      <title>Re: Why is Splunk is not parsing json output from scripted inputs?</title>
      <link>https://community.splunk.com/t5/Getting-Data-In/Why-is-Splunk-is-not-parsing-json-output-from-scripted-inputs/m-p/597590#M104326</link>
      <description>&lt;P&gt;my issue has been resolved. solution shared is correct only. print each json event in newline(python does automatically in simple print()&amp;nbsp; ). no comma.&lt;/P&gt;</description>
      <pubDate>Thu, 12 May 2022 13:49:38 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Getting-Data-In/Why-is-Splunk-is-not-parsing-json-output-from-scripted-inputs/m-p/597590#M104326</guid>
      <dc:creator>vikramnain1995</dc:creator>
      <dc:date>2022-05-12T13:49:38Z</dc:date>
    </item>
  </channel>
</rss>

