<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Different events number while searching via python sdk in Splunk Search</title>
    <link>https://community.splunk.com/t5/Splunk-Search/Different-events-number-while-searching-via-python-sdk/m-p/560291#M159235</link>
    <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;I am using python SDK to search with this configuration:&lt;/P&gt;&lt;PRE&gt;query_kwargs = {&lt;SPAN&gt;'earliest_time'&lt;/SPAN&gt;: earliest&lt;SPAN&gt;,&lt;BR /&gt;&lt;/SPAN&gt;                &lt;SPAN&gt;'latest_time'&lt;/SPAN&gt;: latest&lt;SPAN&gt;,&lt;BR /&gt;&lt;/SPAN&gt;                &lt;SPAN&gt;'results_preview'&lt;/SPAN&gt;: &lt;SPAN&gt;False,&lt;BR /&gt;&lt;/SPAN&gt;                &lt;SPAN&gt;'search_mode'&lt;/SPAN&gt;: &lt;SPAN&gt;'normal'&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;BR /&gt;&lt;/SPAN&gt;                &lt;SPAN&gt;'status_buckets'&lt;/SPAN&gt;: &lt;SPAN&gt;2&lt;BR /&gt;&lt;/SPAN&gt;                }&lt;BR /&gt;job =splunk_client.jobs.create(query&lt;SPAN&gt;, &lt;/SPAN&gt;**query_kwargs)&lt;BR /&gt;&lt;BR /&gt;&lt;STRONG&gt;As the Splunk documentation (&lt;A href="https://dev.splunk.com/enterprise/docs/devtools/python/sdk-python/howtousesplunkpython/howtorunsearchespython/)" target="_blank" rel="noopener"&gt;https://dev.splunk.com/enterprise/docs/devtools/python/sdk-python/howtousesplunkpython/howtorunsearchespython/)&lt;/A&gt;&lt;BR /&gt;&lt;BR /&gt;I do the follow:&lt;/STRONG&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;while True&lt;/SPAN&gt;:&lt;BR /&gt;&lt;SPAN&gt;while not &lt;/SPAN&gt;job.is_ready():&lt;BR /&gt;&lt;SPAN&gt;pass&lt;BR /&gt;&lt;/SPAN&gt; stats = {&lt;BR /&gt;&lt;SPAN&gt;'isDone'&lt;/SPAN&gt;: job[&lt;SPAN&gt;'isDone'&lt;/SPAN&gt;]&lt;SPAN&gt;,&lt;BR /&gt;&lt;/SPAN&gt; &lt;SPAN&gt;'doneProgress'&lt;/SPAN&gt;: job[&lt;SPAN&gt;'doneProgress'&lt;/SPAN&gt;]&lt;SPAN&gt;,&lt;BR /&gt;&lt;/SPAN&gt; &lt;SPAN&gt;'scanCount'&lt;/SPAN&gt;: job[&lt;SPAN&gt;'scanCount'&lt;/SPAN&gt;]&lt;SPAN&gt;,&lt;BR /&gt;&lt;/SPAN&gt; &lt;SPAN&gt;'eventCount'&lt;/SPAN&gt;: job[&lt;SPAN&gt;'eventCount'&lt;/SPAN&gt;]&lt;SPAN&gt;,&lt;BR /&gt;&lt;/SPAN&gt; &lt;SPAN&gt;'resultCount'&lt;/SPAN&gt;: job[&lt;SPAN&gt;'resultCount'&lt;/SPAN&gt;]&lt;BR /&gt;}&lt;BR /&gt;&lt;BR /&gt;progress = &lt;SPAN&gt;float&lt;/SPAN&gt;(stats[&lt;SPAN&gt;'doneProgress'&lt;/SPAN&gt;])*&lt;SPAN&gt;100&lt;BR /&gt;&lt;/SPAN&gt; scanned = &lt;SPAN&gt;int&lt;/SPAN&gt;(stats[&lt;SPAN&gt;'scanCount'&lt;/SPAN&gt;])&lt;BR /&gt;matched = &lt;SPAN&gt;int&lt;/SPAN&gt;(stats[&lt;SPAN&gt;'eventCount'&lt;/SPAN&gt;])&lt;BR /&gt;result_count = &lt;SPAN&gt;int&lt;/SPAN&gt;(stats[&lt;SPAN&gt;'resultCount'&lt;/SPAN&gt;])&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;if &lt;/SPAN&gt;verbose:&lt;BR /&gt;status = (&lt;SPAN&gt;"&lt;/SPAN&gt;&lt;SPAN&gt;\r&lt;/SPAN&gt;&lt;SPAN&gt;%03.1f%% | %d scanned | %d matched | %d results" &lt;/SPAN&gt;% (progress&lt;SPAN&gt;, &lt;/SPAN&gt;scanned&lt;SPAN&gt;, &lt;/SPAN&gt;matched&lt;SPAN&gt;, &lt;/SPAN&gt;result_count))&lt;BR /&gt;sys.stdout.write(status)&lt;BR /&gt;sys.stdout.flush()&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;if &lt;/SPAN&gt;job[&lt;SPAN&gt;"isDone"&lt;/SPAN&gt;] == &lt;SPAN&gt;"1"&lt;/SPAN&gt;:&lt;BR /&gt;&lt;SPAN&gt;if &lt;/SPAN&gt;verbose:&lt;BR /&gt;sys.stdout.write(&lt;SPAN&gt;"&lt;/SPAN&gt;&lt;SPAN&gt;\n&lt;/SPAN&gt;&lt;SPAN&gt;"&lt;/SPAN&gt;)&lt;BR /&gt;&lt;SPAN&gt;break&lt;BR /&gt;&lt;/SPAN&gt; time.sleep(&lt;SPAN&gt;2&lt;/SPAN&gt;)&amp;nbsp;&lt;/PRE&gt;&lt;P&gt;Then once the job is finished I do this:&lt;/P&gt;&lt;PRE&gt;offset = &lt;SPAN&gt;0&lt;BR /&gt;&lt;/SPAN&gt;max_event_count = &lt;SPAN&gt;50000&lt;BR /&gt;&lt;/SPAN&gt;&lt;SPAN&gt;&lt;BR /&gt;&lt;/SPAN&gt;total_results = []&lt;BR /&gt;first_50k_results = &lt;SPAN&gt;self&lt;/SPAN&gt;.get_results(job&lt;SPAN&gt;, &lt;/SPAN&gt;offset&lt;SPAN&gt;, &lt;/SPAN&gt;max_event_count)&lt;BR /&gt;total_results.extend(first_50k_results)&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;while &lt;/SPAN&gt;offset &amp;lt;= number_of_results:&lt;BR /&gt;    offset += max_event_count&lt;BR /&gt;    intermediate_result = &lt;SPAN&gt;self&lt;/SPAN&gt;.get_results(job&lt;SPAN&gt;, &lt;/SPAN&gt;offset&lt;SPAN&gt;, &lt;/SPAN&gt;max_event_count)&lt;BR /&gt;    total_results.extend(intermediate_result)&lt;BR /&gt;&lt;BR /&gt;&lt;/PRE&gt;&lt;PRE&gt;&lt;SPAN&gt;def &lt;/SPAN&gt;&lt;SPAN&gt;get_results&lt;/SPAN&gt;(&lt;SPAN&gt;self&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;job&lt;SPAN&gt;, &lt;/SPAN&gt;offset&lt;SPAN&gt;, &lt;/SPAN&gt;max_event_count):&lt;BR /&gt;    logger.info(&lt;SPAN&gt;"collecting results,please wait . . "&lt;/SPAN&gt;)&lt;BR /&gt;    results_list = []&lt;BR /&gt;    kwargs_paginate = {&lt;SPAN&gt;"count"&lt;/SPAN&gt;: max_event_count&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;"offset"&lt;/SPAN&gt;: offset}&lt;BR /&gt;    &lt;SPAN&gt;for &lt;/SPAN&gt;result &lt;SPAN&gt;in &lt;/SPAN&gt;results.ResultsReader(job.results(**kwargs_paginate)):&lt;BR /&gt;        results_list.append(result)&lt;BR /&gt;    &lt;SPAN&gt;return &lt;/SPAN&gt;results_list&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The issue is that the number of events that the python search return is different from the number of events that the search in the Splunk console return.&lt;/P&gt;&lt;P&gt;Can you please advise what I am doing wrong?&lt;/P&gt;&lt;P&gt;Please note that I am using&amp;nbsp;&lt;SPAN&gt;explicit index= in my search&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Wed, 21 Jul 2021 13:35:47 GMT</pubDate>
    <dc:creator>osnathy83</dc:creator>
    <dc:date>2021-07-21T13:35:47Z</dc:date>
    <item>
      <title>Different events number while searching via python sdk</title>
      <link>https://community.splunk.com/t5/Splunk-Search/Different-events-number-while-searching-via-python-sdk/m-p/560291#M159235</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;I am using python SDK to search with this configuration:&lt;/P&gt;&lt;PRE&gt;query_kwargs = {&lt;SPAN&gt;'earliest_time'&lt;/SPAN&gt;: earliest&lt;SPAN&gt;,&lt;BR /&gt;&lt;/SPAN&gt;                &lt;SPAN&gt;'latest_time'&lt;/SPAN&gt;: latest&lt;SPAN&gt;,&lt;BR /&gt;&lt;/SPAN&gt;                &lt;SPAN&gt;'results_preview'&lt;/SPAN&gt;: &lt;SPAN&gt;False,&lt;BR /&gt;&lt;/SPAN&gt;                &lt;SPAN&gt;'search_mode'&lt;/SPAN&gt;: &lt;SPAN&gt;'normal'&lt;/SPAN&gt;&lt;SPAN&gt;,&lt;BR /&gt;&lt;/SPAN&gt;                &lt;SPAN&gt;'status_buckets'&lt;/SPAN&gt;: &lt;SPAN&gt;2&lt;BR /&gt;&lt;/SPAN&gt;                }&lt;BR /&gt;job =splunk_client.jobs.create(query&lt;SPAN&gt;, &lt;/SPAN&gt;**query_kwargs)&lt;BR /&gt;&lt;BR /&gt;&lt;STRONG&gt;As the Splunk documentation (&lt;A href="https://dev.splunk.com/enterprise/docs/devtools/python/sdk-python/howtousesplunkpython/howtorunsearchespython/)" target="_blank" rel="noopener"&gt;https://dev.splunk.com/enterprise/docs/devtools/python/sdk-python/howtousesplunkpython/howtorunsearchespython/)&lt;/A&gt;&lt;BR /&gt;&lt;BR /&gt;I do the follow:&lt;/STRONG&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;while True&lt;/SPAN&gt;:&lt;BR /&gt;&lt;SPAN&gt;while not &lt;/SPAN&gt;job.is_ready():&lt;BR /&gt;&lt;SPAN&gt;pass&lt;BR /&gt;&lt;/SPAN&gt; stats = {&lt;BR /&gt;&lt;SPAN&gt;'isDone'&lt;/SPAN&gt;: job[&lt;SPAN&gt;'isDone'&lt;/SPAN&gt;]&lt;SPAN&gt;,&lt;BR /&gt;&lt;/SPAN&gt; &lt;SPAN&gt;'doneProgress'&lt;/SPAN&gt;: job[&lt;SPAN&gt;'doneProgress'&lt;/SPAN&gt;]&lt;SPAN&gt;,&lt;BR /&gt;&lt;/SPAN&gt; &lt;SPAN&gt;'scanCount'&lt;/SPAN&gt;: job[&lt;SPAN&gt;'scanCount'&lt;/SPAN&gt;]&lt;SPAN&gt;,&lt;BR /&gt;&lt;/SPAN&gt; &lt;SPAN&gt;'eventCount'&lt;/SPAN&gt;: job[&lt;SPAN&gt;'eventCount'&lt;/SPAN&gt;]&lt;SPAN&gt;,&lt;BR /&gt;&lt;/SPAN&gt; &lt;SPAN&gt;'resultCount'&lt;/SPAN&gt;: job[&lt;SPAN&gt;'resultCount'&lt;/SPAN&gt;]&lt;BR /&gt;}&lt;BR /&gt;&lt;BR /&gt;progress = &lt;SPAN&gt;float&lt;/SPAN&gt;(stats[&lt;SPAN&gt;'doneProgress'&lt;/SPAN&gt;])*&lt;SPAN&gt;100&lt;BR /&gt;&lt;/SPAN&gt; scanned = &lt;SPAN&gt;int&lt;/SPAN&gt;(stats[&lt;SPAN&gt;'scanCount'&lt;/SPAN&gt;])&lt;BR /&gt;matched = &lt;SPAN&gt;int&lt;/SPAN&gt;(stats[&lt;SPAN&gt;'eventCount'&lt;/SPAN&gt;])&lt;BR /&gt;result_count = &lt;SPAN&gt;int&lt;/SPAN&gt;(stats[&lt;SPAN&gt;'resultCount'&lt;/SPAN&gt;])&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;if &lt;/SPAN&gt;verbose:&lt;BR /&gt;status = (&lt;SPAN&gt;"&lt;/SPAN&gt;&lt;SPAN&gt;\r&lt;/SPAN&gt;&lt;SPAN&gt;%03.1f%% | %d scanned | %d matched | %d results" &lt;/SPAN&gt;% (progress&lt;SPAN&gt;, &lt;/SPAN&gt;scanned&lt;SPAN&gt;, &lt;/SPAN&gt;matched&lt;SPAN&gt;, &lt;/SPAN&gt;result_count))&lt;BR /&gt;sys.stdout.write(status)&lt;BR /&gt;sys.stdout.flush()&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;if &lt;/SPAN&gt;job[&lt;SPAN&gt;"isDone"&lt;/SPAN&gt;] == &lt;SPAN&gt;"1"&lt;/SPAN&gt;:&lt;BR /&gt;&lt;SPAN&gt;if &lt;/SPAN&gt;verbose:&lt;BR /&gt;sys.stdout.write(&lt;SPAN&gt;"&lt;/SPAN&gt;&lt;SPAN&gt;\n&lt;/SPAN&gt;&lt;SPAN&gt;"&lt;/SPAN&gt;)&lt;BR /&gt;&lt;SPAN&gt;break&lt;BR /&gt;&lt;/SPAN&gt; time.sleep(&lt;SPAN&gt;2&lt;/SPAN&gt;)&amp;nbsp;&lt;/PRE&gt;&lt;P&gt;Then once the job is finished I do this:&lt;/P&gt;&lt;PRE&gt;offset = &lt;SPAN&gt;0&lt;BR /&gt;&lt;/SPAN&gt;max_event_count = &lt;SPAN&gt;50000&lt;BR /&gt;&lt;/SPAN&gt;&lt;SPAN&gt;&lt;BR /&gt;&lt;/SPAN&gt;total_results = []&lt;BR /&gt;first_50k_results = &lt;SPAN&gt;self&lt;/SPAN&gt;.get_results(job&lt;SPAN&gt;, &lt;/SPAN&gt;offset&lt;SPAN&gt;, &lt;/SPAN&gt;max_event_count)&lt;BR /&gt;total_results.extend(first_50k_results)&lt;BR /&gt;&lt;BR /&gt;&lt;SPAN&gt;while &lt;/SPAN&gt;offset &amp;lt;= number_of_results:&lt;BR /&gt;    offset += max_event_count&lt;BR /&gt;    intermediate_result = &lt;SPAN&gt;self&lt;/SPAN&gt;.get_results(job&lt;SPAN&gt;, &lt;/SPAN&gt;offset&lt;SPAN&gt;, &lt;/SPAN&gt;max_event_count)&lt;BR /&gt;    total_results.extend(intermediate_result)&lt;BR /&gt;&lt;BR /&gt;&lt;/PRE&gt;&lt;PRE&gt;&lt;SPAN&gt;def &lt;/SPAN&gt;&lt;SPAN&gt;get_results&lt;/SPAN&gt;(&lt;SPAN&gt;self&lt;/SPAN&gt;&lt;SPAN&gt;, &lt;/SPAN&gt;job&lt;SPAN&gt;, &lt;/SPAN&gt;offset&lt;SPAN&gt;, &lt;/SPAN&gt;max_event_count):&lt;BR /&gt;    logger.info(&lt;SPAN&gt;"collecting results,please wait . . "&lt;/SPAN&gt;)&lt;BR /&gt;    results_list = []&lt;BR /&gt;    kwargs_paginate = {&lt;SPAN&gt;"count"&lt;/SPAN&gt;: max_event_count&lt;SPAN&gt;, &lt;/SPAN&gt;&lt;SPAN&gt;"offset"&lt;/SPAN&gt;: offset}&lt;BR /&gt;    &lt;SPAN&gt;for &lt;/SPAN&gt;result &lt;SPAN&gt;in &lt;/SPAN&gt;results.ResultsReader(job.results(**kwargs_paginate)):&lt;BR /&gt;        results_list.append(result)&lt;BR /&gt;    &lt;SPAN&gt;return &lt;/SPAN&gt;results_list&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The issue is that the number of events that the python search return is different from the number of events that the search in the Splunk console return.&lt;/P&gt;&lt;P&gt;Can you please advise what I am doing wrong?&lt;/P&gt;&lt;P&gt;Please note that I am using&amp;nbsp;&lt;SPAN&gt;explicit index= in my search&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 21 Jul 2021 13:35:47 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/Different-events-number-while-searching-via-python-sdk/m-p/560291#M159235</guid>
      <dc:creator>osnathy83</dc:creator>
      <dc:date>2021-07-21T13:35:47Z</dc:date>
    </item>
  </channel>
</rss>

