<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic How to find duplicates on multiple fields? in Splunk Search</title>
    <link>https://community.splunk.com/t5/Splunk-Search/How-to-find-duplicates-on-multiple-fields/m-p/629752#M218770</link>
    <description>&lt;P&gt;I have logs with the following three fields:&lt;/P&gt;
&lt;P&gt;-category&amp;nbsp;&lt;/P&gt;
&lt;P&gt;-price&amp;nbsp;&lt;/P&gt;
&lt;P&gt;-requestID (unique per entry)&lt;/P&gt;
&lt;P&gt;I want to find all requestID's for entries that have BOTH the same category and price within a 1 hour time span.&lt;/P&gt;
&lt;P&gt;I started off with this query:&amp;nbsp;&lt;/P&gt;
&lt;P&gt;index=foo component="shop-service" | streamstats count as dupes by category, price&lt;BR /&gt;| search dupes&amp;gt; 1&lt;/P&gt;
&lt;P&gt;But I cannot seem to calculate the duplicate entries nor tie it to the requestID&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Mon, 06 Feb 2023 17:56:28 GMT</pubDate>
    <dc:creator>sejiweji</dc:creator>
    <dc:date>2023-02-06T17:56:28Z</dc:date>
    <item>
      <title>How to find duplicates on multiple fields?</title>
      <link>https://community.splunk.com/t5/Splunk-Search/How-to-find-duplicates-on-multiple-fields/m-p/629752#M218770</link>
      <description>&lt;P&gt;I have logs with the following three fields:&lt;/P&gt;
&lt;P&gt;-category&amp;nbsp;&lt;/P&gt;
&lt;P&gt;-price&amp;nbsp;&lt;/P&gt;
&lt;P&gt;-requestID (unique per entry)&lt;/P&gt;
&lt;P&gt;I want to find all requestID's for entries that have BOTH the same category and price within a 1 hour time span.&lt;/P&gt;
&lt;P&gt;I started off with this query:&amp;nbsp;&lt;/P&gt;
&lt;P&gt;index=foo component="shop-service" | streamstats count as dupes by category, price&lt;BR /&gt;| search dupes&amp;gt; 1&lt;/P&gt;
&lt;P&gt;But I cannot seem to calculate the duplicate entries nor tie it to the requestID&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 06 Feb 2023 17:56:28 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/How-to-find-duplicates-on-multiple-fields/m-p/629752#M218770</guid>
      <dc:creator>sejiweji</dc:creator>
      <dc:date>2023-02-06T17:56:28Z</dc:date>
    </item>
    <item>
      <title>Re: Find duplicates on multiple fields</title>
      <link>https://community.splunk.com/t5/Splunk-Search/How-to-find-duplicates-on-multiple-fields/m-p/629755#M218773</link>
      <description>&lt;LI-CODE lang="markup"&gt;index=foo component="shop-service"
| stats list(request_id) count as dupes by category, price 
| where dupes &amp;gt; 1&lt;/LI-CODE&gt;</description>
      <pubDate>Mon, 06 Feb 2023 15:17:19 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/How-to-find-duplicates-on-multiple-fields/m-p/629755#M218773</guid>
      <dc:creator>PaulPanther</dc:creator>
      <dc:date>2023-02-06T15:17:19Z</dc:date>
    </item>
    <item>
      <title>Re: Find duplicates on multiple fields</title>
      <link>https://community.splunk.com/t5/Splunk-Search/How-to-find-duplicates-on-multiple-fields/m-p/629757#M218775</link>
      <description>&lt;P&gt;Try eventstats not streamstats&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;index=foo component="shop-service" | eventstats count as dupes by category, price
| search dupes&amp;gt; 1&lt;/LI-CODE&gt;</description>
      <pubDate>Mon, 06 Feb 2023 15:23:24 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/How-to-find-duplicates-on-multiple-fields/m-p/629757#M218775</guid>
      <dc:creator>ITWhisperer</dc:creator>
      <dc:date>2023-02-06T15:23:24Z</dc:date>
    </item>
    <item>
      <title>Re: How to find duplicates on multiple fields?</title>
      <link>https://community.splunk.com/t5/Splunk-Search/How-to-find-duplicates-on-multiple-fields/m-p/629818#M218806</link>
      <description>&lt;P&gt;I assume you are searching a time window longer than 1 hour if you are using streamstats. If you are only searching 60 minutes, then stats will work.&lt;/P&gt;&lt;P&gt;To collect the requestIDs, use values(requestID) in the streamstats command&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;index=foo component="shop-service" 
| streamstats time_window=1h values(requestID) as requestIDs by category price
| where mvcount(requestIDs) &amp;gt; 1&lt;/LI-CODE&gt;&lt;P&gt;This will collect all unique requestIDs that have the same category and price and the mvcount() does the &amp;gt; 1 test.&amp;nbsp;&lt;/P&gt;&lt;P&gt;Note that there are event limitations using streamstats with long time windows, see the docs, so be aware.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 07 Feb 2023 02:36:19 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/How-to-find-duplicates-on-multiple-fields/m-p/629818#M218806</guid>
      <dc:creator>bowesmana</dc:creator>
      <dc:date>2023-02-07T02:36:19Z</dc:date>
    </item>
  </channel>
</rss>

