<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Dedup is extremely Slow in Splunk Search</title>
    <link>https://community.splunk.com/t5/Splunk-Search/Dedup-is-extremely-Slow/m-p/746573#M241675</link>
    <description>&lt;P&gt;first is "closer" to dedup since it keeps the first event in the event pipeline for each unique value of the dedup'd field(s)&lt;/P&gt;</description>
    <pubDate>Tue, 20 May 2025 16:19:04 GMT</pubDate>
    <dc:creator>ITWhisperer</dc:creator>
    <dc:date>2025-05-20T16:19:04Z</dc:date>
    <item>
      <title>Dedup is extremely Slow</title>
      <link>https://community.splunk.com/t5/Splunk-Search/Dedup-is-extremely-Slow/m-p/746550#M241666</link>
      <description>&lt;P&gt;Hello,&lt;/P&gt;&lt;P&gt;I have a Search that is taking 5 min to complete when looking at only the last 24 hrs.&amp;nbsp; If possible, could someone help me figure out how I can improve this Search?&amp;nbsp; I am in need of deduping by SessionId and combing&amp;nbsp; 3 fields into a single field.&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;source="mobilepro-test"
| dedup Session.SessionId
| strcat UserInfo.UserId " " Location.Site " " Session.StartTime label
| table Session.SessionId, label&lt;/LI-CODE&gt;&lt;P&gt;It looks like it's the dedup that is causing the slowness, but I have no idea how to improve that.&lt;/P&gt;&lt;P&gt;Thanks for any help on this one,&lt;/P&gt;&lt;P&gt;Tom&lt;/P&gt;</description>
      <pubDate>Tue, 20 May 2025 13:27:43 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/Dedup-is-extremely-Slow/m-p/746550#M241666</guid>
      <dc:creator>tdavison76</dc:creator>
      <dc:date>2025-05-20T13:27:43Z</dc:date>
    </item>
    <item>
      <title>Re: Dedup is extremely Slow</title>
      <link>https://community.splunk.com/t5/Splunk-Search/Dedup-is-extremely-Slow/m-p/746552#M241667</link>
      <description>&lt;P&gt;You could try stats&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;source="mobilepro-test"
| stats first(UserInfo.UserId) as UserInfo.UserId first(Location.Site) as Location.Site first(Session.StartTime) as Session.StartTime by Session.SessionId
| strcat UserInfo.UserId " " Location.Site " " Session.StartTime label
| table Session.SessionId, label&lt;/LI-CODE&gt;</description>
      <pubDate>Tue, 20 May 2025 13:43:31 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/Dedup-is-extremely-Slow/m-p/746552#M241667</guid>
      <dc:creator>ITWhisperer</dc:creator>
      <dc:date>2025-05-20T13:43:31Z</dc:date>
    </item>
    <item>
      <title>Re: Dedup is extremely Slow</title>
      <link>https://community.splunk.com/t5/Splunk-Search/Dedup-is-extremely-Slow/m-p/746560#M241671</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;a href="https://community.splunk.com/t5/user/viewprofilepage/user-id/267744"&gt;@tdavison76&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I would recommend using stats for this instead, see below:&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;source="mobilepro-test"
| strcat UserInfo.UserId " " Location.Site " " Session.StartTime label
| stats latest(label) as label by Session.SessionId&lt;/LI-CODE&gt;&lt;P&gt;You could switch the order of strcat to save on processing multiple strcat:&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;source="mobilepro-test"
| stats latest(UserInfo.UserId) as UserInfo_UserId, latest(Location.Site) as Location_Site, latest(Session.StartTime) AS Session_StartTime by Session.SessionId
| strcat UserInfo_UserId " " Location_Site " " Session_StartTime label
| table Session.SessionId, label&lt;/LI-CODE&gt;&lt;P&gt;Note: We are using "latest" here which keeps the most recent event.&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-unicode-emoji" title=":glowing_star:"&gt;🌟&lt;/span&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;STRONG&gt;Did this answer help you?&lt;/STRONG&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;If so, please consider:&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;Adding karma to show it was useful&lt;/LI&gt;&lt;LI&gt;Marking it as the solution if it resolved your issue&lt;/LI&gt;&lt;LI&gt;Commenting if you need any clarification&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;Your feedback encourages the volunteers in this community to continue contributing&lt;/P&gt;</description>
      <pubDate>Tue, 20 May 2025 14:35:44 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/Dedup-is-extremely-Slow/m-p/746560#M241671</guid>
      <dc:creator>livehybrid</dc:creator>
      <dc:date>2025-05-20T14:35:44Z</dc:date>
    </item>
    <item>
      <title>Re: Dedup is extremely Slow</title>
      <link>https://community.splunk.com/t5/Splunk-Search/Dedup-is-extremely-Slow/m-p/746566#M241673</link>
      <description>&lt;P&gt;As a side note, completely irrelevant to the original problem - I'm wondering whether there will be any noticeable performance difference between first(something) and latest(something) in case of a default base search returning results in reverse chronological order.&lt;/P&gt;</description>
      <pubDate>Tue, 20 May 2025 15:22:54 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/Dedup-is-extremely-Slow/m-p/746566#M241673</guid>
      <dc:creator>PickleRick</dc:creator>
      <dc:date>2025-05-20T15:22:54Z</dc:date>
    </item>
    <item>
      <title>Re: Dedup is extremely Slow</title>
      <link>https://community.splunk.com/t5/Splunk-Search/Dedup-is-extremely-Slow/m-p/746569#M241674</link>
      <description>&lt;P&gt;Thats a good point&amp;nbsp;&lt;a href="https://community.splunk.com/t5/user/viewprofilepage/user-id/231884"&gt;@PickleRick&lt;/a&gt;&amp;nbsp;- for some reason I've always used latest, mainly incase there is any reason that events dont get returned with the most recent first (e.g. sorting of some sort, changes to _time, lookups, appends etc) but I suppose stats will stop looking after the first event if using first() but could read all events to check its still the "latest".&lt;BR /&gt;I might try this on a big dataset to see if it makes much difference!&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 20 May 2025 15:26:23 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/Dedup-is-extremely-Slow/m-p/746569#M241674</guid>
      <dc:creator>livehybrid</dc:creator>
      <dc:date>2025-05-20T15:26:23Z</dc:date>
    </item>
    <item>
      <title>Re: Dedup is extremely Slow</title>
      <link>https://community.splunk.com/t5/Splunk-Search/Dedup-is-extremely-Slow/m-p/746573#M241675</link>
      <description>&lt;P&gt;first is "closer" to dedup since it keeps the first event in the event pipeline for each unique value of the dedup'd field(s)&lt;/P&gt;</description>
      <pubDate>Tue, 20 May 2025 16:19:04 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/Dedup-is-extremely-Slow/m-p/746573#M241675</guid>
      <dc:creator>ITWhisperer</dc:creator>
      <dc:date>2025-05-20T16:19:04Z</dc:date>
    </item>
  </channel>
</rss>

