<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic How to associate two data set when one set need to lookup for associated key but the other data set doesn't need ? in Splunk Search</title>
    <link>https://community.splunk.com/t5/Splunk-Search/How-to-associate-two-data-set-when-one-set-need-to-lookup-for/m-p/669288#M229555</link>
    <description>&lt;P&gt;Dear All,&lt;/P&gt;&lt;P&gt;I have one index and I use this index to store messages and summary report as well.&lt;/P&gt;&lt;P&gt;In report="report_b", it stores the running case name and the used device id(DEV_ID) in timestamp _time.&lt;/P&gt;&lt;P&gt;ex.&lt;/P&gt;&lt;TABLE border="1" width="100%"&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD width="25%" height="25px"&gt;_time&lt;/TD&gt;&lt;TD width="25%" height="25px"&gt;DEV_ID&lt;/TD&gt;&lt;TD width="12.5%" height="25px"&gt;case_name&lt;/TD&gt;&lt;TD width="12.5%"&gt;case_action&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD width="25%" height="25px"&gt;01:00&lt;/TD&gt;&lt;TD width="25%" height="25px"&gt;111&lt;/TD&gt;&lt;TD width="12.5%" height="25px"&gt;ping111.py&lt;/TD&gt;&lt;TD width="12.5%"&gt;start&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD width="25%" height="25px"&gt;01:20&lt;/TD&gt;&lt;TD width="25%" height="25px"&gt;111&lt;/TD&gt;&lt;TD width="12.5%" height="25px"&gt;ping111.py&lt;/TD&gt;&lt;TD width="12.5%"&gt;end&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD width="25%" height="25px"&gt;02:00&lt;/TD&gt;&lt;TD width="25%" height="25px"&gt;222&lt;/TD&gt;&lt;TD width="12.5%" height="25px"&gt;ping222.py&lt;/TD&gt;&lt;TD width="12.5%"&gt;start&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;02:30&lt;/TD&gt;&lt;TD&gt;222&lt;/TD&gt;&lt;TD&gt;ping222.py&lt;/TD&gt;&lt;TD&gt;end&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;02:40&lt;/TD&gt;&lt;TD&gt;111&lt;/TD&gt;&lt;TD&gt;ping222.py&lt;/TD&gt;&lt;TD&gt;start&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;03:00&lt;/TD&gt;&lt;TD&gt;111&lt;/TD&gt;&lt;TD&gt;ping222.py&lt;/TD&gt;&lt;TD&gt;end&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;For Message_Name="event_a",&amp;nbsp; it is stored in index=A as below:&lt;/P&gt;&lt;TABLE border="1" width="56.25%"&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD width="25%" height="25px"&gt;_time&lt;/TD&gt;&lt;TD width="25%" height="25px"&gt;LOG_ID&lt;/TD&gt;&lt;TD width="25%" height="25px"&gt;Message_Name&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD width="25%" height="25px"&gt;01:10&lt;/TD&gt;&lt;TD width="25%" height="25px"&gt;01&lt;/TD&gt;&lt;TD width="25%" height="25px"&gt;event_a&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD height="25px"&gt;02:50&lt;/TD&gt;&lt;TD height="25px"&gt;02&lt;/TD&gt;&lt;TD height="25px"&gt;event_a&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&lt;BR /&gt;I would like to associate the case that is running when the event_a is sent.&lt;BR /&gt;So I use the code below:&lt;BR /&gt;Firstly, to find out the device id(DEV_ID) associated with this log(LOG_ID)&amp;nbsp;&lt;BR /&gt;Secondly, to associate event_a and case_name by DEV_ID&lt;BR /&gt;Finally, list those event_a only.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;(index=A Message_Name="event_a") OR (index=A report="report_b")
| lookup table_A.csv LOG_ID OUTPUT DEV_ID
| sort 0 + _time 
| streamstats current=false last(case_name) as last_case_name, , last(case_action) as last_case_action by DEV_ID 
| eval case_name=if(isnull(case_name) AND last_case_action="start",last_case_name,case_name)

| where isnotnull(Message_Name)
| table _time Message_Name LOG_ID DEV_ID case_name&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;BR /&gt;The output would be:&lt;/P&gt;&lt;TABLE border="1" width="666px"&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD width="146.516px" height="25px"&gt;_time&lt;/TD&gt;&lt;TD width="126.359px" height="25px"&gt;Message_Name&lt;/TD&gt;&lt;TD width="148.938px" height="25px"&gt;LOG_ID&lt;/TD&gt;&lt;TD width="148.75px" height="25px"&gt;DEV_ID&lt;/TD&gt;&lt;TD width="94.4375px" height="25px"&gt;case_name&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD width="146.516px" height="25px"&gt;01:10&lt;/TD&gt;&lt;TD width="126.359px" height="25px"&gt;event_a&lt;/TD&gt;&lt;TD width="148.938px" height="25px"&gt;01&lt;/TD&gt;&lt;TD width="148.75px" height="25px"&gt;111&lt;/TD&gt;&lt;TD width="94.4375px" height="25px"&gt;&lt;FONT color="#3366FF"&gt;ping111.py&lt;/FONT&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD width="146.516px" height="25px"&gt;02:50&lt;/TD&gt;&lt;TD width="126.359px" height="25px"&gt;event_a&lt;/TD&gt;&lt;TD width="148.938px" height="25px"&gt;02&lt;/TD&gt;&lt;TD width="148.75px" height="25px"&gt;111&lt;/TD&gt;&lt;TD width="94.4375px" height="25px"&gt;&lt;FONT color="#3366FF"&gt;ping222.py&lt;/FONT&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The code works fine but the amount of data is huge so the lookup command takes a very long time.&amp;nbsp; Furthermore, actually, it is no need to apply lookup command for report="report_b".&lt;BR /&gt;(index=A Message_Name="event_a") : 150000 records in 24 hour&lt;BR /&gt;(index=A report="report_b") : 700000 records in 24 hour&lt;BR /&gt;&lt;BR /&gt;Is there any way to rewrite the code to make lookup only apply on events belongs to&amp;nbsp;(index=A Message_Name="event_a") ? try to use subsearch, append, appendpipe to restrict find associated DEV_ID first but not working.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you so much.&lt;/P&gt;</description>
    <pubDate>Tue, 21 Nov 2023 06:12:06 GMT</pubDate>
    <dc:creator>Jouman</dc:creator>
    <dc:date>2023-11-21T06:12:06Z</dc:date>
    <item>
      <title>How to associate two data set when one set need to lookup for associated key but the other data set doesn't need ?</title>
      <link>https://community.splunk.com/t5/Splunk-Search/How-to-associate-two-data-set-when-one-set-need-to-lookup-for/m-p/669288#M229555</link>
      <description>&lt;P&gt;Dear All,&lt;/P&gt;&lt;P&gt;I have one index and I use this index to store messages and summary report as well.&lt;/P&gt;&lt;P&gt;In report="report_b", it stores the running case name and the used device id(DEV_ID) in timestamp _time.&lt;/P&gt;&lt;P&gt;ex.&lt;/P&gt;&lt;TABLE border="1" width="100%"&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD width="25%" height="25px"&gt;_time&lt;/TD&gt;&lt;TD width="25%" height="25px"&gt;DEV_ID&lt;/TD&gt;&lt;TD width="12.5%" height="25px"&gt;case_name&lt;/TD&gt;&lt;TD width="12.5%"&gt;case_action&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD width="25%" height="25px"&gt;01:00&lt;/TD&gt;&lt;TD width="25%" height="25px"&gt;111&lt;/TD&gt;&lt;TD width="12.5%" height="25px"&gt;ping111.py&lt;/TD&gt;&lt;TD width="12.5%"&gt;start&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD width="25%" height="25px"&gt;01:20&lt;/TD&gt;&lt;TD width="25%" height="25px"&gt;111&lt;/TD&gt;&lt;TD width="12.5%" height="25px"&gt;ping111.py&lt;/TD&gt;&lt;TD width="12.5%"&gt;end&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD width="25%" height="25px"&gt;02:00&lt;/TD&gt;&lt;TD width="25%" height="25px"&gt;222&lt;/TD&gt;&lt;TD width="12.5%" height="25px"&gt;ping222.py&lt;/TD&gt;&lt;TD width="12.5%"&gt;start&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;02:30&lt;/TD&gt;&lt;TD&gt;222&lt;/TD&gt;&lt;TD&gt;ping222.py&lt;/TD&gt;&lt;TD&gt;end&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;02:40&lt;/TD&gt;&lt;TD&gt;111&lt;/TD&gt;&lt;TD&gt;ping222.py&lt;/TD&gt;&lt;TD&gt;start&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD&gt;03:00&lt;/TD&gt;&lt;TD&gt;111&lt;/TD&gt;&lt;TD&gt;ping222.py&lt;/TD&gt;&lt;TD&gt;end&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;For Message_Name="event_a",&amp;nbsp; it is stored in index=A as below:&lt;/P&gt;&lt;TABLE border="1" width="56.25%"&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD width="25%" height="25px"&gt;_time&lt;/TD&gt;&lt;TD width="25%" height="25px"&gt;LOG_ID&lt;/TD&gt;&lt;TD width="25%" height="25px"&gt;Message_Name&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD width="25%" height="25px"&gt;01:10&lt;/TD&gt;&lt;TD width="25%" height="25px"&gt;01&lt;/TD&gt;&lt;TD width="25%" height="25px"&gt;event_a&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD height="25px"&gt;02:50&lt;/TD&gt;&lt;TD height="25px"&gt;02&lt;/TD&gt;&lt;TD height="25px"&gt;event_a&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&lt;BR /&gt;I would like to associate the case that is running when the event_a is sent.&lt;BR /&gt;So I use the code below:&lt;BR /&gt;Firstly, to find out the device id(DEV_ID) associated with this log(LOG_ID)&amp;nbsp;&lt;BR /&gt;Secondly, to associate event_a and case_name by DEV_ID&lt;BR /&gt;Finally, list those event_a only.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;(index=A Message_Name="event_a") OR (index=A report="report_b")
| lookup table_A.csv LOG_ID OUTPUT DEV_ID
| sort 0 + _time 
| streamstats current=false last(case_name) as last_case_name, , last(case_action) as last_case_action by DEV_ID 
| eval case_name=if(isnull(case_name) AND last_case_action="start",last_case_name,case_name)

| where isnotnull(Message_Name)
| table _time Message_Name LOG_ID DEV_ID case_name&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;BR /&gt;The output would be:&lt;/P&gt;&lt;TABLE border="1" width="666px"&gt;&lt;TBODY&gt;&lt;TR&gt;&lt;TD width="146.516px" height="25px"&gt;_time&lt;/TD&gt;&lt;TD width="126.359px" height="25px"&gt;Message_Name&lt;/TD&gt;&lt;TD width="148.938px" height="25px"&gt;LOG_ID&lt;/TD&gt;&lt;TD width="148.75px" height="25px"&gt;DEV_ID&lt;/TD&gt;&lt;TD width="94.4375px" height="25px"&gt;case_name&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD width="146.516px" height="25px"&gt;01:10&lt;/TD&gt;&lt;TD width="126.359px" height="25px"&gt;event_a&lt;/TD&gt;&lt;TD width="148.938px" height="25px"&gt;01&lt;/TD&gt;&lt;TD width="148.75px" height="25px"&gt;111&lt;/TD&gt;&lt;TD width="94.4375px" height="25px"&gt;&lt;FONT color="#3366FF"&gt;ping111.py&lt;/FONT&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;TR&gt;&lt;TD width="146.516px" height="25px"&gt;02:50&lt;/TD&gt;&lt;TD width="126.359px" height="25px"&gt;event_a&lt;/TD&gt;&lt;TD width="148.938px" height="25px"&gt;02&lt;/TD&gt;&lt;TD width="148.75px" height="25px"&gt;111&lt;/TD&gt;&lt;TD width="94.4375px" height="25px"&gt;&lt;FONT color="#3366FF"&gt;ping222.py&lt;/FONT&gt;&lt;/TD&gt;&lt;/TR&gt;&lt;/TBODY&gt;&lt;/TABLE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;The code works fine but the amount of data is huge so the lookup command takes a very long time.&amp;nbsp; Furthermore, actually, it is no need to apply lookup command for report="report_b".&lt;BR /&gt;(index=A Message_Name="event_a") : 150000 records in 24 hour&lt;BR /&gt;(index=A report="report_b") : 700000 records in 24 hour&lt;BR /&gt;&lt;BR /&gt;Is there any way to rewrite the code to make lookup only apply on events belongs to&amp;nbsp;(index=A Message_Name="event_a") ? try to use subsearch, append, appendpipe to restrict find associated DEV_ID first but not working.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thank you so much.&lt;/P&gt;</description>
      <pubDate>Tue, 21 Nov 2023 06:12:06 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/How-to-associate-two-data-set-when-one-set-need-to-lookup-for/m-p/669288#M229555</guid>
      <dc:creator>Jouman</dc:creator>
      <dc:date>2023-11-21T06:12:06Z</dc:date>
    </item>
    <item>
      <title>Re: How to associate two data set when one set need to lookup for associated key but the other data set doesn't need ?</title>
      <link>https://community.splunk.com/t5/Splunk-Search/How-to-associate-two-data-set-when-one-set-need-to-lookup-for/m-p/669293#M229556</link>
      <description>&lt;P&gt;Just note: Often times it is better to describe your use case than trying to "fix" SPL. &amp;nbsp;Are you sure it is lookup that slows the search, not sort? &amp;nbsp;Sorting large amount of data is expensive in many ways while lookup is a very efficient command.&lt;/P&gt;&lt;P&gt;If you must try to not lookup in report_b, you can append after lookup.&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;(index=A Message_Name="event_a")
| lookup table_A.csv LOG_ID OUTPUT DEV_ID
append [search index=A report="report_b"]
| sort 0 + _time 
| streamstats current=false last(case_name) as last_case_name, , last(case_action) as last_case_action by DEV_ID 
| eval case_name=if(isnull(case_name) AND last_case_action="start",last_case_name,case_name)

| where isnotnull(Message_Name)
| table _time Message_Name LOG_ID DEV_ID case_name&lt;/LI-CODE&gt;&lt;P&gt;Not sure how much this can speed search up, however.&lt;/P&gt;</description>
      <pubDate>Tue, 21 Nov 2023 07:02:25 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/How-to-associate-two-data-set-when-one-set-need-to-lookup-for/m-p/669293#M229556</guid>
      <dc:creator>yuanliu</dc:creator>
      <dc:date>2023-11-21T07:02:25Z</dc:date>
    </item>
    <item>
      <title>Re: How to associate two data set when one set need to lookup for associated key but the other data set doesn't need ?</title>
      <link>https://community.splunk.com/t5/Splunk-Search/How-to-associate-two-data-set-when-one-set-need-to-lookup-for/m-p/669299#M229558</link>
      <description>&lt;P&gt;It is more likely that your performance issue is caused by the sort+streamstats rather than the lookup&lt;/P&gt;&lt;P&gt;Here is an example that does not use sort or streamstats - it may or may not work in your data, but the principle is to use stats. You can run this example and it will give you your results.&amp;nbsp;&lt;/P&gt;&lt;P&gt;The piece you would want is shown by the comment before the fields statement.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;| makeresults format=csv data="_time,DEV_ID,case_name,case_action
01:00,111,ping111.py,start
01:20,111,ping111.py,end
02:00,222,ping222.py,start
02:30,222,ping222.py,end
02:40,111,ping222.py,start
03:00,111,ping222.py,end"
| eval _time=strptime("2023-11-21 "._time.":00", "%F %T")
| append [ 
  | makeresults format=csv data="_time,LOG_ID,Message_Name
01:10,01,event_a
02:50,02,event_a"
  | eval _time=strptime("2023-11-21 "._time.":00", "%F %T")
  | eval DEV_ID=111
]

``` So use your first two lines of your search and then the following```
| fields _time DEV_ID case_name case_action LOG_ID Message_Name
| eval t=if(isnull(LOG_ID),printf("%d##%s##%s", _time, case_action, case_name), null())
| eval lt=if(isnull(LOG_ID),null,printf("%d##%s##%s", _time, LOG_ID, Message_Name))
| fields - LOG_ID Message_Name case_*
| stats values(*) as * by DEV_ID
| where isnotnull(lt)
| mvexpand lt
| eval s=split(lt, "##")
| eval _time=mvindex(s, 0), LOG_ID=mvindex(s, 1), Message_Name=mvindex(s,2)
| rex field=t max_match=0 "(?&amp;lt;report_time&amp;gt;\d+)##(?&amp;lt;case_action&amp;gt;[^#]*)##(?&amp;lt;case_name&amp;gt;.*)"
| eval min_ix=-1
| eval c = 0
| foreach mode=multivalue report_time [ eval min_ix=if(_time &amp;gt; '&amp;lt;&amp;lt;ITEM&amp;gt;&amp;gt;', c, min_ix), c=c+1 ]
| eval case_name=if(min_ix&amp;gt;=0, mvindex(case_name, min_ix), "unknown")
| eval case_action=if(min_ix&amp;gt;=0, mvindex(case_action, min_ix), "unknown")
| fields - s lt t c min_ix report_time
| table _time Message_Name LOG_ID DEV_ID case_name&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 21 Nov 2023 07:44:50 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/How-to-associate-two-data-set-when-one-set-need-to-lookup-for/m-p/669299#M229558</guid>
      <dc:creator>bowesmana</dc:creator>
      <dc:date>2023-11-21T07:44:50Z</dc:date>
    </item>
  </channel>
</rss>

