Splunk Enterprise

mstats join taking too long with joins - how can i make it faster?

robertlynch2020
Motivator

Hi

I have the following complex statement with multiple mstats.

The issue is I think I have to do joins to get the data to work for me correctly, however, this is expencive for time. It takes 5 seconds to get back 1 hour. I want to get back 10 hours. 

Is there any other way I can pull multiple data and bring them together without using a join?

 

 

| mstats min("mx.process.cpu.utilization") as cpuPerc min("mx.process.threads") as nbOfThreads min("mx.process.memory.usage") as memoryCons min("mx.process.file_descriptors") as nbOfOpenFiles min("mx.process.up.time") as upTime avg("mx.process.creation.time") as creationTime WHERE "index"="metrics_test" AND mx.env=http://mx20267vm:15000 span=30s BY pid cmd service.type host.name service.name replica.name component.name threshold 
| rename "service.name" as service_name 
| rename "replica.name" as replica_name 
| rename "service.type" as service_type 
| eval T_NbOfThreads=if(isnull(nbOfThreads),"",threshold) 
| eval T_MemoryCons=if(isnull(memoryCons),"",threshold) 
| eval T_NbOfOpenFiles=if(isnull(nbOfOpenFiles),"",threshold) 
| stats values(cpuPerc) as cpuPerc values(nbOfThreads) as nbOfThreads values(memoryCons) as memoryCons values(nbOfOpenFiles) as nbOfOpenFiles values(upTime) as upTime values(creationTime) as creationTime values(T_NbOfOpenFiles) as T_NbOfOpenFiles values(T_MemoryCons) as T_MemoryCons values(T_CpuPerc) as T_CpuPerc values(T_NbOfThreads) as T_NbOfThreads by _time pid cmd service_type host.name service_name replica_name component.name 
| eval Process_Name=((service_name . " # ") . replica_name) 
| sort 0 - _time 
| dedup _time pid 
| join type=left Process_Name _time 
    [| mstats min("mx.replica.status") as Replica WHERE "index"="metrics_test" AND mx.env=http://mx20267vm:15000 span=30s BY "service.name" replica.name service.type 
    | rename "service.name" as service_name 
    | rename "replica.name" as replica_name 
    | eval Process_Name=((service_name . " # ") . replica_name) 
    | table Process_Name, Replica, "service.type", _time 
    | sort 0 - _time 
    | dedup _time Process_Name] 
| sort Process_Name _time 
| table _time, Process_Name, Replica 
| streamstats last(Replica) as Replica 
| sort - _time 
| append maxout=200000 
    [| mstats min("mx.replica.status") as Replica WHERE "index"="metrics_test" AND mx.env=http://mx20267vm:15000 span=30s BY "service.name" replica.name service.type 
    | rename "service.name" as service_name 
    | rename "replica.name" as replica_name 
    | rename "service.type" as service_type 
    | eval Process_Name=((service_name . " # ") . replica_name) 
    | table Process_Name, Replica, service_type, _time 
    | sort 0 - _time 
    | dedup _time Process_Name 
    | join type=left Process_Name,_time 
        [| mstats min("mx.process.cpu.utilization") as cpuPerc min("mx.process.threads") as nbOfThreads min("mx.process.memory.usage") as memoryCons min("mx.process.file_descriptors") as nbOfOpenFiles min("mx.process.up.time") as upTime avg("mx.process.creation.time") as creationTime WHERE "index"="metrics_test" AND mx.env=http://mx20267vm:15000 span=30s BY pid cmd service.type host.name service.name replica.name component.name threshold 
        | rename "service.name" as service_name 
        | rename "replica.name" as replica_name 
        | rename "service.type" as service_type 
        | eval T_NbOfThreads=if(isnull(nbOfThreads),"",threshold) 
        | eval T_MemoryCons=if(isnull(memoryCons),"",threshold) 
        | eval T_NbOfOpenFiles=if(isnull(nbOfOpenFiles),"",threshold) 
        | stats values(cpuPerc) as cpuPerc values(nbOfThreads) as nbOfThreads values(memoryCons) as memoryCons values(nbOfOpenFiles) as nbOfOpenFiles values(upTime) as upTime values(creationTime) as creationTime values(T_NbOfOpenFiles) as T_NbOfOpenFiles values(T_MemoryCons) as T_MemoryCons values(T_CpuPerc) as T_CpuPerc values(T_NbOfThreads) as T_NbOfThreads by _time pid cmd service_type host.name service_name replica_name component.name 
        | eval Process_Name=((service_name . " # ") . replica_name) 
        | sort 0 - _time 
        | dedup _time pid] 
    | rex field=Process_Name "(?<service_name2>.*) # (?<replica_name2>.*)" 
    | eval service_name=if(isnull(service_name),service_name2,service_name) 
    | eval replica_name=if(isnull(replica_name),replica_name2,replica_name)] 
| sort 0 - _time 
| dedup _time pid 
| join type=left 
    [| mstats min("mx.process.resources.status") as Resources WHERE "index"="metrics_test" AND mx.env=http://mx20267vm:15000 span=30s BY "service.name" replica.name 
    | rename "service.name" as service_name 
    | rename "replica.name" as replica_name 
    | eval Process_Name=((service_name . " # ") . replica_name) 
    | sort 0 - _time 
    | dedup _time Process_Name 
    | table Process_Name, Status, Resources 
    | eval Resources=rtrim(Resources,substr(Resources,-7)) 
    | eval Resources=if((Resources == ""),0,Resources)] 
| eval Status=(Resources * Replica) 
| eval Status=if((Status == 4),2,if((Status == 0),0,1)) 
| eval Replica=case((Process_Name == "xmlserver # xmlserver"),"2",(Process_Name == "zookeeper # zookeeper"),"2",(Process_Name == "fileserver # fileserver"),"2",true(),Replica) 
| search Process_Name!="*ANT_TASK*" 
| eval Replica=if((Replica == 1),0,Replica) 
| timechart min(Replica) as Process_Status

 

 

 

 

 

Labels (1)
Tags (1)
0 Karma
Get Updates on the Splunk Community!

.conf24 | Day 0

Hello Splunk Community! My name is Chris, and I'm based in Canberra, Australia's capital, and I travelled for ...

Enhance Security Visibility with Splunk Enterprise Security 7.1 through Threat ...

(view in My Videos)Struggling with alert fatigue, lack of context, and prioritization around security ...

Troubleshooting the OpenTelemetry Collector

  In this tech talk, you’ll learn how to troubleshoot the OpenTelemetry collector - from checking the ...