**2017 Update for Splunk 6.3+ - from the upcoming Information Theory Suite for Splunk**
Revised entropy.py for SCPv2 api. make sure "splunklib" is in bin dir. (no error handling) :
#!/usr/bin/env python
# coding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
#####
#
# entropy.py
# Part of: Information Theory Suite for Splunk
#
# commands.conf for SCPv2 chunked (splunk6.3+) but still recommend to use the SCPv1 config to get all records from stats buffers
"""
[entropy]
filename = entropy.py
chunked = true
"""
#
# commands.conf for SCPv1 (recommended for now due to stats and tstats + chunked limitations)
"""
[entropy]
filename = entropy.py
enableheader = true
outputheader = true
requires_srinfo = true
stderr_dest = message
supports_getinfo = true
supports_rawargs = true
supports_multivalues = true
"""
#####
# - rshoward
# Credit to http://stackoverflow.com/questions/2979174/how-do-i-compute-the-approximate-entropy-of-a-bit-string
# and the Revelation codebase (GPL, https://github.com/mikelolasagasti/revelation) for the entropy function
####
from splunklib.searchcommands import dispatch, StreamingCommand, Configuration, Option, validators
import sys
import math
def entropy(string):
"Calculates the Shannon entropy of a string"
# get probability of chars in string
prob = [ float(string.count(c)) / len(string) for c in dict.fromkeys(list(string)) ]
# calculate the entropy
entropy = - sum([ p * math.log(p) / math.log(2.0) for p in prob ])
return entropy
def entropy_ideal(length):
"Calculates the ideal Shannon entropy of a string with given length"
prob = 1.0 / length
ideal = -1.0 * length * prob * math.log(prob) / math.log(2.0)
return ideal
@Configuration()
class EntropyCommand(StreamingCommand):
""" Calculates entropy and ideal values for a given set of fields.
##Syntax
.. code-block::
entropy includeideal=<bool> includedistance=<bool> <field-list>
##Description
Calculate Shannon Entropy for the given field list. Values will be returned into the data stream as
entropy_<field-name>. Optionally, you can include the ideal entropy for the given field(s) string length,
as well as the distance between the field's entropy value, and the ideal entropy for said fields string length.
##Example
Search for records where the entropy of any of the fields in a list are close to the ideal entropy.
.. code-block::
index=proxy_sg | entropy includeideal=T includedistance=T host path file options | search entropy_ideal_distance* > -1
"""
includeideal = Option(
doc='''
**Syntax:** **includeideal=***<boolean>*
**Description:** Calculate and include the ideal entropy for the given field(s) length''',
require=False, validate=validators.Boolean())
includedistance = Option(
doc='''
**Syntax:** **includedistance=***<boolean>*
**Description:** Calculate and include the field's entropy distance from ideal entropy for the given field(s) length''',
require=False, validate=validators.Boolean())
def stream(self, records):
self.logger.debug('EntropyCommand init via: %s', self) # logs command line
for record in records:
for fieldname in self.fieldnames:
record["entropy_" + fieldname] = entropy(record[fieldname].decode("utf-8"))
if self.includeideal:
record["entropy_ideal_" + fieldname] = entropy_ideal(len(record[fieldname].decode("utf-8")))
if self.includedistance:
record["entropy_ideal_distance_" + fieldname] = float(record["entropy_" + fieldname]) - float(entropy_ideal(len(record[fieldname].decode("utf-8"))))
yield record
dispatch(EntropyCommand, sys.argv, sys.stdin, sys.stdout, __name__)
Original post from 2011 :
Use the following code for a custom command. via "Shannon's entropy equation is the standard method of calculation. Here is a simple implementation in Python, shamelessly copied from the Revelation codebase, and thus GPL licensed:"
def entropy(string):
"Calculates the Shannon entropy of a string"
# get probability of chars in string
prob = [ float(string.count(c)) / len(string) for c in dict.fromkeys(list(string)) ]
# calculate the entropy
entropy = - sum([ p * math.log(p) / math.log(2.0) for p in prob ])
return entropy
And
def entropy_ideal(length):
"Calculates the ideal Shannon entropy of a string with given length"
prob = 1.0 / length
return -1.0 * length * prob * math.log(prob) / math.log(2.0)
EDIT: Completed streaming custom command. Already found some hidden call-backs in proxy logs.
Usage: "...| entropy [field]"
Will add a field called entropy_[field] with the shannon entropy value of the field.
If no field is specified, raw is used and creates a field (with two underscores) entropy_raw.
This is easily modified to take multiple fields or can be used as is nested like "...| entropy | entropy uri_host | entropy uri_path"
1) create /opt/splunk/etc/apps/search/bin/entropy.py
import splunk.Intersplunk as si
import math, sys, os, re
import cPickle, bisect
from string import atoi
import socket, struct, csv
ATTRIBUTES = ['entropy']
(is_get_info, sys.argv) = si.isGetInfo(sys.argv)
keywords, options = si.getKeywordsAndOptions()
if len(keywords) > 0:
en_field = keywords[0]
else:
en_field = "_raw"
if is_get_info:
si.outputInfo(True, False, True, False, None, True)
def entropy(string):
"Calculates the Shannon entropy of a string"
# get probability of chars in string
prob = [ float(string.count(c)) / len(string) for c in dict.fromkeys(list(string)) ]
# calculate the entropy
entropy = - sum([ p * math.log(p) / math.log(2.0) for p in prob ])
return entropy
def entropy_ideal(length):
"Calculates the ideal Shannon entropy of a string with given length (not implemented yet)"
prob = 1.0 / length
return -1.0 * length * prob * math.log(prob) / math.log(2.0)
# Strip command header
while len(sys.stdin.readline().strip()):
pass
reader = csv.DictReader(sys.stdin)
headers = reader.fieldnames
if not en_field in headers:
headers.append(en_field)
for h in ATTRIBUTES:
headers.append("%s_%s" % (h, en_field))
writer = csv.DictWriter(sys.stdout, headers)
writer.writer.writerow(headers)
for r in reader:
for f in ATTRIBUTES:
r["%s_%s" % (f, en_field)] = entropy(r[en_field])
writer.writerow(r)
2) Add to /opt/splunk/etc/apps/search/local/commands.conf
[entropy]
filename = entropy.py
overrides_timeorder = false
retainsevents = true
streaming = true
supports_getinfo = true
3) Restart Splunk
4) PROFIT!
... View more