I think that is not possible with Eventgen.
Here is what I did to generate events like they would appear in reality:
created a CSV file containing real events (e.g. by searching for events already in Splunk, reordering and then exporting to CSV) with colums "_time", "_raw", "host", "index", "sourcetype"
created a modular input like described here: http://dev.splunk.com/view/python-sdk/SP-CAAAER3
at the bottom you find the implementation of my replay script
configured an input of that modular input and point it to the CSV file you created
The script replays / regenerates events like they appeared in time before by making sure the time span between events is the same but the time of each generated event is set to "now".
import sys
import os
import csv
import datetime
import time
from splunklib.modularinput import *
class Replay(Script):
def get_scheme(self):
scheme = Scheme("CSV Replay")
scheme.description = "Replay events in CSV file using the current timestamp"
scheme.use_external_validation = True
scheme.use_single_instance = False
path_argument = Argument("path")
path_argument.data_type = Argument.data_type_string
path_argument.description = "Path to the file containing the samples."
path_argument.required_on_create = True
scheme.add_argument(path_argument)
return scheme
def validate_input(self, validation_definition):
path = validation_definition.parameters["path"]
if not os.path.isfile(path):
raise ValueError("File at path '%s' does not exist."%path)
def stream_events(self, inputs, ew):
for input_name, input_item in inputs.inputs.iteritems():
path = input_item["path"]
last_summary_time = time.time()
generated_events_since_last_summary = 0
ew.log("INFO","start generating events")
while True:
ew.log("INFO","begin reading from file %s" % path)
with open(path, 'r') as f:
reader = csv.DictReader(f)
last_event_time = None
last_event_yield_time = None
for row in reader:
# 2014-05-01 08:00:00.000 CEST
event_time = datetime.datetime.strptime(row["_time"], "%Y-%m-%d %H:%M:%S.%f %Z")
if last_event_time is None:
last_event_time = event_time
event_time_difference = (event_time - last_event_time).total_seconds()
now = time.time()
if last_event_yield_time is None:
last_event_yield_time = now
event_yield_time = last_event_yield_time + event_time_difference
yield_time_difference = max(event_yield_time - now,0.0)
#ew.log("INFO","event_time_difference=%.3f event_time=%s yield_time_difference=%.3f" % (event_time_difference,event_time,yield_time_difference))
time.sleep(yield_time_difference)
last_event_yield_time = now = time.time()
last_event_time = event_time
event = Event()
event.stanza = input_name
event.data = row["_raw"].strip()
event.time = "%.3f" % now
event.host = row["host"]
event.index = row["index"]
event.source = "data_replay" #row["source"]
event.sourceType = row["sourcetype"]
ew.write_event(event)
generated_events_since_last_summary = generated_events_since_last_summary + 1
seconds_since_last_summery = now - last_summary_time
if seconds_since_last_summery >= 60:
ew.log("INFO","generated %s events since %s seconds" % (generated_events_since_last_summary, seconds_since_last_summery))
last_summary_time = now
generated_events_since_last_summary = 0
ew.log("INFO","reached end of file %s" % (path))
if __name__ == "__main__":
sys.exit(Replay().run(sys.argv))
... View more