Hi all,
I'm trying to integrate a Python script I wrote in Splunk in order to perform an external lookup to a SQL Server database. In order to do so, I used the pyodbc module installed on my OS Python 2.7 (I'm using Windows Server 2012).
I then followed advices written here to try and test my script. Everything works fine and I'm able to enrich my input data with the query I provided.
When involved in the integration with Splunk (defining a lookup in transforms.conf and so on) I faced the problem of using a wrapper script to call my system Python (the one with the pyodbc module installed) which in turns call my enrichment script. I followed these solutions ( first and second ) to no avail.
What happens is that i run my lookup but it seems like the os.execv command doesn't run for some reason (I tried to add some logging but had no luck. The enrichment script is never called). If I call my wrapper from CLI with "splunk cmd python", it works great and returns me my enriched data.
I attach the two scripts, hoping to get help to solve this strange issue.
Regards,
Stefano
wrapper.py
import os, sys
import logging
### LOGGING ###
LOG_FILENAME = 'C:\\Program Files\\Splunk\\var\\log\\lookupWrapper.log'
#create logger
logger = logging.getLogger("lookupWrapper")
logger.setLevel(logging.DEBUG)
#create file handler and set level to debug
fh = logging.FileHandler(LOG_FILENAME)
fh.setLevel(logging.DEBUG)
#create formatter
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
#add formatter to ch and fh
fh.setFormatter(formatter)
#add ch and fh to logger
logger.addHandler(fh)
logger.debug('========Wrapper=========')
### DELETING ENV VARS ###
# Remove problematic environmental variables if they exist.
for envvar in ("PYTHONPATH", "LD_LIBRARY_PATH"):
if envvar in os.environ:
del os.environ[envvar]
logger.debug("env vars unset")
### USING OS INTERPRETER ###
python_executable = "C:\Python27\python.exe"
real_script = os.path.join(os.path.dirname(__file__), "test_wrapper.py")
os.execv(python_executable, [ python_executable, real_script ] + sys.argv[1:])
db_data_enrichment.py
###############################################################
##### IMPORTS #####
###############################################################
import sys
import os
import csv
import pyodbc
import ConfigParser
import logging
LOG_FILENAME = 'C:\\Program Files\\Splunk\\var\\log\\lookup.log'
###############################################################
##### CLASSES #####
###############################################################
# Specific class to manage DB connections using pyodbc
class ConnectionManager(object):
'''
classdocs
'''
def __init__(self, config):
'''
Constructor
'''
self.driver = config.get('connection','driver')
self.host = config.get('connection','host')
self.port = config.get('connection','port')
self.database = config.get('connection','database')
self.user = config.get('connection','user')
self.password = config.get('connection','password')
self.conn = pyodbc.connect('DRIVER={' + self.driver + '};SERVER=' + self.host + '\\' + self.database + ',' + self.port + ';;UID=' + self.user + ';PWD=' + self.password)
self.cursor = self.conn.cursor()
self.query = config.get('query','sql')
# perform lookup with given fields
def lookup(self, query_params, output_keys):
i = 0
enriched_data = {}
self.cursor.execute(self.query, query_params)
try:
data = self.cursor.fetchone()
for key in output_keys:
enriched_data[key] = data[i]
i =+ 1
except:
sys.stderr.write("Error executing query. Returning empty result.")
pass
return enriched_data
def dummy_lookup(self, query_params, output_keys):
i = 0
d = {}
for key in output_keys:
d[key] = "prova" + str(i)
i =+ 1
return d
def close(self):
self.cursor.close()
self.conn.close()
###############################################################
##### MAIN #####
###############################################################
if __name__ == '__main__':
### LOGGING ###
logger = logging.getLogger("lookup")
logger.setLevel(logging.DEBUG)
#create file handler and set level to debug
fh = logging.FileHandler(LOG_FILENAME)
fh.setLevel(logging.DEBUG)
#create formatter
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
#add formatter to ch and fh
fh.setFormatter(formatter)
#add ch and fh to logger
logger.addHandler(fh)
logger.debug('========Lookup=========')
### CONFIGURATION ###
cp = ConfigParser.RawConfigParser()
cp.read('config.properties')
iparams = cp.get('params','input_params').split('\n')
oparams = cp.get('params','output_params').split('\n')
if len(sys.argv) != (len(iparams + oparams) + 1):
sys.stderr.write("Usage: python db_data_enrichment.py " + " ".join(map(str,iparams)) + " " + " ".join(map(str,oparams)) + "\n")
sys.exit(0)
# establishing connection to database
cm = ConnectionManager(cp)
### VARIABLES ###
r = csv.reader(sys.stdin)
w = None
header = []
first = True
i = 0
### EXECUTION ###
for line in r:
if first:
header = line
for elem in iparams + oparams:
if elem not in header:
print "All fields must exist in CSV data"
sys.exit(0)
csv.writer(sys.stdout).writerow(header)
w = csv.DictWriter(sys.stdout, header)
first = False
continue
result = {}
i = 0
while i < len(header):
if i < len(line):
result[header[i]] = line[i]
else:
result[header[i]] = ''
i += 1
w.writerow(dict(result, **cm.dummy_lookup([result[k] for k in iparams if k in result], oparams)))
cm.cursor.commit()
### CLOSE CONNECTION ###
cm.close()
I finally found the solution (hack was in this line - sys.stdout.write(",".join(header) + "\n") # hack, because py 2.6 doesn't have writeheader
) :
###############################################################
##### MAIN #####
###############################################################
if __name__ == '__main__':
### CONFIGURATION ###
cp = ConfigParser.RawConfigParser()
cp.read('gcv.properties')
iparams = cp.get('params','input_params').split('\n')
oparams = cp.get('params','output_params').split('\n')
if len(sys.argv) != (len(iparams + oparams) + 1):
sys.stderr.write("Usage: python db_data_enrichment.py " + " ".join(map(str,iparams)) + " " + " ".join(map(str,oparams)) + "\n")
sys.stderr.write("Instead, given: " + " ".join(sys.argv[1:]))
sys.exit(0)
# establishing connection to database
cm = ConnectionManager(cp)
### VARIABLES ###
r = csv.DictReader(sys.stdin)
header = r.fieldnames
w = None
first = True
i = 0
sys.stdout.write(",".join(header) + "\n") # hack, because py 2.6 doesn't have writeheader
w = csv.DictWriter(sys.stdout, header)
### EXECUTION ###
for line in r:
w.writerow(dict(line, **cm.lookup([line[k] for k in iparams if k in line], oparams)))
cm.cursor.commit()
### CLOSE CONNECTION ###
cm.close()
I finally found the solution (hack was in this line - sys.stdout.write(",".join(header) + "\n") # hack, because py 2.6 doesn't have writeheader
) :
###############################################################
##### MAIN #####
###############################################################
if __name__ == '__main__':
### CONFIGURATION ###
cp = ConfigParser.RawConfigParser()
cp.read('gcv.properties')
iparams = cp.get('params','input_params').split('\n')
oparams = cp.get('params','output_params').split('\n')
if len(sys.argv) != (len(iparams + oparams) + 1):
sys.stderr.write("Usage: python db_data_enrichment.py " + " ".join(map(str,iparams)) + " " + " ".join(map(str,oparams)) + "\n")
sys.stderr.write("Instead, given: " + " ".join(sys.argv[1:]))
sys.exit(0)
# establishing connection to database
cm = ConnectionManager(cp)
### VARIABLES ###
r = csv.DictReader(sys.stdin)
header = r.fieldnames
w = None
first = True
i = 0
sys.stdout.write(",".join(header) + "\n") # hack, because py 2.6 doesn't have writeheader
w = csv.DictWriter(sys.stdout, header)
### EXECUTION ###
for line in r:
w.writerow(dict(line, **cm.lookup([line[k] for k in iparams if k in line], oparams)))
cm.cursor.commit()
### CLOSE CONNECTION ###
cm.close()