Solved: Python wrapper for external command lookup doesn't...

stefano_guidoba · ‎10-15-2013

Hi all,

I'm trying to integrate a Python script I wrote in Splunk in order to perform an external lookup to a SQL Server database. In order to do so, I used the pyodbc module installed on my OS Python 2.7 (I'm using Windows Server 2012).

I then followed advices written here to try and test my script. Everything works fine and I'm able to enrich my input data with the query I provided.
When involved in the integration with Splunk (defining a lookup in transforms.conf and so on) I faced the problem of using a wrapper script to call my system Python (the one with the pyodbc module installed) which in turns call my enrichment script. I followed these solutions ( first and second ) to no avail.

What happens is that i run my lookup but it seems like the os.execv command doesn't run for some reason (I tried to add some logging but had no luck. The enrichment script is never called). If I call my wrapper from CLI with "splunk cmd python", it works great and returns me my enriched data.

I attach the two scripts, hoping to get help to solve this strange issue.
Regards,
Stefano

wrapper.py

import os, sys
import logging


### LOGGING ###
LOG_FILENAME = 'C:\\Program Files\\Splunk\\var\\log\\lookupWrapper.log'
#create logger
logger = logging.getLogger("lookupWrapper")
logger.setLevel(logging.DEBUG)
#create file handler and set level to debug
fh = logging.FileHandler(LOG_FILENAME)
fh.setLevel(logging.DEBUG)
#create formatter
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s -  %(message)s")
#add formatter to ch and fh
fh.setFormatter(formatter)
#add ch and fh to logger
logger.addHandler(fh)
logger.debug('========Wrapper=========')


### DELETING ENV VARS ###
# Remove problematic environmental variables if they exist.
for envvar in ("PYTHONPATH", "LD_LIBRARY_PATH"):
    if envvar in os.environ:
        del os.environ[envvar]

logger.debug("env vars unset")


### USING OS INTERPRETER ###
python_executable = "C:\Python27\python.exe"

real_script = os.path.join(os.path.dirname(__file__), "test_wrapper.py")

os.execv(python_executable, [ python_executable, real_script ] + sys.argv[1:])

db_data_enrichment.py

###############################################################
##### IMPORTS #####
###############################################################

import sys
import os
import csv
import pyodbc
import ConfigParser
import logging


LOG_FILENAME = 'C:\\Program Files\\Splunk\\var\\log\\lookup.log'


###############################################################
##### CLASSES #####
###############################################################

# Specific class to manage DB connections using pyodbc
class ConnectionManager(object):
    '''
    classdocs
    '''


    def __init__(self, config):
        '''
        Constructor
        '''

        self.driver = config.get('connection','driver')
        self.host = config.get('connection','host')
        self.port = config.get('connection','port')
        self.database = config.get('connection','database')
        self.user = config.get('connection','user')
        self.password = config.get('connection','password')

        self.conn = pyodbc.connect('DRIVER={' + self.driver + '};SERVER=' + self.host + '\\' + self.database +  ',' + self.port + ';;UID=' + self.user + ';PWD=' + self.password)
        self.cursor = self.conn.cursor()

        self.query = config.get('query','sql')


    # perform lookup with given fields
    def lookup(self, query_params, output_keys):

        i = 0
        enriched_data = {}

        self.cursor.execute(self.query, query_params)

        try:
            data = self.cursor.fetchone()
            for key in output_keys:
                enriched_data[key] = data[i]
                i =+ 1

        except:
            sys.stderr.write("Error executing query. Returning empty result.")
            pass

        return enriched_data


    def dummy_lookup(self, query_params, output_keys):

        i = 0
        d = {}

        for key in output_keys:
            d[key] = "prova" + str(i)
            i =+ 1

        return d


    def close(self):
        self.cursor.close()
        self.conn.close()




###############################################################
##### MAIN #####
###############################################################

if __name__ == '__main__':


    ### LOGGING ###
    logger = logging.getLogger("lookup")
    logger.setLevel(logging.DEBUG)
    #create file handler and set level to debug
    fh = logging.FileHandler(LOG_FILENAME)
    fh.setLevel(logging.DEBUG)
    #create formatter
    formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s -  %(message)s")
    #add formatter to ch and fh
    fh.setFormatter(formatter)
    #add ch and fh to logger
    logger.addHandler(fh)
    logger.debug('========Lookup=========')


    ### CONFIGURATION ###
    cp = ConfigParser.RawConfigParser()
    cp.read('config.properties')
    iparams = cp.get('params','input_params').split('\n')
    oparams = cp.get('params','output_params').split('\n')

    if len(sys.argv) != (len(iparams + oparams) + 1):
        sys.stderr.write("Usage: python db_data_enrichment.py " + " ".join(map(str,iparams)) + " " + " ".join(map(str,oparams)) + "\n")
        sys.exit(0)

    # establishing connection to database
    cm = ConnectionManager(cp)



    ### VARIABLES ###
    r = csv.reader(sys.stdin)
    w = None
    header = []
    first = True
    i = 0



    ### EXECUTION ###
    for line in r:
        if first:
            header = line
            for elem in iparams + oparams:
                if elem not in header:
                    print "All fields must exist in CSV data"
                    sys.exit(0)
            csv.writer(sys.stdout).writerow(header)
            w = csv.DictWriter(sys.stdout, header)
            first = False
            continue

        result = {}
        i = 0
        while i < len(header):
            if i < len(line):
                result[header[i]] = line[i]
            else:
                result[header[i]] = ''
            i += 1

        w.writerow(dict(result, **cm.dummy_lookup([result[k] for k in iparams if k in result], oparams)))
        cm.cursor.commit()


    ### CLOSE CONNECTION ###
    cm.close()

stefano_guidoba · ‎04-18-2014

I finally found the solution (hack was in this line - sys.stdout.write(",".join(header) + "\n") # hack, because py 2.6 doesn't have writeheader) :

###############################################################
##### MAIN #####
###############################################################

if __name__ == '__main__':

### CONFIGURATION ###
cp = ConfigParser.RawConfigParser()
cp.read('gcv.properties')
iparams = cp.get('params','input_params').split('\n')
oparams = cp.get('params','output_params').split('\n')

if len(sys.argv) != (len(iparams + oparams) + 1):
    sys.stderr.write("Usage: python db_data_enrichment.py " + " ".join(map(str,iparams)) + " " + " ".join(map(str,oparams)) + "\n")
    sys.stderr.write("Instead, given: " + " ".join(sys.argv[1:]))
    sys.exit(0)

# establishing connection to database
cm = ConnectionManager(cp)

### VARIABLES ###
r = csv.DictReader(sys.stdin)
header = r.fieldnames
w = None
first = True
i = 0

sys.stdout.write(",".join(header) + "\n") # hack, because py 2.6 doesn't have writeheader
w = csv.DictWriter(sys.stdout, header)

### EXECUTION ###
for line in r:

    w.writerow(dict(line, **cm.lookup([line[k] for k in iparams if k in line], oparams)))
    cm.cursor.commit()

### CLOSE CONNECTION ###
cm.close()

View solution in original post

stefano_guidoba · ‎04-18-2014

I finally found the solution (hack was in this line - sys.stdout.write(",".join(header) + "\n") # hack, because py 2.6 doesn't have writeheader) :

###############################################################
##### MAIN #####
###############################################################

if __name__ == '__main__':

### CONFIGURATION ###
cp = ConfigParser.RawConfigParser()
cp.read('gcv.properties')
iparams = cp.get('params','input_params').split('\n')
oparams = cp.get('params','output_params').split('\n')

if len(sys.argv) != (len(iparams + oparams) + 1):
    sys.stderr.write("Usage: python db_data_enrichment.py " + " ".join(map(str,iparams)) + " " + " ".join(map(str,oparams)) + "\n")
    sys.stderr.write("Instead, given: " + " ".join(sys.argv[1:]))
    sys.exit(0)

# establishing connection to database
cm = ConnectionManager(cp)

### VARIABLES ###
r = csv.DictReader(sys.stdin)
header = r.fieldnames
w = None
first = True
i = 0

sys.stdout.write(",".join(header) + "\n") # hack, because py 2.6 doesn't have writeheader
w = csv.DictWriter(sys.stdout, header)

### EXECUTION ###
for line in r:

    w.writerow(dict(line, **cm.lookup([line[k] for k in iparams if k in line], oparams)))
    cm.cursor.commit()

### CLOSE CONNECTION ###
cm.close()

Python wrapper for external command lookup doesn't properly work

Extending Observability Content to Splunk Cloud

More Control Over Your Monitoring Costs with Archived Metrics!

New in Observability Cloud - Explicit Bucket Histograms