Hi,
We have Cisco Nexus 9k Add-on for Splunk Enterprise app configured in our application to collect stats from our Nexus 9K switches. collect.py script is configured to run every 5 minutes in all the switches, collect the data and load it to Splunk. the script was running properly when we had less than 25 switches configured. But when the number of switches doubled up, to 50 we are not receiving data in Splunk for every 5 minutes. Instead there is a drop in events from the switches. UDP 514 logs from the same number of switches are being collected and loaded to Splunk properly, the only problem is with the collect.py script. Is there any way to trouble shoot or fix this issue ?
collect.py script.
#All rights reserved
import sys,os,csv,getopt
import json
import re
from datetime import datetime
import splunk.entity as entity
import logging
logger = logging.getLogger()
logger.addHandler(logging.StreamHandler())
logger.setLevel("ERROR")
try:
utils_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),'utils')
sys.path.extend([utils_path])
from nxapi_utils import *
except Exception as e:
logger.error("Nexus Error: Error importing the required module: %s",str(e))
raise
""" global variables """
cmdFile=''
command=''
dev_ip=''
device=''
""" Display data in JSON format"""
def _display_data(device,component,jsonElement):
json_row = json.dumps(jsonElement,ensure_ascii=False)
row_string = json.loads(json_row)
if type(row_string) is dict:
for key,value in row_string.items():
if value != None and type(value) == unicode:
if value.startswith('"') and value.endswith('"'):
value = value[1:-1]
row_string[key] = value
currentTime= datetime.now().strftime('%Y-%m-%d %H:%M:%S%z')
response = {"timestamp":currentTime,"component":component,"device":device,"Row_info":row_string}
print json.dumps(response,ensure_ascii=False)
return 1
""" Split JSON response"""
def _split_json(device,component,jsonData,tableName,rowName):
if tableName in jsonData:
single_row = jsonData[tableName][rowName]
if type(single_row) is list:
for element in single_row:
_display_data(device,component,element)
elif type(single_row) is dict:
_display_data(device,component,single_row)
return 1
""" execute CLI"""
def _execute_command(command,device,component='N/A'):
cmd_out = None
cmd_json = None
try:
cmd_out= NXAPITransport.clid(command)
except Exception as e:
logger.error("Nexus Error: Not able to Execute command through NXAPI: %s, DEVICE IP: %s, COMMAND: %s",str(e),str(device),str(command))
pass
if cmd_out != None:
cmd_json=json.loads(cmd_out)
if cmd_json != None:
dataKeys=cmd_json.keys()
rowKeyVal = []
for i in range(len(dataKeys)):
if not "TABLE" in dataKeys[i]:
check_type = cmd_json[dataKeys[i]]
if type(check_type) is unicode:
value=cmd_json[dataKeys[i]]
key_value = {dataKeys[i]:value}
rowKeyVal.append(key_value)
if type(check_type) is dict:
internal_single_row = cmd_json[dataKeys[i]]#single_row has inside raw data in k:v pair
internalDataKeys = internal_single_row.keys()
internalTableNames=[]
internalRowNames=[]
for table in internalDataKeys:
if not "TABLE" in table:
internal_value = internal_single_row[table]
if type(internal_value) is unicode:
currentTime= datetime.now().strftime('%Y-%m-%d %H:%M:%S%z')
internal_key_value = {table:internal_value}
response = {"timestamp":currentTime,"component":component,"device":device,"Row_info":internal_key_value}
print json.dumps(response,ensure_ascii=False)
if type(internal_value) is dict:
currentTime= datetime.now().strftime('%Y-%m-%d %H:%M:%S%z')
response = {"timestamp":currentTime,"component":component,"device":device,"Row_info":internal_single_row[table]}
print json.dumps(response,ensure_ascii=False)
if "TABLE" in table:
internalTableNames.append(table)
row=table.replace("TABLE","ROW")
internalRowNames.append(row)
for i in range(len(internalTableNames)):
_split_json(device,component,internal_single_row,internalTableNames[i],internalRowNames[i])
if rowKeyVal:
_display_data(device,component,rowKeyVal)
tableNames=[]
rowNames=[]
for table in dataKeys:
if "TABLE" in table:
tableNames.append(table)
row=table.replace("TABLE","ROW")
rowNames.append(row)
for i in range(len(tableNames)):
_split_json(device,component,cmd_json,tableNames[i],rowNames[i])
def _getCredentials(sessionKey):
myapp = 'TA_cisco-Nexus-9k'
try:
# list all credentials
entities = entity.getEntities(['admin', 'passwords'], namespace=myapp,
owner='nobody', sessionKey=sessionKey)
except Exception, e:
logger.error("Nexus Error: Could not get %s credentials from splunk. Error: %s"
% (myapp, str(e)))
# return first set of credentials
device_credentials = dict()
for i, c in entities.items():
if (str(c['eai:acl']['app'])) == myapp:
device = str(i.split(':')[0]).strip()
username = c['username']
password = c['clear_password']
credential = []
credential = [username,password]
device_credentials[device] = list(credential)
return device_credentials
""" prepare execution """
def _prepare_and_execute():
global command,cmdFile
sessionKey = sys.stdin.readline().strip()
if len(sessionKey) == 0:
logger.error("Nexus Error: Did not receive a session key from splunkd.")
sys.exit()
device_credentials = _getCredentials(sessionKey)
for device in device_credentials.keys():
username = device_credentials[device][0]
password = device_credentials[device][1]
target_url = "https://"+str(device)+"/ins"
try:
NXAPITransport.init(target_url=target_url, username=username, password=password, timeout=600)
except Exception as e:
logger.error("Nexus Error: Not able to connect to NXAPI: %s, DEVICE IP: %s",str(e),str(device))
continue
if cmdFile:
cmdFile = os.path.join(os.path.dirname(os.path.realpath(__file__)),cmdFile)
file = open(cmdFile, 'r')
cmdList = file.readlines()
for cmdIn in cmdList:
cmdIn=cmdIn.strip()
(cmdIn,component)=cmdIn.split(',')
cmdIn=cmdIn.strip()
_execute_command(command=cmdIn,device=device,component=component)
elif command:
_execute_command(command=command,device=device)
""" main method """
def main(argv):
length_of_argv = len(argv)
if _validate_argumnets(argv):
_parse_command_line_arguments(argv,length_of_argv)
_execute(argv,length_of_argv)
""" Validate command line arguments """
def _validate_argumnets(argv):
for a in argv:
if not a:
logger.error("Nexus Error: Empty argument found. Please provide appropriate command line arguments.")
return False
return True
""" Parse command line arguments"""
def _parse_command_line_arguments(argv,length_of_argv):
global cmdFile,command,dev_ip,inputcsv
if length_of_argv > 1:
try:
if length_of_argv == 2:
if argv[0] == "-inputFile":
cmdFile = argv[1]
elif argv[0] == "-cmd":
command = argv[1]
elif length_of_argv > 2:
if argv[0] == "-cmd":
command = argv[1]
if argv[2] == "-device":
dev_ip = argv[3]
except Exception as e:
logger.error("Nexus Error: Please enter valid arguments. %s",str(e))
raise
else:
logger.error("Nexus Error: Unrecognized command line arguments")
sys.exit()
""" execute method has following user input category:
a) devices b) cmdFile c) command
"""
def _execute(argv,length_of_argv):
global dev_ip,credential_file,command,device,inputcsv,cmdFile
if length_of_argv > 2:
""" Will execute if user input is device(s)"""
if dev_ip:
dev_ip_arr = dev_ip.split(",")
sessionKey = sys.stdin.readline().strip()
if len(sessionKey) == 0:
logger.error("Nexus Error: Did not receive a session key from splunkd.")
sys.exit()
device_credentials = _getCredentials(sessionKey)
for device in device_credentials.keys():
for ip in dev_ip_arr:
if ip == device:
username = device_credentials[device][0]
password = device_credentials[device][1]
target_url = "https://"+str(device)+"/ins"
try:
NXAPITransport.init(target_url=target_url, username=username, password=password, timeout=20)
except Exception as e:
logger.error("Not able to connect to NXAPI: %s, DEVICE IP: %s",str(e),str(ip))
pass
_execute_command(command=command,device=ip)
else:
_prepare_and_execute()
else:
""" Will execute if user input is cmdFile """
""" Will execute if user input is command """
_prepare_and_execute()
if __name__ == "__main__":
main(sys.argv[1:])
Splitting up the Cisco_Nexus_9k app with 10 switches each, fixed the issue.
Splitting up the Cisco_Nexus_9k app with 10 switches each, fixed the issue.
@rupeshmano If your problem is resolved, please accept the answer to help future readers.