We're seeing an issue on our indexer cluster where ~25% of events are duplicated. The raw logs do not contain duplicates, nor are there duplicate or overlapping monitor stanzas. When looking at bucket ID, Index Time, and Splunk Server, all are identical across the duplicates.
Our indexers are clustered, and we're running Enterprise version 6.6.3 on Windows Server 2012 R2.
Here's our aggregated outputs.conf from a Universal Forwarder:
\splunk btool outputs list
[syslog]
maxEventSize = 1024
priority = <13>
type = udp
[tcpout]
ackTimeoutOnShutdown = 30
autoLBFrequency = 30
autoLBVolume = 0
blockOnCloning = true
blockWarnThreshold = 100
cipherSuite = ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA256:AES256-GCM-SHA384:AES128-GCM-SHA256:AES128-SHA256:ECDH-ECDSA-AES256-GCM-SHA384:ECDH-ECDSA-AES128-GCM-SHA256:ECDH-ECDSA-AES256-SHA384:ECDH-ECDSA-AES128-SHA256
compressed = false
connectionTimeout = 20
defaultGroup = gd_indexers
disabled = false
dropClonedEventsOnQueueFull = 5
dropEventsOnQueueFull = -1
ecdhCurves = prime256v1, secp384r1, secp521r1
forceTimebasedAutoLB = false
forwardedindex.0.whitelist = .*
forwardedindex.1.blacklist = _.*
forwardedindex.2.whitelist = (_audit|_introspection|_internal|_telemetry)
forwardedindex.filter.disable = false
heartbeatFrequency = 30
indexAndForward = false
maxConnectionsPerIndexer = 2
maxFailuresPerInterval = 2
maxQueueSize = auto
readTimeout = 300
secsInFailureInterval = 1
sendCookedData = true
sslQuietShutdown = false
sslVersions = tls1.2
tcpSendBufSz = 0
useACK = true
writeTimeout = 300
[tcpout:gd_indexers]
server = <List of Internal IPs>
If anyone can suggest an avenue for troubleshooting, it would be greatly appreciated. Please also let me know if I can provide more relevant information.
... View more