<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: hunk search-time field extraction not working in Splunk Search</title>
    <link>https://community.splunk.com/t5/Splunk-Search/hunk-search-time-field-extraction-not-working/m-p/153164#M42970</link>
    <description>&lt;P&gt;Looks like this page formatted the escape characters.  Here's my original question: &lt;A href="http://pastebin.ca/3023980"&gt;http://pastebin.ca/3023980&lt;/A&gt;&lt;/P&gt;</description>
    <pubDate>Wed, 10 Jun 2015 20:40:17 GMT</pubDate>
    <dc:creator>suarezry</dc:creator>
    <dc:date>2015-06-10T20:40:17Z</dc:date>
    <item>
      <title>hunk search-time field extraction not working</title>
      <link>https://community.splunk.com/t5/Splunk-Search/hunk-search-time-field-extraction-not-working/m-p/153163#M42969</link>
      <description>&lt;P&gt;Hunk v6.2.2 to hortonworks hadoop v2.2.4.2.  My search-time field extraction for client_host is not consistent.  It will return too few results or none at all.  For example, if I search "index=hadoop client_host=10.0.0.10" in the last 4 hours (at 4pm eastern time) I get no results.  Can someone help troubleshoot?&lt;/P&gt;

&lt;P&gt;Raw logs in &lt;CODE&gt;/myprovider/syslogs/2015/2015-06-10_datacollector2.txt&lt;/CODE&gt; contain:&lt;/P&gt;

&lt;P&gt;2015-06-10T20:13:33Z    syslog.tcp  {"message":"&amp;lt;14&amp;gt;Jun 10 16:07:03 WIN-VQCJADNQOGL MSWinEventLog\t1\tMicrosoft-Windows-LanguagePackSetup/Operational\t71\tWed Jun 10 16:07:03 2015\t4001\tMicrosoft-Windows-LanguagePackSetup\tSYSTEM\tUser\tInformation\tWIN-VQCJADNQOGL\tLanguage Pack cleanup functionality\t\tLPRemove terminating.\t16\r","client_host":"10.0.0.10"}&lt;BR /&gt;
2015-06-10T20:13:33Z    syslog.tcp  {"message":"&amp;lt;14&amp;gt;Jun 10 16:07:03 WIN-VQCJADNQOGL MSWinEventLog\t1\tMicrosoft-Windows-MUI/Operational\t72\tWed Jun 10 16:07:03 2015\t3003\tMicrosoft-Windows-MUI\tSYSTEM\tUser\tInformation\tWIN-VQCJADNQOGL\tMUI resource cache builder\t\tMUI resource cache builder has been called with the following parameters: (null).\t29\r","client_host":"10.0.0.10"}&lt;BR /&gt;
2015-06-10T20:13:45Z    syslog.tcp  {"message":"&amp;lt;14&amp;gt;Jun 10 16:07:13 WIN-VQCJADNQOGL MSWinEventLog\t1\tMicrosoft-Windows-MUI/Operational\t73\tWed Jun 10 16:07:13 2015\t3007\tMicrosoft-Windows-MUI\tSYSTEM\tUser\tInformation\tWIN-VQCJADNQOGL\tMUI resource cache builder\t\tNew resource cache built and installed on system. New cache index is 5, live cache index is 5 and config is set to 3.\t30\r","client_host":"10.0.0.10"}&lt;/P&gt;

&lt;P&gt;My Hunk config:&lt;/P&gt;

&lt;H4&gt;index.conf&lt;/H4&gt;

&lt;PRE&gt;&lt;CODE&gt;[provider:myprovider]
vix.command.arg.3 = $SPLUNK_HOME/bin/jars/SplunkMR-s6.0-hy2.0.jar
vix.env.HADOOP_HOME = /usr/hdp/2.2.4.2-2/hadoop
vix.env.JAVA_HOME = /usr/lib/jvm/java-7-openjdk-amd64
vix.family = hadoop
vix.fs.default.name = hdfs://hadoop-namenode1.internal:8020
vix.mapreduce.framework.name = yarn
vix.mapred.child.java.opts = -server -Xmx1024m
vix.output.buckets.max.network.bandwidth = 0
vix.splunk.home.hdfs = /user/splunk/myprovider
vix.yarn.resourcemanager.address = hadoop-namenode2.internal:8050
vix.yarn.resourcemanager.scheduler.address = hadoop-namenode2.internal:8030
vix.yarn.application.classpath = /etc/hadoop/conf,/usr/hdp/2.2.4.2-2/hadoop/client/*,/usr/hdp/2.2.4.2-2/hadoop/lib/*,/usr/hdp/2.2.4.2-2/hadoop-hdfs/*,/usr/hdp/2.2.4.2-2/hadoop-hdfs/lib/*,/usr/hdp/2.2.4.2-2/hadoop-yarn/*,/usr/hdp/2.2.4.2-2/hadoop-yarn/lib/*
vix.splunk.home.datanode = /user/splunk/splunk-search1/
vix.splunk.setup.package = /opt/hunk/hunk-6.2.2-257696-linux-2.6-x86_64.tgz

[hadoop]
vix.input.1.path = /myprovider/syslogs/...
vix.provider = myprovider
vix.input.1.accept = \.txt$
vix.input.1.et.format = yyyyMMdd
vix.input.1.et.offset = 3600
vix.input.1.et.regex = /myprovider/syslogs/(\d+)/\d+-(\d+)-(\d+)_\w+\.txt
vix.input.1.lt.format = yyyyMMdd
vix.input.1.lt.offset = 86400
vix.input.1.lt.regex = /myprovider/syslogs/(\d+)/\d+-(\d+)-(\d+)_\w+\.txt
&lt;/CODE&gt;&lt;/PRE&gt;

&lt;H3&gt;props.conf&lt;/H3&gt;

&lt;PRE&gt;&lt;CODE&gt;[source::/myprovider/syslogs/*/*]
EXTRACT-clienthost = client_host\"\:\"(?&amp;lt;client_host&amp;gt;\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\"

sourcetype = hadoop
priority = 100
ANNOTATE_PUNCT = false
SHOULD_LINEMERGE = false
MAX_TIMESTAMP_LOOKAHEAD = 30
TIME_PREFIX = ^
TIME_FORMAT = %Y-%m-%dT%H:%M:%SZ
TZ=UTC
&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 28 Sep 2020 20:15:03 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/hunk-search-time-field-extraction-not-working/m-p/153163#M42969</guid>
      <dc:creator>suarezry</dc:creator>
      <dc:date>2020-09-28T20:15:03Z</dc:date>
    </item>
    <item>
      <title>Re: hunk search-time field extraction not working</title>
      <link>https://community.splunk.com/t5/Splunk-Search/hunk-search-time-field-extraction-not-working/m-p/153164#M42970</link>
      <description>&lt;P&gt;Looks like this page formatted the escape characters.  Here's my original question: &lt;A href="http://pastebin.ca/3023980"&gt;http://pastebin.ca/3023980&lt;/A&gt;&lt;/P&gt;</description>
      <pubDate>Wed, 10 Jun 2015 20:40:17 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/hunk-search-time-field-extraction-not-working/m-p/153164#M42970</guid>
      <dc:creator>suarezry</dc:creator>
      <dc:date>2015-06-10T20:40:17Z</dc:date>
    </item>
    <item>
      <title>Re: hunk search-time field extraction not working</title>
      <link>https://community.splunk.com/t5/Splunk-Search/hunk-search-time-field-extraction-not-working/m-p/153165#M42971</link>
      <description>&lt;P&gt;Is your data being sourcetyped correctly?  i.e.: does the sourcetype field return a value of hadoop for these events? If so I would add  a field extraction definition to the hadoop sourcetype stanza in props.conf on your search head:&lt;/P&gt;

&lt;P&gt;$SPLUNK_HOME/etc/system/local/props.conf&lt;BR /&gt;
(or props.conf in the app of your choice) $SPLUNK_HOME/etc/apps/appofyourchoice/local/props.conf&lt;/P&gt;

&lt;P&gt;[hadoop]&lt;/P&gt;

&lt;PRE&gt;&lt;CODE&gt;EXTRACT-client_host = (?m)client_host":"(?&amp;lt;client_host&amp;gt;\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})"}
&lt;/CODE&gt;&lt;/PRE&gt;

&lt;P&gt;then restart splunk on your SH:&lt;BR /&gt;
$SPLUNK_HOME/bin&lt;BR /&gt;
./splunk restart&lt;/P&gt;

&lt;P&gt;to validate, you can run a search like this:&lt;BR /&gt;
index=* sourcetype=hadoop | stats count by client_host&lt;/P&gt;

&lt;P&gt;If not and you want to do the extraction on the source field, this should work:&lt;/P&gt;

&lt;P&gt;on your search head:&lt;BR /&gt;
$SPLUNK_HOME/etc/system/local/props.conf&lt;BR /&gt;
(or props.conf in the app of your choice) $SPLUNK_HOME/etc/apps/appofyourchoice/local/props.conf&lt;/P&gt;

&lt;P&gt;[source::/myprovider/syslogs/...]&lt;/P&gt;

&lt;PRE&gt;&lt;CODE&gt;EXTRACT-client_host = (?m)client_host":"(?&amp;lt;client_host&amp;gt;\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})"}
&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Mon, 28 Sep 2020 20:12:52 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/hunk-search-time-field-extraction-not-working/m-p/153165#M42971</guid>
      <dc:creator>rphillips_splk</dc:creator>
      <dc:date>2020-09-28T20:12:52Z</dc:date>
    </item>
    <item>
      <title>Re: hunk search-time field extraction not working</title>
      <link>https://community.splunk.com/t5/Splunk-Search/hunk-search-time-field-extraction-not-working/m-p/153166#M42972</link>
      <description>&lt;P&gt;Can you try a) replacing the stanza name and more importantly b) remove the unnecessary slashes from " in the extraction regex? If that works, given that the data seems partially like json I'd recommend that you add into the regex optional spaces between : and " &lt;/P&gt;

&lt;PRE&gt;&lt;CODE&gt;[source::/myprovider/syslogs/...]
EXTRACT-clienthost = client_host":"(?&amp;lt;client_host&amp;gt;\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})"
&lt;/CODE&gt;&lt;/PRE&gt;</description>
      <pubDate>Thu, 11 Jun 2015 06:40:38 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/hunk-search-time-field-extraction-not-working/m-p/153166#M42972</guid>
      <dc:creator>Ledion_Bitincka</dc:creator>
      <dc:date>2015-06-11T06:40:38Z</dc:date>
    </item>
    <item>
      <title>Re: hunk search-time field extraction not working</title>
      <link>https://community.splunk.com/t5/Splunk-Search/hunk-search-time-field-extraction-not-working/m-p/153167#M42973</link>
      <description>&lt;P&gt;Regex and stanza are shown correctly (ie no format messup)&lt;/P&gt;</description>
      <pubDate>Thu, 11 Jun 2015 06:41:36 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/hunk-search-time-field-extraction-not-working/m-p/153167#M42973</guid>
      <dc:creator>Ledion_Bitincka</dc:creator>
      <dc:date>2015-06-11T06:41:36Z</dc:date>
    </item>
    <item>
      <title>Re: hunk search-time field extraction not working</title>
      <link>https://community.splunk.com/t5/Splunk-Search/hunk-search-time-field-extraction-not-working/m-p/153168#M42974</link>
      <description>&lt;P&gt;Thanks!  Looks like my regex was off.&lt;/P&gt;</description>
      <pubDate>Thu, 11 Jun 2015 14:04:57 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/hunk-search-time-field-extraction-not-working/m-p/153168#M42974</guid>
      <dc:creator>suarezry</dc:creator>
      <dc:date>2015-06-11T14:04:57Z</dc:date>
    </item>
  </channel>
</rss>

