<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Extract data from URL string in Splunk Search</title>
    <link>https://community.splunk.com/t5/Splunk-Search/Extract-data-from-URL-string/m-p/419753#M120693</link>
    <description>&lt;P&gt;Greetings @Vfinney,&lt;/P&gt;

&lt;P&gt;Please try the following run-anywhere search.&lt;/P&gt;

&lt;PRE&gt;&lt;CODE&gt;| makeresults
| eval _raw="7/30/19 1:29:52.000 PM Jul 30 13:29:52 10.140.24.233 Jul 30 13:29:52 Access_Logs_Splunk: Info: 1564511389.352 80 10.140.6.27 TCP_MISS/204 793 GET &lt;A href="http://dmp.truoptik.com/239e300e6dca3b53/sync.gif?dm=ib.adnxs.com&amp;amp;fck=6298473322644763945" target="test_blank"&gt;http://dmp.truoptik.com/239e300e6dca3b53/sync.gif?dm=ib.adnxs.com&amp;amp;fck=6298473322644763945&lt;/A&gt; \"DOL\sroth@KDOL_Web_Auth\" DIRECT/dmp.truoptik.com - DEFAULT_CASE_12-KDOL_Access_Policy-KDOL_Web_Identity-NONE-NONE-NONE-DefaultGroup &amp;lt;-,-,-,\"-\",-,-,-,-,\"-\",-,-,-,\"-\",-,-,\"-\",\"-\",-,-,-,-,\"-\",\"-\",\"-\",\"-\",\"-\",\"-\",79.30,0,-,\"-\",\"-\",-,\"-\",-,-,\"-\",\"-\",-,-,\"-\"&amp;gt; -"
| append [ makeresults | eval _raw="7/30/19 1:29:42.000 PM Jul 30 13:29:42 10.140.24.233 Jul 30 13:29:42 Access_Logs_Splunk: Info: 1564511379.248 324 10.140.10.21 TCP_MISS/206 1587824 GET &lt;A href="http://r14---sn-bvvbaxjpl.gvt1.com/edgedl/release2/chrome/AOnIEhGH7WaH0jVMgWzb_TU_76.0.3809.87/76.0.3809.87_75.0.3770.142_chrome_updater.exe?cms_redirect=yes&amp;amp;mip=165.201.56.130&amp;amp;mm=28&amp;amp;mn=sn-bvvbax-hjpl&amp;amp;ms=nvh&amp;amp;mt=1564511187&amp;amp;mv=m&amp;amp;mvi=13&amp;amp;nh=EAE&amp;amp;pl=16&amp;amp;shardbypass=yes" target="test_blank"&gt;http://r14---sn-bvvbaxjpl.gvt1.com/edgedl/release2/chrome/AOnIEhGH7WaH0jVMgWzb_TU_76.0.3809.87/76.0.3809.87_75.0.3770.142_chrome_updater.exe?cms_redirect=yes&amp;amp;mip=165.201.56.130&amp;amp;mm=28&amp;amp;mn=sn-bvvbax-hjpl&amp;amp;ms=nvh&amp;amp;mt=1564511187&amp;amp;mv=m&amp;amp;mvi=13&amp;amp;nh=EAE&amp;amp;pl=16&amp;amp;shardbypass=yes&lt;/A&gt; \"DOL\dingels@KDOL_Web_Auth\" DIRECT/r14---sn-bvvbax-hjpl.gvt1.com application/octet-stream DEFAULT_CASE_12-KDOL_Access_Policy-KDOL_Web_Identity-NONE-NONE-NONE-DefaultGroup &amp;lt;-,-,-,\"-\",-,-,-,-,\"-\",-,-,-,\"-\",-,-,\"-\",\"-\",-,-,-,-,\"-\",\"-\",\"-\",\"-\",\"-\",\"-\",39205.53,0,-,\"-\",\"-\",-,\"-\",-,-,\"-\",\"-\",-,-,\"-\"&amp;gt; -" ]
| append [ makeresults | eval _raw="7/30/19 1:33:50.000 PM Jul 30 13:33:50 10.140.24.234 Jul 30 13:33:50 Access_Logs_Splunk: Info: 1564511627.609 1779 10.140.4.14 TCP_MISS/200 3461685 GET &lt;A href="http://workforce-ks.com/wp-content/uploads/2015/05/08.01.2019-One-Stop-Advisory-Council-Meeting-Packet.pdf" target="test_blank"&gt;http://workforce-ks.com/wp-content/uploads/2015/05/08.01.2019-One-Stop-Advisory-Council-Meeting-Packet.pdf&lt;/A&gt; \"DOL\nstruckhoff@KDOL_Web_Auth\" DIRECT/workforce-ks.com application/pdf DEFAULT_CASE_12-Social_Media_Access_Policy-KDOL_Web_Identity-NONE-NONE-NONE-DefaultGroup &amp;lt;-,-,-,\"-\",-,-,-,-,\"-\",-,-,-,\"-\",-,-,\"-\",\"-\",-,-,-,-,\"-\",\"-\",\"-\",\"-\",\"-\",\"-\",15566.88,0,-,\"-\",\"-\",-,\"-\",-,-,\"-\",\"-\",-,-,\"-\"&amp;gt; -" ]
| append [ makeresults | eval _raw="7/30/19 1:33:11.000 PM  Jul 30 13:33:11 10.140.24.234 Jul 30 13:33:11 Access_Logs_Splunk: Info: 1564511588.080 44 10.140.4.104 TCP_MISS/200 35005 GET &lt;A href="http://ts.intra.dol.ks.gov/Files/PDF/EmployeeRecognition.pdf" target="test_blank"&gt;http://ts.intra.dol.ks.gov/Files/PDF/EmployeeRecognition.pdf&lt;/A&gt; \"DOL\njanco@KDOL_Web_Auth\" DIRECT/ts.intra.dol.ks.gov application/pdf DEFAULT_CASE_12-Social_Media_Access_Policy-KDOL_Web_Identity-NONE-NONE-NONE-DefaultGroup &amp;lt;-,-,-,\"-\",-,-,-,-,\"-\",-,-,-,\"-\",-,-,\"-\",\"-\",-,-,-,-,\"-\",\"-\",\"-\",\"-\",\"-\",\"-\",6364.55,0,-,\"-\",\"-\",-,\"-\",-,-,\"-\",\"-\",-,-,\"-\"&amp;gt; -" ]
| rex field=_raw     "GET (?&amp;lt;Full_URL&amp;gt;https?://[^\s]+)"
| rex field=Full_URL "(?&amp;lt;URL&amp;gt;https?://[^/]+/)"
| rex field=Full_URL "/(?&amp;lt;Filename&amp;gt;[^/]+)(?&amp;lt;Filetype&amp;gt;\.(gif|exe|pdf))\??"
| table URL Filename Filetype
&lt;/CODE&gt;&lt;/PRE&gt;

&lt;P&gt;These are the results:&lt;/P&gt;

&lt;PRE&gt;&lt;CODE&gt;URL                                    Filename                                              Filetype
&lt;A href="http://dmp.truoptik.com/" target="test_blank"&gt;http://dmp.truoptik.com/&lt;/A&gt;               sync                                                  .gif
&lt;A href="http://r14---sn-bvvbaxjpl.gvt1.com/" target="test_blank"&gt;http://r14---sn-bvvbaxjpl.gvt1.com/&lt;/A&gt;    76.0.3809.87_75.0.3770.142_chrome_updater             .exe
&lt;A href="http://workforce-ks.com/" target="test_blank"&gt;http://workforce-ks.com/&lt;/A&gt;               08.01.2019-One-Stop-Advisory-Council-Meeting-Packet   .pdf
&lt;A href="http://ts.intra.dol.ks.gov/" target="test_blank"&gt;http://ts.intra.dol.ks.gov/&lt;/A&gt;            EmployeeRecognition                                   .pdf
&lt;/CODE&gt;&lt;/PRE&gt;

&lt;P&gt;Assumptions:&lt;BR /&gt;
- URL is always preceded by "GET " and does not contain spaces.&lt;BR /&gt;
- Filename does not contain spaces or "/" symbol&lt;BR /&gt;
- Filetype is either &lt;CODE&gt;.gif&lt;/CODE&gt;, &lt;CODE&gt;.exe&lt;/CODE&gt;, or &lt;CODE&gt;.pdf&lt;/CODE&gt;. You can add &lt;CODE&gt;|&lt;/CODE&gt; and the new extension after &lt;CODE&gt;gif|exe|pdf&lt;/CODE&gt; to add others.&lt;/P&gt;</description>
    <pubDate>Tue, 30 Jul 2019 19:36:33 GMT</pubDate>
    <dc:creator>jacobpevans</dc:creator>
    <dc:date>2019-07-30T19:36:33Z</dc:date>
    <item>
      <title>Extract data from URL string</title>
      <link>https://community.splunk.com/t5/Splunk-Search/Extract-data-from-URL-string/m-p/419752#M120692</link>
      <description>&lt;P&gt;I am trying to extract the file types, file names, and URLs from proxy logs for monitoring purposes.  Here is what I'm looking for.  Thanks in advance for any and all assistance. &lt;/P&gt;

&lt;P&gt;URL                                                 Filetype    Filename&lt;BR /&gt;
&lt;A href="http://dmp.truoptik.com" target="_blank"&gt;http://dmp.truoptik.com&lt;/A&gt;            .gif             sync&lt;BR /&gt;
&lt;A href="http://r14---sn-bvvbax" target="_blank"&gt;http://r14---sn-bvvbax&lt;/A&gt; jpl.gvt1  .exe           Chrome_updater&lt;BR /&gt;
&lt;A href="http://workforce-ks.com/" target="_blank"&gt;http://workforce-ks.com/&lt;/A&gt;           .pdf           2019-One-Stop-Advisory-Council-Meeting-Packet&lt;/P&gt;

&lt;P&gt;Proxy logs examples:&lt;BR /&gt;
7/30/19&lt;BR /&gt;
1:29:52.000 PM&lt;BR /&gt;&lt;BR /&gt;
Jul 30 13:29:52 10.140.24.233 Jul 30 13:29:52 Access_Logs_Splunk: Info: 1564511389.352 80 10.140.6.27 TCP_MISS/204 793 GET &lt;A href="http://dmp.truoptik.com/239e300e6dca3b53/sync.gif?dm=ib.adnxs.com&amp;amp;fck=6298473322644763945" target="_blank"&gt;http://dmp.truoptik.com/239e300e6dca3b53/sync.gif?dm=ib.adnxs.com&amp;amp;fck=6298473322644763945&lt;/A&gt; "DOL\sroth@KDOL_Web_Auth" DIRECT/dmp.truoptik.com - DEFAULT_CASE_12-KDOL_Access_Policy-KDOL_Web_Identity-NONE-NONE-NONE-DefaultGroup &amp;lt;-,-,-,"-",-,-,-,-,"-",-,-,-,"-",-,-,"-","-",-,-,-,-,"-","-","-","-","-","-",79.30,0,-,"-","-",-,"-",-,-,"-","-",-,-,"-"&amp;gt; -&lt;/P&gt;

&lt;P&gt;7/30/19 1:29:42.000 PM Jul 30 13:29:42 10.140.24.233 Jul 30 13:29:42 Access_Logs_Splunk: Info: 1564511379.248 324 10.140.10.21 TCP_MISS/206 1587824 GET &lt;A href="http://r14---sn-bvvbax" target="_blank"&gt;http://r14---sn-bvvbax&lt;/A&gt; jpl.gvt1.com/edgedl/release2/chrome/AOnIEhGH7WaH0jVMgWzb_TU_76.0.3809.87/76.0.3809.87_75.0.3770.142_chrome_updater.exe?cms_redirect=yes&amp;amp;mip=165.201.56.130&amp;amp;mm=28&amp;amp;mn=sn-bvvbax-hjpl&amp;amp;ms=nvh&amp;amp;mt=1564511187&amp;amp;mv=m&amp;amp;mvi=13&amp;amp;nh=EAE&amp;amp;pl=16&amp;amp;shardbypass=yes "DOL\dingels@KDOL_Web_Auth" DIRECT/r14---sn-bvvbax-hjpl.gvt1.com application/octet-stream DEFAULT_CASE_12-KDOL_Access_Policy-KDOL_Web_Identity-NONE-NONE-NONE-DefaultGroup &amp;lt;-,-,-,"-",-,-,-,-,"-",-,-,-,"-",-,-,"-","-",-,-,-,-,"-","-","-","-","-","-",39205.53,0,-,"-","-",-,"-",-,-,"-","-",-,-,"-"&amp;gt; -&lt;/P&gt;

&lt;P&gt;7/30/19&lt;BR /&gt;
1:33:50.000 PM&lt;BR /&gt;&lt;BR /&gt;
Jul 30 13:33:50 10.140.24.234 Jul 30 13:33:50 Access_Logs_Splunk: Info: 1564511627.609 1779 10.140.4.14 TCP_MISS/200 3461685 GET &lt;A href="http://workforce-ks.com/wp-content/uploads/2015/05/08.01.2019-One-Stop-Advisory-Council-Meeting-Packet.pdf" target="_blank"&gt;http://workforce-ks.com/wp-content/uploads/2015/05/08.01.2019-One-Stop-Advisory-Council-Meeting-Packet.pdf&lt;/A&gt; "DOL\nstruckhoff@KDOL_Web_Auth" DIRECT/workforce-ks.com application/pdf DEFAULT_CASE_12-Social_Media_Access_Policy-KDOL_Web_Identity-NONE-NONE-NONE-DefaultGroup &amp;lt;-,-,-,"-",-,-,-,-,"-",-,-,-,"-",-,-,"-","-",-,-,-,-,"-","-","-","-","-","-",15566.88,0,-,"-","-",-,"-",-,-,"-","-",-,-,"-"&amp;gt; -&lt;/P&gt;

&lt;P&gt;7/30/19&lt;BR /&gt;
1:33:11.000 PM&lt;BR /&gt;&lt;BR /&gt;
Jul 30 13:33:11 10.140.24.234 Jul 30 13:33:11 Access_Logs_Splunk: Info: 1564511588.080 44 10.140.4.104 TCP_MISS/200 35005 GET &lt;A href="http://ts.intra.dol.ks.gov/Files/PDF/EmployeeRecognition.pdf" target="_blank"&gt;http://ts.intra.dol.ks.gov/Files/PDF/EmployeeRecognition.pdf&lt;/A&gt; "DOL\njanco@KDOL_Web_Auth" DIRECT/ts.intra.dol.ks.gov application/pdf DEFAULT_CASE_12-Social_Media_Access_Policy-KDOL_Web_Identity-NONE-NONE-NONE-DefaultGroup &amp;lt;-,-,-,"-",-,-,-,-,"-",-,-,-,"-",-,-,"-","-",-,-,-,-,"-","-","-","-","-","-",6364.55,0,-,"-","-",-,"-",-,-,"-","-",-,-,"-"&amp;gt; -&lt;/P&gt;</description>
      <pubDate>Wed, 30 Sep 2020 01:32:20 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/Extract-data-from-URL-string/m-p/419752#M120692</guid>
      <dc:creator>Vfinney</dc:creator>
      <dc:date>2020-09-30T01:32:20Z</dc:date>
    </item>
    <item>
      <title>Re: Extract data from URL string</title>
      <link>https://community.splunk.com/t5/Splunk-Search/Extract-data-from-URL-string/m-p/419753#M120693</link>
      <description>&lt;P&gt;Greetings @Vfinney,&lt;/P&gt;

&lt;P&gt;Please try the following run-anywhere search.&lt;/P&gt;

&lt;PRE&gt;&lt;CODE&gt;| makeresults
| eval _raw="7/30/19 1:29:52.000 PM Jul 30 13:29:52 10.140.24.233 Jul 30 13:29:52 Access_Logs_Splunk: Info: 1564511389.352 80 10.140.6.27 TCP_MISS/204 793 GET &lt;A href="http://dmp.truoptik.com/239e300e6dca3b53/sync.gif?dm=ib.adnxs.com&amp;amp;fck=6298473322644763945" target="test_blank"&gt;http://dmp.truoptik.com/239e300e6dca3b53/sync.gif?dm=ib.adnxs.com&amp;amp;fck=6298473322644763945&lt;/A&gt; \"DOL\sroth@KDOL_Web_Auth\" DIRECT/dmp.truoptik.com - DEFAULT_CASE_12-KDOL_Access_Policy-KDOL_Web_Identity-NONE-NONE-NONE-DefaultGroup &amp;lt;-,-,-,\"-\",-,-,-,-,\"-\",-,-,-,\"-\",-,-,\"-\",\"-\",-,-,-,-,\"-\",\"-\",\"-\",\"-\",\"-\",\"-\",79.30,0,-,\"-\",\"-\",-,\"-\",-,-,\"-\",\"-\",-,-,\"-\"&amp;gt; -"
| append [ makeresults | eval _raw="7/30/19 1:29:42.000 PM Jul 30 13:29:42 10.140.24.233 Jul 30 13:29:42 Access_Logs_Splunk: Info: 1564511379.248 324 10.140.10.21 TCP_MISS/206 1587824 GET &lt;A href="http://r14---sn-bvvbaxjpl.gvt1.com/edgedl/release2/chrome/AOnIEhGH7WaH0jVMgWzb_TU_76.0.3809.87/76.0.3809.87_75.0.3770.142_chrome_updater.exe?cms_redirect=yes&amp;amp;mip=165.201.56.130&amp;amp;mm=28&amp;amp;mn=sn-bvvbax-hjpl&amp;amp;ms=nvh&amp;amp;mt=1564511187&amp;amp;mv=m&amp;amp;mvi=13&amp;amp;nh=EAE&amp;amp;pl=16&amp;amp;shardbypass=yes" target="test_blank"&gt;http://r14---sn-bvvbaxjpl.gvt1.com/edgedl/release2/chrome/AOnIEhGH7WaH0jVMgWzb_TU_76.0.3809.87/76.0.3809.87_75.0.3770.142_chrome_updater.exe?cms_redirect=yes&amp;amp;mip=165.201.56.130&amp;amp;mm=28&amp;amp;mn=sn-bvvbax-hjpl&amp;amp;ms=nvh&amp;amp;mt=1564511187&amp;amp;mv=m&amp;amp;mvi=13&amp;amp;nh=EAE&amp;amp;pl=16&amp;amp;shardbypass=yes&lt;/A&gt; \"DOL\dingels@KDOL_Web_Auth\" DIRECT/r14---sn-bvvbax-hjpl.gvt1.com application/octet-stream DEFAULT_CASE_12-KDOL_Access_Policy-KDOL_Web_Identity-NONE-NONE-NONE-DefaultGroup &amp;lt;-,-,-,\"-\",-,-,-,-,\"-\",-,-,-,\"-\",-,-,\"-\",\"-\",-,-,-,-,\"-\",\"-\",\"-\",\"-\",\"-\",\"-\",39205.53,0,-,\"-\",\"-\",-,\"-\",-,-,\"-\",\"-\",-,-,\"-\"&amp;gt; -" ]
| append [ makeresults | eval _raw="7/30/19 1:33:50.000 PM Jul 30 13:33:50 10.140.24.234 Jul 30 13:33:50 Access_Logs_Splunk: Info: 1564511627.609 1779 10.140.4.14 TCP_MISS/200 3461685 GET &lt;A href="http://workforce-ks.com/wp-content/uploads/2015/05/08.01.2019-One-Stop-Advisory-Council-Meeting-Packet.pdf" target="test_blank"&gt;http://workforce-ks.com/wp-content/uploads/2015/05/08.01.2019-One-Stop-Advisory-Council-Meeting-Packet.pdf&lt;/A&gt; \"DOL\nstruckhoff@KDOL_Web_Auth\" DIRECT/workforce-ks.com application/pdf DEFAULT_CASE_12-Social_Media_Access_Policy-KDOL_Web_Identity-NONE-NONE-NONE-DefaultGroup &amp;lt;-,-,-,\"-\",-,-,-,-,\"-\",-,-,-,\"-\",-,-,\"-\",\"-\",-,-,-,-,\"-\",\"-\",\"-\",\"-\",\"-\",\"-\",15566.88,0,-,\"-\",\"-\",-,\"-\",-,-,\"-\",\"-\",-,-,\"-\"&amp;gt; -" ]
| append [ makeresults | eval _raw="7/30/19 1:33:11.000 PM  Jul 30 13:33:11 10.140.24.234 Jul 30 13:33:11 Access_Logs_Splunk: Info: 1564511588.080 44 10.140.4.104 TCP_MISS/200 35005 GET &lt;A href="http://ts.intra.dol.ks.gov/Files/PDF/EmployeeRecognition.pdf" target="test_blank"&gt;http://ts.intra.dol.ks.gov/Files/PDF/EmployeeRecognition.pdf&lt;/A&gt; \"DOL\njanco@KDOL_Web_Auth\" DIRECT/ts.intra.dol.ks.gov application/pdf DEFAULT_CASE_12-Social_Media_Access_Policy-KDOL_Web_Identity-NONE-NONE-NONE-DefaultGroup &amp;lt;-,-,-,\"-\",-,-,-,-,\"-\",-,-,-,\"-\",-,-,\"-\",\"-\",-,-,-,-,\"-\",\"-\",\"-\",\"-\",\"-\",\"-\",6364.55,0,-,\"-\",\"-\",-,\"-\",-,-,\"-\",\"-\",-,-,\"-\"&amp;gt; -" ]
| rex field=_raw     "GET (?&amp;lt;Full_URL&amp;gt;https?://[^\s]+)"
| rex field=Full_URL "(?&amp;lt;URL&amp;gt;https?://[^/]+/)"
| rex field=Full_URL "/(?&amp;lt;Filename&amp;gt;[^/]+)(?&amp;lt;Filetype&amp;gt;\.(gif|exe|pdf))\??"
| table URL Filename Filetype
&lt;/CODE&gt;&lt;/PRE&gt;

&lt;P&gt;These are the results:&lt;/P&gt;

&lt;PRE&gt;&lt;CODE&gt;URL                                    Filename                                              Filetype
&lt;A href="http://dmp.truoptik.com/" target="test_blank"&gt;http://dmp.truoptik.com/&lt;/A&gt;               sync                                                  .gif
&lt;A href="http://r14---sn-bvvbaxjpl.gvt1.com/" target="test_blank"&gt;http://r14---sn-bvvbaxjpl.gvt1.com/&lt;/A&gt;    76.0.3809.87_75.0.3770.142_chrome_updater             .exe
&lt;A href="http://workforce-ks.com/" target="test_blank"&gt;http://workforce-ks.com/&lt;/A&gt;               08.01.2019-One-Stop-Advisory-Council-Meeting-Packet   .pdf
&lt;A href="http://ts.intra.dol.ks.gov/" target="test_blank"&gt;http://ts.intra.dol.ks.gov/&lt;/A&gt;            EmployeeRecognition                                   .pdf
&lt;/CODE&gt;&lt;/PRE&gt;

&lt;P&gt;Assumptions:&lt;BR /&gt;
- URL is always preceded by "GET " and does not contain spaces.&lt;BR /&gt;
- Filename does not contain spaces or "/" symbol&lt;BR /&gt;
- Filetype is either &lt;CODE&gt;.gif&lt;/CODE&gt;, &lt;CODE&gt;.exe&lt;/CODE&gt;, or &lt;CODE&gt;.pdf&lt;/CODE&gt;. You can add &lt;CODE&gt;|&lt;/CODE&gt; and the new extension after &lt;CODE&gt;gif|exe|pdf&lt;/CODE&gt; to add others.&lt;/P&gt;</description>
      <pubDate>Tue, 30 Jul 2019 19:36:33 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/Extract-data-from-URL-string/m-p/419753#M120693</guid>
      <dc:creator>jacobpevans</dc:creator>
      <dc:date>2019-07-30T19:36:33Z</dc:date>
    </item>
  </channel>
</rss>

