<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: How to check if a field contains unicode in Splunk Search</title>
    <link>https://community.splunk.com/t5/Splunk-Search/How-to-check-if-a-field-contains-unicode/m-p/746161#M241598</link>
    <description>&lt;P&gt;&lt;a href="https://community.splunk.com/t5/user/viewprofilepage/user-id/269035"&gt;@Iris_Pi&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="kiran_panchavat_0-1747208902837.png" style="width: 400px;"&gt;&lt;img src="https://community.splunk.com/t5/image/serverpage/image-id/39006i3BB86E3707DFB23B/image-size/medium?v=v2&amp;amp;px=400" role="button" title="kiran_panchavat_0-1747208902837.png" alt="kiran_panchavat_0-1747208902837.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Wed, 14 May 2025 07:48:33 GMT</pubDate>
    <dc:creator>kiran_panchavat</dc:creator>
    <dc:date>2025-05-14T07:48:33Z</dc:date>
    <item>
      <title>How to check if a field contains unicode</title>
      <link>https://community.splunk.com/t5/Splunk-Search/How-to-check-if-a-field-contains-unicode/m-p/746159#M241597</link>
      <description>&lt;P&gt;Hello Everyone,&lt;/P&gt;&lt;P&gt;I want to check if a field called "from_header_displayname" contains any Unicode.&lt;/P&gt;&lt;P&gt;Below is the event source, this example event contains the unicode of "\u0445":&lt;BR /&gt;&lt;STRONG&gt;"from_header_displayname": "'support@\u0445.comx.com'&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;And the following what I see from the web console, the unicode has been translated into "x" (note: it's not the real letter x, but something looks like x in the other language)&lt;BR /&gt;&lt;STRONG&gt;from_header_displayname: 'support@х.comx.com'&lt;/STRONG&gt;&lt;/P&gt;&lt;P&gt;I used the following search but no luck:&lt;BR /&gt;index=email | regex from_header_displayname="[\u0000-\uffff]"&lt;BR /&gt;&lt;FONT color="#FF0000"&gt;&lt;SPAN&gt;Error in 'SearchOperator:regex': The regex '[\u0000-\uffff]' is invalid. Regex: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u.&lt;/SPAN&gt;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;Please advise what should I use in this case.&lt;/P&gt;&lt;P&gt;Thanks in advance.&lt;/P&gt;&lt;P&gt;Regards,&lt;BR /&gt;Iris&lt;/P&gt;</description>
      <pubDate>Wed, 14 May 2025 07:39:31 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/How-to-check-if-a-field-contains-unicode/m-p/746159#M241597</guid>
      <dc:creator>Iris_Pi</dc:creator>
      <dc:date>2025-05-14T07:39:31Z</dc:date>
    </item>
    <item>
      <title>Re: How to check if a field contains unicode</title>
      <link>https://community.splunk.com/t5/Splunk-Search/How-to-check-if-a-field-contains-unicode/m-p/746161#M241598</link>
      <description>&lt;P&gt;&lt;a href="https://community.splunk.com/t5/user/viewprofilepage/user-id/269035"&gt;@Iris_Pi&lt;/a&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;span class="lia-inline-image-display-wrapper lia-image-align-inline" image-alt="kiran_panchavat_0-1747208902837.png" style="width: 400px;"&gt;&lt;img src="https://community.splunk.com/t5/image/serverpage/image-id/39006i3BB86E3707DFB23B/image-size/medium?v=v2&amp;amp;px=400" role="button" title="kiran_panchavat_0-1747208902837.png" alt="kiran_panchavat_0-1747208902837.png" /&gt;&lt;/span&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 14 May 2025 07:48:33 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/How-to-check-if-a-field-contains-unicode/m-p/746161#M241598</guid>
      <dc:creator>kiran_panchavat</dc:creator>
      <dc:date>2025-05-14T07:48:33Z</dc:date>
    </item>
    <item>
      <title>Re: How to check if a field contains unicode</title>
      <link>https://community.splunk.com/t5/Splunk-Search/How-to-check-if-a-field-contains-unicode/m-p/746165#M241599</link>
      <description>&lt;P&gt;Unicode includes ASCII characters, so 0000-ffff would include all 16 bit characters. If you are looking for any 16 bit characters you could do either of these&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;| eval hasUncode=if(match(string, "[^[:ascii:]]"), "HAS-NON-ASCII", "ASCII")
| eval hasUncode=if(match(string, "[^\x00-\xff]"), "HAS-16 BIT CHARS", "8-BIT")&lt;/LI-CODE&gt;&lt;P&gt;The first character class is ascii and is checking for any characters NOT in the ascii range (0x00-0x7f) and the second is checking for any non 8 bit characters.&lt;/P&gt;&lt;P&gt;So, this example which includes your lower case Cyrillic x&amp;nbsp;&amp;nbsp;demonstrates&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;| makeresults 
| eval string=printf("{\"from_header_displayname\": \"'support@%c.comx.com'\"}", 1024+69)
| eval hasUncode1=if(match(string, "[^[:ascii:]]"), "HAS-NON-ASCII", "ASCII")
| eval hasUncode2=if(match(string, "[^\x00-\xff]"), "HAS-16-BIT", "8 BIT")&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Wed, 14 May 2025 08:19:49 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/How-to-check-if-a-field-contains-unicode/m-p/746165#M241599</guid>
      <dc:creator>bowesmana</dc:creator>
      <dc:date>2025-05-14T08:19:49Z</dc:date>
    </item>
    <item>
      <title>Re: How to check if a field contains unicode</title>
      <link>https://community.splunk.com/t5/Splunk-Search/How-to-check-if-a-field-contains-unicode/m-p/746166#M241600</link>
      <description>&lt;P&gt;To check if a field contains Unicode characters, you can use the regex command with a regular expression that matches non-ASCII characters, but if you're wanting to do filtering you might be better with something like match.&lt;/P&gt;&lt;PRE&gt;index=email 
| eval is_unicode = if(match(from_header_displayname, "[^\x00-\x7F]"), "true", "false")
| where is_unicode="true"&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;This search uses the match function to check if the from_header_displayname field contains any characters outside the ASCII range (\x00-\x7F). If it does, the is_unicode field is set to "true".&lt;/P&gt;&lt;P&gt;Alternatively, you can directly filter the events using the where command with the match function.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;PRE&gt;index=email 
| where match(from_header_displayname, "[^\x00-\x7F]")&lt;/PRE&gt;&lt;P&gt;Here is another working example:&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;| makeresults 
| eval from_header_displayname="support@\u0445.comx.com" 
| eval from_header_displayname_unicode="support@х.comx.com" 
| table from_header_displayname from_header_displayname_unicode 
| eval unicode_detected_raw=if(match(from_header_displayname,"[^\x00-\x7F]"),"Yes","No") 
| eval unicode_detected_unicode=if(match(from_header_displayname_unicode,"[^\x00-\x7F]"),"Yes","No")
| table from_header_displayname unicode_detected_raw from_header_displayname_unicode unicode_detected_unicode&lt;/LI-CODE&gt;&lt;P&gt;Both of these approaches will help you identify events where the from_header_displayname field contains Unicode characters.&lt;/P&gt;&lt;P&gt;&lt;span class="lia-unicode-emoji" title=":glowing_star:"&gt;🌟&lt;/span&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;&lt;STRONG&gt;Did this answer help you?&lt;/STRONG&gt;&lt;SPAN&gt;&amp;nbsp;&lt;/SPAN&gt;If so, please consider:&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;Adding karma to show it was useful&lt;/LI&gt;&lt;LI&gt;Marking it as the solution if it resolved your issue&lt;/LI&gt;&lt;LI&gt;Commenting if you need any clarification&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;Your feedback encourages the volunteers in this community to continue contributing&lt;/P&gt;</description>
      <pubDate>Wed, 14 May 2025 08:26:22 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/How-to-check-if-a-field-contains-unicode/m-p/746166#M241600</guid>
      <dc:creator>livehybrid</dc:creator>
      <dc:date>2025-05-14T08:26:22Z</dc:date>
    </item>
    <item>
      <title>Re: How to check if a field contains unicode</title>
      <link>https://community.splunk.com/t5/Splunk-Search/How-to-check-if-a-field-contains-unicode/m-p/746214#M241603</link>
      <description>&lt;P&gt;Thank you all for your reply! it helps!&lt;/P&gt;</description>
      <pubDate>Thu, 15 May 2025 01:56:38 GMT</pubDate>
      <guid>https://community.splunk.com/t5/Splunk-Search/How-to-check-if-a-field-contains-unicode/m-p/746214#M241603</guid>
      <dc:creator>Iris_Pi</dc:creator>
      <dc:date>2025-05-15T01:56:38Z</dc:date>
    </item>
  </channel>
</rss>

