[ Index ] |
PHP Cross Reference of WordPress |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Main WordPress Formatting API. 4 * 5 * Handles many functions for formatting output. 6 * 7 * @package WordPress 8 */ 9 10 /** 11 * Replaces common plain text characters with formatted entities. 12 * 13 * Returns given text with transformations of quotes into smart quotes, apostrophes, 14 * dashes, ellipses, the trademark symbol, and the multiplication symbol. 15 * 16 * As an example, 17 * 18 * 'cause today's effort makes it worth tomorrow's "holiday" ... 19 * 20 * Becomes: 21 * 22 * ’cause today’s effort makes it worth tomorrow’s “holiday” … 23 * 24 * Code within certain HTML blocks are skipped. 25 * 26 * Do not use this function before the {@see 'init'} action hook; everything will break. 27 * 28 * @since 0.71 29 * 30 * @global array $wp_cockneyreplace Array of formatted entities for certain common phrases. 31 * @global array $shortcode_tags 32 * 33 * @param string $text The text to be formatted. 34 * @param bool $reset Set to true for unit testing. Translated patterns will reset. 35 * @return string The string replaced with HTML entities. 36 */ 37 function wptexturize( $text, $reset = false ) { 38 global $wp_cockneyreplace, $shortcode_tags; 39 static $static_characters = null, 40 $static_replacements = null, 41 $dynamic_characters = null, 42 $dynamic_replacements = null, 43 $default_no_texturize_tags = null, 44 $default_no_texturize_shortcodes = null, 45 $run_texturize = true, 46 $apos = null, 47 $prime = null, 48 $double_prime = null, 49 $opening_quote = null, 50 $closing_quote = null, 51 $opening_single_quote = null, 52 $closing_single_quote = null, 53 $open_q_flag = '<!--oq-->', 54 $open_sq_flag = '<!--osq-->', 55 $apos_flag = '<!--apos-->'; 56 57 // If there's nothing to do, just stop. 58 if ( empty( $text ) || false === $run_texturize ) { 59 return $text; 60 } 61 62 // Set up static variables. Run once only. 63 if ( $reset || ! isset( $static_characters ) ) { 64 /** 65 * Filters whether to skip running wptexturize(). 66 * 67 * Returning false from the filter will effectively short-circuit wptexturize() 68 * and return the original text passed to the function instead. 69 * 70 * The filter runs only once, the first time wptexturize() is called. 71 * 72 * @since 4.0.0 73 * 74 * @see wptexturize() 75 * 76 * @param bool $run_texturize Whether to short-circuit wptexturize(). 77 */ 78 $run_texturize = apply_filters( 'run_wptexturize', $run_texturize ); 79 if ( false === $run_texturize ) { 80 return $text; 81 } 82 83 /* translators: Opening curly double quote. */ 84 $opening_quote = _x( '“', 'opening curly double quote' ); 85 /* translators: Closing curly double quote. */ 86 $closing_quote = _x( '”', 'closing curly double quote' ); 87 88 /* translators: Apostrophe, for example in 'cause or can't. */ 89 $apos = _x( '’', 'apostrophe' ); 90 91 /* translators: Prime, for example in 9' (nine feet). */ 92 $prime = _x( '′', 'prime' ); 93 /* translators: Double prime, for example in 9" (nine inches). */ 94 $double_prime = _x( '″', 'double prime' ); 95 96 /* translators: Opening curly single quote. */ 97 $opening_single_quote = _x( '‘', 'opening curly single quote' ); 98 /* translators: Closing curly single quote. */ 99 $closing_single_quote = _x( '’', 'closing curly single quote' ); 100 101 /* translators: En dash. */ 102 $en_dash = _x( '–', 'en dash' ); 103 /* translators: Em dash. */ 104 $em_dash = _x( '—', 'em dash' ); 105 106 $default_no_texturize_tags = array( 'pre', 'code', 'kbd', 'style', 'script', 'tt' ); 107 $default_no_texturize_shortcodes = array( 'code' ); 108 109 // If a plugin has provided an autocorrect array, use it. 110 if ( isset( $wp_cockneyreplace ) ) { 111 $cockney = array_keys( $wp_cockneyreplace ); 112 $cockneyreplace = array_values( $wp_cockneyreplace ); 113 } else { 114 /* 115 * translators: This is a comma-separated list of words that defy the syntax of quotations in normal use, 116 * for example... 'We do not have enough words yet'... is a typical quoted phrase. But when we write 117 * lines of code 'til we have enough of 'em, then we need to insert apostrophes instead of quotes. 118 */ 119 $cockney = explode( 120 ',', 121 _x( 122 "'tain't,'twere,'twas,'tis,'twill,'til,'bout,'nuff,'round,'cause,'em", 123 'Comma-separated list of words to texturize in your language' 124 ) 125 ); 126 127 $cockneyreplace = explode( 128 ',', 129 _x( 130 '’tain’t,’twere,’twas,’tis,’twill,’til,’bout,’nuff,’round,’cause,’em', 131 'Comma-separated list of replacement words in your language' 132 ) 133 ); 134 } 135 136 $static_characters = array_merge( array( '...', '``', '\'\'', ' (tm)' ), $cockney ); 137 $static_replacements = array_merge( array( '…', $opening_quote, $closing_quote, ' ™' ), $cockneyreplace ); 138 139 // Pattern-based replacements of characters. 140 // Sort the remaining patterns into several arrays for performance tuning. 141 $dynamic_characters = array( 142 'apos' => array(), 143 'quote' => array(), 144 'dash' => array(), 145 ); 146 $dynamic_replacements = array( 147 'apos' => array(), 148 'quote' => array(), 149 'dash' => array(), 150 ); 151 $dynamic = array(); 152 $spaces = wp_spaces_regexp(); 153 154 // '99' and '99" are ambiguous among other patterns; assume it's an abbreviated year at the end of a quotation. 155 if ( "'" !== $apos || "'" !== $closing_single_quote ) { 156 $dynamic[ '/\'(\d\d)\'(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos_flag . '$1' . $closing_single_quote; 157 } 158 if ( "'" !== $apos || '"' !== $closing_quote ) { 159 $dynamic[ '/\'(\d\d)"(?=\Z|[.,:;!?)}\-\]]|>|' . $spaces . ')/' ] = $apos_flag . '$1' . $closing_quote; 160 } 161 162 // '99 '99s '99's (apostrophe) But never '9 or '99% or '999 or '99.0. 163 if ( "'" !== $apos ) { 164 $dynamic['/\'(?=\d\d(?:\Z|(?![%\d]|[.,]\d)))/'] = $apos_flag; 165 } 166 167 // Quoted numbers like '0.42'. 168 if ( "'" !== $opening_single_quote && "'" !== $closing_single_quote ) { 169 $dynamic[ '/(?<=\A|' . $spaces . ')\'(\d[.,\d]*)\'/' ] = $open_sq_flag . '$1' . $closing_single_quote; 170 } 171 172 // Single quote at start, or preceded by (, {, <, [, ", -, or spaces. 173 if ( "'" !== $opening_single_quote ) { 174 $dynamic[ '/(?<=\A|[([{"\-]|<|' . $spaces . ')\'/' ] = $open_sq_flag; 175 } 176 177 // Apostrophe in a word. No spaces, double apostrophes, or other punctuation. 178 if ( "'" !== $apos ) { 179 $dynamic[ '/(?<!' . $spaces . ')\'(?!\Z|[.,:;!?"\'(){}[\]\-]|&[lg]t;|' . $spaces . ')/' ] = $apos_flag; 180 } 181 182 $dynamic_characters['apos'] = array_keys( $dynamic ); 183 $dynamic_replacements['apos'] = array_values( $dynamic ); 184 $dynamic = array(); 185 186 // Quoted numbers like "42". 187 if ( '"' !== $opening_quote && '"' !== $closing_quote ) { 188 $dynamic[ '/(?<=\A|' . $spaces . ')"(\d[.,\d]*)"/' ] = $open_q_flag . '$1' . $closing_quote; 189 } 190 191 // Double quote at start, or preceded by (, {, <, [, -, or spaces, and not followed by spaces. 192 if ( '"' !== $opening_quote ) { 193 $dynamic[ '/(?<=\A|[([{\-]|<|' . $spaces . ')"(?!' . $spaces . ')/' ] = $open_q_flag; 194 } 195 196 $dynamic_characters['quote'] = array_keys( $dynamic ); 197 $dynamic_replacements['quote'] = array_values( $dynamic ); 198 $dynamic = array(); 199 200 // Dashes and spaces. 201 $dynamic['/---/'] = $em_dash; 202 $dynamic[ '/(?<=^|' . $spaces . ')--(?=$|' . $spaces . ')/' ] = $em_dash; 203 $dynamic['/(?<!xn)--/'] = $en_dash; 204 $dynamic[ '/(?<=^|' . $spaces . ')-(?=$|' . $spaces . ')/' ] = $en_dash; 205 206 $dynamic_characters['dash'] = array_keys( $dynamic ); 207 $dynamic_replacements['dash'] = array_values( $dynamic ); 208 } 209 210 // Must do this every time in case plugins use these filters in a context sensitive manner. 211 /** 212 * Filters the list of HTML elements not to texturize. 213 * 214 * @since 2.8.0 215 * 216 * @param string[] $default_no_texturize_tags An array of HTML element names. 217 */ 218 $no_texturize_tags = apply_filters( 'no_texturize_tags', $default_no_texturize_tags ); 219 /** 220 * Filters the list of shortcodes not to texturize. 221 * 222 * @since 2.8.0 223 * 224 * @param string[] $default_no_texturize_shortcodes An array of shortcode names. 225 */ 226 $no_texturize_shortcodes = apply_filters( 'no_texturize_shortcodes', $default_no_texturize_shortcodes ); 227 228 $no_texturize_tags_stack = array(); 229 $no_texturize_shortcodes_stack = array(); 230 231 // Look for shortcodes and HTML elements. 232 233 preg_match_all( '@\[/?([^<>&/\[\]\x00-\x20=]++)@', $text, $matches ); 234 $tagnames = array_intersect( array_keys( $shortcode_tags ), $matches[1] ); 235 $found_shortcodes = ! empty( $tagnames ); 236 $shortcode_regex = $found_shortcodes ? _get_wptexturize_shortcode_regex( $tagnames ) : ''; 237 $regex = _get_wptexturize_split_regex( $shortcode_regex ); 238 239 $textarr = preg_split( $regex, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY ); 240 241 foreach ( $textarr as &$curl ) { 242 // Only call _wptexturize_pushpop_element if $curl is a delimiter. 243 $first = $curl[0]; 244 if ( '<' === $first ) { 245 if ( '<!--' === substr( $curl, 0, 4 ) ) { 246 // This is an HTML comment delimiter. 247 continue; 248 } else { 249 // This is an HTML element delimiter. 250 251 // Replace each & with & unless it already looks like an entity. 252 $curl = preg_replace( '/&(?!#(?:\d+|x[a-f0-9]+);|[a-z1-4]{1,8};)/i', '&', $curl ); 253 254 _wptexturize_pushpop_element( $curl, $no_texturize_tags_stack, $no_texturize_tags ); 255 } 256 } elseif ( '' === trim( $curl ) ) { 257 // This is a newline between delimiters. Performance improves when we check this. 258 continue; 259 260 } elseif ( '[' === $first && $found_shortcodes && 1 === preg_match( '/^' . $shortcode_regex . '$/', $curl ) ) { 261 // This is a shortcode delimiter. 262 263 if ( '[[' !== substr( $curl, 0, 2 ) && ']]' !== substr( $curl, -2 ) ) { 264 // Looks like a normal shortcode. 265 _wptexturize_pushpop_element( $curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes ); 266 } else { 267 // Looks like an escaped shortcode. 268 continue; 269 } 270 } elseif ( empty( $no_texturize_shortcodes_stack ) && empty( $no_texturize_tags_stack ) ) { 271 // This is neither a delimiter, nor is this content inside of no_texturize pairs. Do texturize. 272 273 $curl = str_replace( $static_characters, $static_replacements, $curl ); 274 275 if ( false !== strpos( $curl, "'" ) ) { 276 $curl = preg_replace( $dynamic_characters['apos'], $dynamic_replacements['apos'], $curl ); 277 $curl = wptexturize_primes( $curl, "'", $prime, $open_sq_flag, $closing_single_quote ); 278 $curl = str_replace( $apos_flag, $apos, $curl ); 279 $curl = str_replace( $open_sq_flag, $opening_single_quote, $curl ); 280 } 281 if ( false !== strpos( $curl, '"' ) ) { 282 $curl = preg_replace( $dynamic_characters['quote'], $dynamic_replacements['quote'], $curl ); 283 $curl = wptexturize_primes( $curl, '"', $double_prime, $open_q_flag, $closing_quote ); 284 $curl = str_replace( $open_q_flag, $opening_quote, $curl ); 285 } 286 if ( false !== strpos( $curl, '-' ) ) { 287 $curl = preg_replace( $dynamic_characters['dash'], $dynamic_replacements['dash'], $curl ); 288 } 289 290 // 9x9 (times), but never 0x9999. 291 if ( 1 === preg_match( '/(?<=\d)x\d/', $curl ) ) { 292 // Searching for a digit is 10 times more expensive than for the x, so we avoid doing this one! 293 $curl = preg_replace( '/\b(\d(?(?<=0)[\d\.,]+|[\d\.,]*))x(\d[\d\.,]*)\b/', '$1×$2', $curl ); 294 } 295 296 // Replace each & with & unless it already looks like an entity. 297 $curl = preg_replace( '/&(?!#(?:\d+|x[a-f0-9]+);|[a-z1-4]{1,8};)/i', '&', $curl ); 298 } 299 } 300 301 return implode( '', $textarr ); 302 } 303 304 /** 305 * Implements a logic tree to determine whether or not "7'." represents seven feet, 306 * then converts the special char into either a prime char or a closing quote char. 307 * 308 * @since 4.3.0 309 * 310 * @param string $haystack The plain text to be searched. 311 * @param string $needle The character to search for such as ' or ". 312 * @param string $prime The prime char to use for replacement. 313 * @param string $open_quote The opening quote char. Opening quote replacement must be 314 * accomplished already. 315 * @param string $close_quote The closing quote char to use for replacement. 316 * @return string The $haystack value after primes and quotes replacements. 317 */ 318 function wptexturize_primes( $haystack, $needle, $prime, $open_quote, $close_quote ) { 319 $spaces = wp_spaces_regexp(); 320 $flag = '<!--wp-prime-or-quote-->'; 321 $quote_pattern = "/$needle(?=\\Z|[.,:;!?)}\\-\\]]|>|" . $spaces . ')/'; 322 $prime_pattern = "/(?<=\\d)$needle/"; 323 $flag_after_digit = "/(?<=\\d)$flag/"; 324 $flag_no_digit = "/(?<!\\d)$flag/"; 325 326 $sentences = explode( $open_quote, $haystack ); 327 328 foreach ( $sentences as $key => &$sentence ) { 329 if ( false === strpos( $sentence, $needle ) ) { 330 continue; 331 } elseif ( 0 !== $key && 0 === substr_count( $sentence, $close_quote ) ) { 332 $sentence = preg_replace( $quote_pattern, $flag, $sentence, -1, $count ); 333 if ( $count > 1 ) { 334 // This sentence appears to have multiple closing quotes. Attempt Vulcan logic. 335 $sentence = preg_replace( $flag_no_digit, $close_quote, $sentence, -1, $count2 ); 336 if ( 0 === $count2 ) { 337 // Try looking for a quote followed by a period. 338 $count2 = substr_count( $sentence, "$flag." ); 339 if ( $count2 > 0 ) { 340 // Assume the rightmost quote-period match is the end of quotation. 341 $pos = strrpos( $sentence, "$flag." ); 342 } else { 343 // When all else fails, make the rightmost candidate a closing quote. 344 // This is most likely to be problematic in the context of bug #18549. 345 $pos = strrpos( $sentence, $flag ); 346 } 347 $sentence = substr_replace( $sentence, $close_quote, $pos, strlen( $flag ) ); 348 } 349 // Use conventional replacement on any remaining primes and quotes. 350 $sentence = preg_replace( $prime_pattern, $prime, $sentence ); 351 $sentence = preg_replace( $flag_after_digit, $prime, $sentence ); 352 $sentence = str_replace( $flag, $close_quote, $sentence ); 353 } elseif ( 1 == $count ) { 354 // Found only one closing quote candidate, so give it priority over primes. 355 $sentence = str_replace( $flag, $close_quote, $sentence ); 356 $sentence = preg_replace( $prime_pattern, $prime, $sentence ); 357 } else { 358 // No closing quotes found. Just run primes pattern. 359 $sentence = preg_replace( $prime_pattern, $prime, $sentence ); 360 } 361 } else { 362 $sentence = preg_replace( $prime_pattern, $prime, $sentence ); 363 $sentence = preg_replace( $quote_pattern, $close_quote, $sentence ); 364 } 365 if ( '"' === $needle && false !== strpos( $sentence, '"' ) ) { 366 $sentence = str_replace( '"', $close_quote, $sentence ); 367 } 368 } 369 370 return implode( $open_quote, $sentences ); 371 } 372 373 /** 374 * Searches for disabled element tags. Pushes element to stack on tag open 375 * and pops on tag close. 376 * 377 * Assumes first char of `$text` is tag opening and last char is tag closing. 378 * Assumes second char of `$text` is optionally `/` to indicate closing as in `</html>`. 379 * 380 * @since 2.9.0 381 * @access private 382 * 383 * @param string $text Text to check. Must be a tag like `<html>` or `[shortcode]`. 384 * @param string[] $stack Array of open tag elements. 385 * @param string[] $disabled_elements Array of tag names to match against. Spaces are not allowed in tag names. 386 */ 387 function _wptexturize_pushpop_element( $text, &$stack, $disabled_elements ) { 388 // Is it an opening tag or closing tag? 389 if ( isset( $text[1] ) && '/' !== $text[1] ) { 390 $opening_tag = true; 391 $name_offset = 1; 392 } elseif ( 0 === count( $stack ) ) { 393 // Stack is empty. Just stop. 394 return; 395 } else { 396 $opening_tag = false; 397 $name_offset = 2; 398 } 399 400 // Parse out the tag name. 401 $space = strpos( $text, ' ' ); 402 if ( false === $space ) { 403 $space = -1; 404 } else { 405 $space -= $name_offset; 406 } 407 $tag = substr( $text, $name_offset, $space ); 408 409 // Handle disabled tags. 410 if ( in_array( $tag, $disabled_elements, true ) ) { 411 if ( $opening_tag ) { 412 /* 413 * This disables texturize until we find a closing tag of our type 414 * (e.g. <pre>) even if there was invalid nesting before that. 415 * 416 * Example: in the case <pre>sadsadasd</code>"baba"</pre> 417 * "baba" won't be texturized. 418 */ 419 420 array_push( $stack, $tag ); 421 } elseif ( end( $stack ) == $tag ) { 422 array_pop( $stack ); 423 } 424 } 425 } 426 427 /** 428 * Replaces double line breaks with paragraph elements. 429 * 430 * A group of regex replaces used to identify text formatted with newlines and 431 * replace double line breaks with HTML paragraph tags. The remaining line breaks 432 * after conversion become `<br />` tags, unless `$br` is set to '0' or 'false'. 433 * 434 * @since 0.71 435 * 436 * @param string $text The text which has to be formatted. 437 * @param bool $br Optional. If set, this will convert all remaining line breaks 438 * after paragraphing. Line breaks within `<script>`, `<style>`, 439 * and `<svg>` tags are not affected. Default true. 440 * @return string Text which has been converted into correct paragraph tags. 441 */ 442 function wpautop( $text, $br = true ) { 443 $pre_tags = array(); 444 445 if ( trim( $text ) === '' ) { 446 return ''; 447 } 448 449 // Just to make things a little easier, pad the end. 450 $text = $text . "\n"; 451 452 /* 453 * Pre tags shouldn't be touched by autop. 454 * Replace pre tags with placeholders and bring them back after autop. 455 */ 456 if ( strpos( $text, '<pre' ) !== false ) { 457 $text_parts = explode( '</pre>', $text ); 458 $last_part = array_pop( $text_parts ); 459 $text = ''; 460 $i = 0; 461 462 foreach ( $text_parts as $text_part ) { 463 $start = strpos( $text_part, '<pre' ); 464 465 // Malformed HTML? 466 if ( false === $start ) { 467 $text .= $text_part; 468 continue; 469 } 470 471 $name = "<pre wp-pre-tag-$i></pre>"; 472 $pre_tags[ $name ] = substr( $text_part, $start ) . '</pre>'; 473 474 $text .= substr( $text_part, 0, $start ) . $name; 475 $i++; 476 } 477 478 $text .= $last_part; 479 } 480 // Change multiple <br>'s into two line breaks, which will turn into paragraphs. 481 $text = preg_replace( '|<br\s*/?>\s*<br\s*/?>|', "\n\n", $text ); 482 483 $allblocks = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)'; 484 485 // Add a double line break above block-level opening tags. 486 $text = preg_replace( '!(<' . $allblocks . '[\s/>])!', "\n\n$1", $text ); 487 488 // Add a double line break below block-level closing tags. 489 $text = preg_replace( '!(</' . $allblocks . '>)!', "$1\n\n", $text ); 490 491 // Add a double line break after hr tags, which are self closing. 492 $text = preg_replace( '!(<hr\s*?/?>)!', "$1\n\n", $text ); 493 494 // Standardize newline characters to "\n". 495 $text = str_replace( array( "\r\n", "\r" ), "\n", $text ); 496 497 // Find newlines in all elements and add placeholders. 498 $text = wp_replace_in_html_tags( $text, array( "\n" => ' <!-- wpnl --> ' ) ); 499 500 // Collapse line breaks before and after <option> elements so they don't get autop'd. 501 if ( strpos( $text, '<option' ) !== false ) { 502 $text = preg_replace( '|\s*<option|', '<option', $text ); 503 $text = preg_replace( '|</option>\s*|', '</option>', $text ); 504 } 505 506 /* 507 * Collapse line breaks inside <object> elements, before <param> and <embed> elements 508 * so they don't get autop'd. 509 */ 510 if ( strpos( $text, '</object>' ) !== false ) { 511 $text = preg_replace( '|(<object[^>]*>)\s*|', '$1', $text ); 512 $text = preg_replace( '|\s*</object>|', '</object>', $text ); 513 $text = preg_replace( '%\s*(</?(?:param|embed)[^>]*>)\s*%', '$1', $text ); 514 } 515 516 /* 517 * Collapse line breaks inside <audio> and <video> elements, 518 * before and after <source> and <track> elements. 519 */ 520 if ( strpos( $text, '<source' ) !== false || strpos( $text, '<track' ) !== false ) { 521 $text = preg_replace( '%([<\[](?:audio|video)[^>\]]*[>\]])\s*%', '$1', $text ); 522 $text = preg_replace( '%\s*([<\[]/(?:audio|video)[>\]])%', '$1', $text ); 523 $text = preg_replace( '%\s*(<(?:source|track)[^>]*>)\s*%', '$1', $text ); 524 } 525 526 // Collapse line breaks before and after <figcaption> elements. 527 if ( strpos( $text, '<figcaption' ) !== false ) { 528 $text = preg_replace( '|\s*(<figcaption[^>]*>)|', '$1', $text ); 529 $text = preg_replace( '|</figcaption>\s*|', '</figcaption>', $text ); 530 } 531 532 // Remove more than two contiguous line breaks. 533 $text = preg_replace( "/\n\n+/", "\n\n", $text ); 534 535 // Split up the contents into an array of strings, separated by double line breaks. 536 $paragraphs = preg_split( '/\n\s*\n/', $text, -1, PREG_SPLIT_NO_EMPTY ); 537 538 // Reset $text prior to rebuilding. 539 $text = ''; 540 541 // Rebuild the content as a string, wrapping every bit with a <p>. 542 foreach ( $paragraphs as $paragraph ) { 543 $text .= '<p>' . trim( $paragraph, "\n" ) . "</p>\n"; 544 } 545 546 // Under certain strange conditions it could create a P of entirely whitespace. 547 $text = preg_replace( '|<p>\s*</p>|', '', $text ); 548 549 // Add a closing <p> inside <div>, <address>, or <form> tag if missing. 550 $text = preg_replace( '!<p>([^<]+)</(div|address|form)>!', '<p>$1</p></$2>', $text ); 551 552 // If an opening or closing block element tag is wrapped in a <p>, unwrap it. 553 $text = preg_replace( '!<p>\s*(</?' . $allblocks . '[^>]*>)\s*</p>!', '$1', $text ); 554 555 // In some cases <li> may get wrapped in <p>, fix them. 556 $text = preg_replace( '|<p>(<li.+?)</p>|', '$1', $text ); 557 558 // If a <blockquote> is wrapped with a <p>, move it inside the <blockquote>. 559 $text = preg_replace( '|<p><blockquote([^>]*)>|i', '<blockquote$1><p>', $text ); 560 $text = str_replace( '</blockquote></p>', '</p></blockquote>', $text ); 561 562 // If an opening or closing block element tag is preceded by an opening <p> tag, remove it. 563 $text = preg_replace( '!<p>\s*(</?' . $allblocks . '[^>]*>)!', '$1', $text ); 564 565 // If an opening or closing block element tag is followed by a closing <p> tag, remove it. 566 $text = preg_replace( '!(</?' . $allblocks . '[^>]*>)\s*</p>!', '$1', $text ); 567 568 // Optionally insert line breaks. 569 if ( $br ) { 570 // Replace newlines that shouldn't be touched with a placeholder. 571 $text = preg_replace_callback( '/<(script|style|svg).*?<\/\\1>/s', '_autop_newline_preservation_helper', $text ); 572 573 // Normalize <br> 574 $text = str_replace( array( '<br>', '<br/>' ), '<br />', $text ); 575 576 // Replace any new line characters that aren't preceded by a <br /> with a <br />. 577 $text = preg_replace( '|(?<!<br />)\s*\n|', "<br />\n", $text ); 578 579 // Replace newline placeholders with newlines. 580 $text = str_replace( '<WPPreserveNewline />', "\n", $text ); 581 } 582 583 // If a <br /> tag is after an opening or closing block tag, remove it. 584 $text = preg_replace( '!(</?' . $allblocks . '[^>]*>)\s*<br />!', '$1', $text ); 585 586 // If a <br /> tag is before a subset of opening or closing block tags, remove it. 587 $text = preg_replace( '!<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)!', '$1', $text ); 588 $text = preg_replace( "|\n</p>$|", '</p>', $text ); 589 590 // Replace placeholder <pre> tags with their original content. 591 if ( ! empty( $pre_tags ) ) { 592 $text = str_replace( array_keys( $pre_tags ), array_values( $pre_tags ), $text ); 593 } 594 595 // Restore newlines in all elements. 596 if ( false !== strpos( $text, '<!-- wpnl -->' ) ) { 597 $text = str_replace( array( ' <!-- wpnl --> ', '<!-- wpnl -->' ), "\n", $text ); 598 } 599 600 return $text; 601 } 602 603 /** 604 * Separates HTML elements and comments from the text. 605 * 606 * @since 4.2.4 607 * 608 * @param string $input The text which has to be formatted. 609 * @return string[] Array of the formatted text. 610 */ 611 function wp_html_split( $input ) { 612 return preg_split( get_html_split_regex(), $input, -1, PREG_SPLIT_DELIM_CAPTURE ); 613 } 614 615 /** 616 * Retrieves the regular expression for an HTML element. 617 * 618 * @since 4.4.0 619 * 620 * @return string The regular expression 621 */ 622 function get_html_split_regex() { 623 static $regex; 624 625 if ( ! isset( $regex ) ) { 626 // phpcs:disable Squiz.Strings.ConcatenationSpacing.PaddingFound -- don't remove regex indentation 627 $comments = 628 '!' // Start of comment, after the <. 629 . '(?:' // Unroll the loop: Consume everything until --> is found. 630 . '-(?!->)' // Dash not followed by end of comment. 631 . '[^\-]*+' // Consume non-dashes. 632 . ')*+' // Loop possessively. 633 . '(?:-->)?'; // End of comment. If not found, match all input. 634 635 $cdata = 636 '!\[CDATA\[' // Start of comment, after the <. 637 . '[^\]]*+' // Consume non-]. 638 . '(?:' // Unroll the loop: Consume everything until ]]> is found. 639 . '](?!]>)' // One ] not followed by end of comment. 640 . '[^\]]*+' // Consume non-]. 641 . ')*+' // Loop possessively. 642 . '(?:]]>)?'; // End of comment. If not found, match all input. 643 644 $escaped = 645 '(?=' // Is the element escaped? 646 . '!--' 647 . '|' 648 . '!\[CDATA\[' 649 . ')' 650 . '(?(?=!-)' // If yes, which type? 651 . $comments 652 . '|' 653 . $cdata 654 . ')'; 655 656 $regex = 657 '/(' // Capture the entire match. 658 . '<' // Find start of element. 659 . '(?' // Conditional expression follows. 660 . $escaped // Find end of escaped element. 661 . '|' // ...else... 662 . '[^>]*>?' // Find end of normal element. 663 . ')' 664 . ')/'; 665 // phpcs:enable 666 } 667 668 return $regex; 669 } 670 671 /** 672 * Retrieves the combined regular expression for HTML and shortcodes. 673 * 674 * @access private 675 * @ignore 676 * @internal This function will be removed in 4.5.0 per Shortcode API Roadmap. 677 * @since 4.4.0 678 * 679 * @param string $shortcode_regex Optional. The result from _get_wptexturize_shortcode_regex(). 680 * @return string The regular expression 681 */ 682 function _get_wptexturize_split_regex( $shortcode_regex = '' ) { 683 static $html_regex; 684 685 if ( ! isset( $html_regex ) ) { 686 // phpcs:disable Squiz.Strings.ConcatenationSpacing.PaddingFound -- don't remove regex indentation 687 $comment_regex = 688 '!' // Start of comment, after the <. 689 . '(?:' // Unroll the loop: Consume everything until --> is found. 690 . '-(?!->)' // Dash not followed by end of comment. 691 . '[^\-]*+' // Consume non-dashes. 692 . ')*+' // Loop possessively. 693 . '(?:-->)?'; // End of comment. If not found, match all input. 694 695 $html_regex = // Needs replaced with wp_html_split() per Shortcode API Roadmap. 696 '<' // Find start of element. 697 . '(?(?=!--)' // Is this a comment? 698 . $comment_regex // Find end of comment. 699 . '|' 700 . '[^>]*>?' // Find end of element. If not found, match all input. 701 . ')'; 702 // phpcs:enable 703 } 704 705 if ( empty( $shortcode_regex ) ) { 706 $regex = '/(' . $html_regex . ')/'; 707 } else { 708 $regex = '/(' . $html_regex . '|' . $shortcode_regex . ')/'; 709 } 710 711 return $regex; 712 } 713 714 /** 715 * Retrieves the regular expression for shortcodes. 716 * 717 * @access private 718 * @ignore 719 * @since 4.4.0 720 * 721 * @param string[] $tagnames Array of shortcodes to find. 722 * @return string The regular expression 723 */ 724 function _get_wptexturize_shortcode_regex( $tagnames ) { 725 $tagregexp = implode( '|', array_map( 'preg_quote', $tagnames ) ); 726 $tagregexp = "(?:$tagregexp)(?=[\\s\\]\\/])"; // Excerpt of get_shortcode_regex(). 727 // phpcs:disable Squiz.Strings.ConcatenationSpacing.PaddingFound -- don't remove regex indentation 728 $regex = 729 '\[' // Find start of shortcode. 730 . '[\/\[]?' // Shortcodes may begin with [/ or [[. 731 . $tagregexp // Only match registered shortcodes, because performance. 732 . '(?:' 733 . '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical. 734 . '|' 735 . '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >. 736 . ')*+' // Possessive critical. 737 . '\]' // Find end of shortcode. 738 . '\]?'; // Shortcodes may end with ]]. 739 // phpcs:enable 740 741 return $regex; 742 } 743 744 /** 745 * Replaces characters or phrases within HTML elements only. 746 * 747 * @since 4.2.3 748 * 749 * @param string $haystack The text which has to be formatted. 750 * @param array $replace_pairs In the form array('from' => 'to', ...). 751 * @return string The formatted text. 752 */ 753 function wp_replace_in_html_tags( $haystack, $replace_pairs ) { 754 // Find all elements. 755 $textarr = wp_html_split( $haystack ); 756 $changed = false; 757 758 // Optimize when searching for one item. 759 if ( 1 === count( $replace_pairs ) ) { 760 // Extract $needle and $replace. 761 foreach ( $replace_pairs as $needle => $replace ) { 762 } 763 764 // Loop through delimiters (elements) only. 765 for ( $i = 1, $c = count( $textarr ); $i < $c; $i += 2 ) { 766 if ( false !== strpos( $textarr[ $i ], $needle ) ) { 767 $textarr[ $i ] = str_replace( $needle, $replace, $textarr[ $i ] ); 768 $changed = true; 769 } 770 } 771 } else { 772 // Extract all $needles. 773 $needles = array_keys( $replace_pairs ); 774 775 // Loop through delimiters (elements) only. 776 for ( $i = 1, $c = count( $textarr ); $i < $c; $i += 2 ) { 777 foreach ( $needles as $needle ) { 778 if ( false !== strpos( $textarr[ $i ], $needle ) ) { 779 $textarr[ $i ] = strtr( $textarr[ $i ], $replace_pairs ); 780 $changed = true; 781 // After one strtr() break out of the foreach loop and look at next element. 782 break; 783 } 784 } 785 } 786 } 787 788 if ( $changed ) { 789 $haystack = implode( $textarr ); 790 } 791 792 return $haystack; 793 } 794 795 /** 796 * Newline preservation help function for wpautop(). 797 * 798 * @since 3.1.0 799 * @access private 800 * 801 * @param array $matches preg_replace_callback matches array 802 * @return string 803 */ 804 function _autop_newline_preservation_helper( $matches ) { 805 return str_replace( "\n", '<WPPreserveNewline />', $matches[0] ); 806 } 807 808 /** 809 * Don't auto-p wrap shortcodes that stand alone. 810 * 811 * Ensures that shortcodes are not wrapped in `<p>...</p>`. 812 * 813 * @since 2.9.0 814 * 815 * @global array $shortcode_tags 816 * 817 * @param string $text The content. 818 * @return string The filtered content. 819 */ 820 function shortcode_unautop( $text ) { 821 global $shortcode_tags; 822 823 if ( empty( $shortcode_tags ) || ! is_array( $shortcode_tags ) ) { 824 return $text; 825 } 826 827 $tagregexp = implode( '|', array_map( 'preg_quote', array_keys( $shortcode_tags ) ) ); 828 $spaces = wp_spaces_regexp(); 829 830 // phpcs:disable Squiz.Strings.ConcatenationSpacing.PaddingFound,WordPress.WhiteSpace.PrecisionAlignment.Found -- don't remove regex indentation 831 $pattern = 832 '/' 833 . '<p>' // Opening paragraph. 834 . '(?:' . $spaces . ')*+' // Optional leading whitespace. 835 . '(' // 1: The shortcode. 836 . '\\[' // Opening bracket. 837 . "($tagregexp)" // 2: Shortcode name. 838 . '(?![\\w-])' // Not followed by word character or hyphen. 839 // Unroll the loop: Inside the opening shortcode tag. 840 . '[^\\]\\/]*' // Not a closing bracket or forward slash. 841 . '(?:' 842 . '\\/(?!\\])' // A forward slash not followed by a closing bracket. 843 . '[^\\]\\/]*' // Not a closing bracket or forward slash. 844 . ')*?' 845 . '(?:' 846 . '\\/\\]' // Self closing tag and closing bracket. 847 . '|' 848 . '\\]' // Closing bracket. 849 . '(?:' // Unroll the loop: Optionally, anything between the opening and closing shortcode tags. 850 . '[^\\[]*+' // Not an opening bracket. 851 . '(?:' 852 . '\\[(?!\\/\\2\\])' // An opening bracket not followed by the closing shortcode tag. 853 . '[^\\[]*+' // Not an opening bracket. 854 . ')*+' 855 . '\\[\\/\\2\\]' // Closing shortcode tag. 856 . ')?' 857 . ')' 858 . ')' 859 . '(?:' . $spaces . ')*+' // Optional trailing whitespace. 860 . '<\\/p>' // Closing paragraph. 861 . '/'; 862 // phpcs:enable 863 864 return preg_replace( $pattern, '$1', $text ); 865 } 866 867 /** 868 * Checks to see if a string is utf8 encoded. 869 * 870 * NOTE: This function checks for 5-Byte sequences, UTF8 871 * has Bytes Sequences with a maximum length of 4. 872 * 873 * @author bmorel at ssi dot fr (modified) 874 * @since 1.2.1 875 * 876 * @param string $str The string to be checked 877 * @return bool True if $str fits a UTF-8 model, false otherwise. 878 */ 879 function seems_utf8( $str ) { 880 mbstring_binary_safe_encoding(); 881 $length = strlen( $str ); 882 reset_mbstring_encoding(); 883 for ( $i = 0; $i < $length; $i++ ) { 884 $c = ord( $str[ $i ] ); 885 if ( $c < 0x80 ) { 886 $n = 0; // 0bbbbbbb 887 } elseif ( ( $c & 0xE0 ) == 0xC0 ) { 888 $n = 1; // 110bbbbb 889 } elseif ( ( $c & 0xF0 ) == 0xE0 ) { 890 $n = 2; // 1110bbbb 891 } elseif ( ( $c & 0xF8 ) == 0xF0 ) { 892 $n = 3; // 11110bbb 893 } elseif ( ( $c & 0xFC ) == 0xF8 ) { 894 $n = 4; // 111110bb 895 } elseif ( ( $c & 0xFE ) == 0xFC ) { 896 $n = 5; // 1111110b 897 } else { 898 return false; // Does not match any model. 899 } 900 for ( $j = 0; $j < $n; $j++ ) { // n bytes matching 10bbbbbb follow ? 901 if ( ( ++$i == $length ) || ( ( ord( $str[ $i ] ) & 0xC0 ) != 0x80 ) ) { 902 return false; 903 } 904 } 905 } 906 return true; 907 } 908 909 /** 910 * Converts a number of special characters into their HTML entities. 911 * 912 * Specifically deals with: `&`, `<`, `>`, `"`, and `'`. 913 * 914 * `$quote_style` can be set to ENT_COMPAT to encode `"` to 915 * `"`, or ENT_QUOTES to do both. Default is ENT_NOQUOTES where no quotes are encoded. 916 * 917 * @since 1.2.2 918 * @since 5.5.0 `$quote_style` also accepts `ENT_XML1`. 919 * @access private 920 * 921 * @param string $string The text which is to be encoded. 922 * @param int|string $quote_style Optional. Converts double quotes if set to ENT_COMPAT, 923 * both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. 924 * Converts single and double quotes, as well as converting HTML 925 * named entities (that are not also XML named entities) to their 926 * code points if set to ENT_XML1. Also compatible with old values; 927 * converting single quotes if set to 'single', 928 * double if set to 'double' or both if otherwise set. 929 * Default is ENT_NOQUOTES. 930 * @param false|string $charset Optional. The character encoding of the string. Default false. 931 * @param bool $double_encode Optional. Whether to encode existing HTML entities. Default false. 932 * @return string The encoded text with HTML entities. 933 */ 934 function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false ) { 935 $string = (string) $string; 936 937 if ( 0 === strlen( $string ) ) { 938 return ''; 939 } 940 941 // Don't bother if there are no specialchars - saves some processing. 942 if ( ! preg_match( '/[&<>"\']/', $string ) ) { 943 return $string; 944 } 945 946 // Account for the previous behaviour of the function when the $quote_style is not an accepted value. 947 if ( empty( $quote_style ) ) { 948 $quote_style = ENT_NOQUOTES; 949 } elseif ( ENT_XML1 === $quote_style ) { 950 $quote_style = ENT_QUOTES | ENT_XML1; 951 } elseif ( ! in_array( $quote_style, array( ENT_NOQUOTES, ENT_COMPAT, ENT_QUOTES, 'single', 'double' ), true ) ) { 952 $quote_style = ENT_QUOTES; 953 } 954 955 // Store the site charset as a static to avoid multiple calls to wp_load_alloptions(). 956 if ( ! $charset ) { 957 static $_charset = null; 958 if ( ! isset( $_charset ) ) { 959 $alloptions = wp_load_alloptions(); 960 $_charset = isset( $alloptions['blog_charset'] ) ? $alloptions['blog_charset'] : ''; 961 } 962 $charset = $_charset; 963 } 964 965 if ( in_array( $charset, array( 'utf8', 'utf-8', 'UTF8' ), true ) ) { 966 $charset = 'UTF-8'; 967 } 968 969 $_quote_style = $quote_style; 970 971 if ( 'double' === $quote_style ) { 972 $quote_style = ENT_COMPAT; 973 $_quote_style = ENT_COMPAT; 974 } elseif ( 'single' === $quote_style ) { 975 $quote_style = ENT_NOQUOTES; 976 } 977 978 if ( ! $double_encode ) { 979 // Guarantee every &entity; is valid, convert &garbage; into &garbage; 980 // This is required for PHP < 5.4.0 because ENT_HTML401 flag is unavailable. 981 $string = wp_kses_normalize_entities( $string, ( $quote_style & ENT_XML1 ) ? 'xml' : 'html' ); 982 } 983 984 $string = htmlspecialchars( $string, $quote_style, $charset, $double_encode ); 985 986 // Back-compat. 987 if ( 'single' === $_quote_style ) { 988 $string = str_replace( "'", ''', $string ); 989 } 990 991 return $string; 992 } 993 994 /** 995 * Converts a number of HTML entities into their special characters. 996 * 997 * Specifically deals with: `&`, `<`, `>`, `"`, and `'`. 998 * 999 * `$quote_style` can be set to ENT_COMPAT to decode `"` entities, 1000 * or ENT_QUOTES to do both `"` and `'`. Default is ENT_NOQUOTES where no quotes are decoded. 1001 * 1002 * @since 2.8.0 1003 * 1004 * @param string $string The text which is to be decoded. 1005 * @param string|int $quote_style Optional. Converts double quotes if set to ENT_COMPAT, 1006 * both single and double if set to ENT_QUOTES or 1007 * none if set to ENT_NOQUOTES. 1008 * Also compatible with old _wp_specialchars() values; 1009 * converting single quotes if set to 'single', 1010 * double if set to 'double' or both if otherwise set. 1011 * Default is ENT_NOQUOTES. 1012 * @return string The decoded text without HTML entities. 1013 */ 1014 function wp_specialchars_decode( $string, $quote_style = ENT_NOQUOTES ) { 1015 $string = (string) $string; 1016 1017 if ( 0 === strlen( $string ) ) { 1018 return ''; 1019 } 1020 1021 // Don't bother if there are no entities - saves a lot of processing. 1022 if ( strpos( $string, '&' ) === false ) { 1023 return $string; 1024 } 1025 1026 // Match the previous behaviour of _wp_specialchars() when the $quote_style is not an accepted value. 1027 if ( empty( $quote_style ) ) { 1028 $quote_style = ENT_NOQUOTES; 1029 } elseif ( ! in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) ) { 1030 $quote_style = ENT_QUOTES; 1031 } 1032 1033 // More complete than get_html_translation_table( HTML_SPECIALCHARS ). 1034 $single = array( 1035 ''' => '\'', 1036 ''' => '\'', 1037 ); 1038 $single_preg = array( 1039 '/�*39;/' => ''', 1040 '/�*27;/i' => ''', 1041 ); 1042 $double = array( 1043 '"' => '"', 1044 '"' => '"', 1045 '"' => '"', 1046 ); 1047 $double_preg = array( 1048 '/�*34;/' => '"', 1049 '/�*22;/i' => '"', 1050 ); 1051 $others = array( 1052 '<' => '<', 1053 '<' => '<', 1054 '>' => '>', 1055 '>' => '>', 1056 '&' => '&', 1057 '&' => '&', 1058 '&' => '&', 1059 ); 1060 $others_preg = array( 1061 '/�*60;/' => '<', 1062 '/�*62;/' => '>', 1063 '/�*38;/' => '&', 1064 '/�*26;/i' => '&', 1065 ); 1066 1067 if ( ENT_QUOTES === $quote_style ) { 1068 $translation = array_merge( $single, $double, $others ); 1069 $translation_preg = array_merge( $single_preg, $double_preg, $others_preg ); 1070 } elseif ( ENT_COMPAT === $quote_style || 'double' === $quote_style ) { 1071 $translation = array_merge( $double, $others ); 1072 $translation_preg = array_merge( $double_preg, $others_preg ); 1073 } elseif ( 'single' === $quote_style ) { 1074 $translation = array_merge( $single, $others ); 1075 $translation_preg = array_merge( $single_preg, $others_preg ); 1076 } elseif ( ENT_NOQUOTES === $quote_style ) { 1077 $translation = $others; 1078 $translation_preg = $others_preg; 1079 } 1080 1081 // Remove zero padding on numeric entities. 1082 $string = preg_replace( array_keys( $translation_preg ), array_values( $translation_preg ), $string ); 1083 1084 // Replace characters according to translation table. 1085 return strtr( $string, $translation ); 1086 } 1087 1088 /** 1089 * Checks for invalid UTF8 in a string. 1090 * 1091 * @since 2.8.0 1092 * 1093 * @param string $string The text which is to be checked. 1094 * @param bool $strip Optional. Whether to attempt to strip out invalid UTF8. Default false. 1095 * @return string The checked text. 1096 */ 1097 function wp_check_invalid_utf8( $string, $strip = false ) { 1098 $string = (string) $string; 1099 1100 if ( 0 === strlen( $string ) ) { 1101 return ''; 1102 } 1103 1104 // Store the site charset as a static to avoid multiple calls to get_option(). 1105 static $is_utf8 = null; 1106 if ( ! isset( $is_utf8 ) ) { 1107 $is_utf8 = in_array( get_option( 'blog_charset' ), array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ), true ); 1108 } 1109 if ( ! $is_utf8 ) { 1110 return $string; 1111 } 1112 1113 // Check for support for utf8 in the installed PCRE library once and store the result in a static. 1114 static $utf8_pcre = null; 1115 if ( ! isset( $utf8_pcre ) ) { 1116 // phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged 1117 $utf8_pcre = @preg_match( '/^./u', 'a' ); 1118 } 1119 // We can't demand utf8 in the PCRE installation, so just return the string in those cases. 1120 if ( ! $utf8_pcre ) { 1121 return $string; 1122 } 1123 1124 // phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged -- preg_match fails when it encounters invalid UTF8 in $string. 1125 if ( 1 === @preg_match( '/^./us', $string ) ) { 1126 return $string; 1127 } 1128 1129 // Attempt to strip the bad chars if requested (not recommended). 1130 if ( $strip && function_exists( 'iconv' ) ) { 1131 return iconv( 'utf-8', 'utf-8', $string ); 1132 } 1133 1134 return ''; 1135 } 1136 1137 /** 1138 * Encodes the Unicode values to be used in the URI. 1139 * 1140 * @since 1.5.0 1141 * @since 5.8.3 Added the `encode_ascii_characters` parameter. 1142 * 1143 * @param string $utf8_string String to encode. 1144 * @param int $length Max length of the string 1145 * @param bool $encode_ascii_characters Whether to encode ascii characters such as < " ' 1146 * @return string String with Unicode encoded for URI. 1147 */ 1148 function utf8_uri_encode( $utf8_string, $length = 0, $encode_ascii_characters = false ) { 1149 $unicode = ''; 1150 $values = array(); 1151 $num_octets = 1; 1152 $unicode_length = 0; 1153 1154 mbstring_binary_safe_encoding(); 1155 $string_length = strlen( $utf8_string ); 1156 reset_mbstring_encoding(); 1157 1158 for ( $i = 0; $i < $string_length; $i++ ) { 1159 1160 $value = ord( $utf8_string[ $i ] ); 1161 1162 if ( $value < 128 ) { 1163 $char = chr( $value ); 1164 $encoded_char = $encode_ascii_characters ? rawurlencode( $char ) : $char; 1165 $encoded_char_length = strlen( $encoded_char ); 1166 if ( $length && ( $unicode_length + $encoded_char_length ) > $length ) { 1167 break; 1168 } 1169 $unicode .= $encoded_char; 1170 $unicode_length += $encoded_char_length; 1171 } else { 1172 if ( count( $values ) == 0 ) { 1173 if ( $value < 224 ) { 1174 $num_octets = 2; 1175 } elseif ( $value < 240 ) { 1176 $num_octets = 3; 1177 } else { 1178 $num_octets = 4; 1179 } 1180 } 1181 1182 $values[] = $value; 1183 1184 if ( $length && ( $unicode_length + ( $num_octets * 3 ) ) > $length ) { 1185 break; 1186 } 1187 if ( count( $values ) == $num_octets ) { 1188 for ( $j = 0; $j < $num_octets; $j++ ) { 1189 $unicode .= '%' . dechex( $values[ $j ] ); 1190 } 1191 1192 $unicode_length += $num_octets * 3; 1193 1194 $values = array(); 1195 $num_octets = 1; 1196 } 1197 } 1198 } 1199 1200 return $unicode; 1201 } 1202 1203 /** 1204 * Converts all accent characters to ASCII characters. 1205 * 1206 * If there are no accent characters, then the string given is just returned. 1207 * 1208 * **Accent characters converted:** 1209 * 1210 * Currency signs: 1211 * 1212 * | Code | Glyph | Replacement | Description | 1213 * | -------- | ----- | ----------- | ------------------- | 1214 * | U+00A3 | £ | (empty) | British Pound sign | 1215 * | U+20AC | € | E | Euro sign | 1216 * 1217 * Decompositions for Latin-1 Supplement: 1218 * 1219 * | Code | Glyph | Replacement | Description | 1220 * | ------- | ----- | ----------- | -------------------------------------- | 1221 * | U+00AA | ª | a | Feminine ordinal indicator | 1222 * | U+00BA | º | o | Masculine ordinal indicator | 1223 * | U+00C0 | À | A | Latin capital letter A with grave | 1224 * | U+00C1 | Á | A | Latin capital letter A with acute | 1225 * | U+00C2 |  | A | Latin capital letter A with circumflex | 1226 * | U+00C3 | à | A | Latin capital letter A with tilde | 1227 * | U+00C4 | Ä | A | Latin capital letter A with diaeresis | 1228 * | U+00C5 | Å | A | Latin capital letter A with ring above | 1229 * | U+00C6 | Æ | AE | Latin capital letter AE | 1230 * | U+00C7 | Ç | C | Latin capital letter C with cedilla | 1231 * | U+00C8 | È | E | Latin capital letter E with grave | 1232 * | U+00C9 | É | E | Latin capital letter E with acute | 1233 * | U+00CA | Ê | E | Latin capital letter E with circumflex | 1234 * | U+00CB | Ë | E | Latin capital letter E with diaeresis | 1235 * | U+00CC | Ì | I | Latin capital letter I with grave | 1236 * | U+00CD | Í | I | Latin capital letter I with acute | 1237 * | U+00CE | Î | I | Latin capital letter I with circumflex | 1238 * | U+00CF | Ï | I | Latin capital letter I with diaeresis | 1239 * | U+00D0 | Ð | D | Latin capital letter Eth | 1240 * | U+00D1 | Ñ | N | Latin capital letter N with tilde | 1241 * | U+00D2 | Ò | O | Latin capital letter O with grave | 1242 * | U+00D3 | Ó | O | Latin capital letter O with acute | 1243 * | U+00D4 | Ô | O | Latin capital letter O with circumflex | 1244 * | U+00D5 | Õ | O | Latin capital letter O with tilde | 1245 * | U+00D6 | Ö | O | Latin capital letter O with diaeresis | 1246 * | U+00D8 | Ø | O | Latin capital letter O with stroke | 1247 * | U+00D9 | Ù | U | Latin capital letter U with grave | 1248 * | U+00DA | Ú | U | Latin capital letter U with acute | 1249 * | U+00DB | Û | U | Latin capital letter U with circumflex | 1250 * | U+00DC | Ü | U | Latin capital letter U with diaeresis | 1251 * | U+00DD | Ý | Y | Latin capital letter Y with acute | 1252 * | U+00DE | Þ | TH | Latin capital letter Thorn | 1253 * | U+00DF | ß | s | Latin small letter sharp s | 1254 * | U+00E0 | à | a | Latin small letter a with grave | 1255 * | U+00E1 | á | a | Latin small letter a with acute | 1256 * | U+00E2 | â | a | Latin small letter a with circumflex | 1257 * | U+00E3 | ã | a | Latin small letter a with tilde | 1258 * | U+00E4 | ä | a | Latin small letter a with diaeresis | 1259 * | U+00E5 | å | a | Latin small letter a with ring above | 1260 * | U+00E6 | æ | ae | Latin small letter ae | 1261 * | U+00E7 | ç | c | Latin small letter c with cedilla | 1262 * | U+00E8 | è | e | Latin small letter e with grave | 1263 * | U+00E9 | é | e | Latin small letter e with acute | 1264 * | U+00EA | ê | e | Latin small letter e with circumflex | 1265 * | U+00EB | ë | e | Latin small letter e with diaeresis | 1266 * | U+00EC | ì | i | Latin small letter i with grave | 1267 * | U+00ED | í | i | Latin small letter i with acute | 1268 * | U+00EE | î | i | Latin small letter i with circumflex | 1269 * | U+00EF | ï | i | Latin small letter i with diaeresis | 1270 * | U+00F0 | ð | d | Latin small letter Eth | 1271 * | U+00F1 | ñ | n | Latin small letter n with tilde | 1272 * | U+00F2 | ò | o | Latin small letter o with grave | 1273 * | U+00F3 | ó | o | Latin small letter o with acute | 1274 * | U+00F4 | ô | o | Latin small letter o with circumflex | 1275 * | U+00F5 | õ | o | Latin small letter o with tilde | 1276 * | U+00F6 | ö | o | Latin small letter o with diaeresis | 1277 * | U+00F8 | ø | o | Latin small letter o with stroke | 1278 * | U+00F9 | ù | u | Latin small letter u with grave | 1279 * | U+00FA | ú | u | Latin small letter u with acute | 1280 * | U+00FB | û | u | Latin small letter u with circumflex | 1281 * | U+00FC | ü | u | Latin small letter u with diaeresis | 1282 * | U+00FD | ý | y | Latin small letter y with acute | 1283 * | U+00FE | þ | th | Latin small letter Thorn | 1284 * | U+00FF | ÿ | y | Latin small letter y with diaeresis | 1285 * 1286 * Decompositions for Latin Extended-A: 1287 * 1288 * | Code | Glyph | Replacement | Description | 1289 * | ------- | ----- | ----------- | ------------------------------------------------- | 1290 * | U+0100 | Ā | A | Latin capital letter A with macron | 1291 * | U+0101 | ā | a | Latin small letter a with macron | 1292 * | U+0102 | Ă | A | Latin capital letter A with breve | 1293 * | U+0103 | ă | a | Latin small letter a with breve | 1294 * | U+0104 | Ą | A | Latin capital letter A with ogonek | 1295 * | U+0105 | ą | a | Latin small letter a with ogonek | 1296 * | U+01006 | Ć | C | Latin capital letter C with acute | 1297 * | U+0107 | ć | c | Latin small letter c with acute | 1298 * | U+0108 | Ĉ | C | Latin capital letter C with circumflex | 1299 * | U+0109 | ĉ | c | Latin small letter c with circumflex | 1300 * | U+010A | Ċ | C | Latin capital letter C with dot above | 1301 * | U+010B | ċ | c | Latin small letter c with dot above | 1302 * | U+010C | Č | C | Latin capital letter C with caron | 1303 * | U+010D | č | c | Latin small letter c with caron | 1304 * | U+010E | Ď | D | Latin capital letter D with caron | 1305 * | U+010F | ď | d | Latin small letter d with caron | 1306 * | U+0110 | Đ | D | Latin capital letter D with stroke | 1307 * | U+0111 | đ | d | Latin small letter d with stroke | 1308 * | U+0112 | Ē | E | Latin capital letter E with macron | 1309 * | U+0113 | ē | e | Latin small letter e with macron | 1310 * | U+0114 | Ĕ | E | Latin capital letter E with breve | 1311 * | U+0115 | ĕ | e | Latin small letter e with breve | 1312 * | U+0116 | Ė | E | Latin capital letter E with dot above | 1313 * | U+0117 | ė | e | Latin small letter e with dot above | 1314 * | U+0118 | Ę | E | Latin capital letter E with ogonek | 1315 * | U+0119 | ę | e | Latin small letter e with ogonek | 1316 * | U+011A | Ě | E | Latin capital letter E with caron | 1317 * | U+011B | ě | e | Latin small letter e with caron | 1318 * | U+011C | Ĝ | G | Latin capital letter G with circumflex | 1319 * | U+011D | ĝ | g | Latin small letter g with circumflex | 1320 * | U+011E | Ğ | G | Latin capital letter G with breve | 1321 * | U+011F | ğ | g | Latin small letter g with breve | 1322 * | U+0120 | Ġ | G | Latin capital letter G with dot above | 1323 * | U+0121 | ġ | g | Latin small letter g with dot above | 1324 * | U+0122 | Ģ | G | Latin capital letter G with cedilla | 1325 * | U+0123 | ģ | g | Latin small letter g with cedilla | 1326 * | U+0124 | Ĥ | H | Latin capital letter H with circumflex | 1327 * | U+0125 | ĥ | h | Latin small letter h with circumflex | 1328 * | U+0126 | Ħ | H | Latin capital letter H with stroke | 1329 * | U+0127 | ħ | h | Latin small letter h with stroke | 1330 * | U+0128 | Ĩ | I | Latin capital letter I with tilde | 1331 * | U+0129 | ĩ | i | Latin small letter i with tilde | 1332 * | U+012A | Ī | I | Latin capital letter I with macron | 1333 * | U+012B | ī | i | Latin small letter i with macron | 1334 * | U+012C | Ĭ | I | Latin capital letter I with breve | 1335 * | U+012D | ĭ | i | Latin small letter i with breve | 1336 * | U+012E | Į | I | Latin capital letter I with ogonek | 1337 * | U+012F | į | i | Latin small letter i with ogonek | 1338 * | U+0130 | İ | I | Latin capital letter I with dot above | 1339 * | U+0131 | ı | i | Latin small letter dotless i | 1340 * | U+0132 | IJ | IJ | Latin capital ligature IJ | 1341 * | U+0133 | ij | ij | Latin small ligature ij | 1342 * | U+0134 | Ĵ | J | Latin capital letter J with circumflex | 1343 * | U+0135 | ĵ | j | Latin small letter j with circumflex | 1344 * | U+0136 | Ķ | K | Latin capital letter K with cedilla | 1345 * | U+0137 | ķ | k | Latin small letter k with cedilla | 1346 * | U+0138 | ĸ | k | Latin small letter Kra | 1347 * | U+0139 | Ĺ | L | Latin capital letter L with acute | 1348 * | U+013A | ĺ | l | Latin small letter l with acute | 1349 * | U+013B | Ļ | L | Latin capital letter L with cedilla | 1350 * | U+013C | ļ | l | Latin small letter l with cedilla | 1351 * | U+013D | Ľ | L | Latin capital letter L with caron | 1352 * | U+013E | ľ | l | Latin small letter l with caron | 1353 * | U+013F | Ŀ | L | Latin capital letter L with middle dot | 1354 * | U+0140 | ŀ | l | Latin small letter l with middle dot | 1355 * | U+0141 | Ł | L | Latin capital letter L with stroke | 1356 * | U+0142 | ł | l | Latin small letter l with stroke | 1357 * | U+0143 | Ń | N | Latin capital letter N with acute | 1358 * | U+0144 | ń | n | Latin small letter N with acute | 1359 * | U+0145 | Ņ | N | Latin capital letter N with cedilla | 1360 * | U+0146 | ņ | n | Latin small letter n with cedilla | 1361 * | U+0147 | Ň | N | Latin capital letter N with caron | 1362 * | U+0148 | ň | n | Latin small letter n with caron | 1363 * | U+0149 | ʼn | n | Latin small letter n preceded by apostrophe | 1364 * | U+014A | Ŋ | N | Latin capital letter Eng | 1365 * | U+014B | ŋ | n | Latin small letter Eng | 1366 * | U+014C | Ō | O | Latin capital letter O with macron | 1367 * | U+014D | ō | o | Latin small letter o with macron | 1368 * | U+014E | Ŏ | O | Latin capital letter O with breve | 1369 * | U+014F | ŏ | o | Latin small letter o with breve | 1370 * | U+0150 | Ő | O | Latin capital letter O with double acute | 1371 * | U+0151 | ő | o | Latin small letter o with double acute | 1372 * | U+0152 | Œ | OE | Latin capital ligature OE | 1373 * | U+0153 | œ | oe | Latin small ligature oe | 1374 * | U+0154 | Ŕ | R | Latin capital letter R with acute | 1375 * | U+0155 | ŕ | r | Latin small letter r with acute | 1376 * | U+0156 | Ŗ | R | Latin capital letter R with cedilla | 1377 * | U+0157 | ŗ | r | Latin small letter r with cedilla | 1378 * | U+0158 | Ř | R | Latin capital letter R with caron | 1379 * | U+0159 | ř | r | Latin small letter r with caron | 1380 * | U+015A | Ś | S | Latin capital letter S with acute | 1381 * | U+015B | ś | s | Latin small letter s with acute | 1382 * | U+015C | Ŝ | S | Latin capital letter S with circumflex | 1383 * | U+015D | ŝ | s | Latin small letter s with circumflex | 1384 * | U+015E | Ş | S | Latin capital letter S with cedilla | 1385 * | U+015F | ş | s | Latin small letter s with cedilla | 1386 * | U+0160 | Š | S | Latin capital letter S with caron | 1387 * | U+0161 | š | s | Latin small letter s with caron | 1388 * | U+0162 | Ţ | T | Latin capital letter T with cedilla | 1389 * | U+0163 | ţ | t | Latin small letter t with cedilla | 1390 * | U+0164 | Ť | T | Latin capital letter T with caron | 1391 * | U+0165 | ť | t | Latin small letter t with caron | 1392 * | U+0166 | Ŧ | T | Latin capital letter T with stroke | 1393 * | U+0167 | ŧ | t | Latin small letter t with stroke | 1394 * | U+0168 | Ũ | U | Latin capital letter U with tilde | 1395 * | U+0169 | ũ | u | Latin small letter u with tilde | 1396 * | U+016A | Ū | U | Latin capital letter U with macron | 1397 * | U+016B | ū | u | Latin small letter u with macron | 1398 * | U+016C | Ŭ | U | Latin capital letter U with breve | 1399 * | U+016D | ŭ | u | Latin small letter u with breve | 1400 * | U+016E | Ů | U | Latin capital letter U with ring above | 1401 * | U+016F | ů | u | Latin small letter u with ring above | 1402 * | U+0170 | Ű | U | Latin capital letter U with double acute | 1403 * | U+0171 | ű | u | Latin small letter u with double acute | 1404 * | U+0172 | Ų | U | Latin capital letter U with ogonek | 1405 * | U+0173 | ų | u | Latin small letter u with ogonek | 1406 * | U+0174 | Ŵ | W | Latin capital letter W with circumflex | 1407 * | U+0175 | ŵ | w | Latin small letter w with circumflex | 1408 * | U+0176 | Ŷ | Y | Latin capital letter Y with circumflex | 1409 * | U+0177 | ŷ | y | Latin small letter y with circumflex | 1410 * | U+0178 | Ÿ | Y | Latin capital letter Y with diaeresis | 1411 * | U+0179 | Ź | Z | Latin capital letter Z with acute | 1412 * | U+017A | ź | z | Latin small letter z with acute | 1413 * | U+017B | Ż | Z | Latin capital letter Z with dot above | 1414 * | U+017C | ż | z | Latin small letter z with dot above | 1415 * | U+017D | Ž | Z | Latin capital letter Z with caron | 1416 * | U+017E | ž | z | Latin small letter z with caron | 1417 * | U+017F | ſ | s | Latin small letter long s | 1418 * | U+01A0 | Ơ | O | Latin capital letter O with horn | 1419 * | U+01A1 | ơ | o | Latin small letter o with horn | 1420 * | U+01AF | Ư | U | Latin capital letter U with horn | 1421 * | U+01B0 | ư | u | Latin small letter u with horn | 1422 * | U+01CD | Ǎ | A | Latin capital letter A with caron | 1423 * | U+01CE | ǎ | a | Latin small letter a with caron | 1424 * | U+01CF | Ǐ | I | Latin capital letter I with caron | 1425 * | U+01D0 | ǐ | i | Latin small letter i with caron | 1426 * | U+01D1 | Ǒ | O | Latin capital letter O with caron | 1427 * | U+01D2 | ǒ | o | Latin small letter o with caron | 1428 * | U+01D3 | Ǔ | U | Latin capital letter U with caron | 1429 * | U+01D4 | ǔ | u | Latin small letter u with caron | 1430 * | U+01D5 | Ǖ | U | Latin capital letter U with diaeresis and macron | 1431 * | U+01D6 | ǖ | u | Latin small letter u with diaeresis and macron | 1432 * | U+01D7 | Ǘ | U | Latin capital letter U with diaeresis and acute | 1433 * | U+01D8 | ǘ | u | Latin small letter u with diaeresis and acute | 1434 * | U+01D9 | Ǚ | U | Latin capital letter U with diaeresis and caron | 1435 * | U+01DA | ǚ | u | Latin small letter u with diaeresis and caron | 1436 * | U+01DB | Ǜ | U | Latin capital letter U with diaeresis and grave | 1437 * | U+01DC | ǜ | u | Latin small letter u with diaeresis and grave | 1438 * 1439 * Decompositions for Latin Extended-B: 1440 * 1441 * | Code | Glyph | Replacement | Description | 1442 * | -------- | ----- | ----------- | ----------------------------------------- | 1443 * | U+0218 | Ș | S | Latin capital letter S with comma below | 1444 * | U+0219 | ș | s | Latin small letter s with comma below | 1445 * | U+021A | Ț | T | Latin capital letter T with comma below | 1446 * | U+021B | ț | t | Latin small letter t with comma below | 1447 * 1448 * Vowels with diacritic (Chinese, Hanyu Pinyin): 1449 * 1450 * | Code | Glyph | Replacement | Description | 1451 * | -------- | ----- | ----------- | ----------------------------------------------------- | 1452 * | U+0251 | ɑ | a | Latin small letter alpha | 1453 * | U+1EA0 | Ạ | A | Latin capital letter A with dot below | 1454 * | U+1EA1 | ạ | a | Latin small letter a with dot below | 1455 * | U+1EA2 | Ả | A | Latin capital letter A with hook above | 1456 * | U+1EA3 | ả | a | Latin small letter a with hook above | 1457 * | U+1EA4 | Ấ | A | Latin capital letter A with circumflex and acute | 1458 * | U+1EA5 | ấ | a | Latin small letter a with circumflex and acute | 1459 * | U+1EA6 | Ầ | A | Latin capital letter A with circumflex and grave | 1460 * | U+1EA7 | ầ | a | Latin small letter a with circumflex and grave | 1461 * | U+1EA8 | Ẩ | A | Latin capital letter A with circumflex and hook above | 1462 * | U+1EA9 | ẩ | a | Latin small letter a with circumflex and hook above | 1463 * | U+1EAA | Ẫ | A | Latin capital letter A with circumflex and tilde | 1464 * | U+1EAB | ẫ | a | Latin small letter a with circumflex and tilde | 1465 * | U+1EA6 | Ậ | A | Latin capital letter A with circumflex and dot below | 1466 * | U+1EAD | ậ | a | Latin small letter a with circumflex and dot below | 1467 * | U+1EAE | Ắ | A | Latin capital letter A with breve and acute | 1468 * | U+1EAF | ắ | a | Latin small letter a with breve and acute | 1469 * | U+1EB0 | Ằ | A | Latin capital letter A with breve and grave | 1470 * | U+1EB1 | ằ | a | Latin small letter a with breve and grave | 1471 * | U+1EB2 | Ẳ | A | Latin capital letter A with breve and hook above | 1472 * | U+1EB3 | ẳ | a | Latin small letter a with breve and hook above | 1473 * | U+1EB4 | Ẵ | A | Latin capital letter A with breve and tilde | 1474 * | U+1EB5 | ẵ | a | Latin small letter a with breve and tilde | 1475 * | U+1EB6 | Ặ | A | Latin capital letter A with breve and dot below | 1476 * | U+1EB7 | ặ | a | Latin small letter a with breve and dot below | 1477 * | U+1EB8 | Ẹ | E | Latin capital letter E with dot below | 1478 * | U+1EB9 | ẹ | e | Latin small letter e with dot below | 1479 * | U+1EBA | Ẻ | E | Latin capital letter E with hook above | 1480 * | U+1EBB | ẻ | e | Latin small letter e with hook above | 1481 * | U+1EBC | Ẽ | E | Latin capital letter E with tilde | 1482 * | U+1EBD | ẽ | e | Latin small letter e with tilde | 1483 * | U+1EBE | Ế | E | Latin capital letter E with circumflex and acute | 1484 * | U+1EBF | ế | e | Latin small letter e with circumflex and acute | 1485 * | U+1EC0 | Ề | E | Latin capital letter E with circumflex and grave | 1486 * | U+1EC1 | ề | e | Latin small letter e with circumflex and grave | 1487 * | U+1EC2 | Ể | E | Latin capital letter E with circumflex and hook above | 1488 * | U+1EC3 | ể | e | Latin small letter e with circumflex and hook above | 1489 * | U+1EC4 | Ễ | E | Latin capital letter E with circumflex and tilde | 1490 * | U+1EC5 | ễ | e | Latin small letter e with circumflex and tilde | 1491 * | U+1EC6 | Ệ | E | Latin capital letter E with circumflex and dot below | 1492 * | U+1EC7 | ệ | e | Latin small letter e with circumflex and dot below | 1493 * | U+1EC8 | Ỉ | I | Latin capital letter I with hook above | 1494 * | U+1EC9 | ỉ | i | Latin small letter i with hook above | 1495 * | U+1ECA | Ị | I | Latin capital letter I with dot below | 1496 * | U+1ECB | ị | i | Latin small letter i with dot below | 1497 * | U+1ECC | Ọ | O | Latin capital letter O with dot below | 1498 * | U+1ECD | ọ | o | Latin small letter o with dot below | 1499 * | U+1ECE | Ỏ | O | Latin capital letter O with hook above | 1500 * | U+1ECF | ỏ | o | Latin small letter o with hook above | 1501 * | U+1ED0 | Ố | O | Latin capital letter O with circumflex and acute | 1502 * | U+1ED1 | ố | o | Latin small letter o with circumflex and acute | 1503 * | U+1ED2 | Ồ | O | Latin capital letter O with circumflex and grave | 1504 * | U+1ED3 | ồ | o | Latin small letter o with circumflex and grave | 1505 * | U+1ED4 | Ổ | O | Latin capital letter O with circumflex and hook above | 1506 * | U+1ED5 | ổ | o | Latin small letter o with circumflex and hook above | 1507 * | U+1ED6 | Ỗ | O | Latin capital letter O with circumflex and tilde | 1508 * | U+1ED7 | ỗ | o | Latin small letter o with circumflex and tilde | 1509 * | U+1ED8 | Ộ | O | Latin capital letter O with circumflex and dot below | 1510 * | U+1ED9 | ộ | o | Latin small letter o with circumflex and dot below | 1511 * | U+1EDA | Ớ | O | Latin capital letter O with horn and acute | 1512 * | U+1EDB | ớ | o | Latin small letter o with horn and acute | 1513 * | U+1EDC | Ờ | O | Latin capital letter O with horn and grave | 1514 * | U+1EDD | ờ | o | Latin small letter o with horn and grave | 1515 * | U+1EDE | Ở | O | Latin capital letter O with horn and hook above | 1516 * | U+1EDF | ở | o | Latin small letter o with horn and hook above | 1517 * | U+1EE0 | Ỡ | O | Latin capital letter O with horn and tilde | 1518 * | U+1EE1 | ỡ | o | Latin small letter o with horn and tilde | 1519 * | U+1EE2 | Ợ | O | Latin capital letter O with horn and dot below | 1520 * | U+1EE3 | ợ | o | Latin small letter o with horn and dot below | 1521 * | U+1EE4 | Ụ | U | Latin capital letter U with dot below | 1522 * | U+1EE5 | ụ | u | Latin small letter u with dot below | 1523 * | U+1EE6 | Ủ | U | Latin capital letter U with hook above | 1524 * | U+1EE7 | ủ | u | Latin small letter u with hook above | 1525 * | U+1EE8 | Ứ | U | Latin capital letter U with horn and acute | 1526 * | U+1EE9 | ứ | u | Latin small letter u with horn and acute | 1527 * | U+1EEA | Ừ | U | Latin capital letter U with horn and grave | 1528 * | U+1EEB | ừ | u | Latin small letter u with horn and grave | 1529 * | U+1EEC | Ử | U | Latin capital letter U with horn and hook above | 1530 * | U+1EED | ử | u | Latin small letter u with horn and hook above | 1531 * | U+1EEE | Ữ | U | Latin capital letter U with horn and tilde | 1532 * | U+1EEF | ữ | u | Latin small letter u with horn and tilde | 1533 * | U+1EF0 | Ự | U | Latin capital letter U with horn and dot below | 1534 * | U+1EF1 | ự | u | Latin small letter u with horn and dot below | 1535 * | U+1EF2 | Ỳ | Y | Latin capital letter Y with grave | 1536 * | U+1EF3 | ỳ | y | Latin small letter y with grave | 1537 * | U+1EF4 | Ỵ | Y | Latin capital letter Y with dot below | 1538 * | U+1EF5 | ỵ | y | Latin small letter y with dot below | 1539 * | U+1EF6 | Ỷ | Y | Latin capital letter Y with hook above | 1540 * | U+1EF7 | ỷ | y | Latin small letter y with hook above | 1541 * | U+1EF8 | Ỹ | Y | Latin capital letter Y with tilde | 1542 * | U+1EF9 | ỹ | y | Latin small letter y with tilde | 1543 * 1544 * German (`de_DE`), German formal (`de_DE_formal`), German (Switzerland) formal (`de_CH`), 1545 * German (Switzerland) informal (`de_CH_informal`), and German (Austria) (`de_AT`) locales: 1546 * 1547 * | Code | Glyph | Replacement | Description | 1548 * | -------- | ----- | ----------- | --------------------------------------- | 1549 * | U+00C4 | Ä | Ae | Latin capital letter A with diaeresis | 1550 * | U+00E4 | ä | ae | Latin small letter a with diaeresis | 1551 * | U+00D6 | Ö | Oe | Latin capital letter O with diaeresis | 1552 * | U+00F6 | ö | oe | Latin small letter o with diaeresis | 1553 * | U+00DC | Ü | Ue | Latin capital letter U with diaeresis | 1554 * | U+00FC | ü | ue | Latin small letter u with diaeresis | 1555 * | U+00DF | ß | ss | Latin small letter sharp s | 1556 * 1557 * Danish (`da_DK`) locale: 1558 * 1559 * | Code | Glyph | Replacement | Description | 1560 * | -------- | ----- | ----------- | --------------------------------------- | 1561 * | U+00C6 | Æ | Ae | Latin capital letter AE | 1562 * | U+00E6 | æ | ae | Latin small letter ae | 1563 * | U+00D8 | Ø | Oe | Latin capital letter O with stroke | 1564 * | U+00F8 | ø | oe | Latin small letter o with stroke | 1565 * | U+00C5 | Å | Aa | Latin capital letter A with ring above | 1566 * | U+00E5 | å | aa | Latin small letter a with ring above | 1567 * 1568 * Catalan (`ca`) locale: 1569 * 1570 * | Code | Glyph | Replacement | Description | 1571 * | -------- | ----- | ----------- | --------------------------------------- | 1572 * | U+00B7 | l·l | ll | Flown dot (between two Ls) | 1573 * 1574 * Serbian (`sr_RS`) and Bosnian (`bs_BA`) locales: 1575 * 1576 * | Code | Glyph | Replacement | Description | 1577 * | -------- | ----- | ----------- | --------------------------------------- | 1578 * | U+0110 | Đ | DJ | Latin capital letter D with stroke | 1579 * | U+0111 | đ | dj | Latin small letter d with stroke | 1580 * 1581 * @since 1.2.1 1582 * @since 4.6.0 Added locale support for `de_CH`, `de_CH_informal`, and `ca`. 1583 * @since 4.7.0 Added locale support for `sr_RS`. 1584 * @since 4.8.0 Added locale support for `bs_BA`. 1585 * @since 5.7.0 Added locale support for `de_AT`. 1586 * @since 6.0.0 Added the `$locale` parameter. 1587 * 1588 * @param string $string Text that might have accent characters. 1589 * @param string $locale Optional. The locale to use for accent removal. Some character 1590 * replacements depend on the locale being used (e.g. 'de_DE'). 1591 * Defaults to the current locale. 1592 * @return string Filtered string with replaced "nice" characters. 1593 */ 1594 function remove_accents( $string, $locale = '' ) { 1595 if ( ! preg_match( '/[\x80-\xff]/', $string ) ) { 1596 return $string; 1597 } 1598 1599 if ( seems_utf8( $string ) ) { 1600 $chars = array( 1601 // Decompositions for Latin-1 Supplement. 1602 'ª' => 'a', 1603 'º' => 'o', 1604 'À' => 'A', 1605 'Á' => 'A', 1606 'Â' => 'A', 1607 'Ã' => 'A', 1608 'Ä' => 'A', 1609 'Å' => 'A', 1610 'Æ' => 'AE', 1611 'Ç' => 'C', 1612 'È' => 'E', 1613 'É' => 'E', 1614 'Ê' => 'E', 1615 'Ë' => 'E', 1616 'Ì' => 'I', 1617 'Í' => 'I', 1618 'Î' => 'I', 1619 'Ï' => 'I', 1620 'Ð' => 'D', 1621 'Ñ' => 'N', 1622 'Ò' => 'O', 1623 'Ó' => 'O', 1624 'Ô' => 'O', 1625 'Õ' => 'O', 1626 'Ö' => 'O', 1627 'Ù' => 'U', 1628 'Ú' => 'U', 1629 'Û' => 'U', 1630 'Ü' => 'U', 1631 'Ý' => 'Y', 1632 'Þ' => 'TH', 1633 'ß' => 's', 1634 'à' => 'a', 1635 'á' => 'a', 1636 'â' => 'a', 1637 'ã' => 'a', 1638 'ä' => 'a', 1639 'å' => 'a', 1640 'æ' => 'ae', 1641 'ç' => 'c', 1642 'è' => 'e', 1643 'é' => 'e', 1644 'ê' => 'e', 1645 'ë' => 'e', 1646 'ì' => 'i', 1647 'í' => 'i', 1648 'î' => 'i', 1649 'ï' => 'i', 1650 'ð' => 'd', 1651 'ñ' => 'n', 1652 'ò' => 'o', 1653 'ó' => 'o', 1654 'ô' => 'o', 1655 'õ' => 'o', 1656 'ö' => 'o', 1657 'ø' => 'o', 1658 'ù' => 'u', 1659 'ú' => 'u', 1660 'û' => 'u', 1661 'ü' => 'u', 1662 'ý' => 'y', 1663 'þ' => 'th', 1664 'ÿ' => 'y', 1665 'Ø' => 'O', 1666 // Decompositions for Latin Extended-A. 1667 'Ā' => 'A', 1668 'ā' => 'a', 1669 'Ă' => 'A', 1670 'ă' => 'a', 1671 'Ą' => 'A', 1672 'ą' => 'a', 1673 'Ć' => 'C', 1674 'ć' => 'c', 1675 'Ĉ' => 'C', 1676 'ĉ' => 'c', 1677 'Ċ' => 'C', 1678 'ċ' => 'c', 1679 'Č' => 'C', 1680 'č' => 'c', 1681 'Ď' => 'D', 1682 'ď' => 'd', 1683 'Đ' => 'D', 1684 'đ' => 'd', 1685 'Ē' => 'E', 1686 'ē' => 'e', 1687 'Ĕ' => 'E', 1688 'ĕ' => 'e', 1689 'Ė' => 'E', 1690 'ė' => 'e', 1691 'Ę' => 'E', 1692 'ę' => 'e', 1693 'Ě' => 'E', 1694 'ě' => 'e', 1695 'Ĝ' => 'G', 1696 'ĝ' => 'g', 1697 'Ğ' => 'G', 1698 'ğ' => 'g', 1699 'Ġ' => 'G', 1700 'ġ' => 'g', 1701 'Ģ' => 'G', 1702 'ģ' => 'g', 1703 'Ĥ' => 'H', 1704 'ĥ' => 'h', 1705 'Ħ' => 'H', 1706 'ħ' => 'h', 1707 'Ĩ' => 'I', 1708 'ĩ' => 'i', 1709 'Ī' => 'I', 1710 'ī' => 'i', 1711 'Ĭ' => 'I', 1712 'ĭ' => 'i', 1713 'Į' => 'I', 1714 'į' => 'i', 1715 'İ' => 'I', 1716 'ı' => 'i', 1717 'IJ' => 'IJ', 1718 'ij' => 'ij', 1719 'Ĵ' => 'J', 1720 'ĵ' => 'j', 1721 'Ķ' => 'K', 1722 'ķ' => 'k', 1723 'ĸ' => 'k', 1724 'Ĺ' => 'L', 1725 'ĺ' => 'l', 1726 'Ļ' => 'L', 1727 'ļ' => 'l', 1728 'Ľ' => 'L', 1729 'ľ' => 'l', 1730 'Ŀ' => 'L', 1731 'ŀ' => 'l', 1732 'Ł' => 'L', 1733 'ł' => 'l', 1734 'Ń' => 'N', 1735 'ń' => 'n', 1736 'Ņ' => 'N', 1737 'ņ' => 'n', 1738 'Ň' => 'N', 1739 'ň' => 'n', 1740 'ʼn' => 'n', 1741 'Ŋ' => 'N', 1742 'ŋ' => 'n', 1743 'Ō' => 'O', 1744 'ō' => 'o', 1745 'Ŏ' => 'O', 1746 'ŏ' => 'o', 1747 'Ő' => 'O', 1748 'ő' => 'o', 1749 'Œ' => 'OE', 1750 'œ' => 'oe', 1751 'Ŕ' => 'R', 1752 'ŕ' => 'r', 1753 'Ŗ' => 'R', 1754 'ŗ' => 'r', 1755 'Ř' => 'R', 1756 'ř' => 'r', 1757 'Ś' => 'S', 1758 'ś' => 's', 1759 'Ŝ' => 'S', 1760 'ŝ' => 's', 1761 'Ş' => 'S', 1762 'ş' => 's', 1763 'Š' => 'S', 1764 'š' => 's', 1765 'Ţ' => 'T', 1766 'ţ' => 't', 1767 'Ť' => 'T', 1768 'ť' => 't', 1769 'Ŧ' => 'T', 1770 'ŧ' => 't', 1771 'Ũ' => 'U', 1772 'ũ' => 'u', 1773 'Ū' => 'U', 1774 'ū' => 'u', 1775 'Ŭ' => 'U', 1776 'ŭ' => 'u', 1777 'Ů' => 'U', 1778 'ů' => 'u', 1779 'Ű' => 'U', 1780 'ű' => 'u', 1781 'Ų' => 'U', 1782 'ų' => 'u', 1783 'Ŵ' => 'W', 1784 'ŵ' => 'w', 1785 'Ŷ' => 'Y', 1786 'ŷ' => 'y', 1787 'Ÿ' => 'Y', 1788 'Ź' => 'Z', 1789 'ź' => 'z', 1790 'Ż' => 'Z', 1791 'ż' => 'z', 1792 'Ž' => 'Z', 1793 'ž' => 'z', 1794 'ſ' => 's', 1795 // Decompositions for Latin Extended-B. 1796 'Ș' => 'S', 1797 'ș' => 's', 1798 'Ț' => 'T', 1799 'ț' => 't', 1800 // Euro sign. 1801 '€' => 'E', 1802 // GBP (Pound) sign. 1803 '£' => '', 1804 // Vowels with diacritic (Vietnamese). 1805 // Unmarked. 1806 'Ơ' => 'O', 1807 'ơ' => 'o', 1808 'Ư' => 'U', 1809 'ư' => 'u', 1810 // Grave accent. 1811 'Ầ' => 'A', 1812 'ầ' => 'a', 1813 'Ằ' => 'A', 1814 'ằ' => 'a', 1815 'Ề' => 'E', 1816 'ề' => 'e', 1817 'Ồ' => 'O', 1818 'ồ' => 'o', 1819 'Ờ' => 'O', 1820 'ờ' => 'o', 1821 'Ừ' => 'U', 1822 'ừ' => 'u', 1823 'Ỳ' => 'Y', 1824 'ỳ' => 'y', 1825 // Hook. 1826 'Ả' => 'A', 1827 'ả' => 'a', 1828 'Ẩ' => 'A', 1829 'ẩ' => 'a', 1830 'Ẳ' => 'A', 1831 'ẳ' => 'a', 1832 'Ẻ' => 'E', 1833 'ẻ' => 'e', 1834 'Ể' => 'E', 1835 'ể' => 'e', 1836 'Ỉ' => 'I', 1837 'ỉ' => 'i', 1838 'Ỏ' => 'O', 1839 'ỏ' => 'o', 1840 'Ổ' => 'O', 1841 'ổ' => 'o', 1842 'Ở' => 'O', 1843 'ở' => 'o', 1844 'Ủ' => 'U', 1845 'ủ' => 'u', 1846 'Ử' => 'U', 1847 'ử' => 'u', 1848 'Ỷ' => 'Y', 1849 'ỷ' => 'y', 1850 // Tilde. 1851 'Ẫ' => 'A', 1852 'ẫ' => 'a', 1853 'Ẵ' => 'A', 1854 'ẵ' => 'a', 1855 'Ẽ' => 'E', 1856 'ẽ' => 'e', 1857 'Ễ' => 'E', 1858 'ễ' => 'e', 1859 'Ỗ' => 'O', 1860 'ỗ' => 'o', 1861 'Ỡ' => 'O', 1862 'ỡ' => 'o', 1863 'Ữ' => 'U', 1864 'ữ' => 'u', 1865 'Ỹ' => 'Y', 1866 'ỹ' => 'y', 1867 // Acute accent. 1868 'Ấ' => 'A', 1869 'ấ' => 'a', 1870 'Ắ' => 'A', 1871 'ắ' => 'a', 1872 'Ế' => 'E', 1873 'ế' => 'e', 1874 'Ố' => 'O', 1875 'ố' => 'o', 1876 'Ớ' => 'O', 1877 'ớ' => 'o', 1878 'Ứ' => 'U', 1879 'ứ' => 'u', 1880 // Dot below. 1881 'Ạ' => 'A', 1882 'ạ' => 'a', 1883 'Ậ' => 'A', 1884 'ậ' => 'a', 1885 'Ặ' => 'A', 1886 'ặ' => 'a', 1887 'Ẹ' => 'E', 1888 'ẹ' => 'e', 1889 'Ệ' => 'E', 1890 'ệ' => 'e', 1891 'Ị' => 'I', 1892 'ị' => 'i', 1893 'Ọ' => 'O', 1894 'ọ' => 'o', 1895 'Ộ' => 'O', 1896 'ộ' => 'o', 1897 'Ợ' => 'O', 1898 'ợ' => 'o', 1899 'Ụ' => 'U', 1900 'ụ' => 'u', 1901 'Ự' => 'U', 1902 'ự' => 'u', 1903 'Ỵ' => 'Y', 1904 'ỵ' => 'y', 1905 // Vowels with diacritic (Chinese, Hanyu Pinyin). 1906 'ɑ' => 'a', 1907 // Macron. 1908 'Ǖ' => 'U', 1909 'ǖ' => 'u', 1910 // Acute accent. 1911 'Ǘ' => 'U', 1912 'ǘ' => 'u', 1913 // Caron. 1914 'Ǎ' => 'A', 1915 'ǎ' => 'a', 1916 'Ǐ' => 'I', 1917 'ǐ' => 'i', 1918 'Ǒ' => 'O', 1919 'ǒ' => 'o', 1920 'Ǔ' => 'U', 1921 'ǔ' => 'u', 1922 'Ǚ' => 'U', 1923 'ǚ' => 'u', 1924 // Grave accent. 1925 'Ǜ' => 'U', 1926 'ǜ' => 'u', 1927 ); 1928 1929 // Used for locale-specific rules. 1930 if ( empty( $locale ) ) { 1931 $locale = get_locale(); 1932 } 1933 1934 /* 1935 * German has various locales (de_DE, de_CH, de_AT, ...) with formal and informal variants. 1936 * There is no 3-letter locale like 'def', so checking for 'de' instead of 'de_' is safe, 1937 * since 'de' itself would be a valid locale too. 1938 */ 1939 if ( str_starts_with( $locale, 'de' ) ) { 1940 $chars['Ä'] = 'Ae'; 1941 $chars['ä'] = 'ae'; 1942 $chars['Ö'] = 'Oe'; 1943 $chars['ö'] = 'oe'; 1944 $chars['Ü'] = 'Ue'; 1945 $chars['ü'] = 'ue'; 1946 $chars['ß'] = 'ss'; 1947 } elseif ( 'da_DK' === $locale ) { 1948 $chars['Æ'] = 'Ae'; 1949 $chars['æ'] = 'ae'; 1950 $chars['Ø'] = 'Oe'; 1951 $chars['ø'] = 'oe'; 1952 $chars['Å'] = 'Aa'; 1953 $chars['å'] = 'aa'; 1954 } elseif ( 'ca' === $locale ) { 1955 $chars['l·l'] = 'll'; 1956 } elseif ( 'sr_RS' === $locale || 'bs_BA' === $locale ) { 1957 $chars['Đ'] = 'DJ'; 1958 $chars['đ'] = 'dj'; 1959 } 1960 1961 $string = strtr( $string, $chars ); 1962 } else { 1963 $chars = array(); 1964 // Assume ISO-8859-1 if not UTF-8. 1965 $chars['in'] = "\x80\x83\x8a\x8e\x9a\x9e" 1966 . "\x9f\xa2\xa5\xb5\xc0\xc1\xc2" 1967 . "\xc3\xc4\xc5\xc7\xc8\xc9\xca" 1968 . "\xcb\xcc\xcd\xce\xcf\xd1\xd2" 1969 . "\xd3\xd4\xd5\xd6\xd8\xd9\xda" 1970 . "\xdb\xdc\xdd\xe0\xe1\xe2\xe3" 1971 . "\xe4\xe5\xe7\xe8\xe9\xea\xeb" 1972 . "\xec\xed\xee\xef\xf1\xf2\xf3" 1973 . "\xf4\xf5\xf6\xf8\xf9\xfa\xfb" 1974 . "\xfc\xfd\xff"; 1975 1976 $chars['out'] = 'EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy'; 1977 1978 $string = strtr( $string, $chars['in'], $chars['out'] ); 1979 $double_chars = array(); 1980 $double_chars['in'] = array( "\x8c", "\x9c", "\xc6", "\xd0", "\xde", "\xdf", "\xe6", "\xf0", "\xfe" ); 1981 $double_chars['out'] = array( 'OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th' ); 1982 $string = str_replace( $double_chars['in'], $double_chars['out'], $string ); 1983 } 1984 1985 return $string; 1986 } 1987 1988 /** 1989 * Sanitizes a filename, replacing whitespace with dashes. 1990 * 1991 * Removes special characters that are illegal in filenames on certain 1992 * operating systems and special characters requiring special escaping 1993 * to manipulate at the command line. Replaces spaces and consecutive 1994 * dashes with a single dash. Trims period, dash and underscore from beginning 1995 * and end of filename. It is not guaranteed that this function will return a 1996 * filename that is allowed to be uploaded. 1997 * 1998 * @since 2.1.0 1999 * 2000 * @param string $filename The filename to be sanitized. 2001 * @return string The sanitized filename. 2002 */ 2003 function sanitize_file_name( $filename ) { 2004 $filename_raw = $filename; 2005 $filename = remove_accents( $filename ); 2006 2007 $special_chars = array( '?', '[', ']', '/', '\\', '=', '<', '>', ':', ';', ',', "'", '"', '&', '$', '#', '*', '(', ')', '|', '~', '`', '!', '{', '}', '%', '+', '’', '«', '»', '”', '“', chr( 0 ) ); 2008 2009 // Check for support for utf8 in the installed PCRE library once and store the result in a static. 2010 static $utf8_pcre = null; 2011 if ( ! isset( $utf8_pcre ) ) { 2012 // phpcs:ignore WordPress.PHP.NoSilencedErrors.Discouraged 2013 $utf8_pcre = @preg_match( '/^./u', 'a' ); 2014 } 2015 2016 if ( ! seems_utf8( $filename ) ) { 2017 $_ext = pathinfo( $filename, PATHINFO_EXTENSION ); 2018 $_name = pathinfo( $filename, PATHINFO_FILENAME ); 2019 $filename = sanitize_title_with_dashes( $_name ) . '.' . $_ext; 2020 } 2021 2022 if ( $utf8_pcre ) { 2023 $filename = preg_replace( "#\x{00a0}#siu", ' ', $filename ); 2024 } 2025 2026 /** 2027 * Filters the list of characters to remove from a filename. 2028 * 2029 * @since 2.8.0 2030 * 2031 * @param string[] $special_chars Array of characters to remove. 2032 * @param string $filename_raw The original filename to be sanitized. 2033 */ 2034 $special_chars = apply_filters( 'sanitize_file_name_chars', $special_chars, $filename_raw ); 2035 2036 $filename = str_replace( $special_chars, '', $filename ); 2037 $filename = str_replace( array( '%20', '+' ), '-', $filename ); 2038 $filename = preg_replace( '/[\r\n\t -]+/', '-', $filename ); 2039 $filename = trim( $filename, '.-_' ); 2040 2041 if ( false === strpos( $filename, '.' ) ) { 2042 $mime_types = wp_get_mime_types(); 2043 $filetype = wp_check_filetype( 'test.' . $filename, $mime_types ); 2044 if ( $filetype['ext'] === $filename ) { 2045 $filename = 'unnamed-file.' . $filetype['ext']; 2046 } 2047 } 2048 2049 // Split the filename into a base and extension[s]. 2050 $parts = explode( '.', $filename ); 2051 2052 // Return if only one extension. 2053 if ( count( $parts ) <= 2 ) { 2054 /** This filter is documented in wp-includes/formatting.php */ 2055 return apply_filters( 'sanitize_file_name', $filename, $filename_raw ); 2056 } 2057 2058 // Process multiple extensions. 2059 $filename = array_shift( $parts ); 2060 $extension = array_pop( $parts ); 2061 $mimes = get_allowed_mime_types(); 2062 2063 /* 2064 * Loop over any intermediate extensions. Postfix them with a trailing underscore 2065 * if they are a 2 - 5 character long alpha string not in the allowed extension list. 2066 */ 2067 foreach ( (array) $parts as $part ) { 2068 $filename .= '.' . $part; 2069 2070 if ( preg_match( '/^[a-zA-Z]{2,5}\d?$/', $part ) ) { 2071 $allowed = false; 2072 foreach ( $mimes as $ext_preg => $mime_match ) { 2073 $ext_preg = '!^(' . $ext_preg . ')$!i'; 2074 if ( preg_match( $ext_preg, $part ) ) { 2075 $allowed = true; 2076 break; 2077 } 2078 } 2079 if ( ! $allowed ) { 2080 $filename .= '_'; 2081 } 2082 } 2083 } 2084 2085 $filename .= '.' . $extension; 2086 2087 /** 2088 * Filters a sanitized filename string. 2089 * 2090 * @since 2.8.0 2091 * 2092 * @param string $filename Sanitized filename. 2093 * @param string $filename_raw The filename prior to sanitization. 2094 */ 2095 return apply_filters( 'sanitize_file_name', $filename, $filename_raw ); 2096 } 2097 2098 /** 2099 * Sanitizes a username, stripping out unsafe characters. 2100 * 2101 * Removes tags, octets, entities, and if strict is enabled, will only keep 2102 * alphanumeric, _, space, ., -, @. After sanitizing, it passes the username, 2103 * raw username (the username in the parameter), and the value of $strict as 2104 * parameters for the {@see 'sanitize_user'} filter. 2105 * 2106 * @since 2.0.0 2107 * 2108 * @param string $username The username to be sanitized. 2109 * @param bool $strict Optional. If set limits $username to specific characters. 2110 * Default false. 2111 * @return string The sanitized username, after passing through filters. 2112 */ 2113 function sanitize_user( $username, $strict = false ) { 2114 $raw_username = $username; 2115 $username = wp_strip_all_tags( $username ); 2116 $username = remove_accents( $username ); 2117 // Kill octets. 2118 $username = preg_replace( '|%([a-fA-F0-9][a-fA-F0-9])|', '', $username ); 2119 // Kill entities. 2120 $username = preg_replace( '/&.+?;/', '', $username ); 2121 2122 // If strict, reduce to ASCII for max portability. 2123 if ( $strict ) { 2124 $username = preg_replace( '|[^a-z0-9 _.\-@]|i', '', $username ); 2125 } 2126 2127 $username = trim( $username ); 2128 // Consolidate contiguous whitespace. 2129 $username = preg_replace( '|\s+|', ' ', $username ); 2130 2131 /** 2132 * Filters a sanitized username string. 2133 * 2134 * @since 2.0.1 2135 * 2136 * @param string $username Sanitized username. 2137 * @param string $raw_username The username prior to sanitization. 2138 * @param bool $strict Whether to limit the sanitization to specific characters. 2139 */ 2140 return apply_filters( 'sanitize_user', $username, $raw_username, $strict ); 2141 } 2142 2143 /** 2144 * Sanitizes a string key. 2145 * 2146 * Keys are used as internal identifiers. Lowercase alphanumeric characters, 2147 * dashes, and underscores are allowed. 2148 * 2149 * @since 3.0.0 2150 * 2151 * @param string $key String key. 2152 * @return string Sanitized key. 2153 */ 2154 function sanitize_key( $key ) { 2155 $sanitized_key = ''; 2156 2157 if ( is_scalar( $key ) ) { 2158 $sanitized_key = strtolower( $key ); 2159 $sanitized_key = preg_replace( '/[^a-z0-9_\-]/', '', $sanitized_key ); 2160 } 2161 2162 /** 2163 * Filters a sanitized key string. 2164 * 2165 * @since 3.0.0 2166 * 2167 * @param string $sanitized_key Sanitized key. 2168 * @param string $key The key prior to sanitization. 2169 */ 2170 return apply_filters( 'sanitize_key', $sanitized_key, $key ); 2171 } 2172 2173 /** 2174 * Sanitizes a string into a slug, which can be used in URLs or HTML attributes. 2175 * 2176 * By default, converts accent characters to ASCII characters and further 2177 * limits the output to alphanumeric characters, underscore (_) and dash (-) 2178 * through the {@see 'sanitize_title'} filter. 2179 * 2180 * If `$title` is empty and `$fallback_title` is set, the latter will be used. 2181 * 2182 * @since 1.0.0 2183 * 2184 * @param string $title The string to be sanitized. 2185 * @param string $fallback_title Optional. A title to use if $title is empty. Default empty. 2186 * @param string $context Optional. The operation for which the string is sanitized. 2187 * When set to 'save', the string runs through remove_accents(). 2188 * Default 'save'. 2189 * @return string The sanitized string. 2190 */ 2191 function sanitize_title( $title, $fallback_title = '', $context = 'save' ) { 2192 $raw_title = $title; 2193 2194 if ( 'save' === $context ) { 2195 $title = remove_accents( $title ); 2196 } 2197 2198 /** 2199 * Filters a sanitized title string. 2200 * 2201 * @since 1.2.0 2202 * 2203 * @param string $title Sanitized title. 2204 * @param string $raw_title The title prior to sanitization. 2205 * @param string $context The context for which the title is being sanitized. 2206 */ 2207 $title = apply_filters( 'sanitize_title', $title, $raw_title, $context ); 2208 2209 if ( '' === $title || false === $title ) { 2210 $title = $fallback_title; 2211 } 2212 2213 return $title; 2214 } 2215 2216 /** 2217 * Sanitizes a title with the 'query' context. 2218 * 2219 * Used for querying the database for a value from URL. 2220 * 2221 * @since 3.1.0 2222 * 2223 * @param string $title The string to be sanitized. 2224 * @return string The sanitized string. 2225 */ 2226 function sanitize_title_for_query( $title ) { 2227 return sanitize_title( $title, '', 'query' ); 2228 } 2229 2230 /** 2231 * Sanitizes a title, replacing whitespace and a few other characters with dashes. 2232 * 2233 * Limits the output to alphanumeric characters, underscore (_) and dash (-). 2234 * Whitespace becomes a dash. 2235 * 2236 * @since 1.2.0 2237 * 2238 * @param string $title The title to be sanitized. 2239 * @param string $raw_title Optional. Not used. Default empty. 2240 * @param string $context Optional. The operation for which the string is sanitized. 2241 * When set to 'save', additional entities are converted to hyphens 2242 * or stripped entirely. Default 'display'. 2243 * @return string The sanitized title. 2244 */ 2245 function sanitize_title_with_dashes( $title, $raw_title = '', $context = 'display' ) { 2246 $title = strip_tags( $title ); 2247 // Preserve escaped octets. 2248 $title = preg_replace( '|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $title ); 2249 // Remove percent signs that are not part of an octet. 2250 $title = str_replace( '%', '', $title ); 2251 // Restore octets. 2252 $title = preg_replace( '|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $title ); 2253 2254 if ( seems_utf8( $title ) ) { 2255 if ( function_exists( 'mb_strtolower' ) ) { 2256 $title = mb_strtolower( $title, 'UTF-8' ); 2257 } 2258 $title = utf8_uri_encode( $title, 200 ); 2259 } 2260 2261 $title = strtolower( $title ); 2262 2263 if ( 'save' === $context ) { 2264 // Convert  , &ndash, and &mdash to hyphens. 2265 $title = str_replace( array( '%c2%a0', '%e2%80%93', '%e2%80%94' ), '-', $title ); 2266 // Convert  , &ndash, and &mdash HTML entities to hyphens. 2267 $title = str_replace( array( ' ', ' ', '–', '–', '—', '—' ), '-', $title ); 2268 // Convert forward slash to hyphen. 2269 $title = str_replace( '/', '-', $title ); 2270 2271 // Strip these characters entirely. 2272 $title = str_replace( 2273 array( 2274 // Soft hyphens. 2275 '%c2%ad', 2276 // ¡ and ¿. 2277 '%c2%a1', 2278 '%c2%bf', 2279 // Angle quotes. 2280 '%c2%ab', 2281 '%c2%bb', 2282 '%e2%80%b9', 2283 '%e2%80%ba', 2284 // Curly quotes. 2285 '%e2%80%98', 2286 '%e2%80%99', 2287 '%e2%80%9c', 2288 '%e2%80%9d', 2289 '%e2%80%9a', 2290 '%e2%80%9b', 2291 '%e2%80%9e', 2292 '%e2%80%9f', 2293 // Bullet. 2294 '%e2%80%a2', 2295 // ©, ®, °, &hellip, and &trade. 2296 '%c2%a9', 2297 '%c2%ae', 2298 '%c2%b0', 2299 '%e2%80%a6', 2300 '%e2%84%a2', 2301 // Acute accents. 2302 '%c2%b4', 2303 '%cb%8a', 2304 '%cc%81', 2305 '%cd%81', 2306 // Grave accent, macron, caron. 2307 '%cc%80', 2308 '%cc%84', 2309 '%cc%8c', 2310 // Non-visible characters that display without a width. 2311 '%e2%80%8b', // Zero width space. 2312 '%e2%80%8c', // Zero width non-joiner. 2313 '%e2%80%8d', // Zero width joiner. 2314 '%e2%80%8e', // Left-to-right mark. 2315 '%e2%80%8f', // Right-to-left mark. 2316 '%e2%80%aa', // Left-to-right embedding. 2317 '%e2%80%ab', // Right-to-left embedding. 2318 '%e2%80%ac', // Pop directional formatting. 2319 '%e2%80%ad', // Left-to-right override. 2320 '%e2%80%ae', // Right-to-left override. 2321 '%ef%bb%bf', // Byte order mark. 2322 ), 2323 '', 2324 $title 2325 ); 2326 2327 // Convert non-visible characters that display with a width to hyphen. 2328 $title = str_replace( 2329 array( 2330 '%e2%80%80', // En quad. 2331 '%e2%80%81', // Em quad. 2332 '%e2%80%82', // En space. 2333 '%e2%80%83', // Em space. 2334 '%e2%80%84', // Three-per-em space. 2335 '%e2%80%85', // Four-per-em space. 2336 '%e2%80%86', // Six-per-em space. 2337 '%e2%80%87', // Figure space. 2338 '%e2%80%88', // Punctuation space. 2339 '%e2%80%89', // Thin space. 2340 '%e2%80%8a', // Hair space. 2341 '%e2%80%a8', // Line separator. 2342 '%e2%80%a9', // Paragraph separator. 2343 '%e2%80%af', // Narrow no-break space. 2344 ), 2345 '-', 2346 $title 2347 ); 2348 2349 // Convert × to 'x'. 2350 $title = str_replace( '%c3%97', 'x', $title ); 2351 } 2352 2353 // Kill entities. 2354 $title = preg_replace( '/&.+?;/', '', $title ); 2355 $title = str_replace( '.', '-', $title ); 2356 2357 $title = preg_replace( '/[^%a-z0-9 _-]/', '', $title ); 2358 $title = preg_replace( '/\s+/', '-', $title ); 2359 $title = preg_replace( '|-+|', '-', $title ); 2360 $title = trim( $title, '-' ); 2361 2362 return $title; 2363 } 2364 2365 /** 2366 * Ensures a string is a valid SQL 'order by' clause. 2367 * 2368 * Accepts one or more columns, with or without a sort order (ASC / DESC). 2369 * e.g. 'column_1', 'column_1, column_2', 'column_1 ASC, column_2 DESC' etc. 2370 * 2371 * Also accepts 'RAND()'. 2372 * 2373 * @since 2.5.1 2374 * 2375 * @param string $orderby Order by clause to be validated. 2376 * @return string|false Returns $orderby if valid, false otherwise. 2377 */ 2378 function sanitize_sql_orderby( $orderby ) { 2379 if ( preg_match( '/^\s*(([a-z0-9_]+|`[a-z0-9_]+`)(\s+(ASC|DESC))?\s*(,\s*(?=[a-z0-9_`])|$))+$/i', $orderby ) || preg_match( '/^\s*RAND\(\s*\)\s*$/i', $orderby ) ) { 2380 return $orderby; 2381 } 2382 return false; 2383 } 2384 2385 /** 2386 * Sanitizes an HTML classname to ensure it only contains valid characters. 2387 * 2388 * Strips the string down to A-Z,a-z,0-9,_,-. If this results in an empty 2389 * string then it will return the alternative value supplied. 2390 * 2391 * @todo Expand to support the full range of CDATA that a class attribute can contain. 2392 * 2393 * @since 2.8.0 2394 * 2395 * @param string $class The classname to be sanitized 2396 * @param string $fallback Optional. The value to return if the sanitization ends up as an empty string. 2397 * Defaults to an empty string. 2398 * @return string The sanitized value 2399 */ 2400 function sanitize_html_class( $class, $fallback = '' ) { 2401 // Strip out any %-encoded octets. 2402 $sanitized = preg_replace( '|%[a-fA-F0-9][a-fA-F0-9]|', '', $class ); 2403 2404 // Limit to A-Z, a-z, 0-9, '_', '-'. 2405 $sanitized = preg_replace( '/[^A-Za-z0-9_-]/', '', $sanitized ); 2406 2407 if ( '' === $sanitized && $fallback ) { 2408 return sanitize_html_class( $fallback ); 2409 } 2410 /** 2411 * Filters a sanitized HTML class string. 2412 * 2413 * @since 2.8.0 2414 * 2415 * @param string $sanitized The sanitized HTML class. 2416 * @param string $class HTML class before sanitization. 2417 * @param string $fallback The fallback string. 2418 */ 2419 return apply_filters( 'sanitize_html_class', $sanitized, $class, $fallback ); 2420 } 2421 2422 /** 2423 * Converts lone & characters into `&` (a.k.a. `&`) 2424 * 2425 * @since 0.71 2426 * 2427 * @param string $content String of characters to be converted. 2428 * @param string $deprecated Not used. 2429 * @return string Converted string. 2430 */ 2431 function convert_chars( $content, $deprecated = '' ) { 2432 if ( ! empty( $deprecated ) ) { 2433 _deprecated_argument( __FUNCTION__, '0.71' ); 2434 } 2435 2436 if ( strpos( $content, '&' ) !== false ) { 2437 $content = preg_replace( '/&([^#])(?![a-z1-4]{1,8};)/i', '&$1', $content ); 2438 } 2439 2440 return $content; 2441 } 2442 2443 /** 2444 * Converts invalid Unicode references range to valid range. 2445 * 2446 * @since 4.3.0 2447 * 2448 * @param string $content String with entities that need converting. 2449 * @return string Converted string. 2450 */ 2451 function convert_invalid_entities( $content ) { 2452 $wp_htmltranswinuni = array( 2453 '€' => '€', // The Euro sign. 2454 '' => '', 2455 '‚' => '‚', // These are Windows CP1252 specific characters. 2456 'ƒ' => 'ƒ', // They would look weird on non-Windows browsers. 2457 '„' => '„', 2458 '…' => '…', 2459 '†' => '†', 2460 '‡' => '‡', 2461 'ˆ' => 'ˆ', 2462 '‰' => '‰', 2463 'Š' => 'Š', 2464 '‹' => '‹', 2465 'Œ' => 'Œ', 2466 '' => '', 2467 'Ž' => 'Ž', 2468 '' => '', 2469 '' => '', 2470 '‘' => '‘', 2471 '’' => '’', 2472 '“' => '“', 2473 '”' => '”', 2474 '•' => '•', 2475 '–' => '–', 2476 '—' => '—', 2477 '˜' => '˜', 2478 '™' => '™', 2479 'š' => 'š', 2480 '›' => '›', 2481 'œ' => 'œ', 2482 '' => '', 2483 'ž' => 'ž', 2484 'Ÿ' => 'Ÿ', 2485 ); 2486 2487 if ( strpos( $content, '' ) !== false ) { 2488 $content = strtr( $content, $wp_htmltranswinuni ); 2489 } 2490 2491 return $content; 2492 } 2493 2494 /** 2495 * Balances tags if forced to, or if the 'use_balanceTags' option is set to true. 2496 * 2497 * @since 0.71 2498 * 2499 * @param string $text Text to be balanced 2500 * @param bool $force If true, forces balancing, ignoring the value of the option. Default false. 2501 * @return string Balanced text 2502 */ 2503 function balanceTags( $text, $force = false ) { // phpcs:ignore WordPress.NamingConventions.ValidFunctionName.FunctionNameInvalid 2504 if ( $force || (int) get_option( 'use_balanceTags' ) === 1 ) { 2505 return force_balance_tags( $text ); 2506 } else { 2507 return $text; 2508 } 2509 } 2510 2511 /** 2512 * Balances tags of string using a modified stack. 2513 * 2514 * @since 2.0.4 2515 * @since 5.3.0 Improve accuracy and add support for custom element tags. 2516 * 2517 * @author Leonard Lin <leonard@acm.org> 2518 * @license GPL 2519 * @copyright November 4, 2001 2520 * @version 1.1 2521 * @todo Make better - change loop condition to $text in 1.2 2522 * @internal Modified by Scott Reilly (coffee2code) 02 Aug 2004 2523 * 1.1 Fixed handling of append/stack pop order of end text 2524 * Added Cleaning Hooks 2525 * 1.0 First Version 2526 * 2527 * @param string $text Text to be balanced. 2528 * @return string Balanced text. 2529 */ 2530 function force_balance_tags( $text ) { 2531 $tagstack = array(); 2532 $stacksize = 0; 2533 $tagqueue = ''; 2534 $newtext = ''; 2535 // Known single-entity/self-closing tags. 2536 $single_tags = array( 'area', 'base', 'basefont', 'br', 'col', 'command', 'embed', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param', 'source', 'track', 'wbr' ); 2537 // Tags that can be immediately nested within themselves. 2538 $nestable_tags = array( 'article', 'aside', 'blockquote', 'details', 'div', 'figure', 'object', 'q', 'section', 'span' ); 2539 2540 // WP bug fix for comments - in case you REALLY meant to type '< !--'. 2541 $text = str_replace( '< !--', '< !--', $text ); 2542 // WP bug fix for LOVE <3 (and other situations with '<' before a number). 2543 $text = preg_replace( '#<([0-9]{1})#', '<$1', $text ); 2544 2545 /** 2546 * Matches supported tags. 2547 * 2548 * To get the pattern as a string without the comments paste into a PHP 2549 * REPL like `php -a`. 2550 * 2551 * @see https://html.spec.whatwg.org/#elements-2 2552 * @see https://html.spec.whatwg.org/multipage/custom-elements.html#valid-custom-element-name 2553 * 2554 * @example 2555 * ~# php -a 2556 * php > $s = [paste copied contents of expression below including parentheses]; 2557 * php > echo $s; 2558 */ 2559 $tag_pattern = ( 2560 '#<' . // Start with an opening bracket. 2561 '(/?)' . // Group 1 - If it's a closing tag it'll have a leading slash. 2562 '(' . // Group 2 - Tag name. 2563 // Custom element tags have more lenient rules than HTML tag names. 2564 '(?:[a-z](?:[a-z0-9._]*)-(?:[a-z0-9._-]+)+)' . 2565 '|' . 2566 // Traditional tag rules approximate HTML tag names. 2567 '(?:[\w:]+)' . 2568 ')' . 2569 '(?:' . 2570 // We either immediately close the tag with its '>' and have nothing here. 2571 '\s*' . 2572 '(/?)' . // Group 3 - "attributes" for empty tag. 2573 '|' . 2574 // Or we must start with space characters to separate the tag name from the attributes (or whitespace). 2575 '(\s+)' . // Group 4 - Pre-attribute whitespace. 2576 '([^>]*)' . // Group 5 - Attributes. 2577 ')' . 2578 '>#' // End with a closing bracket. 2579 ); 2580 2581 while ( preg_match( $tag_pattern, $text, $regex ) ) { 2582 $full_match = $regex[0]; 2583 $has_leading_slash = ! empty( $regex[1] ); 2584 $tag_name = $regex[2]; 2585 $tag = strtolower( $tag_name ); 2586 $is_single_tag = in_array( $tag, $single_tags, true ); 2587 $pre_attribute_ws = isset( $regex[4] ) ? $regex[4] : ''; 2588 $attributes = trim( isset( $regex[5] ) ? $regex[5] : $regex[3] ); 2589 $has_self_closer = '/' === substr( $attributes, -1 ); 2590 2591 $newtext .= $tagqueue; 2592 2593 $i = strpos( $text, $full_match ); 2594 $l = strlen( $full_match ); 2595 2596 // Clear the shifter. 2597 $tagqueue = ''; 2598 if ( $has_leading_slash ) { // End tag. 2599 // If too many closing tags. 2600 if ( $stacksize <= 0 ) { 2601 $tag = ''; 2602 // Or close to be safe $tag = '/' . $tag. 2603 2604 // If stacktop value = tag close value, then pop. 2605 } elseif ( $tagstack[ $stacksize - 1 ] === $tag ) { // Found closing tag. 2606 $tag = '</' . $tag . '>'; // Close tag. 2607 array_pop( $tagstack ); 2608 $stacksize--; 2609 } else { // Closing tag not at top, search for it. 2610 for ( $j = $stacksize - 1; $j >= 0; $j-- ) { 2611 if ( $tagstack[ $j ] === $tag ) { 2612 // Add tag to tagqueue. 2613 for ( $k = $stacksize - 1; $k >= $j; $k-- ) { 2614 $tagqueue .= '</' . array_pop( $tagstack ) . '>'; 2615 $stacksize--; 2616 } 2617 break; 2618 } 2619 } 2620 $tag = ''; 2621 } 2622 } else { // Begin tag. 2623 if ( $has_self_closer ) { // If it presents itself as a self-closing tag... 2624 // ...but it isn't a known single-entity self-closing tag, then don't let it be treated as such 2625 // and immediately close it with a closing tag (the tag will encapsulate no text as a result). 2626 if ( ! $is_single_tag ) { 2627 $attributes = trim( substr( $attributes, 0, -1 ) ) . "></$tag"; 2628 } 2629 } elseif ( $is_single_tag ) { // Else if it's a known single-entity tag but it doesn't close itself, do so. 2630 $pre_attribute_ws = ' '; 2631 $attributes .= '/'; 2632 } else { // It's not a single-entity tag. 2633 // If the top of the stack is the same as the tag we want to push, close previous tag. 2634 if ( $stacksize > 0 && ! in_array( $tag, $nestable_tags, true ) && $tagstack[ $stacksize - 1 ] === $tag ) { 2635 $tagqueue = '</' . array_pop( $tagstack ) . '>'; 2636 $stacksize--; 2637 } 2638 $stacksize = array_push( $tagstack, $tag ); 2639 } 2640 2641 // Attributes. 2642 if ( $has_self_closer && $is_single_tag ) { 2643 // We need some space - avoid <br/> and prefer <br />. 2644 $pre_attribute_ws = ' '; 2645 } 2646 2647 $tag = '<' . $tag . $pre_attribute_ws . $attributes . '>'; 2648 // If already queuing a close tag, then put this tag on too. 2649 if ( ! empty( $tagqueue ) ) { 2650 $tagqueue .= $tag; 2651 $tag = ''; 2652 } 2653 } 2654 $newtext .= substr( $text, 0, $i ) . $tag; 2655 $text = substr( $text, $i + $l ); 2656 } 2657 2658 // Clear tag queue. 2659 $newtext .= $tagqueue; 2660 2661 // Add remaining text. 2662 $newtext .= $text; 2663 2664 while ( $x = array_pop( $tagstack ) ) { 2665 $newtext .= '</' . $x . '>'; // Add remaining tags to close. 2666 } 2667 2668 // WP fix for the bug with HTML comments. 2669 $newtext = str_replace( '< !--', '<!--', $newtext ); 2670 $newtext = str_replace( '< !--', '< !--', $newtext ); 2671 2672 return $newtext; 2673 } 2674 2675 /** 2676 * Acts on text which is about to be edited. 2677 * 2678 * The $content is run through esc_textarea(), which uses htmlspecialchars() 2679 * to convert special characters to HTML entities. If `$richedit` is set to true, 2680 * it is simply a holder for the {@see 'format_to_edit'} filter. 2681 * 2682 * @since 0.71 2683 * @since 4.4.0 The `$richedit` parameter was renamed to `$rich_text` for clarity. 2684 * 2685 * @param string $content The text about to be edited. 2686 * @param bool $rich_text Optional. Whether `$content` should be considered rich text, 2687 * in which case it would not be passed through esc_textarea(). 2688 * Default false. 2689 * @return string The text after the filter (and possibly htmlspecialchars()) has been run. 2690 */ 2691 function format_to_edit( $content, $rich_text = false ) { 2692 /** 2693 * Filters the text to be formatted for editing. 2694 * 2695 * @since 1.2.0 2696 * 2697 * @param string $content The text, prior to formatting for editing. 2698 */ 2699 $content = apply_filters( 'format_to_edit', $content ); 2700 if ( ! $rich_text ) { 2701 $content = esc_textarea( $content ); 2702 } 2703 return $content; 2704 } 2705 2706 /** 2707 * Add leading zeros when necessary. 2708 * 2709 * If you set the threshold to '4' and the number is '10', then you will get 2710 * back '0010'. If you set the threshold to '4' and the number is '5000', then you 2711 * will get back '5000'. 2712 * 2713 * Uses sprintf to append the amount of zeros based on the $threshold parameter 2714 * and the size of the number. If the number is large enough, then no zeros will 2715 * be appended. 2716 * 2717 * @since 0.71 2718 * 2719 * @param int $number Number to append zeros to if not greater than threshold. 2720 * @param int $threshold Digit places number needs to be to not have zeros added. 2721 * @return string Adds leading zeros to number if needed. 2722 */ 2723 function zeroise( $number, $threshold ) { 2724 return sprintf( '%0' . $threshold . 's', $number ); 2725 } 2726 2727 /** 2728 * Adds backslashes before letters and before a number at the start of a string. 2729 * 2730 * @since 0.71 2731 * 2732 * @param string $string Value to which backslashes will be added. 2733 * @return string String with backslashes inserted. 2734 */ 2735 function backslashit( $string ) { 2736 if ( isset( $string[0] ) && $string[0] >= '0' && $string[0] <= '9' ) { 2737 $string = '\\\\' . $string; 2738 } 2739 return addcslashes( $string, 'A..Za..z' ); 2740 } 2741 2742 /** 2743 * Appends a trailing slash. 2744 * 2745 * Will remove trailing forward and backslashes if it exists already before adding 2746 * a trailing forward slash. This prevents double slashing a string or path. 2747 * 2748 * The primary use of this is for paths and thus should be used for paths. It is 2749 * not restricted to paths and offers no specific path support. 2750 * 2751 * @since 1.2.0 2752 * 2753 * @param string $string What to add the trailing slash to. 2754 * @return string String with trailing slash added. 2755 */ 2756 function trailingslashit( $string ) { 2757 return untrailingslashit( $string ) . '/'; 2758 } 2759 2760 /** 2761 * Removes trailing forward slashes and backslashes if they exist. 2762 * 2763 * The primary use of this is for paths and thus should be used for paths. It is 2764 * not restricted to paths and offers no specific path support. 2765 * 2766 * @since 2.2.0 2767 * 2768 * @param string $string What to remove the trailing slashes from. 2769 * @return string String without the trailing slashes. 2770 */ 2771 function untrailingslashit( $string ) { 2772 return rtrim( $string, '/\\' ); 2773 } 2774 2775 /** 2776 * Adds slashes to a string or recursively adds slashes to strings within an array. 2777 * 2778 * Slashes will first be removed if magic_quotes_gpc is set, see {@link 2779 * https://www.php.net/magic_quotes} for more details. 2780 * 2781 * @since 0.71 2782 * 2783 * @param string|array $gpc String or array of data to slash. 2784 * @return string|array Slashed `$gpc`. 2785 */ 2786 function addslashes_gpc( $gpc ) { 2787 return wp_slash( $gpc ); 2788 } 2789 2790 /** 2791 * Navigates through an array, object, or scalar, and removes slashes from the values. 2792 * 2793 * @since 2.0.0 2794 * 2795 * @param mixed $value The value to be stripped. 2796 * @return mixed Stripped value. 2797 */ 2798 function stripslashes_deep( $value ) { 2799 return map_deep( $value, 'stripslashes_from_strings_only' ); 2800 } 2801 2802 /** 2803 * Callback function for `stripslashes_deep()` which strips slashes from strings. 2804 * 2805 * @since 4.4.0 2806 * 2807 * @param mixed $value The array or string to be stripped. 2808 * @return mixed The stripped value. 2809 */ 2810 function stripslashes_from_strings_only( $value ) { 2811 return is_string( $value ) ? stripslashes( $value ) : $value; 2812 } 2813 2814 /** 2815 * Navigates through an array, object, or scalar, and encodes the values to be used in a URL. 2816 * 2817 * @since 2.2.0 2818 * 2819 * @param mixed $value The array or string to be encoded. 2820 * @return mixed The encoded value. 2821 */ 2822 function urlencode_deep( $value ) { 2823 return map_deep( $value, 'urlencode' ); 2824 } 2825 2826 /** 2827 * Navigates through an array, object, or scalar, and raw-encodes the values to be used in a URL. 2828 * 2829 * @since 3.4.0 2830 * 2831 * @param mixed $value The array or string to be encoded. 2832 * @return mixed The encoded value. 2833 */ 2834 function rawurlencode_deep( $value ) { 2835 return map_deep( $value, 'rawurlencode' ); 2836 } 2837 2838 /** 2839 * Navigates through an array, object, or scalar, and decodes URL-encoded values 2840 * 2841 * @since 4.4.0 2842 * 2843 * @param mixed $value The array or string to be decoded. 2844 * @return mixed The decoded value. 2845 */ 2846 function urldecode_deep( $value ) { 2847 return map_deep( $value, 'urldecode' ); 2848 } 2849 2850 /** 2851 * Converts email addresses characters to HTML entities to block spam bots. 2852 * 2853 * @since 0.71 2854 * 2855 * @param string $email_address Email address. 2856 * @param int $hex_encoding Optional. Set to 1 to enable hex encoding. 2857 * @return string Converted email address. 2858 */ 2859 function antispambot( $email_address, $hex_encoding = 0 ) { 2860 $email_no_spam_address = ''; 2861 for ( $i = 0, $len = strlen( $email_address ); $i < $len; $i++ ) { 2862 $j = rand( 0, 1 + $hex_encoding ); 2863 if ( 0 == $j ) { 2864 $email_no_spam_address .= '&#' . ord( $email_address[ $i ] ) . ';'; 2865 } elseif ( 1 == $j ) { 2866 $email_no_spam_address .= $email_address[ $i ]; 2867 } elseif ( 2 == $j ) { 2868 $email_no_spam_address .= '%' . zeroise( dechex( ord( $email_address[ $i ] ) ), 2 ); 2869 } 2870 } 2871 2872 return str_replace( '@', '@', $email_no_spam_address ); 2873 } 2874 2875 /** 2876 * Callback to convert URI match to HTML A element. 2877 * 2878 * This function was backported from 2.5.0 to 2.3.2. Regex callback for make_clickable(). 2879 * 2880 * @since 2.3.2 2881 * @access private 2882 * 2883 * @param array $matches Single Regex Match. 2884 * @return string HTML A element with URI address. 2885 */ 2886 function _make_url_clickable_cb( $matches ) { 2887 $url = $matches[2]; 2888 2889 if ( ')' === $matches[3] && strpos( $url, '(' ) ) { 2890 // If the trailing character is a closing parethesis, and the URL has an opening parenthesis in it, 2891 // add the closing parenthesis to the URL. Then we can let the parenthesis balancer do its thing below. 2892 $url .= $matches[3]; 2893 $suffix = ''; 2894 } else { 2895 $suffix = $matches[3]; 2896 } 2897 2898 // Include parentheses in the URL only if paired. 2899 while ( substr_count( $url, '(' ) < substr_count( $url, ')' ) ) { 2900 $suffix = strrchr( $url, ')' ) . $suffix; 2901 $url = substr( $url, 0, strrpos( $url, ')' ) ); 2902 } 2903 2904 $url = esc_url( $url ); 2905 if ( empty( $url ) ) { 2906 return $matches[0]; 2907 } 2908 2909 if ( 'comment_text' === current_filter() ) { 2910 $rel = 'nofollow ugc'; 2911 } else { 2912 $rel = 'nofollow'; 2913 } 2914 2915 /** 2916 * Filters the rel value that is added to URL matches converted to links. 2917 * 2918 * @since 5.3.0 2919 * 2920 * @param string $rel The rel value. 2921 * @param string $url The matched URL being converted to a link tag. 2922 */ 2923 $rel = apply_filters( 'make_clickable_rel', $rel, $url ); 2924 $rel = esc_attr( $rel ); 2925 2926 return $matches[1] . "<a href=\"$url\" rel=\"$rel\">$url</a>" . $suffix; 2927 } 2928 2929 /** 2930 * Callback to convert URL match to HTML A element. 2931 * 2932 * This function was backported from 2.5.0 to 2.3.2. Regex callback for make_clickable(). 2933 * 2934 * @since 2.3.2 2935 * @access private 2936 * 2937 * @param array $matches Single Regex Match. 2938 * @return string HTML A element with URL address. 2939 */ 2940 function _make_web_ftp_clickable_cb( $matches ) { 2941 $ret = ''; 2942 $dest = $matches[2]; 2943 $dest = 'http://' . $dest; 2944 2945 // Removed trailing [.,;:)] from URL. 2946 $last_char = substr( $dest, -1 ); 2947 if ( in_array( $last_char, array( '.', ',', ';', ':', ')' ), true ) === true ) { 2948 $ret = $last_char; 2949 $dest = substr( $dest, 0, strlen( $dest ) - 1 ); 2950 } 2951 2952 $dest = esc_url( $dest ); 2953 if ( empty( $dest ) ) { 2954 return $matches[0]; 2955 } 2956 2957 if ( 'comment_text' === current_filter() ) { 2958 $rel = 'nofollow ugc'; 2959 } else { 2960 $rel = 'nofollow'; 2961 } 2962 2963 /** This filter is documented in wp-includes/formatting.php */ 2964 $rel = apply_filters( 'make_clickable_rel', $rel, $dest ); 2965 $rel = esc_attr( $rel ); 2966 2967 return $matches[1] . "<a href=\"$dest\" rel=\"$rel\">$dest</a>$ret"; 2968 } 2969 2970 /** 2971 * Callback to convert email address match to HTML A element. 2972 * 2973 * This function was backported from 2.5.0 to 2.3.2. Regex callback for make_clickable(). 2974 * 2975 * @since 2.3.2 2976 * @access private 2977 * 2978 * @param array $matches Single Regex Match. 2979 * @return string HTML A element with email address. 2980 */ 2981 function _make_email_clickable_cb( $matches ) { 2982 $email = $matches[2] . '@' . $matches[3]; 2983 return $matches[1] . "<a href=\"mailto:$email\">$email</a>"; 2984 } 2985 2986 /** 2987 * Converts plaintext URI to HTML links. 2988 * 2989 * Converts URI, www and ftp, and email addresses. Finishes by fixing links 2990 * within links. 2991 * 2992 * @since 0.71 2993 * 2994 * @param string $text Content to convert URIs. 2995 * @return string Content with converted URIs. 2996 */ 2997 function make_clickable( $text ) { 2998 $r = ''; 2999 $textarr = preg_split( '/(<[^<>]+>)/', $text, -1, PREG_SPLIT_DELIM_CAPTURE ); // Split out HTML tags. 3000 $nested_code_pre = 0; // Keep track of how many levels link is nested inside <pre> or <code>. 3001 foreach ( $textarr as $piece ) { 3002 3003 if ( preg_match( '|^<code[\s>]|i', $piece ) || preg_match( '|^<pre[\s>]|i', $piece ) || preg_match( '|^<script[\s>]|i', $piece ) || preg_match( '|^<style[\s>]|i', $piece ) ) { 3004 $nested_code_pre++; 3005 } elseif ( $nested_code_pre && ( '</code>' === strtolower( $piece ) || '</pre>' === strtolower( $piece ) || '</script>' === strtolower( $piece ) || '</style>' === strtolower( $piece ) ) ) { 3006 $nested_code_pre--; 3007 } 3008 3009 if ( $nested_code_pre || empty( $piece ) || ( '<' === $piece[0] && ! preg_match( '|^<\s*[\w]{1,20}+://|', $piece ) ) ) { 3010 $r .= $piece; 3011 continue; 3012 } 3013 3014 // Long strings might contain expensive edge cases... 3015 if ( 10000 < strlen( $piece ) ) { 3016 // ...break it up. 3017 foreach ( _split_str_by_whitespace( $piece, 2100 ) as $chunk ) { // 2100: Extra room for scheme and leading and trailing paretheses. 3018 if ( 2101 < strlen( $chunk ) ) { 3019 $r .= $chunk; // Too big, no whitespace: bail. 3020 } else { 3021 $r .= make_clickable( $chunk ); 3022 } 3023 } 3024 } else { 3025 $ret = " $piece "; // Pad with whitespace to simplify the regexes. 3026 3027 $url_clickable = '~ 3028 ([\\s(<.,;:!?]) # 1: Leading whitespace, or punctuation. 3029 ( # 2: URL. 3030 [\\w]{1,20}+:// # Scheme and hier-part prefix. 3031 (?=\S{1,2000}\s) # Limit to URLs less than about 2000 characters long. 3032 [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]*+ # Non-punctuation URL character. 3033 (?: # Unroll the Loop: Only allow puctuation URL character if followed by a non-punctuation URL character. 3034 [\'.,;:!?)] # Punctuation URL character. 3035 [\\w\\x80-\\xff#%\\~/@\\[\\]*(+=&$-]++ # Non-punctuation URL character. 3036 )* 3037 ) 3038 (\)?) # 3: Trailing closing parenthesis (for parethesis balancing post processing). 3039 ~xS'; 3040 // The regex is a non-anchored pattern and does not have a single fixed starting character. 3041 // Tell PCRE to spend more time optimizing since, when used on a page load, it will probably be used several times. 3042 3043 $ret = preg_replace_callback( $url_clickable, '_make_url_clickable_cb', $ret ); 3044 3045 $ret = preg_replace_callback( '#([\s>])((www|ftp)\.[\w\\x80-\\xff\#$%&~/.\-;:=,?@\[\]+]+)#is', '_make_web_ftp_clickable_cb', $ret ); 3046 $ret = preg_replace_callback( '#([\s>])([.0-9a-z_+-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})#i', '_make_email_clickable_cb', $ret ); 3047 3048 $ret = substr( $ret, 1, -1 ); // Remove our whitespace padding. 3049 $r .= $ret; 3050 } 3051 } 3052 3053 // Cleanup of accidental links within links. 3054 return preg_replace( '#(<a([ \r\n\t]+[^>]+?>|>))<a [^>]+?>([^>]+?)</a></a>#i', '$1$3</a>', $r ); 3055 } 3056 3057 /** 3058 * Breaks a string into chunks by splitting at whitespace characters. 3059 * 3060 * The length of each returned chunk is as close to the specified length goal as possible, 3061 * with the caveat that each chunk includes its trailing delimiter. 3062 * Chunks longer than the goal are guaranteed to not have any inner whitespace. 3063 * 3064 * Joining the returned chunks with empty delimiters reconstructs the input string losslessly. 3065 * 3066 * Input string must have no null characters (or eventual transformations on output chunks must not care about null characters) 3067 * 3068 * _split_str_by_whitespace( "1234 67890 1234 67890a cd 1234 890 123456789 1234567890a 45678 1 3 5 7 90 ", 10 ) == 3069 * array ( 3070 * 0 => '1234 67890 ', // 11 characters: Perfect split. 3071 * 1 => '1234 ', // 5 characters: '1234 67890a' was too long. 3072 * 2 => '67890a cd ', // 10 characters: '67890a cd 1234' was too long. 3073 * 3 => '1234 890 ', // 11 characters: Perfect split. 3074 * 4 => '123456789 ', // 10 characters: '123456789 1234567890a' was too long. 3075 * 5 => '1234567890a ', // 12 characters: Too long, but no inner whitespace on which to split. 3076 * 6 => ' 45678 ', // 11 characters: Perfect split. 3077 * 7 => '1 3 5 7 90 ', // 11 characters: End of $string. 3078 * ); 3079 * 3080 * @since 3.4.0 3081 * @access private 3082 * 3083 * @param string $string The string to split. 3084 * @param int $goal The desired chunk length. 3085 * @return array Numeric array of chunks. 3086 */ 3087 function _split_str_by_whitespace( $string, $goal ) { 3088 $chunks = array(); 3089 3090 $string_nullspace = strtr( $string, "\r\n\t\v\f ", "\000\000\000\000\000\000" ); 3091 3092 while ( $goal < strlen( $string_nullspace ) ) { 3093 $pos = strrpos( substr( $string_nullspace, 0, $goal + 1 ), "\000" ); 3094 3095 if ( false === $pos ) { 3096 $pos = strpos( $string_nullspace, "\000", $goal + 1 ); 3097 if ( false === $pos ) { 3098 break; 3099 } 3100 } 3101 3102 $chunks[] = substr( $string, 0, $pos + 1 ); 3103 $string = substr( $string, $pos + 1 ); 3104 $string_nullspace = substr( $string_nullspace, $pos + 1 ); 3105 } 3106 3107 if ( $string ) { 3108 $chunks[] = $string; 3109 } 3110 3111 return $chunks; 3112 } 3113 3114 /** 3115 * Callback to add a rel attribute to HTML A element. 3116 * 3117 * Will remove already existing string before adding to prevent invalidating (X)HTML. 3118 * 3119 * @since 5.3.0 3120 * 3121 * @param array $matches Single match. 3122 * @param string $rel The rel attribute to add. 3123 * @return string HTML A element with the added rel attribute. 3124 */ 3125 function wp_rel_callback( $matches, $rel ) { 3126 $text = $matches[1]; 3127 $atts = wp_kses_hair( $matches[1], wp_allowed_protocols() ); 3128 3129 if ( ! empty( $atts['href'] ) ) { 3130 if ( in_array( strtolower( wp_parse_url( $atts['href']['value'], PHP_URL_SCHEME ) ), array( 'http', 'https' ), true ) ) { 3131 if ( strtolower( wp_parse_url( $atts['href']['value'], PHP_URL_HOST ) ) === strtolower( wp_parse_url( home_url(), PHP_URL_HOST ) ) ) { 3132 return "<a $text>"; 3133 } 3134 } 3135 } 3136 3137 if ( ! empty( $atts['rel'] ) ) { 3138 $parts = array_map( 'trim', explode( ' ', $atts['rel']['value'] ) ); 3139 $rel_array = array_map( 'trim', explode( ' ', $rel ) ); 3140 $parts = array_unique( array_merge( $parts, $rel_array ) ); 3141 $rel = implode( ' ', $parts ); 3142 unset( $atts['rel'] ); 3143 3144 $html = ''; 3145 foreach ( $atts as $name => $value ) { 3146 if ( isset( $value['vless'] ) && 'y' === $value['vless'] ) { 3147 $html .= $name . ' '; 3148 } else { 3149 $html .= "{$name}=\"" . esc_attr( $value['value'] ) . '" '; 3150 } 3151 } 3152 $text = trim( $html ); 3153 } 3154 return "<a $text rel=\"" . esc_attr( $rel ) . '">'; 3155 } 3156 3157 /** 3158 * Adds `rel="nofollow"` string to all HTML A elements in content. 3159 * 3160 * @since 1.5.0 3161 * 3162 * @param string $text Content that may contain HTML A elements. 3163 * @return string Converted content. 3164 */ 3165 function wp_rel_nofollow( $text ) { 3166 // This is a pre-save filter, so text is already escaped. 3167 $text = stripslashes( $text ); 3168 $text = preg_replace_callback( 3169 '|<a (.+?)>|i', 3170 static function( $matches ) { 3171 return wp_rel_callback( $matches, 'nofollow' ); 3172 }, 3173 $text 3174 ); 3175 return wp_slash( $text ); 3176 } 3177 3178 /** 3179 * Callback to add `rel="nofollow"` string to HTML A element. 3180 * 3181 * @since 2.3.0 3182 * @deprecated 5.3.0 Use wp_rel_callback() 3183 * 3184 * @param array $matches Single match. 3185 * @return string HTML A Element with `rel="nofollow"`. 3186 */ 3187 function wp_rel_nofollow_callback( $matches ) { 3188 return wp_rel_callback( $matches, 'nofollow' ); 3189 } 3190 3191 /** 3192 * Adds `rel="nofollow ugc"` string to all HTML A elements in content. 3193 * 3194 * @since 5.3.0 3195 * 3196 * @param string $text Content that may contain HTML A elements. 3197 * @return string Converted content. 3198 */ 3199 function wp_rel_ugc( $text ) { 3200 // This is a pre-save filter, so text is already escaped. 3201 $text = stripslashes( $text ); 3202 $text = preg_replace_callback( 3203 '|<a (.+?)>|i', 3204 static function( $matches ) { 3205 return wp_rel_callback( $matches, 'nofollow ugc' ); 3206 }, 3207 $text 3208 ); 3209 return wp_slash( $text ); 3210 } 3211 3212 /** 3213 * Adds `rel="noopener"` to all HTML A elements that have a target. 3214 * 3215 * @since 5.1.0 3216 * @since 5.6.0 Removed 'noreferrer' relationship. 3217 * 3218 * @param string $text Content that may contain HTML A elements. 3219 * @return string Converted content. 3220 */ 3221 function wp_targeted_link_rel( $text ) { 3222 // Don't run (more expensive) regex if no links with targets. 3223 if ( stripos( $text, 'target' ) === false || stripos( $text, '<a ' ) === false || is_serialized( $text ) ) { 3224 return $text; 3225 } 3226 3227 $script_and_style_regex = '/<(script|style).*?<\/\\1>/si'; 3228 3229 preg_match_all( $script_and_style_regex, $text, $matches ); 3230 $extra_parts = $matches[0]; 3231 $html_parts = preg_split( $script_and_style_regex, $text ); 3232 3233 foreach ( $html_parts as &$part ) { 3234 $part = preg_replace_callback( '|<a\s([^>]*target\s*=[^>]*)>|i', 'wp_targeted_link_rel_callback', $part ); 3235 } 3236 3237 $text = ''; 3238 for ( $i = 0; $i < count( $html_parts ); $i++ ) { 3239 $text .= $html_parts[ $i ]; 3240 if ( isset( $extra_parts[ $i ] ) ) { 3241 $text .= $extra_parts[ $i ]; 3242 } 3243 } 3244 3245 return $text; 3246 } 3247 3248 /** 3249 * Callback to add `rel="noopener"` string to HTML A element. 3250 * 3251 * Will not duplicate an existing 'noopener' value to avoid invalidating the HTML. 3252 * 3253 * @since 5.1.0 3254 * @since 5.6.0 Removed 'noreferrer' relationship. 3255 * 3256 * @param array $matches Single match. 3257 * @return string HTML A Element with `rel="noopener"` in addition to any existing values. 3258 */ 3259 function wp_targeted_link_rel_callback( $matches ) { 3260 $link_html = $matches[1]; 3261 $original_link_html = $link_html; 3262 3263 // Consider the HTML escaped if there are no unescaped quotes. 3264 $is_escaped = ! preg_match( '/(^|[^\\\\])[\'"]/', $link_html ); 3265 if ( $is_escaped ) { 3266 // Replace only the quotes so that they are parsable by wp_kses_hair(), leave the rest as is. 3267 $link_html = preg_replace( '/\\\\([\'"])/', '$1', $link_html ); 3268 } 3269 3270 $atts = wp_kses_hair( $link_html, wp_allowed_protocols() ); 3271 3272 /** 3273 * Filters the rel values that are added to links with `target` attribute. 3274 * 3275 * @since 5.1.0 3276 * 3277 * @param string $rel The rel values. 3278 * @param string $link_html The matched content of the link tag including all HTML attributes. 3279 */ 3280 $rel = apply_filters( 'wp_targeted_link_rel', 'noopener', $link_html ); 3281 3282 // Return early if no rel values to be added or if no actual target attribute. 3283 if ( ! $rel || ! isset( $atts['target'] ) ) { 3284 return "<a $original_link_html>"; 3285 } 3286 3287 if ( isset( $atts['rel'] ) ) { 3288 $all_parts = preg_split( '/\s/', "{$atts['rel']['value']} $rel", -1, PREG_SPLIT_NO_EMPTY ); 3289 $rel = implode( ' ', array_unique( $all_parts ) ); 3290 } 3291 3292 $atts['rel']['whole'] = 'rel="' . esc_attr( $rel ) . '"'; 3293 $link_html = implode( ' ', array_column( $atts, 'whole' ) ); 3294 3295 if ( $is_escaped ) { 3296 $link_html = preg_replace( '/[\'"]/', '\\\\$0', $link_html ); 3297 } 3298 3299 return "<a $link_html>"; 3300 } 3301 3302 /** 3303 * Adds all filters modifying the rel attribute of targeted links. 3304 * 3305 * @since 5.1.0 3306 */ 3307 function wp_init_targeted_link_rel_filters() { 3308 $filters = array( 3309 'title_save_pre', 3310 'content_save_pre', 3311 'excerpt_save_pre', 3312 'content_filtered_save_pre', 3313 'pre_comment_content', 3314 'pre_term_description', 3315 'pre_link_description', 3316 'pre_link_notes', 3317 'pre_user_description', 3318 ); 3319 3320 foreach ( $filters as $filter ) { 3321 add_filter( $filter, 'wp_targeted_link_rel' ); 3322 } 3323 } 3324 3325 /** 3326 * Removes all filters modifying the rel attribute of targeted links. 3327 * 3328 * @since 5.1.0 3329 */ 3330 function wp_remove_targeted_link_rel_filters() { 3331 $filters = array( 3332 'title_save_pre', 3333 'content_save_pre', 3334 'excerpt_save_pre', 3335 'content_filtered_save_pre', 3336 'pre_comment_content', 3337 'pre_term_description', 3338 'pre_link_description', 3339 'pre_link_notes', 3340 'pre_user_description', 3341 ); 3342 3343 foreach ( $filters as $filter ) { 3344 remove_filter( $filter, 'wp_targeted_link_rel' ); 3345 } 3346 } 3347 3348 /** 3349 * Converts one smiley code to the icon graphic file equivalent. 3350 * 3351 * Callback handler for convert_smilies(). 3352 * 3353 * Looks up one smiley code in the $wpsmiliestrans global array and returns an 3354 * `<img>` string for that smiley. 3355 * 3356 * @since 2.8.0 3357 * 3358 * @global array $wpsmiliestrans 3359 * 3360 * @param array $matches Single match. Smiley code to convert to image. 3361 * @return string Image string for smiley. 3362 */ 3363 function translate_smiley( $matches ) { 3364 global $wpsmiliestrans; 3365 3366 if ( count( $matches ) == 0 ) { 3367 return ''; 3368 } 3369 3370 $smiley = trim( reset( $matches ) ); 3371 $img = $wpsmiliestrans[ $smiley ]; 3372 3373 $matches = array(); 3374 $ext = preg_match( '/\.([^.]+)$/', $img, $matches ) ? strtolower( $matches[1] ) : false; 3375 $image_exts = array( 'jpg', 'jpeg', 'jpe', 'gif', 'png', 'webp' ); 3376 3377 // Don't convert smilies that aren't images - they're probably emoji. 3378 if ( ! in_array( $ext, $image_exts, true ) ) { 3379 return $img; 3380 } 3381 3382 /** 3383 * Filters the Smiley image URL before it's used in the image element. 3384 * 3385 * @since 2.9.0 3386 * 3387 * @param string $smiley_url URL for the smiley image. 3388 * @param string $img Filename for the smiley image. 3389 * @param string $site_url Site URL, as returned by site_url(). 3390 */ 3391 $src_url = apply_filters( 'smilies_src', includes_url( "images/smilies/$img" ), $img, site_url() ); 3392 3393 return sprintf( '<img src="%s" alt="%s" class="wp-smiley" style="height: 1em; max-height: 1em;" />', esc_url( $src_url ), esc_attr( $smiley ) ); 3394 } 3395 3396 /** 3397 * Converts text equivalent of smilies to images. 3398 * 3399 * Will only convert smilies if the option 'use_smilies' is true and the global 3400 * used in the function isn't empty. 3401 * 3402 * @since 0.71 3403 * 3404 * @global string|array $wp_smiliessearch 3405 * 3406 * @param string $text Content to convert smilies from text. 3407 * @return string Converted content with text smilies replaced with images. 3408 */ 3409 function convert_smilies( $text ) { 3410 global $wp_smiliessearch; 3411 $output = ''; 3412 if ( get_option( 'use_smilies' ) && ! empty( $wp_smiliessearch ) ) { 3413 // HTML loop taken from texturize function, could possible be consolidated. 3414 $textarr = preg_split( '/(<.*>)/U', $text, -1, PREG_SPLIT_DELIM_CAPTURE ); // Capture the tags as well as in between. 3415 $stop = count( $textarr ); // Loop stuff. 3416 3417 // Ignore proessing of specific tags. 3418 $tags_to_ignore = 'code|pre|style|script|textarea'; 3419 $ignore_block_element = ''; 3420 3421 for ( $i = 0; $i < $stop; $i++ ) { 3422 $content = $textarr[ $i ]; 3423 3424 // If we're in an ignore block, wait until we find its closing tag. 3425 if ( '' === $ignore_block_element && preg_match( '/^<(' . $tags_to_ignore . ')[^>]*>/', $content, $matches ) ) { 3426 $ignore_block_element = $matches[1]; 3427 } 3428 3429 // If it's not a tag and not in ignore block. 3430 if ( '' === $ignore_block_element && strlen( $content ) > 0 && '<' !== $content[0] ) { 3431 $content = preg_replace_callback( $wp_smiliessearch, 'translate_smiley', $content ); 3432 } 3433 3434 // Did we exit ignore block? 3435 if ( '' !== $ignore_block_element && '</' . $ignore_block_element . '>' === $content ) { 3436 $ignore_block_element = ''; 3437 } 3438 3439 $output .= $content; 3440 } 3441 } else { 3442 // Return default text. 3443 $output = $text; 3444 } 3445 return $output; 3446 } 3447 3448 /** 3449 * Verifies that an email is valid. 3450 * 3451 * Does not grok i18n domains. Not RFC compliant. 3452 * 3453 * @since 0.71 3454 * 3455 * @param string $email Email address to verify. 3456 * @param bool $deprecated Deprecated. 3457 * @return string|false Valid email address on success, false on failure. 3458 */ 3459 function is_email( $email, $deprecated = false ) { 3460 if ( ! empty( $deprecated ) ) { 3461 _deprecated_argument( __FUNCTION__, '3.0.0' ); 3462 } 3463 3464 // Test for the minimum length the email can be. 3465 if ( strlen( $email ) < 6 ) { 3466 /** 3467 * Filters whether an email address is valid. 3468 * 3469 * This filter is evaluated under several different contexts, such as 'email_too_short', 3470 * 'email_no_at', 'local_invalid_chars', 'domain_period_sequence', 'domain_period_limits', 3471 * 'domain_no_periods', 'sub_hyphen_limits', 'sub_invalid_chars', or no specific context. 3472 * 3473 * @since 2.8.0 3474 * 3475 * @param string|false $is_email The email address if successfully passed the is_email() checks, false otherwise. 3476 * @param string $email The email address being checked. 3477 * @param string $context Context under which the email was tested. 3478 */ 3479 return apply_filters( 'is_email', false, $email, 'email_too_short' ); 3480 } 3481 3482 // Test for an @ character after the first position. 3483 if ( strpos( $email, '@', 1 ) === false ) { 3484 /** This filter is documented in wp-includes/formatting.php */ 3485 return apply_filters( 'is_email', false, $email, 'email_no_at' ); 3486 } 3487 3488 // Split out the local and domain parts. 3489 list( $local, $domain ) = explode( '@', $email, 2 ); 3490 3491 // LOCAL PART 3492 // Test for invalid characters. 3493 if ( ! preg_match( '/^[a-zA-Z0-9!#$%&\'*+\/=?^_`{|}~\.-]+$/', $local ) ) { 3494 /** This filter is documented in wp-includes/formatting.php */ 3495 return apply_filters( 'is_email', false, $email, 'local_invalid_chars' ); 3496 } 3497 3498 // DOMAIN PART 3499 // Test for sequences of periods. 3500 if ( preg_match( '/\.{2,}/', $domain ) ) { 3501 /** This filter is documented in wp-includes/formatting.php */ 3502 return apply_filters( 'is_email', false, $email, 'domain_period_sequence' ); 3503 } 3504 3505 // Test for leading and trailing periods and whitespace. 3506 if ( trim( $domain, " \t\n\r\0\x0B." ) !== $domain ) { 3507 /** This filter is documented in wp-includes/formatting.php */ 3508 return apply_filters( 'is_email', false, $email, 'domain_period_limits' ); 3509 } 3510 3511 // Split the domain into subs. 3512 $subs = explode( '.', $domain ); 3513 3514 // Assume the domain will have at least two subs. 3515 if ( 2 > count( $subs ) ) { 3516 /** This filter is documented in wp-includes/formatting.php */ 3517 return apply_filters( 'is_email', false, $email, 'domain_no_periods' ); 3518 } 3519 3520 // Loop through each sub. 3521 foreach ( $subs as $sub ) { 3522 // Test for leading and trailing hyphens and whitespace. 3523 if ( trim( $sub, " \t\n\r\0\x0B-" ) !== $sub ) { 3524 /** This filter is documented in wp-includes/formatting.php */ 3525 return apply_filters( 'is_email', false, $email, 'sub_hyphen_limits' ); 3526 } 3527 3528 // Test for invalid characters. 3529 if ( ! preg_match( '/^[a-z0-9-]+$/i', $sub ) ) { 3530 /** This filter is documented in wp-includes/formatting.php */ 3531 return apply_filters( 'is_email', false, $email, 'sub_invalid_chars' ); 3532 } 3533 } 3534 3535 // Congratulations, your email made it! 3536 /** This filter is documented in wp-includes/formatting.php */ 3537 return apply_filters( 'is_email', $email, $email, null ); 3538 } 3539 3540 /** 3541 * Converts to ASCII from email subjects. 3542 * 3543 * @since 1.2.0 3544 * 3545 * @param string $string Subject line. 3546 * @return string Converted string to ASCII. 3547 */ 3548 function wp_iso_descrambler( $string ) { 3549 /* this may only work with iso-8859-1, I'm afraid */ 3550 if ( ! preg_match( '#\=\?(.+)\?Q\?(.+)\?\=#i', $string, $matches ) ) { 3551 return $string; 3552 } else { 3553 $subject = str_replace( '_', ' ', $matches[2] ); 3554 return preg_replace_callback( '#\=([0-9a-f]{2})#i', '_wp_iso_convert', $subject ); 3555 } 3556 } 3557 3558 /** 3559 * Helper function to convert hex encoded chars to ASCII. 3560 * 3561 * @since 3.1.0 3562 * @access private 3563 * 3564 * @param array $match The preg_replace_callback matches array. 3565 * @return string Converted chars. 3566 */ 3567 function _wp_iso_convert( $match ) { 3568 return chr( hexdec( strtolower( $match[1] ) ) ); 3569 } 3570 3571 /** 3572 * Given a date in the timezone of the site, returns that date in UTC. 3573 * 3574 * Requires and returns a date in the Y-m-d H:i:s format. 3575 * Return format can be overridden using the $format parameter. 3576 * 3577 * @since 1.2.0 3578 * 3579 * @param string $string The date to be converted, in the timezone of the site. 3580 * @param string $format The format string for the returned date. Default 'Y-m-d H:i:s'. 3581 * @return string Formatted version of the date, in UTC. 3582 */ 3583 function get_gmt_from_date( $string, $format = 'Y-m-d H:i:s' ) { 3584 $datetime = date_create( $string, wp_timezone() ); 3585 3586 if ( false === $datetime ) { 3587 return gmdate( $format, 0 ); 3588 } 3589 3590 return $datetime->setTimezone( new DateTimeZone( 'UTC' ) )->format( $format ); 3591 } 3592 3593 /** 3594 * Given a date in UTC or GMT timezone, returns that date in the timezone of the site. 3595 * 3596 * Requires a date in the Y-m-d H:i:s format. 3597 * Default return format of 'Y-m-d H:i:s' can be overridden using the `$format` parameter. 3598 * 3599 * @since 1.2.0 3600 * 3601 * @param string $string The date to be converted, in UTC or GMT timezone. 3602 * @param string $format The format string for the returned date. Default 'Y-m-d H:i:s'. 3603 * @return string Formatted version of the date, in the site's timezone. 3604 */ 3605 function get_date_from_gmt( $string, $format = 'Y-m-d H:i:s' ) { 3606 $datetime = date_create( $string, new DateTimeZone( 'UTC' ) ); 3607 3608 if ( false === $datetime ) { 3609 return gmdate( $format, 0 ); 3610 } 3611 3612 return $datetime->setTimezone( wp_timezone() )->format( $format ); 3613 } 3614 3615 /** 3616 * Given an ISO 8601 timezone, returns its UTC offset in seconds. 3617 * 3618 * @since 1.5.0 3619 * 3620 * @param string $timezone Either 'Z' for 0 offset or '±hhmm'. 3621 * @return int|float The offset in seconds. 3622 */ 3623 function iso8601_timezone_to_offset( $timezone ) { 3624 // $timezone is either 'Z' or '[+|-]hhmm'. 3625 if ( 'Z' === $timezone ) { 3626 $offset = 0; 3627 } else { 3628 $sign = ( '+' === substr( $timezone, 0, 1 ) ) ? 1 : -1; 3629 $hours = (int) substr( $timezone, 1, 2 ); 3630 $minutes = (int) substr( $timezone, 3, 4 ) / 60; 3631 $offset = $sign * HOUR_IN_SECONDS * ( $hours + $minutes ); 3632 } 3633 return $offset; 3634 } 3635 3636 /** 3637 * Given an ISO 8601 (Ymd\TH:i:sO) date, returns a MySQL DateTime (Y-m-d H:i:s) format used by post_date[_gmt]. 3638 * 3639 * @since 1.5.0 3640 * 3641 * @param string $date_string Date and time in ISO 8601 format {@link https://en.wikipedia.org/wiki/ISO_8601}. 3642 * @param string $timezone Optional. If set to 'gmt' returns the result in UTC. Default 'user'. 3643 * @return string|false The date and time in MySQL DateTime format - Y-m-d H:i:s, or false on failure. 3644 */ 3645 function iso8601_to_datetime( $date_string, $timezone = 'user' ) { 3646 $timezone = strtolower( $timezone ); 3647 $wp_timezone = wp_timezone(); 3648 $datetime = date_create( $date_string, $wp_timezone ); // Timezone is ignored if input has one. 3649 3650 if ( false === $datetime ) { 3651 return false; 3652 } 3653 3654 if ( 'gmt' === $timezone ) { 3655 return $datetime->setTimezone( new DateTimeZone( 'UTC' ) )->format( 'Y-m-d H:i:s' ); 3656 } 3657 3658 if ( 'user' === $timezone ) { 3659 return $datetime->setTimezone( $wp_timezone )->format( 'Y-m-d H:i:s' ); 3660 } 3661 3662 return false; 3663 } 3664 3665 /** 3666 * Strips out all characters that are not allowable in an email. 3667 * 3668 * @since 1.5.0 3669 * 3670 * @param string $email Email address to filter. 3671 * @return string Filtered email address. 3672 */ 3673 function sanitize_email( $email ) { 3674 // Test for the minimum length the email can be. 3675 if ( strlen( $email ) < 6 ) { 3676 /** 3677 * Filters a sanitized email address. 3678 * 3679 * This filter is evaluated under several contexts, including 'email_too_short', 3680 * 'email_no_at', 'local_invalid_chars', 'domain_period_sequence', 'domain_period_limits', 3681 * 'domain_no_periods', 'domain_no_valid_subs', or no context. 3682 * 3683 * @since 2.8.0 3684 * 3685 * @param string $sanitized_email The sanitized email address. 3686 * @param string $email The email address, as provided to sanitize_email(). 3687 * @param string|null $message A message to pass to the user. null if email is sanitized. 3688 */ 3689 return apply_filters( 'sanitize_email', '', $email, 'email_too_short' ); 3690 } 3691 3692 // Test for an @ character after the first position. 3693 if ( strpos( $email, '@', 1 ) === false ) { 3694 /** This filter is documented in wp-includes/formatting.php */ 3695 return apply_filters( 'sanitize_email', '', $email, 'email_no_at' ); 3696 } 3697 3698 // Split out the local and domain parts. 3699 list( $local, $domain ) = explode( '@', $email, 2 ); 3700 3701 // LOCAL PART 3702 // Test for invalid characters. 3703 $local = preg_replace( '/[^a-zA-Z0-9!#$%&\'*+\/=?^_`{|}~\.-]/', '', $local ); 3704 if ( '' === $local ) { 3705 /** This filter is documented in wp-includes/formatting.php */ 3706 return apply_filters( 'sanitize_email', '', $email, 'local_invalid_chars' ); 3707 } 3708 3709 // DOMAIN PART 3710 // Test for sequences of periods. 3711 $domain = preg_replace( '/\.{2,}/', '', $domain ); 3712 if ( '' === $domain ) { 3713 /** This filter is documented in wp-includes/formatting.php */ 3714 return apply_filters( 'sanitize_email', '', $email, 'domain_period_sequence' ); 3715 } 3716 3717 // Test for leading and trailing periods and whitespace. 3718 $domain = trim( $domain, " \t\n\r\0\x0B." ); 3719 if ( '' === $domain ) { 3720 /** This filter is documented in wp-includes/formatting.php */ 3721 return apply_filters( 'sanitize_email', '', $email, 'domain_period_limits' ); 3722 } 3723 3724 // Split the domain into subs. 3725 $subs = explode( '.', $domain ); 3726 3727 // Assume the domain will have at least two subs. 3728 if ( 2 > count( $subs ) ) { 3729 /** This filter is documented in wp-includes/formatting.php */ 3730 return apply_filters( 'sanitize_email', '', $email, 'domain_no_periods' ); 3731 } 3732 3733 // Create an array that will contain valid subs. 3734 $new_subs = array(); 3735 3736 // Loop through each sub. 3737 foreach ( $subs as $sub ) { 3738 // Test for leading and trailing hyphens. 3739 $sub = trim( $sub, " \t\n\r\0\x0B-" ); 3740 3741 // Test for invalid characters. 3742 $sub = preg_replace( '/[^a-z0-9-]+/i', '', $sub ); 3743 3744 // If there's anything left, add it to the valid subs. 3745 if ( '' !== $sub ) { 3746 $new_subs[] = $sub; 3747 } 3748 } 3749 3750 // If there aren't 2 or more valid subs. 3751 if ( 2 > count( $new_subs ) ) { 3752 /** This filter is documented in wp-includes/formatting.php */ 3753 return apply_filters( 'sanitize_email', '', $email, 'domain_no_valid_subs' ); 3754 } 3755 3756 // Join valid subs into the new domain. 3757 $domain = implode( '.', $new_subs ); 3758 3759 // Put the email back together. 3760 $sanitized_email = $local . '@' . $domain; 3761 3762 // Congratulations, your email made it! 3763 /** This filter is documented in wp-includes/formatting.php */ 3764 return apply_filters( 'sanitize_email', $sanitized_email, $email, null ); 3765 } 3766 3767 /** 3768 * Determines the difference between two timestamps. 3769 * 3770 * The difference is returned in a human readable format such as "1 hour", 3771 * "5 mins", "2 days". 3772 * 3773 * @since 1.5.0 3774 * @since 5.3.0 Added support for showing a difference in seconds. 3775 * 3776 * @param int $from Unix timestamp from which the difference begins. 3777 * @param int $to Optional. Unix timestamp to end the time difference. Default becomes time() if not set. 3778 * @return string Human readable time difference. 3779 */ 3780 function human_time_diff( $from, $to = 0 ) { 3781 if ( empty( $to ) ) { 3782 $to = time(); 3783 } 3784 3785 $diff = (int) abs( $to - $from ); 3786 3787 if ( $diff < MINUTE_IN_SECONDS ) { 3788 $secs = $diff; 3789 if ( $secs <= 1 ) { 3790 $secs = 1; 3791 } 3792 /* translators: Time difference between two dates, in seconds. %s: Number of seconds. */ 3793 $since = sprintf( _n( '%s second', '%s seconds', $secs ), $secs ); 3794 } elseif ( $diff < HOUR_IN_SECONDS && $diff >= MINUTE_IN_SECONDS ) { 3795 $mins = round( $diff / MINUTE_IN_SECONDS ); 3796 if ( $mins <= 1 ) { 3797 $mins = 1; 3798 } 3799 /* translators: Time difference between two dates, in minutes (min=minute). %s: Number of minutes. */ 3800 $since = sprintf( _n( '%s min', '%s mins', $mins ), $mins ); 3801 } elseif ( $diff < DAY_IN_SECONDS && $diff >= HOUR_IN_SECONDS ) { 3802 $hours = round( $diff / HOUR_IN_SECONDS ); 3803 if ( $hours <= 1 ) { 3804 $hours = 1; 3805 } 3806 /* translators: Time difference between two dates, in hours. %s: Number of hours. */ 3807 $since = sprintf( _n( '%s hour', '%s hours', $hours ), $hours ); 3808 } elseif ( $diff < WEEK_IN_SECONDS && $diff >= DAY_IN_SECONDS ) { 3809 $days = round( $diff / DAY_IN_SECONDS ); 3810 if ( $days <= 1 ) { 3811 $days = 1; 3812 } 3813 /* translators: Time difference between two dates, in days. %s: Number of days. */ 3814 $since = sprintf( _n( '%s day', '%s days', $days ), $days ); 3815 } elseif ( $diff < MONTH_IN_SECONDS && $diff >= WEEK_IN_SECONDS ) { 3816 $weeks = round( $diff / WEEK_IN_SECONDS ); 3817 if ( $weeks <= 1 ) { 3818 $weeks = 1; 3819 } 3820 /* translators: Time difference between two dates, in weeks. %s: Number of weeks. */ 3821 $since = sprintf( _n( '%s week', '%s weeks', $weeks ), $weeks ); 3822 } elseif ( $diff < YEAR_IN_SECONDS && $diff >= MONTH_IN_SECONDS ) { 3823 $months = round( $diff / MONTH_IN_SECONDS ); 3824 if ( $months <= 1 ) { 3825 $months = 1; 3826 } 3827 /* translators: Time difference between two dates, in months. %s: Number of months. */ 3828 $since = sprintf( _n( '%s month', '%s months', $months ), $months ); 3829 } elseif ( $diff >= YEAR_IN_SECONDS ) { 3830 $years = round( $diff / YEAR_IN_SECONDS ); 3831 if ( $years <= 1 ) { 3832 $years = 1; 3833 } 3834 /* translators: Time difference between two dates, in years. %s: Number of years. */ 3835 $since = sprintf( _n( '%s year', '%s years', $years ), $years ); 3836 } 3837 3838 /** 3839 * Filters the human readable difference between two timestamps. 3840 * 3841 * @since 4.0.0 3842 * 3843 * @param string $since The difference in human readable text. 3844 * @param int $diff The difference in seconds. 3845 * @param int $from Unix timestamp from which the difference begins. 3846 * @param int $to Unix timestamp to end the time difference. 3847 */ 3848 return apply_filters( 'human_time_diff', $since, $diff, $from, $to ); 3849 } 3850 3851 /** 3852 * Generates an excerpt from the content, if needed. 3853 * 3854 * Returns a maximum of 55 words with an ellipsis appended if necessary. 3855 * 3856 * The 55 word limit can be modified by plugins/themes using the {@see 'excerpt_length'} filter 3857 * The ' […]' string can be modified by plugins/themes using the {@see 'excerpt_more'} filter 3858 * 3859 * @since 1.5.0 3860 * @since 5.2.0 Added the `$post` parameter. 3861 * 3862 * @param string $text Optional. The excerpt. If set to empty, an excerpt is generated. 3863 * @param WP_Post|object|int $post Optional. WP_Post instance or Post ID/object. Default null. 3864 * @return string The excerpt. 3865 */ 3866 function wp_trim_excerpt( $text = '', $post = null ) { 3867 $raw_excerpt = $text; 3868 3869 if ( '' === trim( $text ) ) { 3870 $post = get_post( $post ); 3871 $text = get_the_content( '', false, $post ); 3872 3873 $text = strip_shortcodes( $text ); 3874 $text = excerpt_remove_blocks( $text ); 3875 3876 /** This filter is documented in wp-includes/post-template.php */ 3877 $text = apply_filters( 'the_content', $text ); 3878 $text = str_replace( ']]>', ']]>', $text ); 3879 3880 /* translators: Maximum number of words used in a post excerpt. */ 3881 $excerpt_length = (int) _x( '55', 'excerpt_length' ); 3882 3883 /** 3884 * Filters the maximum number of words in a post excerpt. 3885 * 3886 * @since 2.7.0 3887 * 3888 * @param int $number The maximum number of words. Default 55. 3889 */ 3890 $excerpt_length = (int) apply_filters( 'excerpt_length', $excerpt_length ); 3891 3892 /** 3893 * Filters the string in the "more" link displayed after a trimmed excerpt. 3894 * 3895 * @since 2.9.0 3896 * 3897 * @param string $more_string The string shown within the more link. 3898 */ 3899 $excerpt_more = apply_filters( 'excerpt_more', ' ' . '[…]' ); 3900 $text = wp_trim_words( $text, $excerpt_length, $excerpt_more ); 3901 } 3902 3903 /** 3904 * Filters the trimmed excerpt string. 3905 * 3906 * @since 2.8.0 3907 * 3908 * @param string $text The trimmed text. 3909 * @param string $raw_excerpt The text prior to trimming. 3910 */ 3911 return apply_filters( 'wp_trim_excerpt', $text, $raw_excerpt ); 3912 } 3913 3914 /** 3915 * Trims text to a certain number of words. 3916 * 3917 * This function is localized. For languages that count 'words' by the individual 3918 * character (such as East Asian languages), the $num_words argument will apply 3919 * to the number of individual characters. 3920 * 3921 * @since 3.3.0 3922 * 3923 * @param string $text Text to trim. 3924 * @param int $num_words Number of words. Default 55. 3925 * @param string $more Optional. What to append if $text needs to be trimmed. Default '…'. 3926 * @return string Trimmed text. 3927 */ 3928 function wp_trim_words( $text, $num_words = 55, $more = null ) { 3929 if ( null === $more ) { 3930 $more = __( '…' ); 3931 } 3932 3933 $original_text = $text; 3934 $text = wp_strip_all_tags( $text ); 3935 $num_words = (int) $num_words; 3936 3937 /* 3938 * translators: If your word count is based on single characters (e.g. East Asian characters), 3939 * enter 'characters_excluding_spaces' or 'characters_including_spaces'. Otherwise, enter 'words'. 3940 * Do not translate into your own language. 3941 */ 3942 if ( strpos( _x( 'words', 'Word count type. Do not translate!' ), 'characters' ) === 0 && preg_match( '/^utf\-?8$/i', get_option( 'blog_charset' ) ) ) { 3943 $text = trim( preg_replace( "/[\n\r\t ]+/", ' ', $text ), ' ' ); 3944 preg_match_all( '/./u', $text, $words_array ); 3945 $words_array = array_slice( $words_array[0], 0, $num_words + 1 ); 3946 $sep = ''; 3947 } else { 3948 $words_array = preg_split( "/[\n\r\t ]+/", $text, $num_words + 1, PREG_SPLIT_NO_EMPTY ); 3949 $sep = ' '; 3950 } 3951 3952 if ( count( $words_array ) > $num_words ) { 3953 array_pop( $words_array ); 3954 $text = implode( $sep, $words_array ); 3955 $text = $text . $more; 3956 } else { 3957 $text = implode( $sep, $words_array ); 3958 } 3959 3960 /** 3961 * Filters the text content after words have been trimmed. 3962 * 3963 * @since 3.3.0 3964 * 3965 * @param string $text The trimmed text. 3966 * @param int $num_words The number of words to trim the text to. Default 55. 3967 * @param string $more An optional string to append to the end of the trimmed text, e.g. …. 3968 * @param string $original_text The text before it was trimmed. 3969 */ 3970 return apply_filters( 'wp_trim_words', $text, $num_words, $more, $original_text ); 3971 } 3972 3973 /** 3974 * Converts named entities into numbered entities. 3975 * 3976 * @since 1.5.1 3977 * 3978 * @param string $text The text within which entities will be converted. 3979 * @return string Text with converted entities. 3980 */ 3981 function ent2ncr( $text ) { 3982 3983 /** 3984 * Filters text before named entities are converted into numbered entities. 3985 * 3986 * A non-null string must be returned for the filter to be evaluated. 3987 * 3988 * @since 3.3.0 3989 * 3990 * @param string|null $converted_text The text to be converted. Default null. 3991 * @param string $text The text prior to entity conversion. 3992 */ 3993 $filtered = apply_filters( 'pre_ent2ncr', null, $text ); 3994 if ( null !== $filtered ) { 3995 return $filtered; 3996 } 3997 3998 $to_ncr = array( 3999 '"' => '"', 4000 '&' => '&', 4001 '<' => '<', 4002 '>' => '>', 4003 '|' => '|', 4004 ' ' => ' ', 4005 '¡' => '¡', 4006 '¢' => '¢', 4007 '£' => '£', 4008 '¤' => '¤', 4009 '¥' => '¥', 4010 '¦' => '¦', 4011 '&brkbar;' => '¦', 4012 '§' => '§', 4013 '¨' => '¨', 4014 '¨' => '¨', 4015 '©' => '©', 4016 'ª' => 'ª', 4017 '«' => '«', 4018 '¬' => '¬', 4019 '­' => '­', 4020 '®' => '®', 4021 '¯' => '¯', 4022 '&hibar;' => '¯', 4023 '°' => '°', 4024 '±' => '±', 4025 '²' => '²', 4026 '³' => '³', 4027 '´' => '´', 4028 'µ' => 'µ', 4029 '¶' => '¶', 4030 '·' => '·', 4031 '¸' => '¸', 4032 '¹' => '¹', 4033 'º' => 'º', 4034 '»' => '»', 4035 '¼' => '¼', 4036 '½' => '½', 4037 '¾' => '¾', 4038 '¿' => '¿', 4039 'À' => 'À', 4040 'Á' => 'Á', 4041 'Â' => 'Â', 4042 'Ã' => 'Ã', 4043 'Ä' => 'Ä', 4044 'Å' => 'Å', 4045 'Æ' => 'Æ', 4046 'Ç' => 'Ç', 4047 'È' => 'È', 4048 'É' => 'É', 4049 'Ê' => 'Ê', 4050 'Ë' => 'Ë', 4051 'Ì' => 'Ì', 4052 'Í' => 'Í', 4053 'Î' => 'Î', 4054 'Ï' => 'Ï', 4055 'Ð' => 'Ð', 4056 'Ñ' => 'Ñ', 4057 'Ò' => 'Ò', 4058 'Ó' => 'Ó', 4059 'Ô' => 'Ô', 4060 'Õ' => 'Õ', 4061 'Ö' => 'Ö', 4062 '×' => '×', 4063 'Ø' => 'Ø', 4064 'Ù' => 'Ù', 4065 'Ú' => 'Ú', 4066 'Û' => 'Û', 4067 'Ü' => 'Ü', 4068 'Ý' => 'Ý', 4069 'Þ' => 'Þ', 4070 'ß' => 'ß', 4071 'à' => 'à', 4072 'á' => 'á', 4073 'â' => 'â', 4074 'ã' => 'ã', 4075 'ä' => 'ä', 4076 'å' => 'å', 4077 'æ' => 'æ', 4078 'ç' => 'ç', 4079 'è' => 'è', 4080 'é' => 'é', 4081 'ê' => 'ê', 4082 'ë' => 'ë', 4083 'ì' => 'ì', 4084 'í' => 'í', 4085 'î' => 'î', 4086 'ï' => 'ï', 4087 'ð' => 'ð', 4088 'ñ' => 'ñ', 4089 'ò' => 'ò', 4090 'ó' => 'ó', 4091 'ô' => 'ô', 4092 'õ' => 'õ', 4093 'ö' => 'ö', 4094 '÷' => '÷', 4095 'ø' => 'ø', 4096 'ù' => 'ù', 4097 'ú' => 'ú', 4098 'û' => 'û', 4099 'ü' => 'ü', 4100 'ý' => 'ý', 4101 'þ' => 'þ', 4102 'ÿ' => 'ÿ', 4103 'Œ' => 'Œ', 4104 'œ' => 'œ', 4105 'Š' => 'Š', 4106 'š' => 'š', 4107 'Ÿ' => 'Ÿ', 4108 'ƒ' => 'ƒ', 4109 'ˆ' => 'ˆ', 4110 '˜' => '˜', 4111 'Α' => 'Α', 4112 'Β' => 'Β', 4113 'Γ' => 'Γ', 4114 'Δ' => 'Δ', 4115 'Ε' => 'Ε', 4116 'Ζ' => 'Ζ', 4117 'Η' => 'Η', 4118 'Θ' => 'Θ', 4119 'Ι' => 'Ι', 4120 'Κ' => 'Κ', 4121 'Λ' => 'Λ', 4122 'Μ' => 'Μ', 4123 'Ν' => 'Ν', 4124 'Ξ' => 'Ξ', 4125 'Ο' => 'Ο', 4126 'Π' => 'Π', 4127 'Ρ' => 'Ρ', 4128 'Σ' => 'Σ', 4129 'Τ' => 'Τ', 4130 'Υ' => 'Υ', 4131 'Φ' => 'Φ', 4132 'Χ' => 'Χ', 4133 'Ψ' => 'Ψ', 4134 'Ω' => 'Ω', 4135 'α' => 'α', 4136 'β' => 'β', 4137 'γ' => 'γ', 4138 'δ' => 'δ', 4139 'ε' => 'ε', 4140 'ζ' => 'ζ', 4141 'η' => 'η', 4142 'θ' => 'θ', 4143 'ι' => 'ι', 4144 'κ' => 'κ', 4145 'λ' => 'λ', 4146 'μ' => 'μ', 4147 'ν' => 'ν', 4148 'ξ' => 'ξ', 4149 'ο' => 'ο', 4150 'π' => 'π', 4151 'ρ' => 'ρ', 4152 'ς' => 'ς', 4153 'σ' => 'σ', 4154 'τ' => 'τ', 4155 'υ' => 'υ', 4156 'φ' => 'φ', 4157 'χ' => 'χ', 4158 'ψ' => 'ψ', 4159 'ω' => 'ω', 4160 'ϑ' => 'ϑ', 4161 'ϒ' => 'ϒ', 4162 'ϖ' => 'ϖ', 4163 ' ' => ' ', 4164 ' ' => ' ', 4165 ' ' => ' ', 4166 '‌' => '‌', 4167 '‍' => '‍', 4168 '‎' => '‎', 4169 '‏' => '‏', 4170 '–' => '–', 4171 '—' => '—', 4172 '‘' => '‘', 4173 '’' => '’', 4174 '‚' => '‚', 4175 '“' => '“', 4176 '”' => '”', 4177 '„' => '„', 4178 '†' => '†', 4179 '‡' => '‡', 4180 '•' => '•', 4181 '…' => '…', 4182 '‰' => '‰', 4183 '′' => '′', 4184 '″' => '″', 4185 '‹' => '‹', 4186 '›' => '›', 4187 '‾' => '‾', 4188 '⁄' => '⁄', 4189 '€' => '€', 4190 'ℑ' => 'ℑ', 4191 '℘' => '℘', 4192 'ℜ' => 'ℜ', 4193 '™' => '™', 4194 'ℵ' => 'ℵ', 4195 '↵' => '↵', 4196 '⇐' => '⇐', 4197 '⇑' => '⇑', 4198 '⇒' => '⇒', 4199 '⇓' => '⇓', 4200 '⇔' => '⇔', 4201 '∀' => '∀', 4202 '∂' => '∂', 4203 '∃' => '∃', 4204 '∅' => '∅', 4205 '∇' => '∇', 4206 '∈' => '∈', 4207 '∉' => '∉', 4208 '∋' => '∋', 4209 '∏' => '∏', 4210 '∑' => '∑', 4211 '−' => '−', 4212 '∗' => '∗', 4213 '√' => '√', 4214 '∝' => '∝', 4215 '∞' => '∞', 4216 '∠' => '∠', 4217 '∧' => '∧', 4218 '∨' => '∨', 4219 '∩' => '∩', 4220 '∪' => '∪', 4221 '∫' => '∫', 4222 '∴' => '∴', 4223 '∼' => '∼', 4224 '≅' => '≅', 4225 '≈' => '≈', 4226 '≠' => '≠', 4227 '≡' => '≡', 4228 '≤' => '≤', 4229 '≥' => '≥', 4230 '⊂' => '⊂', 4231 '⊃' => '⊃', 4232 '⊄' => '⊄', 4233 '⊆' => '⊆', 4234 '⊇' => '⊇', 4235 '⊕' => '⊕', 4236 '⊗' => '⊗', 4237 '⊥' => '⊥', 4238 '⋅' => '⋅', 4239 '⌈' => '⌈', 4240 '⌉' => '⌉', 4241 '⌊' => '⌊', 4242 '⌋' => '⌋', 4243 '⟨' => '〈', 4244 '⟩' => '〉', 4245 '←' => '←', 4246 '↑' => '↑', 4247 '→' => '→', 4248 '↓' => '↓', 4249 '↔' => '↔', 4250 '◊' => '◊', 4251 '♠' => '♠', 4252 '♣' => '♣', 4253 '♥' => '♥', 4254 '♦' => '♦', 4255 ); 4256 4257 return str_replace( array_keys( $to_ncr ), array_values( $to_ncr ), $text ); 4258 } 4259 4260 /** 4261 * Formats text for the editor. 4262 * 4263 * Generally the browsers treat everything inside a textarea as text, but 4264 * it is still a good idea to HTML entity encode `<`, `>` and `&` in the content. 4265 * 4266 * The filter {@see 'format_for_editor'} is applied here. If `$text` is empty the 4267 * filter will be applied to an empty string. 4268 * 4269 * @since 4.3.0 4270 * 4271 * @see _WP_Editors::editor() 4272 * 4273 * @param string $text The text to be formatted. 4274 * @param string $default_editor The default editor for the current user. 4275 * It is usually either 'html' or 'tinymce'. 4276 * @return string The formatted text after filter is applied. 4277 */ 4278 function format_for_editor( $text, $default_editor = null ) { 4279 if ( $text ) { 4280 $text = htmlspecialchars( $text, ENT_NOQUOTES, get_option( 'blog_charset' ) ); 4281 } 4282 4283 /** 4284 * Filters the text after it is formatted for the editor. 4285 * 4286 * @since 4.3.0 4287 * 4288 * @param string $text The formatted text. 4289 * @param string $default_editor The default editor for the current user. 4290 * It is usually either 'html' or 'tinymce'. 4291 */ 4292 return apply_filters( 'format_for_editor', $text, $default_editor ); 4293 } 4294 4295 /** 4296 * Performs a deep string replace operation to ensure the values in $search are no longer present. 4297 * 4298 * Repeats the replacement operation until it no longer replaces anything so as to remove "nested" values 4299 * e.g. $subject = '%0%0%0DDD', $search ='%0D', $result ='' rather than the '%0%0DD' that 4300 * str_replace would return 4301 * 4302 * @since 2.8.1 4303 * @access private 4304 * 4305 * @param string|array $search The value being searched for, otherwise known as the needle. 4306 * An array may be used to designate multiple needles. 4307 * @param string $subject The string being searched and replaced on, otherwise known as the haystack. 4308 * @return string The string with the replaced values. 4309 */ 4310 function _deep_replace( $search, $subject ) { 4311 $subject = (string) $subject; 4312 4313 $count = 1; 4314 while ( $count ) { 4315 $subject = str_replace( $search, '', $subject, $count ); 4316 } 4317 4318 return $subject; 4319 } 4320 4321 /** 4322 * Escapes data for use in a MySQL query. 4323 * 4324 * Usually you should prepare queries using wpdb::prepare(). 4325 * Sometimes, spot-escaping is required or useful. One example 4326 * is preparing an array for use in an IN clause. 4327 * 4328 * NOTE: Since 4.8.3, '%' characters will be replaced with a placeholder string, 4329 * this prevents certain SQLi attacks from taking place. This change in behaviour 4330 * may cause issues for code that expects the return value of esc_sql() to be useable 4331 * for other purposes. 4332 * 4333 * @since 2.8.0 4334 * 4335 * @global wpdb $wpdb WordPress database abstraction object. 4336 * 4337 * @param string|array $data Unescaped data 4338 * @return string|array Escaped data 4339 */ 4340 function esc_sql( $data ) { 4341 global $wpdb; 4342 return $wpdb->_escape( $data ); 4343 } 4344 4345 /** 4346 * Checks and cleans a URL. 4347 * 4348 * A number of characters are removed from the URL. If the URL is for displaying 4349 * (the default behaviour) ampersands are also replaced. The {@see 'clean_url'} filter 4350 * is applied to the returned cleaned URL. 4351 * 4352 * @since 2.8.0 4353 * 4354 * @param string $url The URL to be cleaned. 4355 * @param string[] $protocols Optional. An array of acceptable protocols. 4356 * Defaults to return value of wp_allowed_protocols(). 4357 * @param string $_context Private. Use esc_url_raw() for database usage. 4358 * @return string The cleaned URL after the {@see 'clean_url'} filter is applied. 4359 * An empty string is returned if `$url` specifies a protocol other than 4360 * those in `$protocols`, or if `$url` contains an empty string. 4361 */ 4362 function esc_url( $url, $protocols = null, $_context = 'display' ) { 4363 $original_url = $url; 4364 4365 if ( '' === $url ) { 4366 return $url; 4367 } 4368 4369 $url = str_replace( ' ', '%20', ltrim( $url ) ); 4370 $url = preg_replace( '|[^a-z0-9-~+_.?#=!&;,/:%@$\|*\'()\[\]\\x80-\\xff]|i', '', $url ); 4371 4372 if ( '' === $url ) { 4373 return $url; 4374 } 4375 4376 if ( 0 !== stripos( $url, 'mailto:' ) ) { 4377 $strip = array( '%0d', '%0a', '%0D', '%0A' ); 4378 $url = _deep_replace( $strip, $url ); 4379 } 4380 4381 $url = str_replace( ';//', '://', $url ); 4382 /* 4383 * If the URL doesn't appear to contain a scheme, we presume 4384 * it needs http:// prepended (unless it's a relative link 4385 * starting with /, # or ?, or a PHP file). 4386 */ 4387 if ( strpos( $url, ':' ) === false && ! in_array( $url[0], array( '/', '#', '?' ), true ) && 4388 ! preg_match( '/^[a-z0-9-]+?\.php/i', $url ) ) { 4389 $url = 'http://' . $url; 4390 } 4391 4392 // Replace ampersands and single quotes only when displaying. 4393 if ( 'display' === $_context ) { 4394 $url = wp_kses_normalize_entities( $url ); 4395 $url = str_replace( '&', '&', $url ); 4396 $url = str_replace( "'", ''', $url ); 4397 } 4398 4399 if ( ( false !== strpos( $url, '[' ) ) || ( false !== strpos( $url, ']' ) ) ) { 4400 4401 $parsed = wp_parse_url( $url ); 4402 $front = ''; 4403 4404 if ( isset( $parsed['scheme'] ) ) { 4405 $front .= $parsed['scheme'] . '://'; 4406 } elseif ( '/' === $url[0] ) { 4407 $front .= '//'; 4408 } 4409 4410 if ( isset( $parsed['user'] ) ) { 4411 $front .= $parsed['user']; 4412 } 4413 4414 if ( isset( $parsed['pass'] ) ) { 4415 $front .= ':' . $parsed['pass']; 4416 } 4417 4418 if ( isset( $parsed['user'] ) || isset( $parsed['pass'] ) ) { 4419 $front .= '@'; 4420 } 4421 4422 if ( isset( $parsed['host'] ) ) { 4423 $front .= $parsed['host']; 4424 } 4425 4426 if ( isset( $parsed['port'] ) ) { 4427 $front .= ':' . $parsed['port']; 4428 } 4429 4430 $end_dirty = str_replace( $front, '', $url ); 4431 $end_clean = str_replace( array( '[', ']' ), array( '%5B', '%5D' ), $end_dirty ); 4432 $url = str_replace( $end_dirty, $end_clean, $url ); 4433 4434 } 4435 4436 if ( '/' === $url[0] ) { 4437 $good_protocol_url = $url; 4438 } else { 4439 if ( ! is_array( $protocols ) ) { 4440 $protocols = wp_allowed_protocols(); 4441 } 4442 $good_protocol_url = wp_kses_bad_protocol( $url, $protocols ); 4443 if ( strtolower( $good_protocol_url ) != strtolower( $url ) ) { 4444 return ''; 4445 } 4446 } 4447 4448 /** 4449 * Filters a string cleaned and escaped for output as a URL. 4450 * 4451 * @since 2.3.0 4452 * 4453 * @param string $good_protocol_url The cleaned URL to be returned. 4454 * @param string $original_url The URL prior to cleaning. 4455 * @param string $_context If 'display', replace ampersands and single quotes only. 4456 */ 4457 return apply_filters( 'clean_url', $good_protocol_url, $original_url, $_context ); 4458 } 4459 4460 /** 4461 * Performs esc_url() for database or redirect usage. 4462 * 4463 * @since 2.8.0 4464 * 4465 * @see esc_url() 4466 * 4467 * @param string $url The URL to be cleaned. 4468 * @param string[] $protocols Optional. An array of acceptable protocols. 4469 * Defaults to return value of wp_allowed_protocols(). 4470 * @return string The cleaned URL after esc_url() is run with the 'db' context. 4471 */ 4472 function esc_url_raw( $url, $protocols = null ) { 4473 return esc_url( $url, $protocols, 'db' ); 4474 } 4475 4476 /** 4477 * Performs esc_url() for database or redirect usage. 4478 * 4479 * This function is an alias for esc_url_raw(). 4480 * 4481 * @since 2.3.1 4482 * @since 2.8.0 Deprecated in favor of esc_url_raw(). 4483 * @since 5.9.0 Restored (un-deprecated). 4484 * 4485 * @see esc_url_raw() 4486 * 4487 * @param string $url The URL to be cleaned. 4488 * @param string[] $protocols Optional. An array of acceptable protocols. 4489 * Defaults to return value of wp_allowed_protocols(). 4490 * @return string The cleaned URL after esc_url() is run with the 'db' context. 4491 */ 4492 function sanitize_url( $url, $protocols = null ) { 4493 return esc_url_raw( $url, $protocols ); 4494 } 4495 4496 /** 4497 * Converts entities, while preserving already-encoded entities. 4498 * 4499 * @link https://www.php.net/htmlentities Borrowed from the PHP Manual user notes. 4500 * 4501 * @since 1.2.2 4502 * 4503 * @param string $myHTML The text to be converted. 4504 * @return string Converted text. 4505 */ 4506 function htmlentities2( $myHTML ) { 4507 $translation_table = get_html_translation_table( HTML_ENTITIES, ENT_QUOTES ); 4508 $translation_table[ chr( 38 ) ] = '&'; 4509 return preg_replace( '/&(?![A-Za-z]{0,4}\w{2,3};|#[0-9]{2,3};)/', '&', strtr( $myHTML, $translation_table ) ); 4510 } 4511 4512 /** 4513 * Escapes single quotes, `"`, `<`, `>`, `&`, and fixes line endings. 4514 * 4515 * Escapes text strings for echoing in JS. It is intended to be used for inline JS 4516 * (in a tag attribute, for example `onclick="..."`). Note that the strings have to 4517 * be in single quotes. The {@see 'js_escape'} filter is also applied here. 4518 * 4519 * @since 2.8.0 4520 * 4521 * @param string $text The text to be escaped. 4522 * @return string Escaped text. 4523 */ 4524 function esc_js( $text ) { 4525 $safe_text = wp_check_invalid_utf8( $text ); 4526 $safe_text = _wp_specialchars( $safe_text, ENT_COMPAT ); 4527 $safe_text = preg_replace( '/&#(x)?0*(?(1)27|39);?/i', "'", stripslashes( $safe_text ) ); 4528 $safe_text = str_replace( "\r", '', $safe_text ); 4529 $safe_text = str_replace( "\n", '\\n', addslashes( $safe_text ) ); 4530 /** 4531 * Filters a string cleaned and escaped for output in JavaScript. 4532 * 4533 * Text passed to esc_js() is stripped of invalid or special characters, 4534 * and properly slashed for output. 4535 * 4536 * @since 2.0.6 4537 * 4538 * @param string $safe_text The text after it has been escaped. 4539 * @param string $text The text prior to being escaped. 4540 */ 4541 return apply_filters( 'js_escape', $safe_text, $text ); 4542 } 4543 4544 /** 4545 * Escaping for HTML blocks. 4546 * 4547 * @since 2.8.0 4548 * 4549 * @param string $text 4550 * @return string 4551 */ 4552 function esc_html( $text ) { 4553 $safe_text = wp_check_invalid_utf8( $text ); 4554 $safe_text = _wp_specialchars( $safe_text, ENT_QUOTES ); 4555 /** 4556 * Filters a string cleaned and escaped for output in HTML. 4557 * 4558 * Text passed to esc_html() is stripped of invalid or special characters 4559 * before output. 4560 * 4561 * @since 2.8.0 4562 * 4563 * @param string $safe_text The text after it has been escaped. 4564 * @param string $text The text prior to being escaped. 4565 */ 4566 return apply_filters( 'esc_html', $safe_text, $text ); 4567 } 4568 4569 /** 4570 * Escaping for HTML attributes. 4571 * 4572 * @since 2.8.0 4573 * 4574 * @param string $text 4575 * @return string 4576 */ 4577 function esc_attr( $text ) { 4578 $safe_text = wp_check_invalid_utf8( $text ); 4579 $safe_text = _wp_specialchars( $safe_text, ENT_QUOTES ); 4580 /** 4581 * Filters a string cleaned and escaped for output in an HTML attribute. 4582 * 4583 * Text passed to esc_attr() is stripped of invalid or special characters 4584 * before output. 4585 * 4586 * @since 2.0.6 4587 * 4588 * @param string $safe_text The text after it has been escaped. 4589 * @param string $text The text prior to being escaped. 4590 */ 4591 return apply_filters( 'attribute_escape', $safe_text, $text ); 4592 } 4593 4594 /** 4595 * Escaping for textarea values. 4596 * 4597 * @since 3.1.0 4598 * 4599 * @param string $text 4600 * @return string 4601 */ 4602 function esc_textarea( $text ) { 4603 $safe_text = htmlspecialchars( $text, ENT_QUOTES, get_option( 'blog_charset' ) ); 4604 /** 4605 * Filters a string cleaned and escaped for output in a textarea element. 4606 * 4607 * @since 3.1.0 4608 * 4609 * @param string $safe_text The text after it has been escaped. 4610 * @param string $text The text prior to being escaped. 4611 */ 4612 return apply_filters( 'esc_textarea', $safe_text, $text ); 4613 } 4614 4615 /** 4616 * Escaping for XML blocks. 4617 * 4618 * @since 5.5.0 4619 * 4620 * @param string $text Text to escape. 4621 * @return string Escaped text. 4622 */ 4623 function esc_xml( $text ) { 4624 $safe_text = wp_check_invalid_utf8( $text ); 4625 4626 $cdata_regex = '\<\!\[CDATA\[.*?\]\]\>'; 4627 $regex = <<<EOF 4628 / 4629 (?=.*?{$cdata_regex}) # lookahead that will match anything followed by a CDATA Section 4630 (?<non_cdata_followed_by_cdata>(.*?)) # the "anything" matched by the lookahead 4631 (?<cdata>({$cdata_regex})) # the CDATA Section matched by the lookahead 4632 4633 | # alternative 4634 4635 (?<non_cdata>(.*)) # non-CDATA Section 4636 /sx 4637 EOF; 4638 4639 $safe_text = (string) preg_replace_callback( 4640 $regex, 4641 static function( $matches ) { 4642 if ( ! isset( $matches[0] ) ) { 4643 return ''; 4644 } 4645 4646 if ( isset( $matches['non_cdata'] ) ) { 4647 // escape HTML entities in the non-CDATA Section. 4648 return _wp_specialchars( $matches['non_cdata'], ENT_XML1 ); 4649 } 4650 4651 // Return the CDATA Section unchanged, escape HTML entities in the rest. 4652 return _wp_specialchars( $matches['non_cdata_followed_by_cdata'], ENT_XML1 ) . $matches['cdata']; 4653 }, 4654 $safe_text 4655 ); 4656 4657 /** 4658 * Filters a string cleaned and escaped for output in XML. 4659 * 4660 * Text passed to esc_xml() is stripped of invalid or special characters 4661 * before output. HTML named character references are converted to their 4662 * equivalent code points. 4663 * 4664 * @since 5.5.0 4665 * 4666 * @param string $safe_text The text after it has been escaped. 4667 * @param string $text The text prior to being escaped. 4668 */ 4669 return apply_filters( 'esc_xml', $safe_text, $text ); 4670 } 4671 4672 /** 4673 * Escapes an HTML tag name. 4674 * 4675 * @since 2.5.0 4676 * 4677 * @param string $tag_name 4678 * @return string 4679 */ 4680 function tag_escape( $tag_name ) { 4681 $safe_tag = strtolower( preg_replace( '/[^a-zA-Z0-9_:]/', '', $tag_name ) ); 4682 /** 4683 * Filters a string cleaned and escaped for output as an HTML tag. 4684 * 4685 * @since 2.8.0 4686 * 4687 * @param string $safe_tag The tag name after it has been escaped. 4688 * @param string $tag_name The text before it was escaped. 4689 */ 4690 return apply_filters( 'tag_escape', $safe_tag, $tag_name ); 4691 } 4692 4693 /** 4694 * Converts full URL paths to absolute paths. 4695 * 4696 * Removes the http or https protocols and the domain. Keeps the path '/' at the 4697 * beginning, so it isn't a true relative link, but from the web root base. 4698 * 4699 * @since 2.1.0 4700 * @since 4.1.0 Support was added for relative URLs. 4701 * 4702 * @param string $link Full URL path. 4703 * @return string Absolute path. 4704 */ 4705 function wp_make_link_relative( $link ) { 4706 return preg_replace( '|^(https?:)?//[^/]+(/?.*)|i', '$2', $link ); 4707 } 4708 4709 /** 4710 * Sanitizes various option values based on the nature of the option. 4711 * 4712 * This is basically a switch statement which will pass $value through a number 4713 * of functions depending on the $option. 4714 * 4715 * @since 2.0.5 4716 * 4717 * @global wpdb $wpdb WordPress database abstraction object. 4718 * 4719 * @param string $option The name of the option. 4720 * @param string $value The unsanitised value. 4721 * @return string Sanitized value. 4722 */ 4723 function sanitize_option( $option, $value ) { 4724 global $wpdb; 4725 4726 $original_value = $value; 4727 $error = null; 4728 4729 switch ( $option ) { 4730 case 'admin_email': 4731 case 'new_admin_email': 4732 $value = $wpdb->strip_invalid_text_for_column( $wpdb->options, 'option_value', $value ); 4733 if ( is_wp_error( $value ) ) { 4734 $error = $value->get_error_message(); 4735 } else { 4736 $value = sanitize_email( $value ); 4737 if ( ! is_email( $value ) ) { 4738 $error = __( 'The email address entered did not appear to be a valid email address. Please enter a valid email address.' ); 4739 } 4740 } 4741 break; 4742 4743 case 'thumbnail_size_w': 4744 case 'thumbnail_size_h': 4745 case 'medium_size_w': 4746 case 'medium_size_h': 4747 case 'medium_large_size_w': 4748 case 'medium_large_size_h': 4749 case 'large_size_w': 4750 case 'large_size_h': 4751 case 'mailserver_port': 4752 case 'comment_max_links': 4753 case 'page_on_front': 4754 case 'page_for_posts': 4755 case 'rss_excerpt_length': 4756 case 'default_category': 4757 case 'default_email_category': 4758 case 'default_link_category': 4759 case 'close_comments_days_old': 4760 case 'comments_per_page': 4761 case 'thread_comments_depth': 4762 case 'users_can_register': 4763 case 'start_of_week': 4764 case 'site_icon': 4765 $value = absint( $value ); 4766 break; 4767 4768 case 'posts_per_page': 4769 case 'posts_per_rss': 4770 $value = (int) $value; 4771 if ( empty( $value ) ) { 4772 $value = 1; 4773 } 4774 if ( $value < -1 ) { 4775 $value = abs( $value ); 4776 } 4777 break; 4778 4779 case 'default_ping_status': 4780 case 'default_comment_status': 4781 // Options that if not there have 0 value but need to be something like "closed". 4782 if ( '0' == $value || '' === $value ) { 4783 $value = 'closed'; 4784 } 4785 break; 4786 4787 case 'blogdescription': 4788 case 'blogname': 4789 $value = $wpdb->strip_invalid_text_for_column( $wpdb->options, 'option_value', $value ); 4790 if ( $value !== $original_value ) { 4791 $value = $wpdb->strip_invalid_text_for_column( $wpdb->options, 'option_value', wp_encode_emoji( $original_value ) ); 4792 } 4793 4794 if ( is_wp_error( $value ) ) { 4795 $error = $value->get_error_message(); 4796 } else { 4797 $value = esc_html( $value ); 4798 } 4799 break; 4800 4801 case 'blog_charset': 4802 $value = preg_replace( '/[^a-zA-Z0-9_-]/', '', $value ); // Strips slashes. 4803 break; 4804 4805 case 'blog_public': 4806 // This is the value if the settings checkbox is not checked on POST. Don't rely on this. 4807 if ( null === $value ) { 4808 $value = 1; 4809 } else { 4810 $value = (int) $value; 4811 } 4812 break; 4813 4814 case 'date_format': 4815 case 'time_format': 4816 case 'mailserver_url': 4817 case 'mailserver_login': 4818 case 'mailserver_pass': 4819 case 'upload_path': 4820 $value = $wpdb->strip_invalid_text_for_column( $wpdb->options, 'option_value', $value ); 4821 if ( is_wp_error( $value ) ) { 4822 $error = $value->get_error_message(); 4823 } else { 4824 $value = strip_tags( $value ); 4825 $value = wp_kses_data( $value ); 4826 } 4827 break; 4828 4829 case 'ping_sites': 4830 $value = explode( "\n", $value ); 4831 $value = array_filter( array_map( 'trim', $value ) ); 4832 $value = array_filter( array_map( 'esc_url_raw', $value ) ); 4833 $value = implode( "\n", $value ); 4834 break; 4835 4836 case 'gmt_offset': 4837 $value = preg_replace( '/[^0-9:.-]/', '', $value ); // Strips slashes. 4838 break; 4839 4840 case 'siteurl': 4841 $value = $wpdb->strip_invalid_text_for_column( $wpdb->options, 'option_value', $value ); 4842 if ( is_wp_error( $value ) ) { 4843 $error = $value->get_error_message(); 4844 } else { 4845 if ( preg_match( '#http(s?)://(.+)#i', $value ) ) { 4846 $value = esc_url_raw( $value ); 4847 } else { 4848 $error = __( 'The WordPress address you entered did not appear to be a valid URL. Please enter a valid URL.' ); 4849 } 4850 } 4851 break; 4852 4853 case 'home': 4854 $value = $wpdb->strip_invalid_text_for_column( $wpdb->options, 'option_value', $value ); 4855 if ( is_wp_error( $value ) ) { 4856 $error = $value->get_error_message(); 4857 } else { 4858 if ( preg_match( '#http(s?)://(.+)#i', $value ) ) { 4859 $value = esc_url_raw( $value ); 4860 } else { 4861 $error = __( 'The Site address you entered did not appear to be a valid URL. Please enter a valid URL.' ); 4862 } 4863 } 4864 break; 4865 4866 case 'WPLANG': 4867 $allowed = get_available_languages(); 4868 if ( ! is_multisite() && defined( 'WPLANG' ) && '' !== WPLANG && 'en_US' !== WPLANG ) { 4869 $allowed[] = WPLANG; 4870 } 4871 if ( ! in_array( $value, $allowed, true ) && ! empty( $value ) ) { 4872 $value = get_option( $option ); 4873 } 4874 break; 4875 4876 case 'illegal_names': 4877 $value = $wpdb->strip_invalid_text_for_column( $wpdb->options, 'option_value', $value ); 4878 if ( is_wp_error( $value ) ) { 4879 $error = $value->get_error_message(); 4880 } else { 4881 if ( ! is_array( $value ) ) { 4882 $value = explode( ' ', $value ); 4883 } 4884 4885 $value = array_values( array_filter( array_map( 'trim', $value ) ) ); 4886 4887 if ( ! $value ) { 4888 $value = ''; 4889 } 4890 } 4891 break; 4892 4893 case 'limited_email_domains': 4894 case 'banned_email_domains': 4895 $value = $wpdb->strip_invalid_text_for_column( $wpdb->options, 'option_value', $value ); 4896 if ( is_wp_error( $value ) ) { 4897 $error = $value->get_error_message(); 4898 } else { 4899 if ( ! is_array( $value ) ) { 4900 $value = explode( "\n", $value ); 4901 } 4902 4903 $domains = array_values( array_filter( array_map( 'trim', $value ) ) ); 4904 $value = array(); 4905 4906 foreach ( $domains as $domain ) { 4907 if ( ! preg_match( '/(--|\.\.)/', $domain ) && preg_match( '|^([a-zA-Z0-9-\.])+$|', $domain ) ) { 4908 $value[] = $domain; 4909 } 4910 } 4911 if ( ! $value ) { 4912 $value = ''; 4913 } 4914 } 4915 break; 4916 4917 case 'timezone_string': 4918 $allowed_zones = timezone_identifiers_list(); 4919 if ( ! in_array( $value, $allowed_zones, true ) && ! empty( $value ) ) { 4920 $error = __( 'The timezone you have entered is not valid. Please select a valid timezone.' ); 4921 } 4922 break; 4923 4924 case 'permalink_structure': 4925 case 'category_base': 4926 case 'tag_base': 4927 $value = $wpdb->strip_invalid_text_for_column( $wpdb->options, 'option_value', $value ); 4928 if ( is_wp_error( $value ) ) { 4929 $error = $value->get_error_message(); 4930 } else { 4931 $value = esc_url_raw( $value ); 4932 $value = str_replace( 'http://', '', $value ); 4933 } 4934 4935 if ( 'permalink_structure' === $option && null === $error 4936 && '' !== $value && ! preg_match( '/%[^\/%]+%/', $value ) 4937 ) { 4938 $error = sprintf( 4939 /* translators: %s: Documentation URL. */ 4940 __( 'A structure tag is required when using custom permalinks. <a href="%s">Learn more</a>' ), 4941 __( 'https://wordpress.org/support/article/using-permalinks/#choosing-your-permalink-structure' ) 4942 ); 4943 } 4944 break; 4945 4946 case 'default_role': 4947 if ( ! get_role( $value ) && get_role( 'subscriber' ) ) { 4948 $value = 'subscriber'; 4949 } 4950 break; 4951 4952 case 'moderation_keys': 4953 case 'disallowed_keys': 4954 $value = $wpdb->strip_invalid_text_for_column( $wpdb->options, 'option_value', $value ); 4955 if ( is_wp_error( $value ) ) { 4956 $error = $value->get_error_message(); 4957 } else { 4958 $value = explode( "\n", $value ); 4959 $value = array_filter( array_map( 'trim', $value ) ); 4960 $value = array_unique( $value ); 4961 $value = implode( "\n", $value ); 4962 } 4963 break; 4964 } 4965 4966 if ( null !== $error ) { 4967 if ( '' === $error && is_wp_error( $value ) ) { 4968 /* translators: 1: Option name, 2: Error code. */ 4969 $error = sprintf( __( 'Could not sanitize the %1$s option. Error code: %2$s' ), $option, $value->get_error_code() ); 4970 } 4971 4972 $value = get_option( $option ); 4973 if ( function_exists( 'add_settings_error' ) ) { 4974 add_settings_error( $option, "invalid_{$option}", $error ); 4975 } 4976 } 4977 4978 /** 4979 * Filters an option value following sanitization. 4980 * 4981 * @since 2.3.0 4982 * @since 4.3.0 Added the `$original_value` parameter. 4983 * 4984 * @param string $value The sanitized option value. 4985 * @param string $option The option name. 4986 * @param string $original_value The original value passed to the function. 4987 */ 4988 return apply_filters( "sanitize_option_{$option}", $value, $option, $original_value ); 4989 } 4990 4991 /** 4992 * Maps a function to all non-iterable elements of an array or an object. 4993 * 4994 * This is similar to `array_walk_recursive()` but acts upon objects too. 4995 * 4996 * @since 4.4.0 4997 * 4998 * @param mixed $value The array, object, or scalar. 4999 * @param callable $callback The function to map onto $value. 5000 * @return mixed The value with the callback applied to all non-arrays and non-objects inside it. 5001 */ 5002 function map_deep( $value, $callback ) { 5003 if ( is_array( $value ) ) { 5004 foreach ( $value as $index => $item ) { 5005 $value[ $index ] = map_deep( $item, $callback ); 5006 } 5007 } elseif ( is_object( $value ) ) { 5008 $object_vars = get_object_vars( $value ); 5009 foreach ( $object_vars as $property_name => $property_value ) { 5010 $value->$property_name = map_deep( $property_value, $callback ); 5011 } 5012 } else { 5013 $value = call_user_func( $callback, $value ); 5014 } 5015 5016 return $value; 5017 } 5018 5019 /** 5020 * Parses a string into variables to be stored in an array. 5021 * 5022 * @since 2.2.1 5023 * 5024 * @param string $string The string to be parsed. 5025 * @param array $array Variables will be stored in this array. 5026 */ 5027 function wp_parse_str( $string, &$array ) { 5028 parse_str( (string) $string, $array ); 5029 5030 /** 5031 * Filters the array of variables derived from a parsed string. 5032 * 5033 * @since 2.2.1 5034 * 5035 * @param array $array The array populated with variables. 5036 */ 5037 $array = apply_filters( 'wp_parse_str', $array ); 5038 } 5039 5040 /** 5041 * Converts lone less than signs. 5042 * 5043 * KSES already converts lone greater than signs. 5044 * 5045 * @since 2.3.0 5046 * 5047 * @param string $text Text to be converted. 5048 * @return string Converted text. 5049 */ 5050 function wp_pre_kses_less_than( $text ) { 5051 return preg_replace_callback( '%<[^>]*?((?=<)|>|$)%', 'wp_pre_kses_less_than_callback', $text ); 5052 } 5053 5054 /** 5055 * Callback function used by preg_replace. 5056 * 5057 * @since 2.3.0 5058 * 5059 * @param string[] $matches Populated by matches to preg_replace. 5060 * @return string The text returned after esc_html if needed. 5061 */ 5062 function wp_pre_kses_less_than_callback( $matches ) { 5063 if ( false === strpos( $matches[0], '>' ) ) { 5064 return esc_html( $matches[0] ); 5065 } 5066 return $matches[0]; 5067 } 5068 5069 /** 5070 * Removes non-allowable HTML from parsed block attribute values when filtering 5071 * in the post context. 5072 * 5073 * @since 5.3.1 5074 * 5075 * @param string $string Content to be run through KSES. 5076 * @param array[]|string $allowed_html An array of allowed HTML elements 5077 * and attributes, or a context name 5078 * such as 'post'. 5079 * @param string[] $allowed_protocols Array of allowed URL protocols. 5080 * @return string Filtered text to run through KSES. 5081 */ 5082 function wp_pre_kses_block_attributes( $string, $allowed_html, $allowed_protocols ) { 5083 /* 5084 * `filter_block_content` is expected to call `wp_kses`. Temporarily remove 5085 * the filter to avoid recursion. 5086 */ 5087 remove_filter( 'pre_kses', 'wp_pre_kses_block_attributes', 10 ); 5088 $string = filter_block_content( $string, $allowed_html, $allowed_protocols ); 5089 add_filter( 'pre_kses', 'wp_pre_kses_block_attributes', 10, 3 ); 5090 5091 return $string; 5092 } 5093 5094 /** 5095 * WordPress implementation of PHP sprintf() with filters. 5096 * 5097 * @since 2.5.0 5098 * @since 5.3.0 Formalized the existing and already documented `...$args` parameter 5099 * by adding it to the function signature. 5100 * 5101 * @link https://www.php.net/sprintf 5102 * 5103 * @param string $pattern The string which formatted args are inserted. 5104 * @param mixed ...$args Arguments to be formatted into the $pattern string. 5105 * @return string The formatted string. 5106 */ 5107 function wp_sprintf( $pattern, ...$args ) { 5108 $len = strlen( $pattern ); 5109 $start = 0; 5110 $result = ''; 5111 $arg_index = 0; 5112 while ( $len > $start ) { 5113 // Last character: append and break. 5114 if ( strlen( $pattern ) - 1 == $start ) { 5115 $result .= substr( $pattern, -1 ); 5116 break; 5117 } 5118 5119 // Literal %: append and continue. 5120 if ( '%%' === substr( $pattern, $start, 2 ) ) { 5121 $start += 2; 5122 $result .= '%'; 5123 continue; 5124 } 5125 5126 // Get fragment before next %. 5127 $end = strpos( $pattern, '%', $start + 1 ); 5128 if ( false === $end ) { 5129 $end = $len; 5130 } 5131 $fragment = substr( $pattern, $start, $end - $start ); 5132 5133 // Fragment has a specifier. 5134 if ( '%' === $pattern[ $start ] ) { 5135 // Find numbered arguments or take the next one in order. 5136 if ( preg_match( '/^%(\d+)\$/', $fragment, $matches ) ) { 5137 $index = $matches[1] - 1; // 0-based array vs 1-based sprintf() arguments. 5138 $arg = isset( $args[ $index ] ) ? $args[ $index ] : ''; 5139 $fragment = str_replace( "%{$matches[1]}$", '%', $fragment ); 5140 } else { 5141 $arg = isset( $args[ $arg_index ] ) ? $args[ $arg_index ] : ''; 5142 ++$arg_index; 5143 } 5144 5145 /** 5146 * Filters a fragment from the pattern passed to wp_sprintf(). 5147 * 5148 * If the fragment is unchanged, then sprintf() will be run on the fragment. 5149 * 5150 * @since 2.5.0 5151 * 5152 * @param string $fragment A fragment from the pattern. 5153 * @param string $arg The argument. 5154 */ 5155 $_fragment = apply_filters( 'wp_sprintf', $fragment, $arg ); 5156 if ( $_fragment != $fragment ) { 5157 $fragment = $_fragment; 5158 } else { 5159 $fragment = sprintf( $fragment, (string) $arg ); 5160 } 5161 } 5162 5163 // Append to result and move to next fragment. 5164 $result .= $fragment; 5165 $start = $end; 5166 } 5167 5168 return $result; 5169 } 5170 5171 /** 5172 * Localizes list items before the rest of the content. 5173 * 5174 * The '%l' must be at the first characters can then contain the rest of the 5175 * content. The list items will have ', ', ', and', and ' and ' added depending 5176 * on the amount of list items in the $args parameter. 5177 * 5178 * @since 2.5.0 5179 * 5180 * @param string $pattern Content containing '%l' at the beginning. 5181 * @param array $args List items to prepend to the content and replace '%l'. 5182 * @return string Localized list items and rest of the content. 5183 */ 5184 function wp_sprintf_l( $pattern, $args ) { 5185 // Not a match. 5186 if ( '%l' !== substr( $pattern, 0, 2 ) ) { 5187 return $pattern; 5188 } 5189 5190 // Nothing to work with. 5191 if ( empty( $args ) ) { 5192 return ''; 5193 } 5194 5195 /** 5196 * Filters the translated delimiters used by wp_sprintf_l(). 5197 * Placeholders (%s) are included to assist translators and then 5198 * removed before the array of strings reaches the filter. 5199 * 5200 * Please note: Ampersands and entities should be avoided here. 5201 * 5202 * @since 2.5.0 5203 * 5204 * @param array $delimiters An array of translated delimiters. 5205 */ 5206 $l = apply_filters( 5207 'wp_sprintf_l', 5208 array( 5209 /* translators: Used to join items in a list with more than 2 items. */ 5210 'between' => sprintf( __( '%1$s, %2$s' ), '', '' ), 5211 /* translators: Used to join last two items in a list with more than 2 times. */ 5212 'between_last_two' => sprintf( __( '%1$s, and %2$s' ), '', '' ), 5213 /* translators: Used to join items in a list with only 2 items. */ 5214 'between_only_two' => sprintf( __( '%1$s and %2$s' ), '', '' ), 5215 ) 5216 ); 5217 5218 $args = (array) $args; 5219 $result = array_shift( $args ); 5220 if ( count( $args ) == 1 ) { 5221 $result .= $l['between_only_two'] . array_shift( $args ); 5222 } 5223 5224 // Loop when more than two args. 5225 $i = count( $args ); 5226 while ( $i ) { 5227 $arg = array_shift( $args ); 5228 $i--; 5229 if ( 0 == $i ) { 5230 $result .= $l['between_last_two'] . $arg; 5231 } else { 5232 $result .= $l['between'] . $arg; 5233 } 5234 } 5235 5236 return $result . substr( $pattern, 2 ); 5237 } 5238 5239 /** 5240 * Safely extracts not more than the first $count characters from HTML string. 5241 * 5242 * UTF-8, tags and entities safe prefix extraction. Entities inside will *NOT* 5243 * be counted as one character. For example & will be counted as 4, < as 5244 * 3, etc. 5245 * 5246 * @since 2.5.0 5247 * 5248 * @param string $str String to get the excerpt from. 5249 * @param int $count Maximum number of characters to take. 5250 * @param string $more Optional. What to append if $str needs to be trimmed. Defaults to empty string. 5251 * @return string The excerpt. 5252 */ 5253 function wp_html_excerpt( $str, $count, $more = null ) { 5254 if ( null === $more ) { 5255 $more = ''; 5256 } 5257 5258 $str = wp_strip_all_tags( $str, true ); 5259 $excerpt = mb_substr( $str, 0, $count ); 5260 5261 // Remove part of an entity at the end. 5262 $excerpt = preg_replace( '/&[^;\s]{0,6}$/', '', $excerpt ); 5263 if ( $str != $excerpt ) { 5264 $excerpt = trim( $excerpt ) . $more; 5265 } 5266 5267 return $excerpt; 5268 } 5269 5270 /** 5271 * Adds a base URL to relative links in passed content. 5272 * 5273 * By default it supports the 'src' and 'href' attributes. However this can be 5274 * changed via the 3rd param. 5275 * 5276 * @since 2.7.0 5277 * 5278 * @global string $_links_add_base 5279 * 5280 * @param string $content String to search for links in. 5281 * @param string $base The base URL to prefix to links. 5282 * @param array $attrs The attributes which should be processed. 5283 * @return string The processed content. 5284 */ 5285 function links_add_base_url( $content, $base, $attrs = array( 'src', 'href' ) ) { 5286 global $_links_add_base; 5287 $_links_add_base = $base; 5288 $attrs = implode( '|', (array) $attrs ); 5289 return preg_replace_callback( "!($attrs)=(['\"])(.+?)\\2!i", '_links_add_base', $content ); 5290 } 5291 5292 /** 5293 * Callback to add a base URL to relative links in passed content. 5294 * 5295 * @since 2.7.0 5296 * @access private 5297 * 5298 * @global string $_links_add_base 5299 * 5300 * @param string $m The matched link. 5301 * @return string The processed link. 5302 */ 5303 function _links_add_base( $m ) { 5304 global $_links_add_base; 5305 // 1 = attribute name 2 = quotation mark 3 = URL. 5306 return $m[1] . '=' . $m[2] . 5307 ( preg_match( '#^(\w{1,20}):#', $m[3], $protocol ) && in_array( $protocol[1], wp_allowed_protocols(), true ) ? 5308 $m[3] : 5309 WP_Http::make_absolute_url( $m[3], $_links_add_base ) 5310 ) 5311 . $m[2]; 5312 } 5313 5314 /** 5315 * Adds a Target attribute to all links in passed content. 5316 * 5317 * This function by default only applies to `<a>` tags, however this can be 5318 * modified by the 3rd param. 5319 * 5320 * *NOTE:* Any current target attributed will be stripped and replaced. 5321 * 5322 * @since 2.7.0 5323 * 5324 * @global string $_links_add_target 5325 * 5326 * @param string $content String to search for links in. 5327 * @param string $target The Target to add to the links. 5328 * @param string[] $tags An array of tags to apply to. 5329 * @return string The processed content. 5330 */ 5331 function links_add_target( $content, $target = '_blank', $tags = array( 'a' ) ) { 5332 global $_links_add_target; 5333 $_links_add_target = $target; 5334 $tags = implode( '|', (array) $tags ); 5335 return preg_replace_callback<