[ Index ]

PHP Cross Reference of GlotPress

title

Body

[close]

/gp-includes/ -> strings.php (source)

   1  <?php
   2  /**
   3   * Functions, which make work with strings easier
   4   */
   5  
   6  function gp_startswith( $haystack, $needle ) {
   7      return 0 === strpos( $haystack, $needle );
   8  }
   9  
  10  function gp_endswith( $haystack, $needle ) {
  11      return $needle === substr( $haystack, -strlen( $needle ) );
  12  }
  13  
  14  function gp_in( $needle, $haystack ) {
  15      return false !== strpos( $haystack, $needle );
  16  }
  17  
  18  /**
  19   * Compat function to mimic mb_strtolower().
  20   *
  21   * Falls back to `strtolower()` if `mb_strtolower()` doesn't exists.
  22   *
  23   * @since 1.0.0
  24   *
  25   * @param string      $str      The string being lowercased.
  26   * @param string|null $encoding Optional. Character encoding to use. Default null.
  27   * @return int String length of `$str`.
  28   */
  29  function gp_strtolower( $str, $encoding = null ) {
  30      if ( function_exists( 'mb_strtolower' ) ) {
  31          if ( isset( $encoding ) ) {
  32              return mb_strtolower( $str, $encoding );
  33          } else {
  34              return mb_strtolower( $str ); // Uses mb_internal_encoding().
  35          }
  36      }
  37  
  38      return strtolower( $str );
  39  }
  40  
  41  /**
  42   * Compat function to mimic mb_strlen().
  43   *
  44   * Without a `function_exists()` check because WordPress includes
  45   * a compat function for `mb_strlen()`.
  46   *
  47   * @since 1.0.0
  48   *
  49   * @see _mb_strlen()
  50   *
  51   * @param string      $str      The string to retrieve the character length from.
  52   * @param string|null $encoding Optional. Character encoding to use. Default null.
  53   * @return int String length of `$str`.
  54   */
  55  function gp_strlen( $str, $encoding = null ) {
  56      if ( isset( $encoding ) ) {
  57          return mb_strlen( $str, $encoding );
  58      } else {
  59          return mb_strlen( $str ); // Uses mb_internal_encoding().
  60      }
  61  }
  62  
  63  /**
  64   * Compat function to mimic mb_stripos().
  65   *
  66   * Falls back to `stripos()` if `mb_stripos()` doesn't exists.
  67   *
  68   * @since 1.0.0
  69   *
  70   * @param string      $haystack The string from which to get the position of the first occurrence of needle.
  71   * @param string      $needle   The string to find in haystack.
  72   * @param int         $offset   The position in haystack to start searching.
  73   * @param string|null $encoding Optional. Character encoding to use. Default null.
  74   * @return int|false The numeric position of the first occurrence of needle in the haystack string,
  75   *                   or false if needle is not found.
  76   */
  77  function gp_stripos( $haystack, $needle, $offset = 0, $encoding = null ) {
  78      if ( function_exists( 'mb_stripos' ) ) {
  79          if ( isset( $encoding ) ) {
  80              return mb_stripos( $haystack, $needle, $offset, $encoding );
  81          } else {
  82              return mb_stripos( $haystack, $needle, $offset ); // Uses mb_internal_encoding().
  83          }
  84      }
  85  
  86      return stripos( $haystack, $needle, $offset );
  87  }
  88  
  89  /**
  90   * Compat function to mimic mb_substr().
  91   *
  92   * Without a `function_exists()` check because WordPress includes
  93   * a compat function for `mb_substr()`.
  94   *
  95   * @since 1.0.0
  96   *
  97   * @see _mb_substr()
  98   *
  99   * @param string      $str      The string to extract the substring from.
 100   * @param int         $start    Position to being extraction from in `$str`.
 101   * @param int|null    $length   Optional. Maximum number of characters to extract from `$str`.
 102   *                              Default null.
 103   * @param string|null $encoding Optional. Character encoding to use. Default null.
 104   * @return string Extracted substring.
 105   */
 106  function gp_substr( $str, $start, $length, $encoding = null ) {
 107      if ( isset( $encoding ) ) {
 108          return mb_substr( $str, $start, $length, $encoding );
 109      } else {
 110          return mb_substr( $str, $start, $length ); // Uses mb_internal_encoding().
 111      }
 112  }
 113  
 114  /**
 115   * Escaping for HTML attributes.
 116   *
 117   * Similar to esc_attr(), but double encode entities.
 118   *
 119   * @since 1.0.0
 120   *
 121   * @param string $text The text prior to being escaped.
 122   * @return string The text after it has been escaped.
 123   */
 124  function gp_esc_attr_with_entities( $text ) {
 125      $safe_text = wp_check_invalid_utf8( $text );
 126      $safe_text = htmlspecialchars( $safe_text, ENT_QUOTES, false, true );
 127  
 128      /**
 129       * Filter a string cleaned and escaped for output in an HTML attribute.
 130       *
 131       * Text passed to gp_esc_attr_with_entities() is stripped of invalid or
 132       * special characters before output. Unlike esc_attr() it double encodes
 133       * entities.
 134       *
 135       * @since 1.0.0
 136       *
 137       * @param string $safe_text The text after it has been escaped.
 138       * @param string $text      The text prior to being escaped.
 139       */
 140      return apply_filters( 'gp_attribute_escape', $safe_text, $text );
 141  }
 142  
 143  /**
 144   * Escapes translations for HTML blocks.
 145   *
 146   * Similar to esc_html(), but double encode entities.
 147   *
 148   * @since 1.0.0
 149   *
 150   * @param string $text The text prior to being escaped.
 151   * @return string The text after it has been escaped.
 152   */
 153  function esc_translation( $text ) {
 154      $safe_text = wp_check_invalid_utf8( $text );
 155      return htmlspecialchars( $safe_text, ENT_NOQUOTES, false, true );
 156  }
 157  
 158  function gp_string_similarity( $str1, $str2 ) {
 159  
 160      $length1 = gp_strlen( $str1 );
 161      $length2 = gp_strlen( $str2 );
 162  
 163      $len = min( $length1, $length2 );
 164      if ( $len > 5000 ) {
 165          //Arbitrary limit on character length for speed purpose.
 166          $distance = $len;
 167      } else {
 168          $distance = gp_levenshtein( $str1, $str2, $length1, $length2 );
 169      }
 170  
 171      $similarity = 1 - ( $distance * 0.9 / $len );
 172  
 173      return $similarity;
 174  }
 175  
 176  /*
 177      PHP native implementation of levensthein is limited to 255 bytes, so let's extend that
 178      Source: https://github.com/wikimedia/mediawiki-extensions-Translate/blob/master/ttmserver/TTMServer.php#L90
 179  
 180  */
 181  function gp_levenshtein( $str1, $str2, $length1, $length2 ) {
 182  
 183      if ( $length1 == 0 ) {
 184          return $length2;
 185      }
 186  
 187      if ( $length2 == 0 ) {
 188          return $length1;
 189      }
 190  
 191      if ( $str1 === $str2 ) {
 192          return 0;
 193      }
 194  
 195      $bytelength1 = strlen( $str1 );
 196      $bytelength2 = strlen( $str2 );
 197  
 198      if ( $bytelength1 === $length1 && $bytelength1 <= 255
 199           && $bytelength2 === $length2 && $bytelength2 <= 255 ) {
 200          return levenshtein( $str1, $str2 );
 201      }
 202  
 203      $prevRow = range( 0, $length2 );
 204      for ( $i = 0; $i < $length1; $i++ ) {
 205          $currentRow = array();
 206          $currentRow[0] = $i + 1;
 207          $c1 = gp_substr( $str1, $i, 1 );
 208          for ( $j = 0; $j < $length2; $j++ ) {
 209              $c2 = gp_substr( $str2, $j, 1 );
 210              $insertions = $prevRow[$j + 1] + 1;
 211              $deletions = $currentRow[$j] + 1;
 212              $substitutions = $prevRow[$j] + ( ( $c1 != $c2 ) ? 1 : 0 );
 213              $currentRow[] = min( $insertions, $deletions, $substitutions );
 214          }
 215          $prevRow = $currentRow;
 216      }
 217  
 218      return $prevRow[$length2];
 219  }
 220  
 221  /**
 222   * Sanitizes a string for use as a slug, replacing whitespace and a few other characters with dashes.
 223   *
 224   * Limits the output to alphanumeric characters, underscore (_), periods (.) and dash (-).
 225   * Whitespace becomes a dash.
 226   *
 227   * @since 2.1.0
 228   *
 229   * @param string $slug The string to be sanitized for use as a slug.
 230   *
 231   * @return string The sanitized title.
 232   */
 233  function gp_sanitize_slug( $slug ) {
 234      $slug = remove_accents( $slug );
 235  
 236      $slug = strip_tags( $slug );
 237  
 238      // Preserve escaped octets.
 239      $slug = preg_replace( '|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $slug );
 240  
 241      // Remove percent signs that are not part of an octet.
 242      $slug = str_replace( '%', '', $slug );
 243  
 244      // Restore octets.
 245      $slug = preg_replace( '|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $slug );
 246  
 247      $slug = gp_strtolower( $slug, 'UTF-8' );
 248  
 249      if ( seems_utf8( $slug ) ) {
 250          $slug = utf8_uri_encode( $slug, 200 );
 251      }
 252  
 253      // Convert nbsp, ndash and mdash to hyphens.
 254      $slug = str_replace( array( '%c2%a0', '%e2%80%93', '%e2%80%94' ), '-', $slug );
 255  
 256      // Convert nbsp, ndash and mdash HTML entities to hyphens.
 257      $slug = str_replace( array( '&nbsp;', '&#160;', '&ndash;', '&#8211;', '&mdash;', '&#8212;' ), '-', $slug );
 258  
 259      // Strip these characters entirely.
 260      $slug = str_replace( array(
 261          // Iexcl and iquest.
 262          '%c2%a1',
 263          '%c2%bf',
 264          // Angle quotes.
 265          '%c2%ab',
 266          '%c2%bb',
 267          '%e2%80%b9',
 268          '%e2%80%ba',
 269          // Curly quotes.
 270          '%e2%80%98',
 271          '%e2%80%99',
 272          '%e2%80%9c',
 273          '%e2%80%9d',
 274          '%e2%80%9a',
 275          '%e2%80%9b',
 276          '%e2%80%9e',
 277          '%e2%80%9f',
 278          // Copy, reg, deg, hellip and trade.
 279          '%c2%a9',
 280          '%c2%ae',
 281          '%c2%b0',
 282          '%e2%80%a6',
 283          '%e2%84%a2',
 284          // Acute accents.
 285          '%c2%b4',
 286          '%cb%8a',
 287          '%cc%81',
 288          '%cd%81',
 289          // Grave accent, macron, caron.
 290          '%cc%80',
 291          '%cc%84',
 292          '%cc%8c',
 293      ), '', $slug );
 294  
 295      // Convert times to x.
 296      $slug = str_replace( '%c3%97', 'x', $slug );
 297  
 298      // Kill entities.
 299      $slug = preg_replace( '/&.+?;/', '', $slug );
 300  
 301      $slug = preg_replace( '/[^%a-z\.0-9 _-]/', '', $slug );
 302      $slug = preg_replace( '/\s+/', '-', $slug );
 303      $slug = preg_replace( '|-+|', '-', $slug );
 304      $slug = trim( $slug, '-' );
 305  
 306      return $slug;
 307  }


Generated: Wed Jun 26 01:02:08 2019 Cross-referenced by PHPXref 0.7.1