[ Index ]

PHP Cross Reference of GlotPress

title

Body

[close]

/gp-includes/ -> strings.php (source)

   1  <?php
   2  /**
   3   * Functions, which make work with strings easier
   4   */
   5  
   6  function gp_startswith( $haystack, $needle ) {
   7      return 0 === strpos( $haystack, $needle );
   8  }
   9  
  10  function gp_endswith( $haystack, $needle ) {
  11      return substr( $haystack, -strlen( $needle ) ) === $needle;
  12  }
  13  
  14  function gp_in( $needle, $haystack ) {
  15      return false !== strpos( $haystack, $needle );
  16  }
  17  
  18  /**
  19   * Escaping for HTML attributes.
  20   *
  21   * Similar to esc_attr(), but double encode entities.
  22   *
  23   * @since 1.0.0
  24   *
  25   * @param string $text The text prior to being escaped.
  26   * @return string The text after it has been escaped.
  27   */
  28  function gp_esc_attr_with_entities( $text ) {
  29      $safe_text = wp_check_invalid_utf8( $text );
  30      $safe_text = htmlspecialchars( $safe_text, ENT_QUOTES, false, true );
  31  
  32      /**
  33       * Filter a string cleaned and escaped for output in an HTML attribute.
  34       *
  35       * Text passed to gp_esc_attr_with_entities() is stripped of invalid or
  36       * special characters before output. Unlike esc_attr() it double encodes
  37       * entities.
  38       *
  39       * @since 1.0.0
  40       *
  41       * @param string $safe_text The text after it has been escaped.
  42       * @param string $text      The text prior to being escaped.
  43       */
  44      return apply_filters( 'gp_attribute_escape', $safe_text, $text );
  45  }
  46  
  47  /**
  48   * Escapes translations for HTML blocks.
  49   *
  50   * Similar to esc_html(), but double encode entities.
  51   *
  52   * @since 1.0.0
  53   *
  54   * @param string $text The text prior to being escaped.
  55   * @return string The text after it has been escaped.
  56   */
  57  function esc_translation( $text ) {
  58      $safe_text = wp_check_invalid_utf8( $text );
  59      return htmlspecialchars( $safe_text, ENT_NOQUOTES, false, true );
  60  }
  61  
  62  function gp_string_similarity( $str1, $str2 ) {
  63  
  64      $length1 = mb_strlen( $str1 );
  65      $length2 = mb_strlen( $str2 );
  66  
  67      $len = min( $length1, $length2 );
  68      if ( $len > 5000 ) {
  69          // Arbitrary limit on character length for speed purpose.
  70          $distance = $len;
  71      } else {
  72          $distance = gp_levenshtein( $str1, $str2, $length1, $length2 );
  73      }
  74  
  75      $similarity = 1 - ( $distance * 0.9 / $len );
  76  
  77      return $similarity;
  78  }
  79  
  80  /*
  81      PHP native implementation of levensthein is limited to 255 bytes, so let's extend that
  82      Source: https://github.com/wikimedia/mediawiki-extensions-Translate/blob/master/ttmserver/TTMServer.php#L90
  83  
  84  */
  85  function gp_levenshtein( $str1, $str2, $length1, $length2 ) {
  86  
  87      if ( 0 == $length1 ) {
  88          return $length2;
  89      }
  90  
  91      if ( 0 == $length2 ) {
  92          return $length1;
  93      }
  94  
  95      if ( $str1 === $str2 ) {
  96          return 0;
  97      }
  98  
  99      if ( $length1 <= 255 && $length2 <= 255 ) {
 100          $bytelength1 = strlen( $str1 );
 101          $bytelength2 = strlen( $str2 );
 102  
 103          if ( $bytelength1 === $length1 && $bytelength2 === $length2 ) {
 104              return levenshtein( $str1, $str2 );
 105          }
 106      }
 107  
 108      $chars1 = mb_str_split( $str1 );
 109      $chars2 = mb_str_split( $str2 );
 110  
 111      $prevRow = range( 0, $length2 );
 112      foreach ( $chars1 as $i => $c1 ) {
 113          $currentRow    = array();
 114          $currentRow[0] = $i + 1;
 115          foreach ( $chars2 as $j => $c2 ) {
 116              $insertions    = $prevRow[ $j + 1 ] + 1;
 117              $deletions     = $currentRow[ $j ] + 1;
 118              $substitutions = $prevRow[ $j ] + ( ( $c1 != $c2 ) ? 1 : 0 );
 119              $currentRow[]  = min( $insertions, $deletions, $substitutions );
 120          }
 121          $prevRow = $currentRow;
 122      }
 123  
 124      return $prevRow[ $length2 ];
 125  }
 126  
 127  /**
 128   * Sanitizes a string for use as a slug, replacing whitespace and a few other characters with dashes.
 129   *
 130   * Limits the output to alphanumeric characters, underscore (_), periods (.) and dash (-).
 131   * Whitespace becomes a dash.
 132   *
 133   * @since 2.1.0
 134   *
 135   * @param string $slug The string to be sanitized for use as a slug.
 136   *
 137   * @return string The sanitized title.
 138   */
 139  function gp_sanitize_slug( $slug ) {
 140      $slug = remove_accents( $slug );
 141  
 142      $slug = strip_tags( $slug );
 143  
 144      // Preserve escaped octets.
 145      $slug = preg_replace( '|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $slug );
 146  
 147      // Remove percent signs that are not part of an octet.
 148      $slug = str_replace( '%', '', $slug );
 149  
 150      // Restore octets.
 151      $slug = preg_replace( '|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $slug );
 152  
 153      if ( seems_utf8( $slug ) ) {
 154          if ( function_exists( 'mb_strtolower' ) ) {
 155              $slug = mb_strtolower( $slug, 'UTF-8' );
 156          }
 157          $slug = utf8_uri_encode( $slug, 200 );
 158      }
 159  
 160      $slug = strtolower( $slug );
 161  
 162      // Convert nbsp, ndash and mdash to hyphens.
 163      $slug = str_replace( array( '%c2%a0', '%e2%80%93', '%e2%80%94' ), '-', $slug );
 164  
 165      // Convert nbsp, ndash and mdash HTML entities to hyphens.
 166      $slug = str_replace( array( '&nbsp;', '&#160;', '&ndash;', '&#8211;', '&mdash;', '&#8212;' ), '-', $slug );
 167  
 168      // Strip these characters entirely.
 169      $slug = str_replace(
 170          array(
 171              // Iexcl and iquest.
 172              '%c2%a1',
 173              '%c2%bf',
 174              // Angle quotes.
 175              '%c2%ab',
 176              '%c2%bb',
 177              '%e2%80%b9',
 178              '%e2%80%ba',
 179              // Curly quotes.
 180              '%e2%80%98',
 181              '%e2%80%99',
 182              '%e2%80%9c',
 183              '%e2%80%9d',
 184              '%e2%80%9a',
 185              '%e2%80%9b',
 186              '%e2%80%9e',
 187              '%e2%80%9f',
 188              // Copy, reg, deg, hellip and trade.
 189              '%c2%a9',
 190              '%c2%ae',
 191              '%c2%b0',
 192              '%e2%80%a6',
 193              '%e2%84%a2',
 194              // Acute accents.
 195              '%c2%b4',
 196              '%cb%8a',
 197              '%cc%81',
 198              '%cd%81',
 199              // Grave accent, macron, caron.
 200              '%cc%80',
 201              '%cc%84',
 202              '%cc%8c',
 203          ),
 204          '',
 205          $slug
 206      );
 207  
 208      // Convert times to x.
 209      $slug = str_replace( '%c3%97', 'x', $slug );
 210  
 211      // Kill entities.
 212      $slug = preg_replace( '/&.+?;/', '', $slug );
 213  
 214      $slug = preg_replace( '/[^%a-z\.0-9 _-]/', '', $slug );
 215      $slug = preg_replace( '/\s+/', '-', $slug );
 216      $slug = preg_replace( '|-+|', '-', $slug );
 217      $slug = trim( $slug, '-' );
 218  
 219      return $slug;
 220  }


Generated: Wed Sep 18 01:01:03 2024 Cross-referenced by PHPXref 0.7.1