[ Index ]

PHP Cross Reference of GlotPress

title

Body

[close]

/gp-includes/ -> strings.php (source)

   1  <?php
   2  /**
   3   * Functions, which make work with strings easier
   4   */
   5  
   6  function gp_startswith( $haystack, $needle ) {
   7      return 0 === strpos( $haystack, $needle );
   8  }
   9  
  10  function gp_endswith( $haystack, $needle ) {
  11      return substr( $haystack, -strlen( $needle ) ) === $needle;
  12  }
  13  
  14  function gp_in( $needle, $haystack ) {
  15      return false !== strpos( $haystack, $needle );
  16  }
  17  
  18  /**
  19   * Escaping for HTML attributes.
  20   *
  21   * Similar to esc_attr(), but double encode entities.
  22   *
  23   * @since 1.0.0
  24   *
  25   * @param string $text The text prior to being escaped.
  26   * @return string The text after it has been escaped.
  27   */
  28  function gp_esc_attr_with_entities( $text ) {
  29      $safe_text = wp_check_invalid_utf8( $text );
  30      $safe_text = htmlspecialchars( $safe_text, ENT_QUOTES, false, true );
  31  
  32      /**
  33       * Filter a string cleaned and escaped for output in an HTML attribute.
  34       *
  35       * Text passed to gp_esc_attr_with_entities() is stripped of invalid or
  36       * special characters before output. Unlike esc_attr() it double encodes
  37       * entities.
  38       *
  39       * @since 1.0.0
  40       *
  41       * @param string $safe_text The text after it has been escaped.
  42       * @param string $text      The text prior to being escaped.
  43       */
  44      return apply_filters( 'gp_attribute_escape', $safe_text, $text );
  45  }
  46  
  47  /**
  48   * Escapes translations for HTML blocks.
  49   *
  50   * Similar to esc_html(), but double encode entities.
  51   *
  52   * @since 1.0.0
  53   *
  54   * @param string $text The text prior to being escaped.
  55   * @return string The text after it has been escaped.
  56   */
  57  function esc_translation( $text ) {
  58      $safe_text = wp_check_invalid_utf8( $text );
  59      return htmlspecialchars( $safe_text, ENT_NOQUOTES, false, true );
  60  }
  61  
  62  function gp_string_similarity( $str1, $str2 ) {
  63  
  64      $length1 = mb_strlen( $str1 );
  65      $length2 = mb_strlen( $str2 );
  66  
  67      $len = min( $length1, $length2 );
  68      if ( $len > 5000 ) {
  69          // Arbitrary limit on character length for speed purpose.
  70          $distance = $len;
  71      } else {
  72          $distance = gp_levenshtein( $str1, $str2, $length1, $length2 );
  73      }
  74  
  75      $similarity = 1 - ( $distance * 0.9 / $len );
  76  
  77      return $similarity;
  78  }
  79  
  80  /*
  81      PHP native implementation of levensthein is limited to 255 bytes, so let's extend that
  82      Source: https://github.com/wikimedia/mediawiki-extensions-Translate/blob/master/ttmserver/TTMServer.php#L90
  83  
  84  */
  85  function gp_levenshtein( $str1, $str2, $length1, $length2 ) {
  86  
  87      if ( 0 == $length1 ) {
  88          return $length2;
  89      }
  90  
  91      if ( 0 == $length2 ) {
  92          return $length1;
  93      }
  94  
  95      if ( $str1 === $str2 ) {
  96          return 0;
  97      }
  98  
  99      $bytelength1 = strlen( $str1 );
 100      $bytelength2 = strlen( $str2 );
 101  
 102      if ( $bytelength1 === $length1 && $bytelength1 <= 255
 103           && $bytelength2 === $length2 && $bytelength2 <= 255 ) {
 104          return levenshtein( $str1, $str2 );
 105      }
 106  
 107      $prevRow = range( 0, $length2 );
 108      for ( $i = 0; $i < $length1; $i++ ) {
 109          $currentRow    = array();
 110          $currentRow[0] = $i + 1;
 111          $c1            = mb_substr( $str1, $i, 1 );
 112          for ( $j = 0; $j < $length2; $j++ ) {
 113              $c2            = mb_substr( $str2, $j, 1 );
 114              $insertions    = $prevRow[ $j + 1 ] + 1;
 115              $deletions     = $currentRow[ $j ] + 1;
 116              $substitutions = $prevRow[ $j ] + ( ( $c1 != $c2 ) ? 1 : 0 );
 117              $currentRow[]  = min( $insertions, $deletions, $substitutions );
 118          }
 119          $prevRow = $currentRow;
 120      }
 121  
 122      return $prevRow[ $length2 ];
 123  }
 124  
 125  /**
 126   * Sanitizes a string for use as a slug, replacing whitespace and a few other characters with dashes.
 127   *
 128   * Limits the output to alphanumeric characters, underscore (_), periods (.) and dash (-).
 129   * Whitespace becomes a dash.
 130   *
 131   * @since 2.1.0
 132   *
 133   * @param string $slug The string to be sanitized for use as a slug.
 134   *
 135   * @return string The sanitized title.
 136   */
 137  function gp_sanitize_slug( $slug ) {
 138      $slug = remove_accents( $slug );
 139  
 140      $slug = strip_tags( $slug );
 141  
 142      // Preserve escaped octets.
 143      $slug = preg_replace( '|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $slug );
 144  
 145      // Remove percent signs that are not part of an octet.
 146      $slug = str_replace( '%', '', $slug );
 147  
 148      // Restore octets.
 149      $slug = preg_replace( '|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $slug );
 150  
 151      if ( seems_utf8( $slug ) ) {
 152          if ( function_exists( 'mb_strtolower' ) ) {
 153              $slug = mb_strtolower( $slug, 'UTF-8' );
 154          }
 155          $slug = utf8_uri_encode( $slug, 200 );
 156      }
 157  
 158      $slug = strtolower( $slug );
 159  
 160      // Convert nbsp, ndash and mdash to hyphens.
 161      $slug = str_replace( array( '%c2%a0', '%e2%80%93', '%e2%80%94' ), '-', $slug );
 162  
 163      // Convert nbsp, ndash and mdash HTML entities to hyphens.
 164      $slug = str_replace( array( '&nbsp;', '&#160;', '&ndash;', '&#8211;', '&mdash;', '&#8212;' ), '-', $slug );
 165  
 166      // Strip these characters entirely.
 167      $slug = str_replace(
 168          array(
 169              // Iexcl and iquest.
 170              '%c2%a1',
 171              '%c2%bf',
 172              // Angle quotes.
 173              '%c2%ab',
 174              '%c2%bb',
 175              '%e2%80%b9',
 176              '%e2%80%ba',
 177              // Curly quotes.
 178              '%e2%80%98',
 179              '%e2%80%99',
 180              '%e2%80%9c',
 181              '%e2%80%9d',
 182              '%e2%80%9a',
 183              '%e2%80%9b',
 184              '%e2%80%9e',
 185              '%e2%80%9f',
 186              // Copy, reg, deg, hellip and trade.
 187              '%c2%a9',
 188              '%c2%ae',
 189              '%c2%b0',
 190              '%e2%80%a6',
 191              '%e2%84%a2',
 192              // Acute accents.
 193              '%c2%b4',
 194              '%cb%8a',
 195              '%cc%81',
 196              '%cd%81',
 197              // Grave accent, macron, caron.
 198              '%cc%80',
 199              '%cc%84',
 200              '%cc%8c',
 201          ),
 202          '',
 203          $slug
 204      );
 205  
 206      // Convert times to x.
 207      $slug = str_replace( '%c3%97', 'x', $slug );
 208  
 209      // Kill entities.
 210      $slug = preg_replace( '/&.+?;/', '', $slug );
 211  
 212      $slug = preg_replace( '/[^%a-z\.0-9 _-]/', '', $slug );
 213      $slug = preg_replace( '/\s+/', '-', $slug );
 214      $slug = preg_replace( '|-+|', '-', $slug );
 215      $slug = trim( $slug, '-' );
 216  
 217      return $slug;
 218  }


Generated: Tue Dec 1 01:01:56 2020 Cross-referenced by PHPXref 0.7.1