[ Index ]

PHP Cross Reference of GlotPress

title

Body

[close]

/gp-includes/formats/ -> format-properties.php (source)

   1  <?php
   2  
   3  class GP_Format_Properties extends GP_Format {
   4  
   5      public $name = 'Java Properties File (.properties)';
   6      public $extension = 'properties';
   7      public $filename_pattern = '%s_%s';
   8  
   9      public $exported = '';
  10  
  11      /**
  12       * Generates a string the contains the $entries to export in the Properties file format.
  13       *
  14       * @since 2.0.0
  15       *
  16       * @param GP_Project         $project         The project the strings are being exported for, not used
  17       *                                            in this format but part of the scaffold of the parent object.
  18       * @param GP_Locale          $locale          The locale object the strings are being exported for. not used
  19       *                                            in this format but part of the scaffold of the parent object.
  20       * @param GP_Translation_Set $translation_set The locale object the strings are being
  21       *                                            exported for. not used in this format but part
  22       *                                            of the scaffold of the parent object.
  23       * @param GP_Translation     $entries         The entries to export.
  24       *
  25       * @return string
  26       */
  27  	public function print_exported_file( $project, $locale, $translation_set, $entries ) {
  28          $result = '';
  29  
  30          $result .= '# Translation-Revision-Date: ' . GP::$translation->last_modified( $translation_set ) . "+0000\n";
  31          $result .= "# Plural-Forms: nplurals={$locale->nplurals}; plural={$locale->plural_expression};\n";
  32          $result .= '# Generator: GlotPress/' . GP_VERSION . "\n";
  33  
  34          $language_code = $this->get_language_code( $locale );
  35          if ( false !== $language_code ) {
  36              $result .= '# Language: ' . $language_code . "\n";
  37          }
  38  
  39          $result .= "\n";
  40  
  41          $sorted_entries = $entries;
  42          usort( $sorted_entries, array( $this, 'sort_entries' ) );
  43  
  44          foreach ( $sorted_entries as $entry ) {
  45              $entry->context = $this->escape( $entry->context );
  46              if ( empty( $entry->translations ) ) {
  47                  $translation = $entry->context;
  48              } else {
  49                  $translation = $entry->translations[0];
  50              }
  51  
  52              $translation = str_replace( "\n", "\\n", $translation );
  53              $translation = $this->utf8_uni_encode( $translation );
  54  
  55              if ( empty( $entry->context ) ) {
  56                  $original = $entry->singular;
  57              } else {
  58                  $original = $entry->context;
  59              }
  60  
  61              $original = str_replace( "\n", "\\n", $original );
  62  
  63              $comment = preg_replace( "/(^\s+)|(\s+$)/us", "", $entry->extracted_comments );
  64  
  65              if ( $comment == "" ) {
  66                  $comment = "No comment provided.";
  67              }
  68  
  69              $comment_lines = explode( "\n", $comment );
  70  
  71              foreach ( $comment_lines as $line ) {
  72                  $result .= "# $line\n";
  73              }
  74  
  75              $result .= $this->escape_key( $original ) . " = $translation\n\n";
  76          }
  77  
  78          return $result;
  79      }
  80  
  81      /**
  82       * Encodes a PHP string in UTF8 format to a unicode escaped string (multi-byte characters are encoded in the \uXXXX format).
  83       *
  84       * @since 2.0.0
  85       *
  86       * @param $string string The string to encode.
  87       *
  88       * @return string
  89       */
  90  	private function utf8_uni_encode( $string ) {
  91          $result = '';
  92          $offset = 0;
  93  
  94          while ( $offset >= 0 ) {
  95              $val = $this->ordutf8( $string, $offset );
  96  
  97              if( false === $val ) {
  98                  break;
  99              } else if ( $val > 127 ) {
 100                  $result .= sprintf( '\u%04x', $val );
 101              } else {
 102                  $result .= chr( $val );
 103              }
 104          }
 105  
 106          return $result;
 107      }
 108  
 109      /**
 110       * Encodes a PHP string in ascii format to a unicode escaped string (multi-byte characters are encoded in the \uXXXX format).
 111       *
 112       * @since 2.0.0
 113       *
 114       * @param string $string The string to encode.
 115       *
 116       * @return string
 117       */
 118  	private function ascii_uni_encode( $string ) {
 119          $result = '';
 120  
 121          for ( $i = 0; $i < strlen( $string ); $i++ ) {
 122              $val = ord( $string[ $i ] );
 123  
 124              if( $val > 127 ) {
 125                  $result .= sprintf( '\u%04x', $val );
 126              } else {
 127                  $result .= $string[ $i ] ;
 128              }
 129          }
 130  
 131          return $result;
 132      }
 133  
 134      /**
 135       * Decodes a unicode escaped string to a PHP string.
 136       *
 137       * @param string $string The string to decode.
 138       *
 139       * @since 2.0.0
 140       *
 141       * @return string
 142       */
 143  	private function uni_decode( $string ) {
 144          return preg_replace_callback( "/\\\\u([a-fA-F0-9]{4})/", array( $this, "uni_decode_callback" ), $string );
 145      }
 146  
 147      /**
 148       * Part of uni_decode(), this is the call back function that does the heavy lifting of converting a \uXXXX
 149       * value to a UTF-8 encoded character sequence.
 150       *
 151       * @since 2.0.0
 152       *
 153       * @param array $matches The array of matches from preg_replace_callback().
 154       *
 155       * @return string
 156       */
 157  	private function uni_decode_callback( $matches ) {
 158          $binary = decbin( hexdec( $matches[1] ) );
 159          $bin_length = strlen( $binary );
 160  
 161          $byte = array();
 162  
 163          // UTF-8 encoding is a little complex, see https://en.wikipedia.org/wiki/UTF-8#Description for details of the below values.
 164          if ( $bin_length > 16 ) {        // > 16 bits, need 4 unicode bytes to encode.
 165              $byte[0] = chr( bindec( '11110' . sprintf( '%03s', substr( $binary, 0, $bin_length - 18 ) ) ) );
 166              $byte[1] = chr( bindec( '10' . sprintf( '%06s', substr( $binary, -( 6 * 3 ), 6 ) ) ) );
 167              $byte[2] = chr( bindec( '10' . sprintf( '%06s', substr( $binary, -( 6 * 2 ), 6 ) ) ) );
 168              $byte[3] = chr( bindec( '10' . sprintf( '%06s', substr( $binary, -( 6 * 1 ), 6) ) ) );
 169          } else if ( $bin_length > 11 ) {    // > 11 bits, need 3 unicode bytes to encode.
 170              $byte[0] = chr( bindec( '1110' . sprintf( '%04s', substr( $binary, 0, $bin_length - 12 ) ) ) );
 171              $byte[1] = chr( bindec( '10' . sprintf( '%06s', substr( $binary, -( 6 * 2 ), 6 ) ) ) );
 172              $byte[2] = chr( bindec( '10' . sprintf( '%06s', substr( $binary, -( 6 * 1 ), 6) ) ) );
 173          } else if ( $bin_length > 7 ) {  // > 7 bites, need 2 unicode bytes to encode.
 174              $byte[0] = chr( bindec( '110' . sprintf( '%05s', substr( $binary, 0, $bin_length - 6 ) ) ) );
 175              $byte[1] = chr( bindec( '10' . sprintf( '%06s', substr( $binary, -( 6 * 1 ), 6 ) ) ) );
 176          } else {                        // < 8 bites, need 1 unicode bytes to encode.
 177              $byte[0] = chr( bindec( '0' . sprintf(  '%07s', $binary ) ) );
 178          }
 179  
 180          /* This is an alternate way to encode the character but it needs the iconv functions available:
 181           *
 182           *        iconv( 'UCS-4LE', 'UTF-8', pack( 'V', hexdec( $matches[ 1 ] ) ) );
 183           *
 184           */
 185  
 186          return implode( $byte );
 187      }
 188  
 189      /**
 190       * Part of utf8_uni_encode(), this returns the character value of a UTF-8 encoded string.
 191       *
 192       * From http://php.net/manual/en/function.ord.php#109812
 193       *
 194       * @since 2.0.0
 195       *
 196       * @param string $string The UTF-8 string to process.
 197       * @param int    $offset The offset of the string to return the character value of.
 198       *
 199       * @return int|bool
 200       */
 201  	private function ordutf8( $string, &$offset ) {
 202          $character = substr( $string, $offset, 1 );
 203  
 204          // If substr returned false, we are past the end of line so no need to process it.
 205          if( false === $character ) {
 206              // Set the offset back to -1 to indicate we're done.
 207              $offset = -1;
 208              return false;
 209          }
 210  
 211          $code = ord( $character );
 212          $bytesnumber = 1;
 213  
 214          if ( $code >= 128 ) {             //otherwise 0xxxxxxx
 215              $codetemp = $code - 192;
 216  
 217              if ( $code < 224 ) {
 218                  $bytesnumber = 2;        //110xxxxx
 219              } else if ($code < 240) {
 220                  $bytesnumber = 3;        //1110xxxx
 221                  $codetemp -= 32;
 222              } else if ( $code < 248 ) {
 223                  $bytesnumber = 4;        //11110xxx
 224                  $codetemp -= ( 32 + 16 );
 225              }
 226  
 227              for ( $i = 2; $i <= $bytesnumber; $i++ ) {
 228                  $offset ++;
 229                  $code2 = ord( substr( $string, $offset, 1 ) ) - 128;        //10xxxxxx
 230                  $codetemp = ( $codetemp * 64 ) + $code2;
 231              }
 232  
 233              $code = $codetemp;
 234          }
 235  
 236          $offset += 1;
 237  
 238          if ( $offset >= strlen( $string ) ) {
 239              $offset = -1;
 240          }
 241  
 242          return $code;
 243      }
 244  
 245      /**
 246       * Splits a properties file line on the = or : character.
 247       *
 248       * Skips escaped values (\= or \:) in the key and matches the first unescaped instance.
 249       *
 250       * @since 2.0.0
 251       *
 252       * @param string $line  The line to split.
 253       * @param string $key   The key part of the properties file string if found.
 254       * @param string $value The value part of the properties file string if found.
 255       *
 256       * @return bool Returns true if the line was split successfully, false otherwise.
 257       */
 258  	private function split_properties_line( $line, &$key, &$value ) {
 259          // Make sure to reset the key/value before continuing.
 260          $key = '';
 261          $value = '';
 262  
 263          // Split the string on any = or :, get back where the string was split.
 264          $matches = preg_split( '/[=|:]/', $line, null, PREG_SPLIT_OFFSET_CAPTURE );
 265  
 266          // Check the number of matches.
 267          $num_matches = sizeof( $matches );
 268  
 269          // There's always one match (the entire line) so if we matched more than one, let's see if we can split the line.
 270          if ( $num_matches > 1 ) {
 271              // Loop through the matches, starting at the second one.
 272              for( $i = 1; $i < $num_matches; $i ++ ) {
 273                  // Get the location of the current match.
 274                  $location = $matches[ $i ][1];
 275  
 276                  // If the location of the separator is the first character of the string it's an invalid location so skip it.
 277                  if ( $location < 2 ) {
 278                      continue;
 279                  }
 280  
 281                  // If the character before it (-2 as the separator character is still part of the match)
 282                  // is an escape, we don't have a match yet.
 283                  if ( '\\' != $line[ $location - 2 ] ) {
 284                      // Set the return values for the key and value.
 285                      $key = substr( $line, 0, $location - 1 );
 286                      $value = substr( $line, $location );
 287  
 288                      // Handle the special case where the separator is actually " = " or " : ".
 289                      if ( gp_endswith( $key, ' ' ) && gp_startswith( $value, ' ' ) ) {
 290                          $key = substr( $key, 0, -1 );
 291                          $value = substr( $value, 1 );
 292                      }
 293  
 294                      return true;
 295                  }
 296              }
 297          }
 298  
 299          // Return false since we didn't find a valid line to split.
 300          return false;
 301      }
 302  
 303      /**
 304       * Reads a set of translations from a properties file.
 305       *
 306       * @since 2.0.0
 307       *
 308       * @param string     $file_name The filename of the uploaded properties file.
 309       * @param GP_Project $project   The project object to read the translations in to.
 310       *
 311       * @return Translations|bool
 312       */
 313  	public function read_translations_from_file( $file_name, $project = null ) {
 314          if ( is_null( $project ) ) {
 315              return false;
 316          }
 317  
 318          $translations = $this->read_originals_from_file( $file_name );
 319  
 320          if ( ! $translations ) {
 321              return false;
 322          }
 323  
 324          $originals        = GP::$original->by_project_id( $project->id );
 325          $new_translations = new Translations;
 326  
 327          foreach ( $translations->entries as $key => $entry ) {
 328              // we have been using read_originals_from_file to parse the file
 329              // so we need to swap singular and translation
 330              $entry->translations = array( $entry->singular );
 331              $entry->singular = null;
 332  
 333              foreach ( $originals as $original ) {
 334                  if ( $original->context == $entry->context ) {
 335                      $entry->singular = $original->singular;
 336                      $entry->context = $original->context;
 337                      break;
 338                  }
 339              }
 340  
 341              if ( ! $entry->singular ) {
 342                  error_log( sprintf( __( 'Missing context %s in project #%d', 'glotpress' ), $entry->context, $project->id ) );
 343                  continue;
 344              }
 345  
 346              $new_translations->add_entry( $entry );
 347          }
 348  
 349          return $new_translations;
 350      }
 351  
 352      /**
 353       * Reads a set of original strings from a properties file.
 354       *
 355       * @since 2.0.0
 356       *
 357       * @param string $file_name The filename of the uploaded properties file.
 358       *
 359       * @return Translations|bool
 360       */
 361  	public function read_originals_from_file( $file_name ) {
 362          $entries = new Translations;
 363          $file = file_get_contents( $file_name );
 364  
 365          if ( false === $file ) {
 366              return false;
 367          }
 368  
 369          $entry = $comment = null;
 370          $inline = false;
 371          $lines = explode( "\n", $file );
 372          $key = '';
 373          $value = '';
 374  
 375          foreach ( $lines as $line ) {
 376              if ( preg_match( '/^(#|!)\s*(.*)\s*$/', $line, $matches ) ) {
 377                  // If we have been processing a multi-line entry, save it now.
 378                  if ( true === $inline ) {
 379                      $entries->add_entry( $entry );
 380                      $inline = false;
 381                  }
 382  
 383                  $matches[1] = trim( $matches[1] );
 384  
 385                  if ( $matches[1] !== "No comment provided." ) {
 386                      if ( null !== $comment ) {
 387                          $comment = $comment . "\n" . $matches[2];
 388                      } else {
 389                          $comment = $matches[2];
 390                      }
 391                  } else {
 392                      $comment = null;
 393                  }
 394              } else if ( false === $inline && $this->split_properties_line( $line, $key, $value ) ) {
 395                  // Check to see if this line continues on to the next
 396                  if ( gp_endswith( $line, '\\' ) ) {
 397                      $inline = true;
 398                      $value = trim( $value, '\\' );
 399                  }
 400  
 401                  $entry = new Translation_Entry();
 402                  $entry->context = rtrim( $this->unescape( $key ) );
 403  
 404                  /* So the following line looks a little weird, why encode just to decode?
 405                   *
 406                   * The reason is simple, properties files are in ISO-8859-1 aka Latin-1 format
 407                   * and can have extended characters above 127 but below 256 represented by a
 408                   * single byte.  That will break things later as PHP/MySQL will not accept
 409                   * a mixed encoding string with these high single byte characters in them.
 410                   *
 411                   * So let's convert everything to escaped unicode first and then decode
 412                   * the whole kit and kaboodle to UTF-8.
 413                   */
 414                  $entry->singular = $this->uni_decode( $this->ascii_uni_encode( $value ) );
 415  
 416                  if ( ! is_null( $comment )) {
 417                      $entry->extracted_comments = $comment;
 418                      $comment = null;
 419                  }
 420  
 421                  $entry->translations = array();
 422  
 423                  // Only save this entry if we're not in a multi line translation.
 424                  if ( false === $inline ) {
 425                      $entries->add_entry( $entry );
 426                  }
 427              } else {
 428                  // If we're processing a multi-line entry, add the line to the translation.
 429                  if ( true === $inline ) {
 430                      // Check to make sure we're not a blank line.
 431                      if ( '' != trim( $line ) ) {
 432                          // If there's still more lines to add, trim off the trailing slash.
 433                          if ( gp_endswith( $line, '\\' ) ) {
 434                              $line = rtrim( $line, '\\' );
 435                          }
 436  
 437                          // Strip off leading spaces.
 438                          $line = ltrim( $line );
 439  
 440                          // Decode the translation and add it to the current entry.
 441                          $entry->singular = $entry->singular . $this->uni_decode( $line );
 442                      } else {
 443                          // Any blank line signals end of the entry.
 444                          $entries->add_entry( $entry );
 445                          $inline = false;
 446                      }
 447                  } else {
 448                      // If we hit a blank line and are not processing a multi-line entry, reset the comment.
 449                      $comment = null;
 450                  }
 451              }
 452          }
 453  
 454          // Make sure we save the last entry if it is a multi-line entry.
 455          if ( true === $inline ) {
 456              $entries->add_entry( $entry );
 457          }
 458  
 459          return $entries;
 460      }
 461  
 462      /**
 463       * The callback to sort the entries by, used above in print_exported_file().
 464       *
 465       * @since 2.0.0
 466       *
 467       * @param Translations $a The first translation to compare.
 468       * @param Translations $b The second translation to compare.
 469       *
 470       * @return int
 471       */
 472  	private function sort_entries( $a, $b ) {
 473          if ( $a->context == $b->context ) {
 474              return 0;
 475          }
 476  
 477          return ( $a->context > $b->context ) ? +1 : -1;
 478      }
 479  
 480      /**
 481       * Unescape a string to be used as a value in the properties file.
 482       *
 483       * @since 2.0.0
 484       *
 485       * @param string $string The string to unescape.
 486       *
 487       * @return string
 488       */
 489  	private function unescape( $string ) {
 490          return stripcslashes( $string );
 491      }
 492  
 493      /**
 494       * Escape a string to be used as a value in the properties file.
 495       *
 496       * @since 2.0.0
 497       *
 498       * @param string $string The string to escape.
 499       *
 500       * @return string
 501       */
 502  	private function escape( $string ) {
 503          return addcslashes( $string, '"\\/' );
 504      }
 505  
 506      /**
 507       * Escape a string to be used as a key name in the properties file.
 508       *
 509       * @since 2.0.0
 510       *
 511       * @param string $string The string to escape.
 512       *
 513       * @return string
 514       */
 515  	private function escape_key( $string ) {
 516          return addcslashes( $string, '=: ' );
 517      }
 518  
 519  }
 520  
 521  GP::$formats['properties'] = new GP_Format_Properties;


Generated: Mon Nov 18 01:01:56 2019 Cross-referenced by PHPXref 0.7.1