PHPXRef 0.7.1 : GlotPress : /gp-includes/formats/format-properties.php source

[Summary view] [Print] [Text view]
   1  <?php
   2  
   3  class GP_Format_Properties extends GP_Format {
   4  
   5      public $name             = 'Java Properties File (.properties)';
   6      public $extension        = 'properties';
   7      public $filename_pattern = '%s_%s';
   8  
   9      public $exported = '';
  10  
  11      /**
  12       * Generates a string the contains the $entries to export in the Properties file format.
  13       *
  14       * @since 2.0.0
  15       *
  16       * @param GP_Project         $project         The project the strings are being exported for, not used
  17       *                                            in this format but part of the scaffold of the parent object.
  18       * @param GP_Locale          $locale          The locale object the strings are being exported for. not used
  19       *                                            in this format but part of the scaffold of the parent object.
  20       * @param GP_Translation_Set $translation_set The locale object the strings are being
  21       *                                            exported for. not used in this format but part
  22       *                                            of the scaffold of the parent object.
  23       * @param GP_Translation     $entries         The entries to export.
  24       *
  25       * @return string
  26       */
  27  	public function print_exported_file( $project, $locale, $translation_set, $entries ) {
  28          $result = '';
  29  
  30          $result .= '# Translation-Revision-Date: ' . GP::$translation->last_modified( $translation_set ) . "+0000\n";
  31          $result .= "# Plural-Forms: nplurals={$locale->nplurals}; plural={$locale->plural_expression};\n";
  32          $result .= '# Generator: GlotPress/' . GP_VERSION . "\n";
  33  
  34          $language_code = $this->get_language_code( $locale );
  35          if ( false !== $language_code ) {
  36              $result .= '# Language: ' . $language_code . "\n";
  37          }
  38  
  39          $result .= "\n";
  40  
  41          $sorted_entries = $entries;
  42          usort( $sorted_entries, array( $this, 'sort_entries' ) );
  43  
  44          foreach ( $sorted_entries as $entry ) {
  45              $entry->context = $this->escape( $entry->context );
  46              if ( empty( $entry->translations ) ) {
  47                  $translation = $entry->context;
  48              } else {
  49                  $translation = $entry->translations[0];
  50              }
  51  
  52              $translation = str_replace( "\n", "\\n", $translation );
  53              $translation = $this->utf8_uni_encode( $translation );
  54  
  55              if ( empty( $entry->context ) ) {
  56                  $original = $entry->singular;
  57              } else {
  58                  $original = $entry->context;
  59              }
  60  
  61              $original = str_replace( "\n", "\\n", $original );
  62  
  63              $comment = preg_replace( '/(^\s+)|(\s+$)/us', '', $entry->extracted_comments );
  64  
  65              if ( '' == $comment ) {
  66                  $comment = 'No comment provided.';
  67              }
  68  
  69              $comment_lines = explode( "\n", $comment );
  70  
  71              foreach ( $comment_lines as $line ) {
  72                  $result .= "# $line\n";
  73              }
  74  
  75              $result .= $this->escape_key( $original ) . " = $translation\n\n";
  76          }
  77  
  78          return $result;
  79      }
  80  
  81      /**
  82       * Encodes a PHP string in UTF8 format to a unicode escaped string (multi-byte characters are encoded in the \uXXXX format).
  83       *
  84       * @since 2.0.0
  85       *
  86       * @param $string string The string to encode.
  87       *
  88       * @return string
  89       */
  90  	private function utf8_uni_encode( $string ) {
  91          $result = '';
  92          $offset = 0;
  93  
  94          while ( $offset >= 0 ) {
  95              $val = $this->ordutf8( $string, $offset );
  96  
  97              if ( false === $val ) {
  98                  break;
  99              } elseif ( $val > 127 ) {
 100                  $result .= sprintf( '\u%04x', $val );
 101              } else {
 102                  $result .= chr( $val );
 103              }
 104          }
 105  
 106          return $result;
 107      }
 108  
 109      /**
 110       * Encodes a PHP string in ascii format to a unicode escaped string (multi-byte characters are encoded in the \uXXXX format).
 111       *
 112       * @since 2.0.0
 113       *
 114       * @param string $string The string to encode.
 115       *
 116       * @return string
 117       */
 118  	private function ascii_uni_encode( $string ) {
 119          $result = '';
 120  
 121          $string_length = strlen( $string );
 122  
 123          for ( $i = 0; $i < $string_length; $i++ ) {
 124              $val = ord( $string[ $i ] );
 125  
 126              if ( $val > 127 ) {
 127                  $result .= sprintf( '\u%04x', $val );
 128              } else {
 129                  $result .= $string[ $i ];
 130              }
 131          }
 132  
 133          return $result;
 134      }
 135  
 136      /**
 137       * Decodes a unicode escaped string to a PHP string.
 138       *
 139       * @param string $string The string to decode.
 140       *
 141       * @since 2.0.0
 142       *
 143       * @return string
 144       */
 145  	private function uni_decode( $string ) {
 146          return preg_replace_callback( "/\\\\u([a-fA-F0-9]{4})/", array( $this, 'uni_decode_callback' ), $string );
 147      }
 148  
 149      /**
 150       * Part of uni_decode(), this is the call back function that does the heavy lifting of converting a \uXXXX
 151       * value to a UTF-8 encoded character sequence.
 152       *
 153       * @since 2.0.0
 154       *
 155       * @param array $matches The array of matches from preg_replace_callback().
 156       *
 157       * @return string
 158       */
 159  	private function uni_decode_callback( $matches ) {
 160          $binary     = decbin( hexdec( $matches[1] ) );
 161          $bin_length = strlen( $binary );
 162  
 163          $byte = array();
 164  
 165          // UTF-8 encoding is a little complex, see https://en.wikipedia.org/wiki/UTF-8#Description for details of the below values.
 166          if ( $bin_length > 16 ) {        // > 16 bits, need 4 unicode bytes to encode.
 167              $byte[0] = chr( bindec( '11110' . sprintf( '%03s', substr( $binary, 0, $bin_length - 18 ) ) ) );
 168              $byte[1] = chr( bindec( '10' . sprintf( '%06s', substr( $binary, -( 6 * 3 ), 6 ) ) ) );
 169              $byte[2] = chr( bindec( '10' . sprintf( '%06s', substr( $binary, -( 6 * 2 ), 6 ) ) ) );
 170              $byte[3] = chr( bindec( '10' . sprintf( '%06s', substr( $binary, -( 6 * 1 ), 6 ) ) ) );
 171          } elseif ( $bin_length > 11 ) {    // > 11 bits, need 3 unicode bytes to encode.
 172              $byte[0] = chr( bindec( '1110' . sprintf( '%04s', substr( $binary, 0, $bin_length - 12 ) ) ) );
 173              $byte[1] = chr( bindec( '10' . sprintf( '%06s', substr( $binary, -( 6 * 2 ), 6 ) ) ) );
 174              $byte[2] = chr( bindec( '10' . sprintf( '%06s', substr( $binary, -( 6 * 1 ), 6 ) ) ) );
 175          } elseif ( $bin_length > 7 ) {  // > 7 bites, need 2 unicode bytes to encode.
 176              $byte[0] = chr( bindec( '110' . sprintf( '%05s', substr( $binary, 0, $bin_length - 6 ) ) ) );
 177              $byte[1] = chr( bindec( '10' . sprintf( '%06s', substr( $binary, -( 6 * 1 ), 6 ) ) ) );
 178          } else {                        // < 8 bites, need 1 unicode bytes to encode.
 179              $byte[0] = chr( bindec( '0' . sprintf( '%07s', $binary ) ) );
 180          }
 181  
 182          /*
 183           * This is an alternate way to encode the character but it needs the iconv functions available:
 184           *
 185           *        iconv( 'UCS-4LE', 'UTF-8', pack( 'V', hexdec( $matches[ 1 ] ) ) );
 186           *
 187           */
 188  
 189          return implode( $byte );
 190      }
 191  
 192      /**
 193       * Part of utf8_uni_encode(), this returns the character value of a UTF-8 encoded string.
 194       *
 195       * From http://php.net/manual/en/function.ord.php#109812
 196       *
 197       * @since 2.0.0
 198       *
 199       * @param string $string The UTF-8 string to process.
 200       * @param int    $offset The offset of the string to return the character value of.
 201       *
 202       * @return int|bool
 203       */
 204  	private function ordutf8( $string, &$offset ) {
 205          $character = substr( $string, $offset, 1 );
 206  
 207          // If substr returned false, we are past the end of line so no need to process it.
 208          if ( false === $character ) {
 209              // Set the offset back to -1 to indicate we're done.
 210              $offset = -1;
 211              return false;
 212          }
 213  
 214          $code        = ord( $character );
 215          $bytesnumber = 1;
 216  
 217          if ( $code >= 128 ) {  // Otherwise 0xxxxxxx
 218              $codetemp = $code - 192;
 219  
 220              if ( $code < 224 ) {
 221                  $bytesnumber = 2;  // 110xxxxx
 222              } elseif ( $code < 240 ) {
 223                  $bytesnumber = 3;  // 1110xxxx
 224                  $codetemp   -= 32;
 225              } elseif ( $code < 248 ) {
 226                  $bytesnumber = 4;  // 11110xxx
 227                  $codetemp   -= ( 32 + 16 );
 228              }
 229  
 230              for ( $i = 2; $i <= $bytesnumber; $i++ ) {
 231                  $offset ++;
 232                  $code2    = ord( substr( $string, $offset, 1 ) ) - 128;  // 10xxxxxx
 233                  $codetemp = ( $codetemp * 64 ) + $code2;
 234              }
 235  
 236              $code = $codetemp;
 237          }
 238  
 239          $offset += 1;
 240  
 241          if ( $offset >= strlen( $string ) ) {
 242              $offset = -1;
 243          }
 244  
 245          return $code;
 246      }
 247  
 248      /**
 249       * Splits a properties file line on the = or : character.
 250       *
 251       * Skips escaped values (\= or \:) in the key and matches the first unescaped instance.
 252       *
 253       * @since 2.0.0
 254       *
 255       * @param string $line  The line to split.
 256       * @param string $key   The key part of the properties file string if found.
 257       * @param string $value The value part of the properties file string if found.
 258       *
 259       * @return bool Returns true if the line was split successfully, false otherwise.
 260       */
 261  	private function split_properties_line( $line, &$key, &$value ) {
 262          // Make sure to reset the key/value before continuing.
 263          $key   = '';
 264          $value = '';
 265  
 266          // Split the string on any = or :, get back where the string was split.
 267          $matches = preg_split( '/[=|:]/', $line, null, PREG_SPLIT_OFFSET_CAPTURE );
 268  
 269          // Check the number of matches.
 270          $num_matches = count( $matches );
 271  
 272          // There's always one match (the entire line) so if we matched more than one, let's see if we can split the line.
 273          if ( $num_matches > 1 ) {
 274              // Loop through the matches, starting at the second one.
 275              for ( $i = 1; $i < $num_matches; $i ++ ) {
 276                  // Get the location of the current match.
 277                  $location = $matches[ $i ][1];
 278  
 279                  // If the location of the separator is the first character of the string it's an invalid location so skip it.
 280                  if ( $location < 2 ) {
 281                      continue;
 282                  }
 283  
 284                  // If the character before it (-2 as the separator character is still part of the match)
 285                  // is an escape, we don't have a match yet.
 286                  if ( '\\' != $line[ $location - 2 ] ) {
 287                      // Set the return values for the key and value.
 288                      $key   = substr( $line, 0, $location - 1 );
 289                      $value = substr( $line, $location );
 290  
 291                      // Handle the special case where the separator is actually " = " or " : ".
 292                      if ( gp_endswith( $key, ' ' ) && gp_startswith( $value, ' ' ) ) {
 293                          $key   = substr( $key, 0, -1 );
 294                          $value = substr( $value, 1 );
 295                      }
 296  
 297                      return true;
 298                  }
 299              }
 300          }
 301  
 302          // Return false since we didn't find a valid line to split.
 303          return false;
 304      }
 305  
 306      /**
 307       * Reads a set of translations from a properties file.
 308       *
 309       * @since 2.0.0
 310       *
 311       * @param string     $file_name The filename of the uploaded properties file.
 312       * @param GP_Project $project   The project object to read the translations in to.
 313       *
 314       * @return Translations|bool
 315       */
 316  	public function read_translations_from_file( $file_name, $project = null ) {
 317          if ( is_null( $project ) ) {
 318              return false;
 319          }
 320  
 321          $translations = $this->read_originals_from_file( $file_name );
 322  
 323          if ( ! $translations ) {
 324              return false;
 325          }
 326  
 327          $originals        = GP::$original->by_project_id( $project->id );
 328          $new_translations = new Translations();
 329  
 330          foreach ( $translations->entries as $key => $entry ) {
 331              // we have been using read_originals_from_file to parse the file
 332              // so we need to swap singular and translation
 333              $entry->translations = array( $entry->singular );
 334              $entry->singular     = null;
 335  
 336              foreach ( $originals as $original ) {
 337                  if ( $original->context == $entry->context ) {
 338                      $entry->singular = $original->singular;
 339                      $entry->context  = $original->context;
 340                      break;
 341                  }
 342              }
 343  
 344              if ( ! $entry->singular ) {
 345                  error_log(
 346                      sprintf(
 347                          /* translators: 1: Context. 2: Project ID. */
 348                          __( 'Missing context %1$s in project #%2$d', 'glotpress' ),
 349                          $entry->context,
 350                          $project->id
 351                      )
 352                  );
 353                  continue;
 354              }
 355  
 356              $new_translations->add_entry( $entry );
 357          }
 358  
 359          return $new_translations;
 360      }
 361  
 362      /**
 363       * Reads a set of original strings from a properties file.
 364       *
 365       * @since 2.0.0
 366       *
 367       * @param string $file_name The filename of the uploaded properties file.
 368       *
 369       * @return Translations|bool
 370       */
 371  	public function read_originals_from_file( $file_name ) {
 372          $entries = new Translations();
 373          $file    = file_get_contents( $file_name );
 374  
 375          if ( false === $file ) {
 376              return false;
 377          }
 378  
 379          $entry  = $comment = null;
 380          $inline = false;
 381          $lines  = explode( "\n", $file );
 382          $key    = '';
 383          $value  = '';
 384  
 385          foreach ( $lines as $line ) {
 386              if ( preg_match( '/^(#|!)\s*(.*)\s*$/', $line, $matches ) ) {
 387                  // If we have been processing a multi-line entry, save it now.
 388                  if ( true === $inline ) {
 389                      $entries->add_entry( $entry );
 390                      $inline = false;
 391                  }
 392  
 393                  $matches[1] = trim( $matches[1] );
 394  
 395                  if ( 'No comment provided.' !== $matches[1] ) {
 396                      if ( null !== $comment ) {
 397                          $comment = $comment . "\n" . $matches[2];
 398                      } else {
 399                          $comment = $matches[2];
 400                      }
 401                  } else {
 402                      $comment = null;
 403                  }
 404              } elseif ( false === $inline && $this->split_properties_line( $line, $key, $value ) ) {
 405                  // Check to see if this line continues on to the next
 406                  if ( gp_endswith( $line, '\\' ) ) {
 407                      $inline = true;
 408                      $value  = trim( $value, '\\' );
 409                  }
 410  
 411                  $entry          = new Translation_Entry();
 412                  $entry->context = rtrim( $this->unescape( $key ) );
 413  
 414                  /*
 415                   * So the following line looks a little weird, why encode just to decode?
 416                   *
 417                   * The reason is simple, properties files are in ISO-8859-1 aka Latin-1 format
 418                   * and can have extended characters above 127 but below 256 represented by a
 419                   * single byte.  That will break things later as PHP/MySQL will not accept
 420                   * a mixed encoding string with these high single byte characters in them.
 421                   *
 422                   * So let's convert everything to escaped unicode first and then decode
 423                   * the whole kit and kaboodle to UTF-8.
 424                   */
 425                  $entry->singular = $this->uni_decode( $this->ascii_uni_encode( $value ) );
 426  
 427                  if ( ! is_null( $comment ) ) {
 428                      $entry->extracted_comments = $comment;
 429                      $comment                   = null;
 430                  }
 431  
 432                  $entry->translations = array();
 433  
 434                  // Only save this entry if we're not in a multi line translation.
 435                  if ( false === $inline ) {
 436                      $entries->add_entry( $entry );
 437                  }
 438              } else {
 439                  // If we're processing a multi-line entry, add the line to the translation.
 440                  if ( true === $inline ) {
 441                      // Check to make sure we're not a blank line.
 442                      if ( '' != trim( $line ) ) {
 443                          // If there's still more lines to add, trim off the trailing slash.
 444                          if ( gp_endswith( $line, '\\' ) ) {
 445                              $line = rtrim( $line, '\\' );
 446                          }
 447  
 448                          // Strip off leading spaces.
 449                          $line = ltrim( $line );
 450  
 451                          // Decode the translation and add it to the current entry.
 452                          $entry->singular = $entry->singular . $this->uni_decode( $line );
 453                      } else {
 454                          // Any blank line signals end of the entry.
 455                          $entries->add_entry( $entry );
 456                          $inline = false;
 457                      }
 458                  } else {
 459                      // If we hit a blank line and are not processing a multi-line entry, reset the comment.
 460                      $comment = null;
 461                  }
 462              }
 463          }
 464  
 465          // Make sure we save the last entry if it is a multi-line entry.
 466          if ( true === $inline ) {
 467              $entries->add_entry( $entry );
 468          }
 469  
 470          return $entries;
 471      }
 472  
 473      /**
 474       * The callback to sort the entries by, used above in print_exported_file().
 475       *
 476       * @since 2.0.0
 477       *
 478       * @param Translations $a The first translation to compare.
 479       * @param Translations $b The second translation to compare.
 480       *
 481       * @return int
 482       */
 483  	private function sort_entries( $a, $b ) {
 484          if ( $a->context == $b->context ) {
 485              return 0;
 486          }
 487  
 488          return ( $a->context > $b->context ) ? +1 : -1;
 489      }
 490  
 491      /**
 492       * Unescape a string to be used as a value in the properties file.
 493       *
 494       * @since 2.0.0
 495       *
 496       * @param string $string The string to unescape.
 497       *
 498       * @return string
 499       */
 500  	private function unescape( $string ) {
 501          return stripcslashes( $string );
 502      }
 503  
 504      /**
 505       * Escape a string to be used as a value in the properties file.
 506       *
 507       * @since 2.0.0
 508       *
 509       * @param string $string The string to escape.
 510       *
 511       * @return string
 512       */
 513  	private function escape( $string ) {
 514          return addcslashes( $string, '"\\/' );
 515      }
 516  
 517      /**
 518       * Escape a string to be used as a key name in the properties file.
 519       *
 520       * @since 2.0.0
 521       *
 522       * @param string $string The string to escape.
 523       *
 524       * @return string
 525       */
 526  	private function escape_key( $string ) {
 527          return addcslashes( $string, '=: ' );
 528      }
 529  
 530  }
 531  
 532  GP::$formats['properties'] = new GP_Format_Properties();
PHP Cross Reference of GlotPress

/gp-includes/formats/ -> format-properties.php (source)