callbacks[ $id ] = $callback; } /** * Removes an existing callback for a warning. * * @since 1.0.0 * @access public * * @param string $id Unique ID of the callback. */ public function remove( $id ) { unset( $this->callbacks[ $id ] ); } /** * Checks whether a callback exists for an ID. * * @since 1.0.0 * @access public * * @param string $id Unique ID of the callback. * @return bool True if exists, false if not. */ public function has( $id ) { return isset( $this->callbacks[ $id ] ); } /** * Checks translations for any issues/warnings. * * @since 1.0.0 * @access public * * @param string $singular The singular form of an original string. * @param string $plural The plural form of an original string. * @param string[] $translations An array of translations for an original. * @param GP_Locale $locale The locale of the translations. * @return array|null Null if no issues have been found, otherwise an array * with warnings. */ public function check( $singular, $plural, $translations, $locale ) { $problems = array(); foreach ( $translations as $translation_index => $translation ) { if ( ! $translation ) { continue; } $skip = array( 'singular' => false, 'plural' => false, ); if ( null !== $plural ) { $numbers_for_index = $locale->numbers_for_index( $translation_index ); if ( 1 === $locale->nplurals ) { $skip['singular'] = true; } elseif ( in_array( 1, $numbers_for_index, true ) ) { $skip['plural'] = true; } else { $skip['singular'] = true; } } foreach ( $this->callbacks as $callback_id => $callback ) { if ( ! $skip['singular'] ) { $singular_test = $callback( $singular, $translation, $locale ); if ( true !== $singular_test ) { $problems[ $translation_index ][ $callback_id ] = $singular_test; } } if ( null !== $plural && ! $skip['plural'] ) { $plural_test = $callback( $plural, $translation, $locale ); if ( true !== $plural_test ) { $problems[ $translation_index ][ $callback_id ] = $plural_test; } } } } return empty( $problems ) ? null : $problems; } } /** * Class used to register built-in translation warnings. * * @since 1.0.0 */ class GP_Builtin_Translation_Warnings { /** * Lower bound for length checks. * * @since 1.0.0 * @access public * * @var float */ public $length_lower_bound = 0.2; /** * Upper bound for length checks. * * @since 1.0.0 * @access public * * @var float */ public $length_upper_bound = 5.0; /** * List of locales which are excluded from length checks. * * @since 1.0.0 * @access public * * @var array */ public $length_exclude_languages = array( 'art-xemoji', 'ja', 'ko', 'zh', 'zh-hk', 'zh-cn', 'zh-sg', 'zh-tw' ); /** * List of domains with allowed changes to their own subdomains * * @since 3.0.0 * @access public * * @var array */ public $allowed_domain_changes = array( // Allow links to wordpress.org to be changed to a subdomain. 'wordpress.org' => '[^.]+\.wordpress\.org', // Allow links to wordpress.com to be changed to a subdomain. 'wordpress.com' => '[^.]+\.wordpress\.com', // Allow links to gravatar.org to be changed to a subdomain. 'en.gravatar.com' => '[^.]+\.gravatar\.com', // Allow links to wikipedia.org to be changed to a subdomain. 'en.wikipedia.org' => '[^.]+\.wikipedia\.org', ); /** * List of languages without italics * * @since 3.0.0 * @access public * * @var array */ public $languages_without_italics = array( 'ja', 'ko', 'zh', 'zh-hk', 'zh-cn', 'zh-sg', 'zh-tw', ); /** * Checks whether lengths of source and translation differ too much. * * @since 1.0.0 * @access public * * @param string $original The source string. * @param string $translation The translation. * @param GP_Locale $locale The locale of the translation. * @return string|true True if check is OK, otherwise warning message. */ public function warning_length( $original, $translation, $locale ) { if ( in_array( $locale->slug, $this->length_exclude_languages, true ) ) { return true; } if ( gp_startswith( $original, 'number_format_' ) ) { return true; } $len_src = mb_strlen( $original ); $len_trans = mb_strlen( $translation ); if ( ! ( $this->length_lower_bound * $len_src < $len_trans && $len_trans < $this->length_upper_bound * $len_src ) && ( ! gp_in( '_abbreviation', $original ) && ! gp_in( '_initial', $original ) ) ) { return __( 'Lengths of source and translation differ too much.', 'glotpress' ); } return true; } /** * Checks whether HTML tags are missing or have been added. * * @todo Validate if the HTML is in the same order in function of the language. Validate nesting of HTML is same. * * @since 1.0.0 * @access public * * @param string $original The source string. * @param string $translation The translation. * @param GP_Locale $locale The locale of the translation. * @return string|true True if check is OK, otherwise warning message. */ public function warning_tags( $original, $translation, $locale ) { $tag_pattern = '(<[^>]*>)'; $tag_re = "/$tag_pattern/Us"; $original_parts = array(); $translation_parts = array(); if ( preg_match_all( $tag_re, $original, $m ) ) { $original_parts = $m[1]; } if ( preg_match_all( $tag_re, $translation, $m ) ) { $translation_parts = $m[1]; } // East asian languages can remove emphasis/italic tags. if ( count( $original_parts ) > count( $translation_parts ) ) { // Remove Italic requirements. if ( in_array( $locale->slug, $this->languages_without_italics, true ) ) { $original_parts = array_diff( $original_parts, array( '', '', '', '' ) ); } } if ( count( $original_parts ) > count( $translation_parts ) ) { return sprintf( /* translators: %s: HTML tags. */ __( 'Missing tags from translation. Expected: %s', 'glotpress' ), implode( ' ', array_diff( $original_parts, $translation_parts ) ) ); } if ( count( $original_parts ) < count( $translation_parts ) ) { return sprintf( /* translators: %s: HTML tags. */ __( 'Too many tags in translation. Found: %s', 'glotpress' ), implode( ' ', array_diff( $translation_parts, $original_parts ) ) ); } // Check if the translation tags are in correct order. $valid_html_warning = $this->check_valid_html( $original_parts, $translation_parts ); if ( true !== $valid_html_warning ) { return trim( $valid_html_warning ); } // Sort the tags, from this point out as long as all the tags are present is okay. rsort( $original_parts ); rsort( $translation_parts ); $changeable_attributes = array( // We allow certain attributes to be different in translations. 'title', 'aria-label', // src and href will be checked separately. 'src', 'href', ); $attribute_regex = '/(\s*(?P%s))=([\'"])(?P.+)\\3(\s*)/i'; $attribute_replace = '$1=$3...$3$5'; $changeable_attr_regex = sprintf( $attribute_regex, implode( '|', $changeable_attributes ) ); // Items are sorted, so if all is well, will match up. $parts_tags = array_combine( $original_parts, $translation_parts ); $warnings = array(); foreach ( $parts_tags as $original_tag => $translation_tag ) { if ( $original_tag === $translation_tag ) { continue; } // Remove any attributes that can be expected to differ. $original_filtered_tag = preg_replace( $changeable_attr_regex, $attribute_replace, $original_tag ); $translation_filtered_tag = preg_replace( $changeable_attr_regex, $attribute_replace, $translation_tag ); if ( $original_filtered_tag !== $translation_filtered_tag ) { $warnings[] = sprintf( /* translators: 1: Original HTML tag. 2: Translated HTML tag. */ __( 'Expected %1$s, got %2$s.', 'glotpress' ), $original_tag, $translation_tag ); } } // Now check that the URLs mentioned within href & src tags match. $original_links = ''; $translation_links = ''; $original_links = implode( "\n", $this->get_values_from_href_src( $original_parts ) ); $translation_links = implode( "\n", $this->get_values_from_href_src( $translation_parts ) ); // Validate the URLs if present. if ( $original_links || $translation_links ) { $url_warnings = $this->links_without_url_and_placeholders_are_equal( $original_links, $translation_links ); if ( true !== $url_warnings ) { $warnings = array_merge( $warnings, $url_warnings ); } $url_warnings = $this->warning_mismatching_urls( $original_links, $translation_links ); if ( true !== $url_warnings ) { $warnings[] = $url_warnings; } } if ( empty( $warnings ) ) { return true; } return implode( "\n", $warnings ); } /** * Checks whether PHP placeholders are missing or have been added. * * The default regular expression: * bcdefgosuxEFGX are standard printf placeholders. * % is included to allow/expect %%. * l is included for wp_sprintf_l()'s custom %l format. * @ is included for Swift (as used for iOS mobile app) %@ string format. * * @since 1.0.0 * @access public * * @param string $original The source string. * @param string $translation The translation. * @param GP_Locale $locale The locale of the translation. * @return string|true True if check is OK, otherwise warning message. */ public function warning_placeholders( $original, $translation, $locale ) { /** * Filter the regular expression that is used to match placeholders in translations. * * @since 1.0.0 * * @param string $placeholders_re Regular expression pattern without leading or trailing slashes. */ $placeholders_re = apply_filters( 'gp_warning_placeholders_re', '(?_placeholders_counts( $original, $placeholders_re ); $translation_counts = $this->_placeholders_counts( $translation, $placeholders_re ); $all_placeholders = array_unique( array_merge( array_keys( $original_counts ), array_keys( $translation_counts ) ) ); foreach ( $all_placeholders as $placeholder ) { $original_count = gp_array_get( $original_counts, $placeholder, 0 ); $translation_count = gp_array_get( $translation_counts, $placeholder, 0 ); if ( $original_count > $translation_count ) { return sprintf( /* translators: %s: Placeholder. */ __( 'Missing %s placeholder in translation.', 'glotpress' ), $placeholder ); } if ( $original_count < $translation_count ) { return sprintf( /* translators: %s: Placeholder. */ __( 'Extra %s placeholder in translation.', 'glotpress' ), $placeholder ); } } return true; } /** * Counts the placeholders in a string. * * @since 1.0.0 * @access private * * @param string $string The string to search. * @param string $re Regular expressions to match placeholders. * @return array An array with counts per placeholder. */ private function _placeholders_counts( $string, $re ) { $counts = array(); preg_match_all( "/$re/", $string, $matches ); foreach ( $matches[0] as $match ) { $counts[ $match ] = gp_array_get( $counts, $match, 0 ) + 1; } return $counts; } /** * Checks whether a translation does begin on newline. * * @since 1.0.0 * @access public * * @param string $original The source string. * @param string $translation The translation. * @param GP_Locale $locale The locale of the translation. * @return string|true True if check is OK, otherwise warning message. */ public function warning_should_begin_on_newline( $original, $translation, $locale ) { if ( gp_startswith( $original, "\n" ) && ! gp_startswith( $translation, "\n" ) ) { return __( 'Original and translation should both begin on newline.', 'glotpress' ); } return true; } /** * Checks whether a translation doesn't begin on newline. * * @since 1.0.0 * @access public * * @param string $original The source string. * @param string $translation The translation. * @param GP_Locale $locale The locale of the translation. * @return string|true True if check is OK, otherwise warning message. */ public function warning_should_not_begin_on_newline( $original, $translation, $locale ) { if ( ! gp_startswith( $original, "\n" ) && gp_startswith( $translation, "\n" ) ) { return __( 'Translation should not begin on newline.', 'glotpress' ); } return true; } /** * Checks whether a translation does end on newline. * * @since 1.0.0 * @access public * * @param string $original The source string. * @param string $translation The translation. * @param GP_Locale $locale The locale of the translation. * @return string|true True if check is OK, otherwise warning message. */ public function warning_should_end_on_newline( $original, $translation, $locale ) { if ( gp_endswith( $original, "\n" ) && ! gp_endswith( $translation, "\n" ) ) { return __( 'Original and translation should both end on newline.', 'glotpress' ); } return true; } /** * Checks whether a translation doesn't end on newline. * * @since 1.0.0 * @access public * * @param string $original The source string. * @param string $translation The translation. * @param GP_Locale $locale The locale of the translation. * @return string|true True if check is OK, otherwise warning message. */ public function warning_should_not_end_on_newline( $original, $translation, $locale ) { if ( ! gp_endswith( $original, "\n" ) && gp_endswith( $translation, "\n" ) ) { return __( 'Translation should not end on newline.', 'glotpress' ); } return true; } /** * Adds a warning for changing plain-text URLs. * * This allows for the scheme to change, and for some domains to change to a subdomain. * * @since 3.0.0 * @access public * * @param string $original The original string. * @param string $translation The translated string. * @return string|true True if check is OK, otherwise warning message. */ public function warning_mismatching_urls( $original, $translation ) { // Any http/https/schemeless URLs which are not encased in quotation marks // nor contain whitespace and end with a valid URL ending char. $urls_regex = '@(? http) or a trailing slash was changed, discard if so. foreach ( $missing_urls as $key => $missing_url ) { $scheme = parse_url( $missing_url, PHP_URL_SCHEME ); $alternate_scheme = ( 'http' == $scheme ? 'https' : 'http' ); $alternate_scheme_url = preg_replace( "@^$scheme(?=:)@", $alternate_scheme, $missing_url ); $alt_urls = array( // Scheme changes. $alternate_scheme_url, // Slashed/unslashed changes. ( '/' === substr( $missing_url, -1 ) ? rtrim( $missing_url, '/' ) : "$missing_url/" ), // Scheme & Slash changes. ( '/' === substr( $alternate_scheme_url, -1 ) ? rtrim( $alternate_scheme_url, '/' ) : "$alternate_scheme_url/" ), ); foreach ( $alt_urls as $alt_url ) { $alternate_index = array_search( $alt_url, $added_urls ); if ( false !== $alternate_index ) { unset( $missing_urls[ $key ], $added_urls[ $alternate_index ] ); } } } // Check if just the domain was changed, and if so, if it's to a whitelisted domain. foreach ( $missing_urls as $key => $missing_url ) { $host = parse_url( $missing_url, PHP_URL_HOST ); if ( ! isset( $this->allowed_domain_changes[ $host ] ) ) { continue; } $allowed_host_regex = $this->allowed_domain_changes[ $host ]; list( , $missing_url_path ) = explode( $host, $missing_url, 2 ); $alternate_host_regex = '!^https?://' . $allowed_host_regex . preg_quote( $missing_url_path, '!' ) . '$!i'; foreach ( $added_urls as $added_index => $added_url ) { if ( preg_match( $alternate_host_regex, $added_url, $match ) ) { unset( $missing_urls[ $key ], $added_urls[ $added_index ] ); } } } if ( ! $missing_urls && ! $added_urls ) { return true; } $error = ''; if ( $missing_urls ) { $error .= sprintf( /* translators: %s: URLs. */ __( 'The translation appears to be missing the following URLs: %s', 'glotpress' ), implode( ', ', $missing_urls ) . "\n" ); } if ( $added_urls ) { $error .= sprintf( /* translators: %s: URLs. */ __( 'The translation contains the following unexpected URLs: %s', 'glotpress' ), implode( ', ', $added_urls ) . "\n" ); } return trim( $error ); } /** * Adds a warning for adding unexpected percent signs in a sprintf-like string. * * This is to catch translations for originals like this: * - Original: `100 percent` * - Submitted translation: `100%` * - Proper translation: `100%%` * * @since 3.0.0 * @access public * * @param string $original The original string. * @param string $translation The translated string. * @return bool|string */ public function warning_unexpected_sprintf_token( $original, $translation ) { $unexpected_tokens = array(); $is_sprintf = preg_match( '!%((\d+\$(?:\d+)?)?[bcdefgosuxl])\b!i', $original ); // Find any percents that are not valid or escaped. if ( $is_sprintf ) { // Negative/Positive lookahead not used to allow the warning to include the context around the % sign. preg_match_all( '/(?P[^\s%]*)%((\d+\$(?:\d+)?)?(?P.))/i', $translation, $m ); foreach ( $m['char'] as $i => $char ) { // % is included for escaped %%. if ( false === strpos( 'bcdefgosux%l.', $char ) ) { $unexpected_tokens[] = $m[0][ $i ]; } } } if ( $unexpected_tokens ) { return sprintf( /* translators: %s: Placeholders. */ __( 'The translation contains the following unexpected placeholders: %s', 'glotpress' ), implode( ', ', $unexpected_tokens ) ); } return true; } /** * Registers all methods starting with `warning_` as built-in warnings. * * @param GP_Translation_Warnings $translation_warnings Instance of GP_Translation_Warnings. */ public function add_all( $translation_warnings ) { $warnings = array_filter( get_class_methods( get_class( $this ) ), function ( $key ) { return gp_startswith( $key, 'warning_' ); } ); $warnings = array_fill_keys( $warnings, $this ); foreach ( $warnings as $warning => $class ) { $translation_warnings->add( str_replace( 'warning_', '', $warning ), array( $class, $warning ) ); } } /** * Adds a warning for changing placeholders. * * This only supports placeholders in the format of '###[A-Za-z_-]+###'. * * @todo Check that the number of each type of placeholders are the same in the original and in the translation * * @since 3.0.0 * @access public * * @param string $original The original string. * @param string $translation The translated string. * @return string|true */ public function warning_named_placeholders( string $original, string $translation ) { $placeholder_regex = '@(###[A-Za-z_-]+###)@'; preg_match_all( $placeholder_regex, $original, $original_placeholders ); $original_placeholders = array_unique( $original_placeholders[0] ); preg_match_all( $placeholder_regex, $translation, $translation_placeholders ); $translation_placeholders = array_unique( $translation_placeholders[0] ); $missing_placeholders = array_diff( $original_placeholders, $translation_placeholders ); $added_placeholders = array_diff( $translation_placeholders, $original_placeholders ); if ( ! $missing_placeholders && ! $added_placeholders ) { return true; } $error = ''; if ( $missing_placeholders ) { $error .= sprintf( /* translators: %s: Placeholders. */ __( 'The translation appears to be missing the following placeholders: %s', 'glotpress' ), implode( ', ', $missing_placeholders ) . "\n" ); } if ( $added_placeholders ) { $error .= sprintf( /* translators: %s: Placeholders. */ __( 'The translation contains the following unexpected placeholders: %s', 'glotpress' ), implode( ', ', $added_placeholders ) ); } return trim( $error ); } /** * Returns the values from the href and the src * * @since 3.0.0 * @access private * * @param array $content The original array. * @return array */ private function get_values_from_href_src( array $content ): array { preg_match_all( '/]+href=([\'"])(?.+?)\1[^>]*>/i', implode( ' ', $content ), $href_values ); preg_match_all( '/<[^>]+src=([\'"])(?.+?)\1[^>]*>/i', implode( ' ', $content ), $src_values ); return array_merge( $href_values['href'], $src_values['src'] ); } /** * Checks if the HTML tags are in correct order * * Warns about HTML tags translations in incorrect order. For example: * - Original: * - Translation: * * @param array $original_parts The original HTML tags. * @param array $translation_parts The translation HTML tags. * @return string|true True if check is OK, otherwise warning message. */ private function check_valid_html( array $original_parts, array $translation_parts ) { if ( empty( $original_parts ) ) { return true; } if ( $original_parts === $translation_parts ) { return true; } libxml_clear_errors(); libxml_use_internal_errors( true ); $original = new DOMDocument(); $original->loadHTML( implode( '', $original_parts ) ); // If the original parts are not well-formed, don't continue the translation check. $errors = libxml_get_errors(); if ( ! empty( $errors ) ) { return true; } $translation = new DOMDocument(); $translation->loadHTML( implode( '', $translation_parts ) ); $errors = libxml_get_errors(); if ( ! empty( $errors ) ) { $message = array(); foreach ( $errors as $error ) { $message[] = trim( $error->message ); } return sprintf( /* translators: %s: HTML tags. */ __( 'The translation contains incorrect HTML tags: %s', 'glotpress' ), implode( ', ', $message ) ); } return true; } /** * Checks whether links that are not URL or placeholders are equal or not * * @since 3.0.0 * @access private * * @param string $original_links The original links. * @param string $translation_links The translated links. * @return array|true True if check is OK, otherwise warning message. */ private function links_without_url_and_placeholders_are_equal( string $original_links, string $translation_links ) { $urls_regex = '@(?