PHPXRef 0.7.1 : BuddyPress : /src/bp-core/classes/class-bp-media-extractor.php source

[Summary view] [Print] [Text view]
   1  <?php
   2  /**
   3   * Core component classes.
   4   *
   5   * @package BuddyPress
   6   * @subpackage Core
   7   * @since 2.3.0
   8   */
   9  
  10  // Exit if accessed directly.
  11  defined( 'ABSPATH' ) || exit;
  12  
  13  /**
  14   * Extracts media from text. Use {@link extract()}.
  15   *
  16   * @since 2.3.0
  17   *
  18   * The supported types are links, mentions, images, shortcodes, embeds, audio, video, and "all".
  19   * This is what each type extracts:
  20   *
  21   * Links:      <a href="http://example.com">
  22   * Mentions:   @name
  23   *             If the Activity component is enabled, we use it to parse out any at-names. A consequence
  24   *             to note is that the "name" mentioned must match a real user account. If it's a made-up
  25   *             at-name, then it isn't extracted.
  26   *             If the Activity component is disabled, any at-name is extracted (both those matching
  27   *             real accounts, and those made-up).
  28   * Images:     <img src="image.gif">, [gallery], [gallery ids="2,3"], featured images (Post thumbnails).
  29   *             If an extracted image is in the Media Library, then its resolution will be included.
  30   * Shortcodes: Extract information about any (registered) shortcodes.
  31   *             This includes any shortcodes indirectly covered by any of the other media extraction types.
  32   *             For example, [gallery].
  33   * Embeds:     Extract any URL matching a registered oEmbed handler.
  34   * Audio:      <a href="*.mp3"">, [audio]
  35   *             See wp_get_audio_extensions() for supported audio formats.
  36   * Video:      [video]
  37   *             See wp_get_video_extensions() for supported video formats.
  38   *
  39   * @see BP_Media_Extractor::extract() Use this to extract media.
  40   */
  41  class BP_Media_Extractor {
  42      /**
  43       * Media type.
  44       *
  45       * @since 2.3.0
  46       * @var int
  47       */
  48      const ALL        = 255;
  49      const LINKS      = 1;
  50      const MENTIONS   = 2;
  51      const IMAGES     = 4;
  52      const SHORTCODES = 8;
  53      const EMBEDS     = 16;
  54      const AUDIO      = 32;
  55      const VIDEOS     = 64;
  56  
  57  
  58      /**
  59       * Extract media from text.
  60       *
  61       * @since 2.3.0
  62       *
  63       * @param string|WP_Post $richtext        Content to parse.
  64       * @param int            $what_to_extract Media type to extract (optional).
  65       * @param array          $extra_args      Bespoke data for a particular extractor (optional).
  66       * @return array {
  67       *     @type array $has Extracted media counts. {
  68       *         @type int $audio
  69       *         @type int $embeds
  70       *         @type int $images
  71       *         @type int $links
  72       *         @type int $mentions
  73       *         @type int $shortcodes
  74       *         @type int $video
  75       *     }
  76       *     @type array $audio Extracted audio. {
  77       *         Array of extracted media.
  78       *
  79       *         @type string $source Media source. Either "html" or "shortcodes".
  80       *         @type string $url    Link to audio.
  81       *     }
  82       *     @type array $embeds Extracted oEmbeds. {
  83       *         Array of extracted media.
  84       *
  85       *         @type string $url oEmbed link.
  86       *     }
  87       *     @type array $images Extracted images. {
  88       *         Array of extracted media.
  89       *
  90       *         @type int    $gallery_id Gallery ID. Optional, not always set.
  91       *         @type int    $height     Width of image. If unknown, set to 0.
  92       *         @type string $source     Media source. Either "html" or "galleries".
  93       *         @type string $url        Link to image.
  94       *         @type int    $width      Width of image. If unknown, set to 0.
  95       *     }
  96       *     @type array $links Extracted URLs. {
  97       *         Array of extracted media.
  98       *
  99       *         @type string $url Link.
 100       *     }
 101       *     @type array $mentions Extracted mentions. {
 102       *         Array of extracted media.
 103       *
 104       *         @type string $name    @mention.
 105       *         @type string $user_id User ID. Optional, only set if Activity component enabled.
 106       *     }
 107       *     @type array $shortcodes Extracted shortcodes. {
 108       *         Array of extracted media.
 109       *
 110       *         @type array  $attributes Key/value pairs of the shortcodes attributes (if any).
 111       *         @type string $content    Text wrapped by the shortcode.
 112       *         @type string $type       Shortcode type.
 113       *         @type string $original   The entire shortcode.
 114       *     }
 115       *     @type array $videos Extracted video. {
 116       *         Array of extracted media.
 117       *
 118       *         @type string $source Media source. Currently only "shortcodes".
 119       *         @type string $url    Link to audio.
 120       *     }
 121       * }
 122       */
 123  	public function extract( $richtext, $what_to_extract = self::ALL, $extra_args = array() ) {
 124          $media = array();
 125  
 126          // Support passing a WordPress Post for the $richtext parameter.
 127          if ( is_a( $richtext, 'WP_Post' ) ) {
 128              $extra_args['post'] = $richtext;
 129              $richtext           = $extra_args['post']->post_content;
 130          }
 131  
 132          $plaintext = $this->strip_markup( $richtext );
 133  
 134  
 135          // Extract links.
 136          if ( self::LINKS & $what_to_extract ) {
 137              $media = array_merge_recursive( $media, $this->extract_links( $richtext, $plaintext, $extra_args ) );
 138          }
 139  
 140          // Extract mentions.
 141          if ( self::MENTIONS & $what_to_extract ) {
 142              $media = array_merge_recursive( $media, $this->extract_mentions( $richtext, $plaintext, $extra_args ) );
 143          }
 144  
 145          // Extract images.
 146          if ( self::IMAGES & $what_to_extract ) {
 147              $media = array_merge_recursive( $media, $this->extract_images( $richtext, $plaintext, $extra_args ) );
 148          }
 149  
 150          // Extract shortcodes.
 151          if ( self::SHORTCODES & $what_to_extract ) {
 152              $media = array_merge_recursive( $media, $this->extract_shortcodes( $richtext, $plaintext, $extra_args ) );
 153          }
 154  
 155          // Extract oEmbeds.
 156          if ( self::EMBEDS & $what_to_extract ) {
 157              $media = array_merge_recursive( $media, $this->extract_embeds( $richtext, $plaintext, $extra_args ) );
 158          }
 159  
 160          // Extract audio.
 161          if ( self::AUDIO & $what_to_extract ) {
 162              $media = array_merge_recursive( $media, $this->extract_audio( $richtext, $plaintext, $extra_args ) );
 163          }
 164  
 165          // Extract video.
 166          if ( self::VIDEOS & $what_to_extract ) {
 167              $media = array_merge_recursive( $media, $this->extract_video( $richtext, $plaintext, $extra_args ) );
 168          }
 169  
 170          /**
 171           * Filters media extracted from text.
 172           *
 173           * @since 2.3.0
 174           *
 175           * @param array  $media           Extracted media. See {@link BP_Media_Extractor::extract()} for format.
 176           * @param string $richtext        Content to parse.
 177           * @param int    $what_to_extract Media type to extract.
 178           * @param array  $extra_args      Bespoke data for a particular extractor.
 179           * @param string $plaintext       Copy of $richtext without any markup.
 180           */
 181          return apply_filters( 'bp_media_extractor_extract', $media, $richtext, $what_to_extract, $extra_args, $plaintext );
 182      }
 183  
 184  
 185      /**
 186       * Content type specific extraction methods.
 187       *
 188       * You shouldn't need to use these directly; just use {@link BP_Media_Extractor::extract()}.
 189       */
 190  
 191      /**
 192       * Extract `<a href>` tags from text.
 193       *
 194       * @since 2.3.0
 195       *
 196       * @param string $richtext   Content to parse.
 197       * @param string $plaintext  Sanitized version of the content.
 198       * @param array  $extra_args Bespoke data for a particular extractor (optional).
 199       * @return array {
 200       *     @type array $has Extracted media counts. {
 201       *         @type int $links
 202       *     }
 203       *     @type array $links Extracted URLs. {
 204       *         Array of extracted media.
 205       *
 206       *         @type string $url Link.
 207       *     }
 208       * }
 209       */
 210  	protected function extract_links( $richtext, $plaintext, $extra_args = array() ) {
 211          $data = array( 'has' => array( 'links' => 0 ), 'links' => array() );
 212  
 213          // Matches: href="text" and href='text'.
 214          if ( stripos( $richtext, 'href=' ) !== false ) {
 215              preg_match_all( '#href=(["\'])([^"\']+)\1#i', $richtext, $matches );
 216  
 217              if ( ! empty( $matches[2] ) ) {
 218                  $matches[2] = array_unique( $matches[2] );
 219  
 220                  foreach ( $matches[2] as $link_src ) {
 221                      $link_src = esc_url_raw( $link_src );
 222  
 223                      if ( $link_src ) {
 224                          $data['links'][] = array( 'url' => $link_src );
 225                      }
 226                  }
 227              }
 228          }
 229  
 230          $data['has']['links'] = count( $data['links'] );
 231  
 232          /**
 233           * Filters links extracted from text.
 234           *
 235           * @since 2.3.0
 236           *
 237           * @param array  $data       Extracted links. See {@link BP_Media_Extractor::extract_links()} for format.
 238           * @param string $richtext   Content to parse.
 239           * @param string $plaintext  Copy of $richtext without any markup.
 240           * @param array  $extra_args Bespoke data for a particular extractor.
 241           */
 242          return apply_filters( 'bp_media_extractor_links', $data, $richtext, $plaintext, $extra_args );
 243      }
 244  
 245      /**
 246       * Extract @mentions tags from text.
 247       *
 248       * If the Activity component is enabled, it is used to parse @mentions.
 249       * The mentioned "name" must match a user account, otherwise it is discarded.
 250       *
 251       * If the Activity component is disabled, any @mentions are extracted.
 252       *
 253       * @since 2.3.0
 254       *
 255       * @param string $richtext   Content to parse.
 256       * @param string $plaintext  Sanitized version of the content.
 257       * @param array  $extra_args Bespoke data for a particular extractor.
 258       * @return array {
 259       *     @type array $has Extracted media counts. {
 260       *         @type int $mentions
 261       *     }
 262       *     @type array $mentions Extracted mentions. {
 263       *         Array of extracted media.
 264       *
 265       *         @type string $name    @mention.
 266       *         @type string $user_id User ID. Optional, only set if Activity component enabled.
 267       *     }
 268       * }
 269       */
 270  	protected function extract_mentions( $richtext, $plaintext, $extra_args = array() ) {
 271          $data     = array( 'has' => array( 'mentions' => 0 ), 'mentions' => array() );
 272          $mentions = array();
 273  
 274          // If the Activity component is active, use it to parse @mentions.
 275          if ( bp_is_active( 'activity' ) ) {
 276              $mentions = bp_activity_find_mentions( $plaintext );
 277              if ( ! $mentions ) {
 278                  $mentions = array();
 279              }
 280  
 281          // If the Activity component is disabled, instead do a basic parse.
 282          } else {
 283              if ( strpos( $plaintext, '@' ) !== false ) {
 284                  preg_match_all( '/[@]+([A-Za-z0-9-_\.@]+)\b/', $plaintext, $matches );
 285  
 286                  if ( ! empty( $matches[1] ) ) {
 287                      $mentions = array_unique( array_map( 'strtolower', $matches[1] ) );
 288                  }
 289              }
 290          }
 291  
 292          // Build results.
 293          foreach ( $mentions as $user_id => $mention_name ) {
 294              $mention = array( 'name' => strtolower( $mention_name ) );
 295  
 296              // If the Activity component is active, store the User ID, too.
 297              if ( bp_is_active( 'activity' ) ) {
 298                  $mention['user_id'] = (int) $user_id;
 299              }
 300  
 301              $data['mentions'][] = $mention;
 302          }
 303  
 304          $data['has']['mentions'] = count( $data['mentions'] );
 305  
 306          /**
 307           * Filters @mentions extracted from text.
 308           *
 309           * @since 2.3.0
 310           *
 311           * @param array  $data       Extracted @mentions. See {@link BP_Media_Extractor::extract_mentions()} for format.
 312           * @param string $richtext   Content to parse.
 313           * @param string $plaintext  Copy of $richtext without any markup.
 314           * @param array  $extra_args Bespoke data for a particular extractor (optional).
 315           */
 316          return apply_filters( 'bp_media_extractor_mentions', $data, $richtext, $plaintext, $extra_args );
 317      }
 318  
 319      /**
 320       * Extract images from `<img src>` tags, [galleries], and featured images from a Post.
 321       *
 322       * If an image is in the Media Library, then its resolution is included in the results.
 323       *
 324       * @since 2.3.0
 325       *
 326       * @param string $richtext   Content to parse.
 327       * @param string $plaintext  Sanitized version of the content.
 328       * @param array  $extra_args Bespoke data for a particular extractor (optional).
 329       * @return array {
 330       *     @type array $has Extracted media counts. {
 331       *         @type int $images
 332       *     }
 333       *     @type array $images Extracted images. {
 334       *         Array of extracted media.
 335       *
 336       *         @type int    $gallery_id Gallery ID. Optional, not always set.
 337       *         @type int    $height     Width of image. If unknown, set to 0.
 338       *         @type string $source     Media source. Either "html" or "galleries".
 339       *         @type string $url        Link to image.
 340       *         @type int    $width      Width of image. If unknown, set to 0.
 341       *     }
 342       * }
 343       */
 344  	protected function extract_images( $richtext, $plaintext, $extra_args = array() ) {
 345          $media = array( 'has' => array( 'images' => 0 ), 'images' => array() );
 346  
 347          $featured_image = $this->extract_images_from_featured_images( $richtext, $plaintext, $extra_args );
 348          $galleries      = $this->extract_images_from_galleries( $richtext, $plaintext, $extra_args );
 349  
 350  
 351          // `<img src>` tags.
 352          if ( stripos( $richtext, 'src=' ) !== false ) {
 353              preg_match_all( '#src=(["\'])([^"\']+)\1#i', $richtext, $img_srcs );  // Matches src="text" and src='text'.
 354  
 355              // <img>.
 356              if ( ! empty( $img_srcs[2] ) ) {
 357                  $img_srcs[2] = array_unique( $img_srcs[2] );
 358  
 359                  foreach ( $img_srcs[2] as $image_src ) {
 360                      // Skip data URIs.
 361                      if ( strtolower( substr( $image_src, 0, 5 ) ) === 'data:' ) {
 362                          continue;
 363                      }
 364  
 365                      $image_src = esc_url_raw( $image_src );
 366                      if ( ! $image_src ) {
 367                          continue;
 368                      }
 369  
 370                      $media['images'][] = array(
 371                          'source' => 'html',
 372                          'url'    => $image_src,
 373  
 374                          // The image resolution isn't available, but we need to set the keys anyway.
 375                          'height' => 0,
 376                          'width'  => 0,
 377                      );
 378                  }
 379              }
 380          }
 381  
 382          // Galleries.
 383          if ( ! empty( $galleries ) ) {
 384              foreach ( $galleries as $gallery ) {
 385                  foreach ( $gallery as $image ) {
 386                      $image_url = esc_url_raw( $image['url'] );
 387                      if ( ! $image_url ) {
 388                          continue;
 389                      }
 390  
 391                      $media['images'][] = array(
 392                          'gallery_id' => $image['gallery_id'],
 393                          'source'     => 'galleries',
 394                          'url'        => $image_url,
 395                          'width'      => $image['width'],
 396                          'height'     => $image['height'],
 397                      );
 398                  }
 399              }
 400  
 401              $media['has']['galleries'] = count( $galleries );
 402          }
 403  
 404          // Featured images (aka thumbnails).
 405          if ( ! empty( $featured_image ) ) {
 406              $image_url = esc_url_raw( $featured_image[0] );
 407  
 408              if ( $image_url ) {
 409                  $media['images'][] = array(
 410                      'source' => 'featured_images',
 411                      'url'    => $image_url,
 412                      'width'  => $featured_image[1],
 413                      'height' => $featured_image[2],
 414                  );
 415  
 416                  $media['has']['featured_images'] = 1;
 417              }
 418          }
 419  
 420          // Update image count.
 421          $media['has']['images'] = count( $media['images'] );
 422  
 423  
 424          /**
 425           * Filters images extracted from text.
 426           *
 427           * @since 2.3.0
 428           *
 429           * @param array  $media      Extracted images. See {@link BP_Media_Extractor::extract_images()} for format.
 430           * @param string $richtext   Content to parse.
 431           * @param string $plaintext  Copy of $richtext without any markup.
 432           * @param array  $extra_args Bespoke data for a particular extractor.
 433           */
 434          return apply_filters( 'bp_media_extractor_images', $media, $richtext, $plaintext, $extra_args );
 435      }
 436  
 437      /**
 438       * Extract shortcodes from text.
 439       *
 440       * This includes any shortcodes indirectly used by other media extraction types.
 441       * For example, [gallery] and [audio].
 442       *
 443       * @since 2.3.0
 444       *
 445       * @param string $richtext   Content to parse.
 446       * @param string $plaintext  Sanitized version of the content.
 447       * @param array  $extra_args Bespoke data for a particular extractor (optional).
 448       * @return array {
 449       *     @type array $has Extracted media counts. {
 450       *         @type int $shortcodes
 451       *     }
 452       *     @type array $shortcodes Extracted shortcodes. {
 453       *         Array of extracted media.
 454       *
 455       *         @type array  $attributes Key/value pairs of the shortcodes attributes (if any).
 456       *         @type string $content    Text wrapped by the shortcode.
 457       *         @type string $type       Shortcode type.
 458       *         @type string $original   The entire shortcode.
 459       *     }
 460       * }
 461       */
 462  	protected function extract_shortcodes( $richtext, $plaintext, $extra_args = array() ) {
 463          $data = array( 'has' => array( 'shortcodes' => 0 ), 'shortcodes' => array() );
 464  
 465          // Match any registered WordPress shortcodes.
 466          if ( strpos( $richtext, '[' ) !== false ) {
 467              preg_match_all( '/' . get_shortcode_regex() . '/s', $richtext, $matches );
 468  
 469              if ( ! empty( $matches[2] ) ) {
 470                  foreach ( $matches[2] as $i => $shortcode_name ) {
 471                      $attrs = shortcode_parse_atts( $matches[3][ $i ] );
 472                      $attrs = ( ! $attrs ) ? array() : (array) $attrs;
 473  
 474                      $shortcode               = array();
 475                      $shortcode['attributes'] = $attrs;             // Attributes.
 476                      $shortcode['content']    = $matches[5][ $i ];  // Content.
 477                      $shortcode['type']       = $shortcode_name;    // Shortcode.
 478                      $shortcode['original']   = $matches[0][ $i ];  // Entire shortcode.
 479  
 480                      $data['shortcodes'][] = $shortcode;
 481                  }
 482              }
 483          }
 484  
 485          $data['has']['shortcodes'] = count( $data['shortcodes'] );
 486  
 487          /**
 488           * Filters shortcodes extracted from text.
 489           *
 490           * @since 2.3.0
 491           *
 492           * @param array  $data       Extracted shortcodes.
 493           *                           See {@link BP_Media_Extractor::extract_shortcodes()} for format.
 494           * @param string $richtext   Content to parse.
 495           * @param string $plaintext  Copy of $richtext without any markup.
 496           * @param array  $extra_args Bespoke data for a particular extractor.
 497           */
 498          return apply_filters( 'bp_media_extractor_shortcodes', $data, $richtext, $plaintext, $extra_args );
 499      }
 500  
 501      /**
 502       * Extract any URL, matching a registered oEmbed endpoint, from text.
 503       *
 504       * @since 2.3.0
 505       *
 506       * @param string $richtext   Content to parse.
 507       * @param string $plaintext  Sanitized version of the content.
 508       * @param array  $extra_args Bespoke data for a particular extractor (optional).
 509       * @return array {
 510       *     @type array $has Extracted media counts. {
 511       *         @type int $embeds
 512       *     }
 513       *     @type array $embeds Extracted oEmbeds. {
 514       *         Array of extracted media.
 515       *
 516       *         @type string $url oEmbed link.
 517       *     }
 518       * }
 519       */
 520  	protected function extract_embeds( $richtext, $plaintext, $extra_args = array() ) {
 521          $data   = array( 'has' => array( 'embeds' => 0 ), 'embeds' => array() );
 522          $embeds = array();
 523  
 524          if ( ! function_exists( '_wp_oembed_get_object' ) ) {
 525              require( ABSPATH . WPINC . '/class-oembed.php' );
 526          }
 527  
 528  
 529          // Matches any links on their own lines. They may be oEmbeds.
 530          if ( stripos( $richtext, 'http' ) !== false ) {
 531              preg_match_all( '#^\s*(https?://[^\s"]+)\s*$#im', $richtext, $matches );
 532  
 533              if ( ! empty( $matches[1] ) ) {
 534                  $matches[1] = array_unique( $matches[1] );
 535                  $oembed     = _wp_oembed_get_object();
 536  
 537                  foreach ( $matches[1] as $link ) {
 538                      // Skip data URIs.
 539                      if ( strtolower( substr( $link, 0, 5 ) ) === 'data:' ) {
 540                          continue;
 541                      }
 542  
 543                      foreach ( $oembed->providers as $matchmask => $oembed_data ) {
 544                          list( , $is_regex ) = $oembed_data;
 545  
 546                          // Turn asterisk-type provider URLs into regexs.
 547                          if ( ! $is_regex ) {
 548                              $matchmask = '#' . str_replace( '___wildcard___', '(.+)', preg_quote( str_replace( '*', '___wildcard___', $matchmask ), '#' ) ) . '#i';
 549                              $matchmask = preg_replace( '|^#http\\\://|', '#https?\://', $matchmask );
 550                          }
 551  
 552                          // Check whether this "link" is really an oEmbed.
 553                          if ( preg_match( $matchmask, $link ) ) {
 554                              $data['embeds'][] = array( 'url' => $link );
 555  
 556                              break;
 557                          }
 558                      }
 559                  }
 560              }
 561          }
 562  
 563          $data['has']['embeds'] = count( $data['embeds'] );
 564  
 565          /**
 566           * Filters embeds extracted from text.
 567           *
 568           * @since 2.3.0
 569           *
 570           * @param array  $data       Extracted embeds. See {@link BP_Media_Extractor::extract_embeds()} for format.
 571           * @param string $richtext   Content to parse.
 572           * @param string $plaintext  Copy of $richtext without any markup.
 573           * @param array  $extra_args Bespoke data for a particular extractor.
 574           */
 575          return apply_filters( 'bp_media_extractor_embeds', $data, $richtext, $plaintext, $extra_args );
 576      }
 577  
 578      /**
 579       * Extract [audio] shortcodes and `<a href="*.mp3">` tags, from text.
 580       *
 581       * @since 2.3.0
 582       *
 583       * @see wp_get_audio_extensions() for supported audio formats.
 584       *
 585       * @param string $richtext   Content to parse.
 586       * @param string $plaintext  Sanitized version of the content.
 587       * @param array  $extra_args Bespoke data for a particular extractor (optional).
 588       * @return array {
 589       *     @type array $has Extracted media counts. {
 590       *         @type int $audio
 591       *     }
 592       *     @type array $audio Extracted audio. {
 593       *         Array of extracted media.
 594       *
 595       *         @type string $original The entire shortcode.
 596       *         @type string $source   Media source. Either "html" or "shortcodes".
 597       *         @type string $url      Link to audio.
 598       *     }
 599       * }
 600       */
 601  	protected function extract_audio( $richtext, $plaintext, $extra_args = array() ) {
 602          $data   = array( 'has' => array( 'audio' => 0 ), 'audio' => array() );
 603          $audios = $this->extract_shortcodes( $richtext, $plaintext, $extra_args );
 604          $links  = $this->extract_links( $richtext, $plaintext, $extra_args );
 605  
 606          $audio_types = wp_get_audio_extensions();
 607  
 608  
 609          // [audio]
 610          $audios = wp_list_filter( $audios['shortcodes'], array( 'type' => 'audio' ) );
 611          foreach ( $audios as $audio ) {
 612  
 613              // Media URL can appear as the first parameter inside the shortcode brackets.
 614              if ( isset( $audio['attributes']['src'] ) ) {
 615                  $src_param = 'src';
 616              } elseif ( isset( $audio['attributes'][0] ) ) {
 617                  $src_param = 0;
 618              } else {
 619                  continue;
 620              }
 621  
 622              $path = untrailingslashit( parse_url( $audio['attributes'][ $src_param ], PHP_URL_PATH ) );
 623  
 624              foreach ( $audio_types as $extension ) {
 625                  $extension = '.' . $extension;
 626  
 627                  // Check this URL's file extension matches that of an accepted audio format.
 628                  if ( ! $path || substr( $path, -4 ) !== $extension ) {
 629                      continue;
 630                  }
 631  
 632                  $data['audio'][] = array(
 633                      'original' => '[audio src="' . esc_url_raw( $audio['attributes'][ $src_param ] ) . '"]',
 634                      'source'   => 'shortcodes',
 635                      'url'      => esc_url_raw( $audio['attributes'][ $src_param ] ),
 636                  );
 637              }
 638          }
 639  
 640          // <a href="*.mp3"> tags.
 641          foreach ( $audio_types as $extension ) {
 642              $extension = '.' . $extension;
 643  
 644              foreach ( $links['links'] as $link ) {
 645                  $path = untrailingslashit( parse_url( $link['url'], PHP_URL_PATH ) );
 646  
 647                  // Check this URL's file extension matches that of an accepted audio format.
 648                  if ( ! $path || substr( $path, -4 ) !== $extension ) {
 649                      continue;
 650                  }
 651  
 652                  $data['audio'][] = array(
 653                      'original' => '[audio src="' . esc_url_raw( $link['url'] ) . '"]',  // Build an audio shortcode.
 654                      'source'   => 'html',
 655                      'url'      => esc_url_raw( $link['url'] ),
 656                  );
 657              }
 658          }
 659  
 660          $data['has']['audio'] = count( $data['audio'] );
 661  
 662          /**
 663           * Filters audio extracted from text.
 664           *
 665           * @since 2.3.0
 666           *
 667           * @param array  $data       Extracted audio. See {@link BP_Media_Extractor::extract_audio()} for format.
 668           * @param string $richtext   Content to parse.
 669           * @param string $plaintext  Copy of $richtext without any markup.
 670           * @param array  $extra_args Bespoke data for a particular extractor.
 671           */
 672          return apply_filters( 'bp_media_extractor_audio', $data, $richtext, $plaintext, $extra_args );
 673      }
 674  
 675      /**
 676       * Extract [video] shortcodes from text.
 677       *
 678       * @since 2.3.0
 679       *
 680       * @see wp_get_video_extensions() for supported video formats.
 681       *
 682       * @param string $richtext   Content to parse.
 683       * @param string $plaintext  Sanitized version of the content.
 684       * @param array  $extra_args Bespoke data for a particular extractor (optional).
 685       * @return array {
 686       *     @type array $has Extracted media counts. {
 687       *         @type int $video
 688       *     }
 689       *     @type array $videos Extracted video. {
 690       *         Array of extracted media.
 691       *
 692       *         @type string $source Media source. Currently only "shortcodes".
 693       *         @type string $url    Link to audio.
 694       *     }
 695       * }
 696       */
 697  	protected function extract_video( $richtext, $plaintext, $extra_args = array() ) {
 698          $data   = array( 'has' => array( 'videos' => 0 ), 'videos' => array() );
 699          $videos = $this->extract_shortcodes( $richtext, $plaintext, $extra_args );
 700  
 701          $video_types = wp_get_video_extensions();
 702  
 703  
 704          // [video]
 705          $videos = wp_list_filter( $videos['shortcodes'], array( 'type' => 'video' ) );
 706          foreach ( $videos as $video ) {
 707  
 708              // Media URL can appear as the first parameter inside the shortcode brackets.
 709              if ( isset( $video['attributes']['src'] ) ) {
 710                  $src_param = 'src';
 711              } elseif ( isset( $video['attributes'][0] ) ) {
 712                  $src_param = 0;
 713              } else {
 714                  continue;
 715              }
 716  
 717              $path = untrailingslashit( parse_url( $video['attributes'][ $src_param ], PHP_URL_PATH ) );
 718  
 719              foreach ( $video_types as $extension ) {
 720                  $extension = '.' . $extension;
 721  
 722                  // Check this URL's file extension matches that of an accepted video format (-5 for webm).
 723                  if ( ! $path || ( substr( $path, -4 ) !== $extension && substr( $path, -5 ) !== $extension ) ) {
 724                      continue;
 725                  }
 726  
 727                  $data['videos'][] = array(
 728                      'original' => $video['original'],  // Entire shortcode.
 729                      'source'   => 'shortcodes',
 730                      'url'      => esc_url_raw( $video['attributes'][ $src_param ] ),
 731                  );
 732              }
 733          }
 734  
 735          $data['has']['videos'] = count( $data['videos'] );
 736  
 737          /**
 738           * Filters videos extracted from text.
 739           *
 740           * @since 2.3.0
 741           *
 742           * @param array  $data       Extracted videos. See {@link BP_Media_Extractor::extract_videos()} for format.
 743           * @param string $richtext   Content to parse.
 744           * @param string $plaintext  Copy of $richtext without any markup.
 745           * @param array  $extra_args Bespoke data for a particular extractor.
 746           */
 747          return apply_filters( 'bp_media_extractor_videos', $data, $richtext, $plaintext, $extra_args );
 748      }
 749  
 750  
 751      /**
 752       * Helpers and utility methods.
 753       */
 754  
 755      /**
 756       * Extract images in [galleries] shortcodes from text.
 757       *
 758       * @since 2.3.0
 759       *
 760       * @param string $richtext   Content to parse.
 761       * @param string $plaintext  Sanitized version of the content.
 762       * @param array  $extra_args Bespoke data for a particular extractor (optional).
 763       * @return array
 764       */
 765  	protected function extract_images_from_galleries( $richtext, $plaintext, $extra_args = array() ) {
 766          if ( ! isset( $extra_args['post'] ) || ! is_a( $extra_args['post'], 'WP_Post' ) ) {
 767              $post = new WP_Post( (object) array( 'post_content' => $richtext ) );
 768          } else {
 769              $post = $extra_args['post'];
 770          }
 771  
 772          // We're not using get_post_galleries_images() because it returns thumbnails; we want the original image.
 773          $galleries      = get_post_galleries( $post, false );
 774          $galleries_data = array();
 775  
 776          if ( ! empty( $galleries ) ) {
 777              // Validate the size of the images requested.
 778              if ( isset( $extra_args['width'] ) ) {
 779  
 780                  // A width was specified but not a height, so calculate it assuming a 4:3 ratio.
 781                  if ( ! isset( $extra_args['height'] ) && ctype_digit( $extra_args['width'] ) ) {
 782                      $extra_args['height'] = round( ( $extra_args['width'] / 4 ) * 3 );
 783                  }
 784  
 785                  if ( ctype_digit( $extra_args['width'] ) ) {
 786                      $image_size = array( $extra_args['width'], $extra_args['height'] );
 787                  } else {
 788                      $image_size = $extra_args['width'];  // E.g. "thumb", "medium".
 789                  }
 790  
 791              } else {
 792                  $image_size = 'full';
 793              }
 794  
 795              /**
 796               * There are two variants of gallery shortcode.
 797               *
 798               * One kind specifies the image (post) IDs via an `ids` parameter.
 799               * The other gets the image IDs from post_type=attachment and post_parent=get_the_ID().
 800               */
 801  
 802              foreach ( $galleries as $gallery_id => $gallery ) {
 803                  $data   = array();
 804                  $images = array();
 805  
 806                  // Gallery ids= variant.
 807                  if ( isset( $gallery['ids'] ) ) {
 808                      $images = wp_parse_id_list( $gallery['ids'] );
 809  
 810                  // Gallery post_parent variant.
 811                  } elseif ( isset( $extra_args['post'] ) ) {
 812                      $images = wp_parse_id_list(
 813                          get_children( array(
 814                              'fields'         => 'ids',
 815                              'order'          => 'ASC',
 816                              'orderby'        => 'menu_order ID',
 817                              'post_mime_type' => 'image',
 818                              'post_parent'    => $extra_args['post']->ID,
 819                              'post_status'    => 'inherit',
 820                              'post_type'      => 'attachment',
 821                          ) )
 822                      );
 823                  }
 824  
 825                  // Extract the data we need from each image in this gallery.
 826                  foreach ( $images as $image_id ) {
 827                      $image  = wp_get_attachment_image_src( $image_id, $image_size );
 828  
 829                      $image_url    = isset( $image[0] ) ? $image[0] : '';
 830                      $image_width  = isset( $image[1] ) ? $image[1] : '';
 831                      $image_height = isset( $image[2] ) ? $image[2] : '';
 832  
 833                      $data[] = array(
 834                          'url'    => $image_url,
 835                          'width'  => $image_width,
 836                          'height' => $image_height,
 837  
 838                          'gallery_id' => 1 + $gallery_id,
 839                      );
 840                  }
 841  
 842                  $galleries_data[] = $data;
 843              }
 844          }
 845  
 846          /**
 847           * Filters image galleries extracted from text.
 848           *
 849           * @since 2.3.0
 850           *
 851           * @param array  $galleries_data Galleries. See {@link BP_Media_Extractor::extract_images_from_galleries()}.
 852           * @param string $richtext       Content to parse.
 853           * @param string $plaintext      Copy of $richtext without any markup.
 854           * @param array  $extra_args     Bespoke data for a particular extractor.
 855           */
 856          return apply_filters( 'bp_media_extractor_galleries', $galleries_data, $richtext, $plaintext, $extra_args );
 857      }
 858  
 859      /**
 860       * Extract the featured image from a Post.
 861       *
 862       * @since 2.3.0
 863       *
 864       * @param string $richtext   Content to parse.
 865       * @param string $plaintext  Sanitized version of the content.
 866       * @param array  $extra_args Contains data that an implementation might need beyond the defaults.
 867       * @return array
 868       */
 869  	protected function extract_images_from_featured_images( $richtext, $plaintext, $extra_args ) {
 870          $image = array();
 871          $thumb = 0;
 872  
 873          if ( isset( $extra_args['post'] ) ) {
 874              $thumb = (int) get_post_thumbnail_id( $extra_args['post']->ID );
 875          }
 876  
 877          if ( $thumb ) {
 878              // Validate the size of the images requested.
 879              if ( isset( $extra_args['width'] ) ) {
 880                  if ( ! isset( $extra_args['height'] ) && ctype_digit( $extra_args['width'] ) ) {
 881                      // A width was specified but not a height, so calculate it assuming a 4:3 ratio.
 882                      $extra_args['height'] = round( ( $extra_args['width'] / 4 ) * 3 );
 883                  }
 884  
 885                  if ( ctype_digit( $extra_args['width'] ) ) {
 886                      $image_size = array( $extra_args['width'], $extra_args['height'] );
 887                  } else {
 888                      $image_size = $extra_args['width'];  // E.g. "thumb", "medium".
 889                  }
 890              } else {
 891                  $image_size = 'full';
 892              }
 893  
 894              $image = wp_get_attachment_image_src( $thumb, $image_size );
 895          }
 896  
 897          /**
 898           * Filters featured images extracted from a WordPress Post.
 899           *
 900           * @since 2.3.0
 901           *
 902           * @param array  $image      Extracted images. See {@link BP_Media_Extractor_Post::extract_images()} for format.
 903           * @param string $richtext   Content to parse.
 904           * @param string $plaintext  Copy of $richtext without any markup.
 905           * @param array  $extra_args Bespoke data for a particular extractor.
 906           */
 907          return apply_filters( 'bp_media_extractor_featured_images', $image, $richtext, $plaintext, $extra_args );
 908      }
 909  
 910      /**
 911       * Sanitize and format raw content to prepare for content extraction.
 912       *
 913       * HTML tags and shortcodes are removed, and HTML entities are decoded.
 914       *
 915       * @since 2.3.0
 916       *
 917       * @param string $richtext Content to sanitize.
 918       * @return string
 919       */
 920  	protected function strip_markup( $richtext ) {
 921          $plaintext = strip_shortcodes( html_entity_decode( strip_tags( $richtext ) ) );
 922  
 923          /**
 924           * Filters the generated plain text version of the content passed to the extractor.
 925           *
 926           * @since 2.3.0
 927           *
 928           * @param array  $plaintext Generated plain text.
 929           * @param string $richtext  Original content.
 930           */
 931          return apply_filters( 'bp_media_extractor_strip_markup', $plaintext, $richtext );
 932      }
 933  }
PHP Cross Reference of BuddyPress

/src/bp-core/classes/ -> class-bp-media-extractor.php (source)