[ Index ]

PHP Cross Reference of WordPress

title

Body

[close]

/wp-includes/ -> class-wp-block-parser.php (source)

   1  <?php
   2  /**
   3   * Block Serialization Parser
   4   *
   5   * @package WordPress
   6   */
   7  
   8  /**
   9   * Class WP_Block_Parser_Block
  10   *
  11   * Holds the block structure in memory
  12   *
  13   * @since 5.0.0
  14   */
  15  class WP_Block_Parser_Block {
  16      /**
  17       * Name of block
  18       *
  19       * @example "core/paragraph"
  20       *
  21       * @since 5.0.0
  22       * @var string
  23       */
  24      public $blockName;
  25  
  26      /**
  27       * Optional set of attributes from block comment delimiters
  28       *
  29       * @example null
  30       * @example array( 'columns' => 3 )
  31       *
  32       * @since 5.0.0
  33       * @var array|null
  34       */
  35      public $attrs;
  36  
  37      /**
  38       * List of inner blocks (of this same class)
  39       *
  40       * @since 5.0.0
  41       * @var WP_Block_Parser_Block[]
  42       */
  43      public $innerBlocks;
  44  
  45      /**
  46       * Resultant HTML from inside block comment delimiters
  47       * after removing inner blocks
  48       *
  49       * @example "...Just <!-- wp:test /--> testing..." -> "Just testing..."
  50       *
  51       * @since 5.0.0
  52       * @var string
  53       */
  54      public $innerHTML;
  55  
  56      /**
  57       * List of string fragments and null markers where inner blocks were found
  58       *
  59       * @example array(
  60       *   'innerHTML'    => 'BeforeInnerAfter',
  61       *   'innerBlocks'  => array( block, block ),
  62       *   'innerContent' => array( 'Before', null, 'Inner', null, 'After' ),
  63       * )
  64       *
  65       * @since 4.2.0
  66       * @var array
  67       */
  68      public $innerContent;
  69  
  70      /**
  71       * Constructor.
  72       *
  73       * Will populate object properties from the provided arguments.
  74       *
  75       * @since 5.0.0
  76       *
  77       * @param string $name         Name of block.
  78       * @param array  $attrs        Optional set of attributes from block comment delimiters.
  79       * @param array  $innerBlocks  List of inner blocks (of this same class).
  80       * @param string $innerHTML    Resultant HTML from inside block comment delimiters after removing inner blocks.
  81       * @param array  $innerContent List of string fragments and null markers where inner blocks were found.
  82       */
  83  	function __construct( $name, $attrs, $innerBlocks, $innerHTML, $innerContent ) {
  84          $this->blockName    = $name;
  85          $this->attrs        = $attrs;
  86          $this->innerBlocks  = $innerBlocks;
  87          $this->innerHTML    = $innerHTML;
  88          $this->innerContent = $innerContent;
  89      }
  90  }
  91  
  92  /**
  93   * Class WP_Block_Parser_Frame
  94   *
  95   * Holds partial blocks in memory while parsing
  96   *
  97   * @internal
  98   * @since 5.0.0
  99   */
 100  class WP_Block_Parser_Frame {
 101      /**
 102       * Full or partial block
 103       *
 104       * @since 5.0.0
 105       * @var WP_Block_Parser_Block
 106       */
 107      public $block;
 108  
 109      /**
 110       * Byte offset into document for start of parse token
 111       *
 112       * @since 5.0.0
 113       * @var int
 114       */
 115      public $token_start;
 116  
 117      /**
 118       * Byte length of entire parse token string
 119       *
 120       * @since 5.0.0
 121       * @var int
 122       */
 123      public $token_length;
 124  
 125      /**
 126       * Byte offset into document for after parse token ends
 127       * (used during reconstruction of stack into parse production)
 128       *
 129       * @since 5.0.0
 130       * @var int
 131       */
 132      public $prev_offset;
 133  
 134      /**
 135       * Byte offset into document where leading HTML before token starts
 136       *
 137       * @since 5.0.0
 138       * @var int
 139       */
 140      public $leading_html_start;
 141  
 142      /**
 143       * Constructor
 144       *
 145       * Will populate object properties from the provided arguments.
 146       *
 147       * @since 5.0.0
 148       *
 149       * @param WP_Block_Parser_Block $block              Full or partial block.
 150       * @param int                   $token_start        Byte offset into document for start of parse token.
 151       * @param int                   $token_length       Byte length of entire parse token string.
 152       * @param int                   $prev_offset        Byte offset into document for after parse token ends.
 153       * @param int                   $leading_html_start Byte offset into document where leading HTML before token starts.
 154       */
 155  	function __construct( $block, $token_start, $token_length, $prev_offset = null, $leading_html_start = null ) {
 156          $this->block              = $block;
 157          $this->token_start        = $token_start;
 158          $this->token_length       = $token_length;
 159          $this->prev_offset        = isset( $prev_offset ) ? $prev_offset : $token_start + $token_length;
 160          $this->leading_html_start = $leading_html_start;
 161      }
 162  }
 163  
 164  /**
 165   * Class WP_Block_Parser
 166   *
 167   * Parses a document and constructs a list of parsed block objects
 168   *
 169   * @since 5.0.0
 170   * @since 4.0.0 returns arrays not objects, all attributes are arrays
 171   */
 172  class WP_Block_Parser {
 173      /**
 174       * Input document being parsed
 175       *
 176       * @example "Pre-text\n<!-- wp:paragraph -->This is inside a block!<!-- /wp:paragraph -->"
 177       *
 178       * @since 5.0.0
 179       * @var string
 180       */
 181      public $document;
 182  
 183      /**
 184       * Tracks parsing progress through document
 185       *
 186       * @since 5.0.0
 187       * @var int
 188       */
 189      public $offset;
 190  
 191      /**
 192       * List of parsed blocks
 193       *
 194       * @since 5.0.0
 195       * @var WP_Block_Parser_Block[]
 196       */
 197      public $output;
 198  
 199      /**
 200       * Stack of partially-parsed structures in memory during parse
 201       *
 202       * @since 5.0.0
 203       * @var WP_Block_Parser_Frame[]
 204       */
 205      public $stack;
 206  
 207      /**
 208       * Empty associative array, here due to PHP quirks
 209       *
 210       * @since 4.4.0
 211       * @var array empty associative array
 212       */
 213      public $empty_attrs;
 214  
 215      /**
 216       * Parses a document and returns a list of block structures
 217       *
 218       * When encountering an invalid parse will return a best-effort
 219       * parse. In contrast to the specification parser this does not
 220       * return an error on invalid inputs.
 221       *
 222       * @since 5.0.0
 223       *
 224       * @param string $document Input document being parsed.
 225       * @return WP_Block_Parser_Block[]
 226       */
 227  	function parse( $document ) {
 228          $this->document    = $document;
 229          $this->offset      = 0;
 230          $this->output      = array();
 231          $this->stack       = array();
 232          $this->empty_attrs = json_decode( '{}', true );
 233  
 234          do {
 235              // twiddle our thumbs.
 236          } while ( $this->proceed() );
 237  
 238          return $this->output;
 239      }
 240  
 241      /**
 242       * Processes the next token from the input document
 243       * and returns whether to proceed eating more tokens
 244       *
 245       * This is the "next step" function that essentially
 246       * takes a token as its input and decides what to do
 247       * with that token before descending deeper into a
 248       * nested block tree or continuing along the document
 249       * or breaking out of a level of nesting.
 250       *
 251       * @internal
 252       * @since 5.0.0
 253       * @return bool
 254       */
 255  	function proceed() {
 256          $next_token = $this->next_token();
 257          list( $token_type, $block_name, $attrs, $start_offset, $token_length ) = $next_token;
 258          $stack_depth = count( $this->stack );
 259  
 260          // we may have some HTML soup before the next block.
 261          $leading_html_start = $start_offset > $this->offset ? $this->offset : null;
 262  
 263          switch ( $token_type ) {
 264              case 'no-more-tokens':
 265                  // if not in a block then flush output.
 266                  if ( 0 === $stack_depth ) {
 267                      $this->add_freeform();
 268                      return false;
 269                  }
 270  
 271                  /*
 272                   * Otherwise we have a problem
 273                   * This is an error
 274                   *
 275                   * we have options
 276                   * - treat it all as freeform text
 277                   * - assume an implicit closer (easiest when not nesting)
 278                   */
 279  
 280                  // for the easy case we'll assume an implicit closer.
 281                  if ( 1 === $stack_depth ) {
 282                      $this->add_block_from_stack();
 283                      return false;
 284                  }
 285  
 286                  /*
 287                   * for the nested case where it's more difficult we'll
 288                   * have to assume that multiple closers are missing
 289                   * and so we'll collapse the whole stack piecewise
 290                   */
 291                  while ( 0 < count( $this->stack ) ) {
 292                      $this->add_block_from_stack();
 293                  }
 294                  return false;
 295  
 296              case 'void-block':
 297                  /*
 298                   * easy case is if we stumbled upon a void block
 299                   * in the top-level of the document
 300                   */
 301                  if ( 0 === $stack_depth ) {
 302                      if ( isset( $leading_html_start ) ) {
 303                          $this->output[] = (array) $this->freeform(
 304                              substr(
 305                                  $this->document,
 306                                  $leading_html_start,
 307                                  $start_offset - $leading_html_start
 308                              )
 309                          );
 310                      }
 311  
 312                      $this->output[] = (array) new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() );
 313                      $this->offset   = $start_offset + $token_length;
 314                      return true;
 315                  }
 316  
 317                  // otherwise we found an inner block.
 318                  $this->add_inner_block(
 319                      new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ),
 320                      $start_offset,
 321                      $token_length
 322                  );
 323                  $this->offset = $start_offset + $token_length;
 324                  return true;
 325  
 326              case 'block-opener':
 327                  // track all newly-opened blocks on the stack.
 328                  array_push(
 329                      $this->stack,
 330                      new WP_Block_Parser_Frame(
 331                          new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ),
 332                          $start_offset,
 333                          $token_length,
 334                          $start_offset + $token_length,
 335                          $leading_html_start
 336                      )
 337                  );
 338                  $this->offset = $start_offset + $token_length;
 339                  return true;
 340  
 341              case 'block-closer':
 342                  /*
 343                   * if we're missing an opener we're in trouble
 344                   * This is an error
 345                   */
 346                  if ( 0 === $stack_depth ) {
 347                      /*
 348                       * we have options
 349                       * - assume an implicit opener
 350                       * - assume _this_ is the opener
 351                       * - give up and close out the document
 352                       */
 353                      $this->add_freeform();
 354                      return false;
 355                  }
 356  
 357                  // if we're not nesting then this is easy - close the block.
 358                  if ( 1 === $stack_depth ) {
 359                      $this->add_block_from_stack( $start_offset );
 360                      $this->offset = $start_offset + $token_length;
 361                      return true;
 362                  }
 363  
 364                  /*
 365                   * otherwise we're nested and we have to close out the current
 366                   * block and add it as a new innerBlock to the parent
 367                   */
 368                  $stack_top                        = array_pop( $this->stack );
 369                  $html                             = substr( $this->document, $stack_top->prev_offset, $start_offset - $stack_top->prev_offset );
 370                  $stack_top->block->innerHTML     .= $html;
 371                  $stack_top->block->innerContent[] = $html;
 372                  $stack_top->prev_offset           = $start_offset + $token_length;
 373  
 374                  $this->add_inner_block(
 375                      $stack_top->block,
 376                      $stack_top->token_start,
 377                      $stack_top->token_length,
 378                      $start_offset + $token_length
 379                  );
 380                  $this->offset = $start_offset + $token_length;
 381                  return true;
 382  
 383              default:
 384                  // This is an error.
 385                  $this->add_freeform();
 386                  return false;
 387          }
 388      }
 389  
 390      /**
 391       * Scans the document from where we last left off
 392       * and finds the next valid token to parse if it exists
 393       *
 394       * Returns the type of the find: kind of find, block information, attributes
 395       *
 396       * @internal
 397       * @since 5.0.0
 398       * @since 4.6.1 fixed a bug in attribute parsing which caused catastrophic backtracking on invalid block comments
 399       * @return array
 400       */
 401  	function next_token() {
 402          $matches = null;
 403  
 404          /*
 405           * aye the magic
 406           * we're using a single RegExp to tokenize the block comment delimiters
 407           * we're also using a trick here because the only difference between a
 408           * block opener and a block closer is the leading `/` before `wp:` (and
 409           * a closer has no attributes). we can trap them both and process the
 410           * match back in PHP to see which one it was.
 411           */
 412          $has_match = preg_match(
 413              '/<!--\s+(?P<closer>\/)?wp:(?P<namespace>[a-z][a-z0-9_-]*\/)?(?P<name>[a-z][a-z0-9_-]*)\s+(?P<attrs>{(?:(?:[^}]+|}+(?=})|(?!}\s+\/?-->).)*+)?}\s+)?(?P<void>\/)?-->/s',
 414              $this->document,
 415              $matches,
 416              PREG_OFFSET_CAPTURE,
 417              $this->offset
 418          );
 419  
 420          // if we get here we probably have catastrophic backtracking or out-of-memory in the PCRE.
 421          if ( false === $has_match ) {
 422              return array( 'no-more-tokens', null, null, null, null );
 423          }
 424  
 425          // we have no more tokens.
 426          if ( 0 === $has_match ) {
 427              return array( 'no-more-tokens', null, null, null, null );
 428          }
 429  
 430          list( $match, $started_at ) = $matches[0];
 431  
 432          $length    = strlen( $match );
 433          $is_closer = isset( $matches['closer'] ) && -1 !== $matches['closer'][1];
 434          $is_void   = isset( $matches['void'] ) && -1 !== $matches['void'][1];
 435          $namespace = $matches['namespace'];
 436          $namespace = ( isset( $namespace ) && -1 !== $namespace[1] ) ? $namespace[0] : 'core/';
 437          $name      = $namespace . $matches['name'][0];
 438          $has_attrs = isset( $matches['attrs'] ) && -1 !== $matches['attrs'][1];
 439  
 440          /*
 441           * Fun fact! It's not trivial in PHP to create "an empty associative array" since all arrays
 442           * are associative arrays. If we use `array()` we get a JSON `[]`
 443           */
 444          $attrs = $has_attrs
 445              ? json_decode( $matches['attrs'][0], /* as-associative */ true )
 446              : $this->empty_attrs;
 447  
 448          /*
 449           * This state isn't allowed
 450           * This is an error
 451           */
 452          if ( $is_closer && ( $is_void || $has_attrs ) ) {
 453              // we can ignore them since they don't hurt anything.
 454          }
 455  
 456          if ( $is_void ) {
 457              return array( 'void-block', $name, $attrs, $started_at, $length );
 458          }
 459  
 460          if ( $is_closer ) {
 461              return array( 'block-closer', $name, null, $started_at, $length );
 462          }
 463  
 464          return array( 'block-opener', $name, $attrs, $started_at, $length );
 465      }
 466  
 467      /**
 468       * Returns a new block object for freeform HTML
 469       *
 470       * @internal
 471       * @since 3.9.0
 472       *
 473       * @param string $innerHTML HTML content of block.
 474       * @return WP_Block_Parser_Block freeform block object.
 475       */
 476  	function freeform( $innerHTML ) {
 477          return new WP_Block_Parser_Block( null, $this->empty_attrs, array(), $innerHTML, array( $innerHTML ) );
 478      }
 479  
 480      /**
 481       * Pushes a length of text from the input document
 482       * to the output list as a freeform block.
 483       *
 484       * @internal
 485       * @since 5.0.0
 486       * @param null $length how many bytes of document text to output.
 487       */
 488  	function add_freeform( $length = null ) {
 489          $length = $length ? $length : strlen( $this->document ) - $this->offset;
 490  
 491          if ( 0 === $length ) {
 492              return;
 493          }
 494  
 495          $this->output[] = (array) $this->freeform( substr( $this->document, $this->offset, $length ) );
 496      }
 497  
 498      /**
 499       * Given a block structure from memory pushes
 500       * a new block to the output list.
 501       *
 502       * @internal
 503       * @since 5.0.0
 504       * @param WP_Block_Parser_Block $block        The block to add to the output.
 505       * @param int                   $token_start  Byte offset into the document where the first token for the block starts.
 506       * @param int                   $token_length Byte length of entire block from start of opening token to end of closing token.
 507       * @param int|null              $last_offset  Last byte offset into document if continuing form earlier output.
 508       */
 509  	function add_inner_block( WP_Block_Parser_Block $block, $token_start, $token_length, $last_offset = null ) {
 510          $parent                       = $this->stack[ count( $this->stack ) - 1 ];
 511          $parent->block->innerBlocks[] = (array) $block;
 512          $html                         = substr( $this->document, $parent->prev_offset, $token_start - $parent->prev_offset );
 513  
 514          if ( ! empty( $html ) ) {
 515              $parent->block->innerHTML     .= $html;
 516              $parent->block->innerContent[] = $html;
 517          }
 518  
 519          $parent->block->innerContent[] = null;
 520          $parent->prev_offset           = $last_offset ? $last_offset : $token_start + $token_length;
 521      }
 522  
 523      /**
 524       * Pushes the top block from the parsing stack to the output list.
 525       *
 526       * @internal
 527       * @since 5.0.0
 528       * @param int|null $end_offset byte offset into document for where we should stop sending text output as HTML.
 529       */
 530  	function add_block_from_stack( $end_offset = null ) {
 531          $stack_top   = array_pop( $this->stack );
 532          $prev_offset = $stack_top->prev_offset;
 533  
 534          $html = isset( $end_offset )
 535              ? substr( $this->document, $prev_offset, $end_offset - $prev_offset )
 536              : substr( $this->document, $prev_offset );
 537  
 538          if ( ! empty( $html ) ) {
 539              $stack_top->block->innerHTML     .= $html;
 540              $stack_top->block->innerContent[] = $html;
 541          }
 542  
 543          if ( isset( $stack_top->leading_html_start ) ) {
 544              $this->output[] = (array) $this->freeform(
 545                  substr(
 546                      $this->document,
 547                      $stack_top->leading_html_start,
 548                      $stack_top->token_start - $stack_top->leading_html_start
 549                  )
 550              );
 551          }
 552  
 553          $this->output[] = (array) $stack_top->block;
 554      }
 555  }


Generated: Thu Nov 21 01:00:03 2024 Cross-referenced by PHPXref 0.7.1