[ Index ] |
PHP Cross Reference of WordPress |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * Block Serialization Parser 4 * 5 * @package WordPress 6 */ 7 8 /** 9 * Class WP_Block_Parser_Block 10 * 11 * Holds the block structure in memory 12 * 13 * @since 5.0.0 14 */ 15 class WP_Block_Parser_Block { 16 /** 17 * Name of block 18 * 19 * @example "core/paragraph" 20 * 21 * @since 5.0.0 22 * @var string 23 */ 24 public $blockName; 25 26 /** 27 * Optional set of attributes from block comment delimiters 28 * 29 * @example null 30 * @example array( 'columns' => 3 ) 31 * 32 * @since 5.0.0 33 * @var array|null 34 */ 35 public $attrs; 36 37 /** 38 * List of inner blocks (of this same class) 39 * 40 * @since 5.0.0 41 * @var WP_Block_Parser_Block[] 42 */ 43 public $innerBlocks; 44 45 /** 46 * Resultant HTML from inside block comment delimiters 47 * after removing inner blocks 48 * 49 * @example "...Just <!-- wp:test /--> testing..." -> "Just testing..." 50 * 51 * @since 5.0.0 52 * @var string 53 */ 54 public $innerHTML; 55 56 /** 57 * List of string fragments and null markers where inner blocks were found 58 * 59 * @example array( 60 * 'innerHTML' => 'BeforeInnerAfter', 61 * 'innerBlocks' => array( block, block ), 62 * 'innerContent' => array( 'Before', null, 'Inner', null, 'After' ), 63 * ) 64 * 65 * @since 4.2.0 66 * @var array 67 */ 68 public $innerContent; 69 70 /** 71 * Constructor. 72 * 73 * Will populate object properties from the provided arguments. 74 * 75 * @since 5.0.0 76 * 77 * @param string $name Name of block. 78 * @param array $attrs Optional set of attributes from block comment delimiters. 79 * @param array $innerBlocks List of inner blocks (of this same class). 80 * @param string $innerHTML Resultant HTML from inside block comment delimiters after removing inner blocks. 81 * @param array $innerContent List of string fragments and null markers where inner blocks were found. 82 */ 83 function __construct( $name, $attrs, $innerBlocks, $innerHTML, $innerContent ) { 84 $this->blockName = $name; 85 $this->attrs = $attrs; 86 $this->innerBlocks = $innerBlocks; 87 $this->innerHTML = $innerHTML; 88 $this->innerContent = $innerContent; 89 } 90 } 91 92 /** 93 * Class WP_Block_Parser_Frame 94 * 95 * Holds partial blocks in memory while parsing 96 * 97 * @internal 98 * @since 5.0.0 99 */ 100 class WP_Block_Parser_Frame { 101 /** 102 * Full or partial block 103 * 104 * @since 5.0.0 105 * @var WP_Block_Parser_Block 106 */ 107 public $block; 108 109 /** 110 * Byte offset into document for start of parse token 111 * 112 * @since 5.0.0 113 * @var int 114 */ 115 public $token_start; 116 117 /** 118 * Byte length of entire parse token string 119 * 120 * @since 5.0.0 121 * @var int 122 */ 123 public $token_length; 124 125 /** 126 * Byte offset into document for after parse token ends 127 * (used during reconstruction of stack into parse production) 128 * 129 * @since 5.0.0 130 * @var int 131 */ 132 public $prev_offset; 133 134 /** 135 * Byte offset into document where leading HTML before token starts 136 * 137 * @since 5.0.0 138 * @var int 139 */ 140 public $leading_html_start; 141 142 /** 143 * Constructor 144 * 145 * Will populate object properties from the provided arguments. 146 * 147 * @since 5.0.0 148 * 149 * @param WP_Block_Parser_Block $block Full or partial block. 150 * @param int $token_start Byte offset into document for start of parse token. 151 * @param int $token_length Byte length of entire parse token string. 152 * @param int $prev_offset Byte offset into document for after parse token ends. 153 * @param int $leading_html_start Byte offset into document where leading HTML before token starts. 154 */ 155 function __construct( $block, $token_start, $token_length, $prev_offset = null, $leading_html_start = null ) { 156 $this->block = $block; 157 $this->token_start = $token_start; 158 $this->token_length = $token_length; 159 $this->prev_offset = isset( $prev_offset ) ? $prev_offset : $token_start + $token_length; 160 $this->leading_html_start = $leading_html_start; 161 } 162 } 163 164 /** 165 * Class WP_Block_Parser 166 * 167 * Parses a document and constructs a list of parsed block objects 168 * 169 * @since 5.0.0 170 * @since 4.0.0 returns arrays not objects, all attributes are arrays 171 */ 172 class WP_Block_Parser { 173 /** 174 * Input document being parsed 175 * 176 * @example "Pre-text\n<!-- wp:paragraph -->This is inside a block!<!-- /wp:paragraph -->" 177 * 178 * @since 5.0.0 179 * @var string 180 */ 181 public $document; 182 183 /** 184 * Tracks parsing progress through document 185 * 186 * @since 5.0.0 187 * @var int 188 */ 189 public $offset; 190 191 /** 192 * List of parsed blocks 193 * 194 * @since 5.0.0 195 * @var WP_Block_Parser_Block[] 196 */ 197 public $output; 198 199 /** 200 * Stack of partially-parsed structures in memory during parse 201 * 202 * @since 5.0.0 203 * @var WP_Block_Parser_Frame[] 204 */ 205 public $stack; 206 207 /** 208 * Empty associative array, here due to PHP quirks 209 * 210 * @since 4.4.0 211 * @var array empty associative array 212 */ 213 public $empty_attrs; 214 215 /** 216 * Parses a document and returns a list of block structures 217 * 218 * When encountering an invalid parse will return a best-effort 219 * parse. In contrast to the specification parser this does not 220 * return an error on invalid inputs. 221 * 222 * @since 5.0.0 223 * 224 * @param string $document Input document being parsed. 225 * @return WP_Block_Parser_Block[] 226 */ 227 function parse( $document ) { 228 $this->document = $document; 229 $this->offset = 0; 230 $this->output = array(); 231 $this->stack = array(); 232 $this->empty_attrs = json_decode( '{}', true ); 233 234 do { 235 // twiddle our thumbs. 236 } while ( $this->proceed() ); 237 238 return $this->output; 239 } 240 241 /** 242 * Processes the next token from the input document 243 * and returns whether to proceed eating more tokens 244 * 245 * This is the "next step" function that essentially 246 * takes a token as its input and decides what to do 247 * with that token before descending deeper into a 248 * nested block tree or continuing along the document 249 * or breaking out of a level of nesting. 250 * 251 * @internal 252 * @since 5.0.0 253 * @return bool 254 */ 255 function proceed() { 256 $next_token = $this->next_token(); 257 list( $token_type, $block_name, $attrs, $start_offset, $token_length ) = $next_token; 258 $stack_depth = count( $this->stack ); 259 260 // we may have some HTML soup before the next block. 261 $leading_html_start = $start_offset > $this->offset ? $this->offset : null; 262 263 switch ( $token_type ) { 264 case 'no-more-tokens': 265 // if not in a block then flush output. 266 if ( 0 === $stack_depth ) { 267 $this->add_freeform(); 268 return false; 269 } 270 271 /* 272 * Otherwise we have a problem 273 * This is an error 274 * 275 * we have options 276 * - treat it all as freeform text 277 * - assume an implicit closer (easiest when not nesting) 278 */ 279 280 // for the easy case we'll assume an implicit closer. 281 if ( 1 === $stack_depth ) { 282 $this->add_block_from_stack(); 283 return false; 284 } 285 286 /* 287 * for the nested case where it's more difficult we'll 288 * have to assume that multiple closers are missing 289 * and so we'll collapse the whole stack piecewise 290 */ 291 while ( 0 < count( $this->stack ) ) { 292 $this->add_block_from_stack(); 293 } 294 return false; 295 296 case 'void-block': 297 /* 298 * easy case is if we stumbled upon a void block 299 * in the top-level of the document 300 */ 301 if ( 0 === $stack_depth ) { 302 if ( isset( $leading_html_start ) ) { 303 $this->output[] = (array) $this->freeform( 304 substr( 305 $this->document, 306 $leading_html_start, 307 $start_offset - $leading_html_start 308 ) 309 ); 310 } 311 312 $this->output[] = (array) new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ); 313 $this->offset = $start_offset + $token_length; 314 return true; 315 } 316 317 // otherwise we found an inner block. 318 $this->add_inner_block( 319 new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ), 320 $start_offset, 321 $token_length 322 ); 323 $this->offset = $start_offset + $token_length; 324 return true; 325 326 case 'block-opener': 327 // track all newly-opened blocks on the stack. 328 array_push( 329 $this->stack, 330 new WP_Block_Parser_Frame( 331 new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ), 332 $start_offset, 333 $token_length, 334 $start_offset + $token_length, 335 $leading_html_start 336 ) 337 ); 338 $this->offset = $start_offset + $token_length; 339 return true; 340 341 case 'block-closer': 342 /* 343 * if we're missing an opener we're in trouble 344 * This is an error 345 */ 346 if ( 0 === $stack_depth ) { 347 /* 348 * we have options 349 * - assume an implicit opener 350 * - assume _this_ is the opener 351 * - give up and close out the document 352 */ 353 $this->add_freeform(); 354 return false; 355 } 356 357 // if we're not nesting then this is easy - close the block. 358 if ( 1 === $stack_depth ) { 359 $this->add_block_from_stack( $start_offset ); 360 $this->offset = $start_offset + $token_length; 361 return true; 362 } 363 364 /* 365 * otherwise we're nested and we have to close out the current 366 * block and add it as a new innerBlock to the parent 367 */ 368 $stack_top = array_pop( $this->stack ); 369 $html = substr( $this->document, $stack_top->prev_offset, $start_offset - $stack_top->prev_offset ); 370 $stack_top->block->innerHTML .= $html; 371 $stack_top->block->innerContent[] = $html; 372 $stack_top->prev_offset = $start_offset + $token_length; 373 374 $this->add_inner_block( 375 $stack_top->block, 376 $stack_top->token_start, 377 $stack_top->token_length, 378 $start_offset + $token_length 379 ); 380 $this->offset = $start_offset + $token_length; 381 return true; 382 383 default: 384 // This is an error. 385 $this->add_freeform(); 386 return false; 387 } 388 } 389 390 /** 391 * Scans the document from where we last left off 392 * and finds the next valid token to parse if it exists 393 * 394 * Returns the type of the find: kind of find, block information, attributes 395 * 396 * @internal 397 * @since 5.0.0 398 * @since 4.6.1 fixed a bug in attribute parsing which caused catastrophic backtracking on invalid block comments 399 * @return array 400 */ 401 function next_token() { 402 $matches = null; 403 404 /* 405 * aye the magic 406 * we're using a single RegExp to tokenize the block comment delimiters 407 * we're also using a trick here because the only difference between a 408 * block opener and a block closer is the leading `/` before `wp:` (and 409 * a closer has no attributes). we can trap them both and process the 410 * match back in PHP to see which one it was. 411 */ 412 $has_match = preg_match( 413 '/<!--\s+(?P<closer>\/)?wp:(?P<namespace>[a-z][a-z0-9_-]*\/)?(?P<name>[a-z][a-z0-9_-]*)\s+(?P<attrs>{(?:(?:[^}]+|}+(?=})|(?!}\s+\/?-->).)*+)?}\s+)?(?P<void>\/)?-->/s', 414 $this->document, 415 $matches, 416 PREG_OFFSET_CAPTURE, 417 $this->offset 418 ); 419 420 // if we get here we probably have catastrophic backtracking or out-of-memory in the PCRE. 421 if ( false === $has_match ) { 422 return array( 'no-more-tokens', null, null, null, null ); 423 } 424 425 // we have no more tokens. 426 if ( 0 === $has_match ) { 427 return array( 'no-more-tokens', null, null, null, null ); 428 } 429 430 list( $match, $started_at ) = $matches[0]; 431 432 $length = strlen( $match ); 433 $is_closer = isset( $matches['closer'] ) && -1 !== $matches['closer'][1]; 434 $is_void = isset( $matches['void'] ) && -1 !== $matches['void'][1]; 435 $namespace = $matches['namespace']; 436 $namespace = ( isset( $namespace ) && -1 !== $namespace[1] ) ? $namespace[0] : 'core/'; 437 $name = $namespace . $matches['name'][0]; 438 $has_attrs = isset( $matches['attrs'] ) && -1 !== $matches['attrs'][1]; 439 440 /* 441 * Fun fact! It's not trivial in PHP to create "an empty associative array" since all arrays 442 * are associative arrays. If we use `array()` we get a JSON `[]` 443 */ 444 $attrs = $has_attrs 445 ? json_decode( $matches['attrs'][0], /* as-associative */ true ) 446 : $this->empty_attrs; 447 448 /* 449 * This state isn't allowed 450 * This is an error 451 */ 452 if ( $is_closer && ( $is_void || $has_attrs ) ) { 453 // we can ignore them since they don't hurt anything. 454 } 455 456 if ( $is_void ) { 457 return array( 'void-block', $name, $attrs, $started_at, $length ); 458 } 459 460 if ( $is_closer ) { 461 return array( 'block-closer', $name, null, $started_at, $length ); 462 } 463 464 return array( 'block-opener', $name, $attrs, $started_at, $length ); 465 } 466 467 /** 468 * Returns a new block object for freeform HTML 469 * 470 * @internal 471 * @since 3.9.0 472 * 473 * @param string $innerHTML HTML content of block. 474 * @return WP_Block_Parser_Block freeform block object. 475 */ 476 function freeform( $innerHTML ) { 477 return new WP_Block_Parser_Block( null, $this->empty_attrs, array(), $innerHTML, array( $innerHTML ) ); 478 } 479 480 /** 481 * Pushes a length of text from the input document 482 * to the output list as a freeform block. 483 * 484 * @internal 485 * @since 5.0.0 486 * @param null $length how many bytes of document text to output. 487 */ 488 function add_freeform( $length = null ) { 489 $length = $length ? $length : strlen( $this->document ) - $this->offset; 490 491 if ( 0 === $length ) { 492 return; 493 } 494 495 $this->output[] = (array) $this->freeform( substr( $this->document, $this->offset, $length ) ); 496 } 497 498 /** 499 * Given a block structure from memory pushes 500 * a new block to the output list. 501 * 502 * @internal 503 * @since 5.0.0 504 * @param WP_Block_Parser_Block $block The block to add to the output. 505 * @param int $token_start Byte offset into the document where the first token for the block starts. 506 * @param int $token_length Byte length of entire block from start of opening token to end of closing token. 507 * @param int|null $last_offset Last byte offset into document if continuing form earlier output. 508 */ 509 function add_inner_block( WP_Block_Parser_Block $block, $token_start, $token_length, $last_offset = null ) { 510 $parent = $this->stack[ count( $this->stack ) - 1 ]; 511 $parent->block->innerBlocks[] = (array) $block; 512 $html = substr( $this->document, $parent->prev_offset, $token_start - $parent->prev_offset ); 513 514 if ( ! empty( $html ) ) { 515 $parent->block->innerHTML .= $html; 516 $parent->block->innerContent[] = $html; 517 } 518 519 $parent->block->innerContent[] = null; 520 $parent->prev_offset = $last_offset ? $last_offset : $token_start + $token_length; 521 } 522 523 /** 524 * Pushes the top block from the parsing stack to the output list. 525 * 526 * @internal 527 * @since 5.0.0 528 * @param int|null $end_offset byte offset into document for where we should stop sending text output as HTML. 529 */ 530 function add_block_from_stack( $end_offset = null ) { 531 $stack_top = array_pop( $this->stack ); 532 $prev_offset = $stack_top->prev_offset; 533 534 $html = isset( $end_offset ) 535 ? substr( $this->document, $prev_offset, $end_offset - $prev_offset ) 536 : substr( $this->document, $prev_offset ); 537 538 if ( ! empty( $html ) ) { 539 $stack_top->block->innerHTML .= $html; 540 $stack_top->block->innerContent[] = $html; 541 } 542 543 if ( isset( $stack_top->leading_html_start ) ) { 544 $this->output[] = (array) $this->freeform( 545 substr( 546 $this->document, 547 $stack_top->leading_html_start, 548 $stack_top->token_start - $stack_top->leading_html_start 549 ) 550 ); 551 } 552 553 $this->output[] = (array) $stack_top->block; 554 } 555 }
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Sun Dec 22 01:00:02 2024 | Cross-referenced by PHPXref 0.7.1 |