[ Index ]

PHP Cross Reference of WordPress

title

Body

[close]

/wp-includes/SimplePie/ -> Sanitize.php (source)

   1  <?php
   2  /**
   3   * SimplePie
   4   *
   5   * A PHP-Based RSS and Atom Feed Framework.
   6   * Takes the hard work out of managing a complete RSS/Atom solution.
   7   *
   8   * Copyright (c) 2004-2016, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors
   9   * All rights reserved.
  10   *
  11   * Redistribution and use in source and binary forms, with or without modification, are
  12   * permitted provided that the following conditions are met:
  13   *
  14   *     * Redistributions of source code must retain the above copyright notice, this list of
  15   *       conditions and the following disclaimer.
  16   *
  17   *     * Redistributions in binary form must reproduce the above copyright notice, this list
  18   *       of conditions and the following disclaimer in the documentation and/or other materials
  19   *       provided with the distribution.
  20   *
  21   *     * Neither the name of the SimplePie Team nor the names of its contributors may be used
  22   *       to endorse or promote products derived from this software without specific prior
  23   *       written permission.
  24   *
  25   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
  26   * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
  27   * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
  28   * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  30   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  31   * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  32   * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33   * POSSIBILITY OF SUCH DAMAGE.
  34   *
  35   * @package SimplePie
  36   * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue
  37   * @author Ryan Parman
  38   * @author Sam Sneddon
  39   * @author Ryan McCue
  40   * @link http://simplepie.org/ SimplePie
  41   * @license http://www.opensource.org/licenses/bsd-license.php BSD License
  42   */
  43  
  44  /**
  45   * Used for data cleanup and post-processing
  46   *
  47   *
  48   * This class can be overloaded with {@see SimplePie::set_sanitize_class()}
  49   *
  50   * @package SimplePie
  51   * @todo Move to using an actual HTML parser (this will allow tags to be properly stripped, and to switch between HTML and XHTML), this will also make it easier to shorten a string while preserving HTML tags
  52   */
  53  class SimplePie_Sanitize
  54  {
  55      // Private vars
  56      var $base;
  57  
  58      // Options
  59      var $remove_div = true;
  60      var $image_handler = '';
  61      var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style');
  62      var $encode_instead_of_strip = false;
  63      var $strip_attributes = array('bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc');
  64      var $add_attributes = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none'));
  65      var $strip_comments = false;
  66      var $output_encoding = 'UTF-8';
  67      var $enable_cache = true;
  68      var $cache_location = './cache';
  69      var $cache_name_function = 'md5';
  70      var $timeout = 10;
  71      var $useragent = '';
  72      var $force_fsockopen = false;
  73      var $replace_url_attributes = null;
  74  
  75  	public function __construct()
  76      {
  77          // Set defaults
  78          $this->set_url_replacements(null);
  79      }
  80  
  81  	public function remove_div($enable = true)
  82      {
  83          $this->remove_div = (bool) $enable;
  84      }
  85  
  86  	public function set_image_handler($page = false)
  87      {
  88          if ($page)
  89          {
  90              $this->image_handler = (string) $page;
  91          }
  92          else
  93          {
  94              $this->image_handler = false;
  95          }
  96      }
  97  
  98  	public function set_registry(SimplePie_Registry $registry)
  99      {
 100          $this->registry = $registry;
 101      }
 102  
 103  	public function pass_cache_data($enable_cache = true, $cache_location = './cache', $cache_name_function = 'md5', $cache_class = 'SimplePie_Cache')
 104      {
 105          if (isset($enable_cache))
 106          {
 107              $this->enable_cache = (bool) $enable_cache;
 108          }
 109  
 110          if ($cache_location)
 111          {
 112              $this->cache_location = (string) $cache_location;
 113          }
 114  
 115          if ($cache_name_function)
 116          {
 117              $this->cache_name_function = (string) $cache_name_function;
 118          }
 119      }
 120  
 121  	public function pass_file_data($file_class = 'SimplePie_File', $timeout = 10, $useragent = '', $force_fsockopen = false)
 122      {
 123          if ($timeout)
 124          {
 125              $this->timeout = (string) $timeout;
 126          }
 127  
 128          if ($useragent)
 129          {
 130              $this->useragent = (string) $useragent;
 131          }
 132  
 133          if ($force_fsockopen)
 134          {
 135              $this->force_fsockopen = (string) $force_fsockopen;
 136          }
 137      }
 138  
 139  	public function strip_htmltags($tags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'))
 140      {
 141          if ($tags)
 142          {
 143              if (is_array($tags))
 144              {
 145                  $this->strip_htmltags = $tags;
 146              }
 147              else
 148              {
 149                  $this->strip_htmltags = explode(',', $tags);
 150              }
 151          }
 152          else
 153          {
 154              $this->strip_htmltags = false;
 155          }
 156      }
 157  
 158  	public function encode_instead_of_strip($encode = false)
 159      {
 160          $this->encode_instead_of_strip = (bool) $encode;
 161      }
 162  
 163  	public function strip_attributes($attribs = array('bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'))
 164      {
 165          if ($attribs)
 166          {
 167              if (is_array($attribs))
 168              {
 169                  $this->strip_attributes = $attribs;
 170              }
 171              else
 172              {
 173                  $this->strip_attributes = explode(',', $attribs);
 174              }
 175          }
 176          else
 177          {
 178              $this->strip_attributes = false;
 179          }
 180      }
 181  
 182  	public function add_attributes($attribs = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none')))
 183      {
 184          if ($attribs)
 185          {
 186              if (is_array($attribs))
 187              {
 188                  $this->add_attributes = $attribs;
 189              }
 190              else
 191              {
 192                  $this->add_attributes = explode(',', $attribs);
 193              }
 194          }
 195          else
 196          {
 197              $this->add_attributes = false;
 198          }
 199      }
 200  
 201  	public function strip_comments($strip = false)
 202      {
 203          $this->strip_comments = (bool) $strip;
 204      }
 205  
 206  	public function set_output_encoding($encoding = 'UTF-8')
 207      {
 208          $this->output_encoding = (string) $encoding;
 209      }
 210  
 211      /**
 212       * Set element/attribute key/value pairs of HTML attributes
 213       * containing URLs that need to be resolved relative to the feed
 214       *
 215       * Defaults to |a|@href, |area|@href, |blockquote|@cite, |del|@cite,
 216       * |form|@action, |img|@longdesc, |img|@src, |input|@src, |ins|@cite,
 217       * |q|@cite
 218       *
 219       * @since 1.0
 220       * @param array|null $element_attribute Element/attribute key/value pairs, null for default
 221       */
 222  	public function set_url_replacements($element_attribute = null)
 223      {
 224          if ($element_attribute === null)
 225          {
 226              $element_attribute = array(
 227                  'a' => 'href',
 228                  'area' => 'href',
 229                  'blockquote' => 'cite',
 230                  'del' => 'cite',
 231                  'form' => 'action',
 232                  'img' => array(
 233                      'longdesc',
 234                      'src'
 235                  ),
 236                  'input' => 'src',
 237                  'ins' => 'cite',
 238                  'q' => 'cite'
 239              );
 240          }
 241          $this->replace_url_attributes = (array) $element_attribute;
 242      }
 243  
 244  	public function sanitize($data, $type, $base = '')
 245      {
 246          $data = trim($data);
 247          if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI)
 248          {
 249              if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML)
 250              {
 251                  if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data))
 252                  {
 253                      $type |= SIMPLEPIE_CONSTRUCT_HTML;
 254                  }
 255                  else
 256                  {
 257                      $type |= SIMPLEPIE_CONSTRUCT_TEXT;
 258                  }
 259              }
 260  
 261              if ($type & SIMPLEPIE_CONSTRUCT_BASE64)
 262              {
 263                  $data = base64_decode($data);
 264              }
 265  
 266              if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML))
 267              {
 268  
 269                  if (!class_exists('DOMDocument'))
 270                  {
 271                      throw new SimplePie_Exception('DOMDocument not found, unable to use sanitizer');
 272                  }
 273                  $document = new DOMDocument();
 274                  $document->encoding = 'UTF-8';
 275  
 276                  $data = $this->preprocess($data, $type);
 277  
 278                  set_error_handler(array('SimplePie_Misc', 'silence_errors'));
 279                  $document->loadHTML($data);
 280                  restore_error_handler();
 281  
 282                  $xpath = new DOMXPath($document);
 283  
 284                  // Strip comments
 285                  if ($this->strip_comments)
 286                  {
 287                      $comments = $xpath->query('//comment()');
 288  
 289                      foreach ($comments as $comment)
 290                      {
 291                          $comment->parentNode->removeChild($comment);
 292                      }
 293                  }
 294  
 295                  // Strip out HTML tags and attributes that might cause various security problems.
 296                  // Based on recommendations by Mark Pilgrim at:
 297                  // http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely
 298                  if ($this->strip_htmltags)
 299                  {
 300                      foreach ($this->strip_htmltags as $tag)
 301                      {
 302                          $this->strip_tag($tag, $document, $xpath, $type);
 303                      }
 304                  }
 305  
 306                  if ($this->strip_attributes)
 307                  {
 308                      foreach ($this->strip_attributes as $attrib)
 309                      {
 310                          $this->strip_attr($attrib, $xpath);
 311                      }
 312                  }
 313  
 314                  if ($this->add_attributes)
 315                  {
 316                      foreach ($this->add_attributes as $tag => $valuePairs)
 317                      {
 318                          $this->add_attr($tag, $valuePairs, $document);
 319                      }
 320                  }
 321  
 322                  // Replace relative URLs
 323                  $this->base = $base;
 324                  foreach ($this->replace_url_attributes as $element => $attributes)
 325                  {
 326                      $this->replace_urls($document, $element, $attributes);
 327                  }
 328  
 329                  // If image handling (caching, etc.) is enabled, cache and rewrite all the image tags.
 330                  if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache)
 331                  {
 332                      $images = $document->getElementsByTagName('img');
 333                      foreach ($images as $img)
 334                      {
 335                          if ($img->hasAttribute('src'))
 336                          {
 337                              $image_url = call_user_func($this->cache_name_function, $img->getAttribute('src'));
 338                              $cache = $this->registry->call('Cache', 'get_handler', array($this->cache_location, $image_url, 'spi'));
 339  
 340                              if ($cache->load())
 341                              {
 342                                  $img->setAttribute('src', $this->image_handler . $image_url);
 343                              }
 344                              else
 345                              {
 346                                  $file = $this->registry->create('File', array($img->getAttribute('src'), $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen));
 347                                  $headers = $file->headers;
 348  
 349                                  if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300)))
 350                                  {
 351                                      if ($cache->save(array('headers' => $file->headers, 'body' => $file->body)))
 352                                      {
 353                                          $img->setAttribute('src', $this->image_handler . $image_url);
 354                                      }
 355                                      else
 356                                      {
 357                                          trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
 358                                      }
 359                                  }
 360                              }
 361                          }
 362                      }
 363                  }
 364  
 365                  // Get content node
 366                  $div = $document->getElementsByTagName('body')->item(0)->firstChild;
 367                  // Finally, convert to a HTML string
 368                  $data = trim($document->saveHTML($div));
 369  
 370                  if ($this->remove_div)
 371                  {
 372                      $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data);
 373                      $data = preg_replace('/<\/div>$/', '', $data);
 374                  }
 375                  else
 376                  {
 377                      $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data);
 378                  }
 379              }
 380  
 381              if ($type & SIMPLEPIE_CONSTRUCT_IRI)
 382              {
 383                  $absolute = $this->registry->call('Misc', 'absolutize_url', array($data, $base));
 384                  if ($absolute !== false)
 385                  {
 386                      $data = $absolute;
 387                  }
 388              }
 389  
 390              if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI))
 391              {
 392                  $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8');
 393              }
 394  
 395              if ($this->output_encoding !== 'UTF-8')
 396              {
 397                  $data = $this->registry->call('Misc', 'change_encoding', array($data, 'UTF-8', $this->output_encoding));
 398              }
 399          }
 400          return $data;
 401      }
 402  
 403  	protected function preprocess($html, $type)
 404      {
 405          $ret = '';
 406          $html = preg_replace('%</?(?:html|body)[^>]*?'.'>%is', '', $html);
 407          if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML)
 408          {
 409              // Atom XHTML constructs are wrapped with a div by default
 410              // Note: No protection if $html contains a stray </div>!
 411              $html = '<div>' . $html . '</div>';
 412              $ret .= '<!DOCTYPE html>';
 413              $content_type = 'text/html';
 414          }
 415          else
 416          {
 417              $ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">';
 418              $content_type = 'application/xhtml+xml';
 419          }
 420  
 421          $ret .= '<html><head>';
 422          $ret .= '<meta http-equiv="Content-Type" content="' . $content_type . '; charset=utf-8" />';
 423          $ret .= '</head><body>' . $html . '</body></html>';
 424          return $ret;
 425      }
 426  
 427  	public function replace_urls($document, $tag, $attributes)
 428      {
 429          if (!is_array($attributes))
 430          {
 431              $attributes = array($attributes);
 432          }
 433  
 434          if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags))
 435          {
 436              $elements = $document->getElementsByTagName($tag);
 437              foreach ($elements as $element)
 438              {
 439                  foreach ($attributes as $attribute)
 440                  {
 441                      if ($element->hasAttribute($attribute))
 442                      {
 443                          $value = $this->registry->call('Misc', 'absolutize_url', array($element->getAttribute($attribute), $this->base));
 444                          if ($value !== false)
 445                          {
 446                              $element->setAttribute($attribute, $value);
 447                          }
 448                      }
 449                  }
 450              }
 451          }
 452      }
 453  
 454  	public function do_strip_htmltags($match)
 455      {
 456          if ($this->encode_instead_of_strip)
 457          {
 458              if (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style')))
 459              {
 460                  $match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8');
 461                  $match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8');
 462                  return "&lt;$match[1]$match[2]&gt;$match[3]&lt;/$match[1]&gt;";
 463              }
 464              else
 465              {
 466                  return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8');
 467              }
 468          }
 469          elseif (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style')))
 470          {
 471              return $match[4];
 472          }
 473          else
 474          {
 475              return '';
 476          }
 477      }
 478  
 479  	protected function strip_tag($tag, $document, $xpath, $type)
 480      {
 481          $elements = $xpath->query('body//' . $tag);
 482          if ($this->encode_instead_of_strip)
 483          {
 484              foreach ($elements as $element)
 485              {
 486                  $fragment = $document->createDocumentFragment();
 487  
 488                  // For elements which aren't script or style, include the tag itself
 489                  if (!in_array($tag, array('script', 'style')))
 490                  {
 491                      $text = '<' . $tag;
 492                      if ($element->hasAttributes())
 493                      {
 494                          $attrs = array();
 495                          foreach ($element->attributes as $name => $attr)
 496                          {
 497                              $value = $attr->value;
 498  
 499                              // In XHTML, empty values should never exist, so we repeat the value
 500                              if (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_XHTML))
 501                              {
 502                                  $value = $name;
 503                              }
 504                              // For HTML, empty is fine
 505                              elseif (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_HTML))
 506                              {
 507                                  $attrs[] = $name;
 508                                  continue;
 509                              }
 510  
 511                              // Standard attribute text
 512                              $attrs[] = $name . '="' . $attr->value . '"';
 513                          }
 514                          $text .= ' ' . implode(' ', $attrs);
 515                      }
 516                      $text .= '>';
 517                      $fragment->appendChild(new DOMText($text));
 518                  }
 519  
 520                  $number = $element->childNodes->length;
 521                  for ($i = $number; $i > 0; $i--)
 522                  {
 523                      $child = $element->childNodes->item(0);
 524                      $fragment->appendChild($child);
 525                  }
 526  
 527                  if (!in_array($tag, array('script', 'style')))
 528                  {
 529                      $fragment->appendChild(new DOMText('</' . $tag . '>'));
 530                  }
 531  
 532                  $element->parentNode->replaceChild($fragment, $element);
 533              }
 534  
 535              return;
 536          }
 537          elseif (in_array($tag, array('script', 'style')))
 538          {
 539              foreach ($elements as $element)
 540              {
 541                  $element->parentNode->removeChild($element);
 542              }
 543  
 544              return;
 545          }
 546          else
 547          {
 548              foreach ($elements as $element)
 549              {
 550                  $fragment = $document->createDocumentFragment();
 551                  $number = $element->childNodes->length;
 552                  for ($i = $number; $i > 0; $i--)
 553                  {
 554                      $child = $element->childNodes->item(0);
 555                      $fragment->appendChild($child);
 556                  }
 557  
 558                  $element->parentNode->replaceChild($fragment, $element);
 559              }
 560          }
 561      }
 562  
 563  	protected function strip_attr($attrib, $xpath)
 564      {
 565          $elements = $xpath->query('//*[@' . $attrib . ']');
 566  
 567          foreach ($elements as $element)
 568          {
 569              $element->removeAttribute($attrib);
 570          }
 571      }
 572  
 573      protected function add_attr($tag, $valuePairs, $document)
 574      {
 575          $elements = $document->getElementsByTagName($tag);
 576          foreach ($elements as $element)
 577          {
 578              foreach ($valuePairs as $attrib => $value)
 579              {
 580                  $element->setAttribute($attrib, $value);
 581              }
 582          }
 583      }
 584  }


Generated: Wed Aug 12 01:00:03 2020 Cross-referenced by PHPXref 0.7.1