[ Index ]

PHP Cross Reference of WordPress

title

Body

[close]

/wp-includes/SimplePie/ -> Sanitize.php (source)

   1  <?php
   2  /**
   3   * SimplePie
   4   *
   5   * A PHP-Based RSS and Atom Feed Framework.
   6   * Takes the hard work out of managing a complete RSS/Atom solution.
   7   *
   8   * Copyright (c) 2004-2016, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors
   9   * All rights reserved.
  10   *
  11   * Redistribution and use in source and binary forms, with or without modification, are
  12   * permitted provided that the following conditions are met:
  13   *
  14   *     * Redistributions of source code must retain the above copyright notice, this list of
  15   *       conditions and the following disclaimer.
  16   *
  17   *     * Redistributions in binary form must reproduce the above copyright notice, this list
  18   *       of conditions and the following disclaimer in the documentation and/or other materials
  19   *       provided with the distribution.
  20   *
  21   *     * Neither the name of the SimplePie Team nor the names of its contributors may be used
  22   *       to endorse or promote products derived from this software without specific prior
  23   *       written permission.
  24   *
  25   * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
  26   * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
  27   * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
  28   * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29   * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  30   * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  31   * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  32   * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33   * POSSIBILITY OF SUCH DAMAGE.
  34   *
  35   * @package SimplePie
  36   * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue
  37   * @author Ryan Parman
  38   * @author Sam Sneddon
  39   * @author Ryan McCue
  40   * @link http://simplepie.org/ SimplePie
  41   * @license http://www.opensource.org/licenses/bsd-license.php BSD License
  42   */
  43  
  44  /**
  45   * Used for data cleanup and post-processing
  46   *
  47   *
  48   * This class can be overloaded with {@see SimplePie::set_sanitize_class()}
  49   *
  50   * @package SimplePie
  51   * @todo Move to using an actual HTML parser (this will allow tags to be properly stripped, and to switch between HTML and XHTML), this will also make it easier to shorten a string while preserving HTML tags
  52   */
  53  class SimplePie_Sanitize
  54  {
  55      // Private vars
  56      var $base;
  57  
  58      // Options
  59      var $remove_div = true;
  60      var $image_handler = '';
  61      var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style');
  62      var $encode_instead_of_strip = false;
  63      var $strip_attributes = array('bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc');
  64      var $add_attributes = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none'));
  65      var $strip_comments = false;
  66      var $output_encoding = 'UTF-8';
  67      var $enable_cache = true;
  68      var $cache_location = './cache';
  69      var $cache_name_function = 'md5';
  70      var $timeout = 10;
  71      var $useragent = '';
  72      var $force_fsockopen = false;
  73      var $replace_url_attributes = null;
  74      var $registry;
  75  
  76      /**
  77       * List of domains for which to force HTTPS.
  78       * @see SimplePie_Sanitize::set_https_domains()
  79       * Array is a tree split at DNS levels. Example:
  80       * array('biz' => true, 'com' => array('example' => true), 'net' => array('example' => array('www' => true)))
  81       */
  82      var $https_domains = array();
  83  
  84  	public function __construct()
  85      {
  86          // Set defaults
  87          $this->set_url_replacements(null);
  88      }
  89  
  90  	public function remove_div($enable = true)
  91      {
  92          $this->remove_div = (bool) $enable;
  93      }
  94  
  95  	public function set_image_handler($page = false)
  96      {
  97          if ($page)
  98          {
  99              $this->image_handler = (string) $page;
 100          }
 101          else
 102          {
 103              $this->image_handler = false;
 104          }
 105      }
 106  
 107  	public function set_registry(SimplePie_Registry $registry)
 108      {
 109          $this->registry = $registry;
 110      }
 111  
 112  	public function pass_cache_data($enable_cache = true, $cache_location = './cache', $cache_name_function = 'md5', $cache_class = 'SimplePie_Cache')
 113      {
 114          if (isset($enable_cache))
 115          {
 116              $this->enable_cache = (bool) $enable_cache;
 117          }
 118  
 119          if ($cache_location)
 120          {
 121              $this->cache_location = (string) $cache_location;
 122          }
 123  
 124          if ($cache_name_function)
 125          {
 126              $this->cache_name_function = (string) $cache_name_function;
 127          }
 128      }
 129  
 130  	public function pass_file_data($file_class = 'SimplePie_File', $timeout = 10, $useragent = '', $force_fsockopen = false)
 131      {
 132          if ($timeout)
 133          {
 134              $this->timeout = (string) $timeout;
 135          }
 136  
 137          if ($useragent)
 138          {
 139              $this->useragent = (string) $useragent;
 140          }
 141  
 142          if ($force_fsockopen)
 143          {
 144              $this->force_fsockopen = (string) $force_fsockopen;
 145          }
 146      }
 147  
 148  	public function strip_htmltags($tags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'))
 149      {
 150          if ($tags)
 151          {
 152              if (is_array($tags))
 153              {
 154                  $this->strip_htmltags = $tags;
 155              }
 156              else
 157              {
 158                  $this->strip_htmltags = explode(',', $tags);
 159              }
 160          }
 161          else
 162          {
 163              $this->strip_htmltags = false;
 164          }
 165      }
 166  
 167  	public function encode_instead_of_strip($encode = false)
 168      {
 169          $this->encode_instead_of_strip = (bool) $encode;
 170      }
 171  
 172  	public function strip_attributes($attribs = array('bgsound', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'))
 173      {
 174          if ($attribs)
 175          {
 176              if (is_array($attribs))
 177              {
 178                  $this->strip_attributes = $attribs;
 179              }
 180              else
 181              {
 182                  $this->strip_attributes = explode(',', $attribs);
 183              }
 184          }
 185          else
 186          {
 187              $this->strip_attributes = false;
 188          }
 189      }
 190  
 191  	public function add_attributes($attribs = array('audio' => array('preload' => 'none'), 'iframe' => array('sandbox' => 'allow-scripts allow-same-origin'), 'video' => array('preload' => 'none')))
 192      {
 193          if ($attribs)
 194          {
 195              if (is_array($attribs))
 196              {
 197                  $this->add_attributes = $attribs;
 198              }
 199              else
 200              {
 201                  $this->add_attributes = explode(',', $attribs);
 202              }
 203          }
 204          else
 205          {
 206              $this->add_attributes = false;
 207          }
 208      }
 209  
 210  	public function strip_comments($strip = false)
 211      {
 212          $this->strip_comments = (bool) $strip;
 213      }
 214  
 215  	public function set_output_encoding($encoding = 'UTF-8')
 216      {
 217          $this->output_encoding = (string) $encoding;
 218      }
 219  
 220      /**
 221       * Set element/attribute key/value pairs of HTML attributes
 222       * containing URLs that need to be resolved relative to the feed
 223       *
 224       * Defaults to |a|@href, |area|@href, |blockquote|@cite, |del|@cite,
 225       * |form|@action, |img|@longdesc, |img|@src, |input|@src, |ins|@cite,
 226       * |q|@cite
 227       *
 228       * @since 1.0
 229       * @param array|null $element_attribute Element/attribute key/value pairs, null for default
 230       */
 231  	public function set_url_replacements($element_attribute = null)
 232      {
 233          if ($element_attribute === null)
 234          {
 235              $element_attribute = array(
 236                  'a' => 'href',
 237                  'area' => 'href',
 238                  'blockquote' => 'cite',
 239                  'del' => 'cite',
 240                  'form' => 'action',
 241                  'img' => array(
 242                      'longdesc',
 243                      'src'
 244                  ),
 245                  'input' => 'src',
 246                  'ins' => 'cite',
 247                  'q' => 'cite'
 248              );
 249          }
 250          $this->replace_url_attributes = (array) $element_attribute;
 251      }
 252  
 253      /**
 254       * Set the list of domains for which to force HTTPS.
 255       * @see SimplePie_Misc::https_url()
 256       * Example array('biz', 'example.com', 'example.org', 'www.example.net');
 257       */
 258  	public function set_https_domains($domains)
 259      {
 260          $this->https_domains = array();
 261          foreach ($domains as $domain)
 262          {
 263              $domain = trim($domain, ". \t\n\r\0\x0B");
 264              $segments = array_reverse(explode('.', $domain));
 265              $node =& $this->https_domains;
 266              foreach ($segments as $segment)
 267              {//Build a tree
 268                  if ($node === true)
 269                  {
 270                      break;
 271                  }
 272                  if (!isset($node[$segment]))
 273                  {
 274                      $node[$segment] = array();
 275                  }
 276                  $node =& $node[$segment];
 277              }
 278              $node = true;
 279          }
 280      }
 281  
 282      /**
 283       * Check if the domain is in the list of forced HTTPS.
 284       */
 285  	protected function is_https_domain($domain)
 286      {
 287          $domain = trim($domain, '. ');
 288          $segments = array_reverse(explode('.', $domain));
 289          $node =& $this->https_domains;
 290          foreach ($segments as $segment)
 291          {//Explore the tree
 292              if (isset($node[$segment]))
 293              {
 294                  $node =& $node[$segment];
 295              }
 296              else
 297              {
 298                  break;
 299              }
 300          }
 301          return $node === true;
 302      }
 303  
 304      /**
 305       * Force HTTPS for selected Web sites.
 306       */
 307  	public function https_url($url)
 308      {
 309          return (strtolower(substr($url, 0, 7)) === 'http://') &&
 310              $this->is_https_domain(parse_url($url, PHP_URL_HOST)) ?
 311              substr_replace($url, 's', 4, 0) :    //Add the 's' to HTTPS
 312              $url;
 313      }
 314  
 315  	public function sanitize($data, $type, $base = '')
 316      {
 317          $data = trim($data);
 318          if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI)
 319          {
 320              if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML)
 321              {
 322                  if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data))
 323                  {
 324                      $type |= SIMPLEPIE_CONSTRUCT_HTML;
 325                  }
 326                  else
 327                  {
 328                      $type |= SIMPLEPIE_CONSTRUCT_TEXT;
 329                  }
 330              }
 331  
 332              if ($type & SIMPLEPIE_CONSTRUCT_BASE64)
 333              {
 334                  $data = base64_decode($data);
 335              }
 336  
 337              if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML))
 338              {
 339  
 340                  if (!class_exists('DOMDocument'))
 341                  {
 342                      throw new SimplePie_Exception('DOMDocument not found, unable to use sanitizer');
 343                  }
 344                  $document = new DOMDocument();
 345                  $document->encoding = 'UTF-8';
 346  
 347                  $data = $this->preprocess($data, $type);
 348  
 349                  set_error_handler(array('SimplePie_Misc', 'silence_errors'));
 350                  $document->loadHTML($data);
 351                  restore_error_handler();
 352  
 353                  $xpath = new DOMXPath($document);
 354  
 355                  // Strip comments
 356                  if ($this->strip_comments)
 357                  {
 358                      $comments = $xpath->query('//comment()');
 359  
 360                      foreach ($comments as $comment)
 361                      {
 362                          $comment->parentNode->removeChild($comment);
 363                      }
 364                  }
 365  
 366                  // Strip out HTML tags and attributes that might cause various security problems.
 367                  // Based on recommendations by Mark Pilgrim at:
 368                  // http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely
 369                  if ($this->strip_htmltags)
 370                  {
 371                      foreach ($this->strip_htmltags as $tag)
 372                      {
 373                          $this->strip_tag($tag, $document, $xpath, $type);
 374                      }
 375                  }
 376  
 377                  if ($this->strip_attributes)
 378                  {
 379                      foreach ($this->strip_attributes as $attrib)
 380                      {
 381                          $this->strip_attr($attrib, $xpath);
 382                      }
 383                  }
 384  
 385                  if ($this->add_attributes)
 386                  {
 387                      foreach ($this->add_attributes as $tag => $valuePairs)
 388                      {
 389                          $this->add_attr($tag, $valuePairs, $document);
 390                      }
 391                  }
 392  
 393                  // Replace relative URLs
 394                  $this->base = $base;
 395                  foreach ($this->replace_url_attributes as $element => $attributes)
 396                  {
 397                      $this->replace_urls($document, $element, $attributes);
 398                  }
 399  
 400                  // If image handling (caching, etc.) is enabled, cache and rewrite all the image tags.
 401                  if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache)
 402                  {
 403                      $images = $document->getElementsByTagName('img');
 404                      foreach ($images as $img)
 405                      {
 406                          if ($img->hasAttribute('src'))
 407                          {
 408                              $image_url = call_user_func($this->cache_name_function, $img->getAttribute('src'));
 409                              $cache = $this->registry->call('Cache', 'get_handler', array($this->cache_location, $image_url, 'spi'));
 410  
 411                              if ($cache->load())
 412                              {
 413                                  $img->setAttribute('src', $this->image_handler . $image_url);
 414                              }
 415                              else
 416                              {
 417                                  $file = $this->registry->create('File', array($img->getAttribute('src'), $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen));
 418                                  $headers = $file->headers;
 419  
 420                                  if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300)))
 421                                  {
 422                                      if ($cache->save(array('headers' => $file->headers, 'body' => $file->body)))
 423                                      {
 424                                          $img->setAttribute('src', $this->image_handler . $image_url);
 425                                      }
 426                                      else
 427                                      {
 428                                          trigger_error("$this->cache_location is not writable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
 429                                      }
 430                                  }
 431                              }
 432                          }
 433                      }
 434                  }
 435  
 436                  // Get content node
 437                  $div = $document->getElementsByTagName('body')->item(0)->firstChild;
 438                  // Finally, convert to a HTML string
 439                  $data = trim($document->saveHTML($div));
 440  
 441                  if ($this->remove_div)
 442                  {
 443                      $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data);
 444                      $data = preg_replace('/<\/div>$/', '', $data);
 445                  }
 446                  else
 447                  {
 448                      $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data);
 449                  }
 450              }
 451  
 452              if ($type & SIMPLEPIE_CONSTRUCT_IRI)
 453              {
 454                  $absolute = $this->registry->call('Misc', 'absolutize_url', array($data, $base));
 455                  if ($absolute !== false)
 456                  {
 457                      $data = $absolute;
 458                  }
 459              }
 460  
 461              if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI))
 462              {
 463                  $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8');
 464              }
 465  
 466              if ($this->output_encoding !== 'UTF-8')
 467              {
 468                  $data = $this->registry->call('Misc', 'change_encoding', array($data, 'UTF-8', $this->output_encoding));
 469              }
 470          }
 471          return $data;
 472      }
 473  
 474  	protected function preprocess($html, $type)
 475      {
 476          $ret = '';
 477          $html = preg_replace('%</?(?:html|body)[^>]*?'.'>%is', '', $html);
 478          if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML)
 479          {
 480              // Atom XHTML constructs are wrapped with a div by default
 481              // Note: No protection if $html contains a stray </div>!
 482              $html = '<div>' . $html . '</div>';
 483              $ret .= '<!DOCTYPE html>';
 484              $content_type = 'text/html';
 485          }
 486          else
 487          {
 488              $ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">';
 489              $content_type = 'application/xhtml+xml';
 490          }
 491  
 492          $ret .= '<html><head>';
 493          $ret .= '<meta http-equiv="Content-Type" content="' . $content_type . '; charset=utf-8" />';
 494          $ret .= '</head><body>' . $html . '</body></html>';
 495          return $ret;
 496      }
 497  
 498  	public function replace_urls($document, $tag, $attributes)
 499      {
 500          if (!is_array($attributes))
 501          {
 502              $attributes = array($attributes);
 503          }
 504  
 505          if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags))
 506          {
 507              $elements = $document->getElementsByTagName($tag);
 508              foreach ($elements as $element)
 509              {
 510                  foreach ($attributes as $attribute)
 511                  {
 512                      if ($element->hasAttribute($attribute))
 513                      {
 514                          $value = $this->registry->call('Misc', 'absolutize_url', array($element->getAttribute($attribute), $this->base));
 515                          if ($value !== false)
 516                          {
 517                              $value = $this->https_url($value);
 518                              $element->setAttribute($attribute, $value);
 519                          }
 520                      }
 521                  }
 522              }
 523          }
 524      }
 525  
 526  	public function do_strip_htmltags($match)
 527      {
 528          if ($this->encode_instead_of_strip)
 529          {
 530              if (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style')))
 531              {
 532                  $match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8');
 533                  $match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8');
 534                  return "&lt;$match[1]$match[2]&gt;$match[3]&lt;/$match[1]&gt;";
 535              }
 536              else
 537              {
 538                  return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8');
 539              }
 540          }
 541          elseif (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style')))
 542          {
 543              return $match[4];
 544          }
 545          else
 546          {
 547              return '';
 548          }
 549      }
 550  
 551  	protected function strip_tag($tag, $document, $xpath, $type)
 552      {
 553          $elements = $xpath->query('body//' . $tag);
 554          if ($this->encode_instead_of_strip)
 555          {
 556              foreach ($elements as $element)
 557              {
 558                  $fragment = $document->createDocumentFragment();
 559  
 560                  // For elements which aren't script or style, include the tag itself
 561                  if (!in_array($tag, array('script', 'style')))
 562                  {
 563                      $text = '<' . $tag;
 564                      if ($element->hasAttributes())
 565                      {
 566                          $attrs = array();
 567                          foreach ($element->attributes as $name => $attr)
 568                          {
 569                              $value = $attr->value;
 570  
 571                              // In XHTML, empty values should never exist, so we repeat the value
 572                              if (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_XHTML))
 573                              {
 574                                  $value = $name;
 575                              }
 576                              // For HTML, empty is fine
 577                              elseif (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_HTML))
 578                              {
 579                                  $attrs[] = $name;
 580                                  continue;
 581                              }
 582  
 583                              // Standard attribute text
 584                              $attrs[] = $name . '="' . $attr->value . '"';
 585                          }
 586                          $text .= ' ' . implode(' ', $attrs);
 587                      }
 588                      $text .= '>';
 589                      $fragment->appendChild(new DOMText($text));
 590                  }
 591  
 592                  $number = $element->childNodes->length;
 593                  for ($i = $number; $i > 0; $i--)
 594                  {
 595                      $child = $element->childNodes->item(0);
 596                      $fragment->appendChild($child);
 597                  }
 598  
 599                  if (!in_array($tag, array('script', 'style')))
 600                  {
 601                      $fragment->appendChild(new DOMText('</' . $tag . '>'));
 602                  }
 603  
 604                  $element->parentNode->replaceChild($fragment, $element);
 605              }
 606  
 607              return;
 608          }
 609          elseif (in_array($tag, array('script', 'style')))
 610          {
 611              foreach ($elements as $element)
 612              {
 613                  $element->parentNode->removeChild($element);
 614              }
 615  
 616              return;
 617          }
 618          else
 619          {
 620              foreach ($elements as $element)
 621              {
 622                  $fragment = $document->createDocumentFragment();
 623                  $number = $element->childNodes->length;
 624                  for ($i = $number; $i > 0; $i--)
 625                  {
 626                      $child = $element->childNodes->item(0);
 627                      $fragment->appendChild($child);
 628                  }
 629  
 630                  $element->parentNode->replaceChild($fragment, $element);
 631              }
 632          }
 633      }
 634  
 635  	protected function strip_attr($attrib, $xpath)
 636      {
 637          $elements = $xpath->query('//*[@' . $attrib . ']');
 638  
 639          foreach ($elements as $element)
 640          {
 641              $element->removeAttribute($attrib);
 642          }
 643      }
 644  
 645      protected function add_attr($tag, $valuePairs, $document)
 646      {
 647          $elements = $document->getElementsByTagName($tag);
 648          foreach ($elements as $element)
 649          {
 650              foreach ($valuePairs as $attrib => $value)
 651              {
 652                  $element->setAttribute($attrib, $value);
 653              }
 654          }
 655      }
 656  }


Generated: Sat Nov 23 01:00:02 2024 Cross-referenced by PHPXref 0.7.1