Uname: Linux premium264.web-hosting.com 4.18.0-553.lve.el8.x86_64 #1 SMP Mon May 27 15:27:34 UTC 2024 x86_64
Software: LiteSpeed
PHP version: 8.3.22 [ PHP INFO ] PHP os: Linux
Server Ip: 69.57.162.13
Your Ip: 216.73.216.219
User: workvvfb (1129) | Group: workvvfb (1084)
Safe Mode: OFF
Disable Function:
NONE

name : HTMLDocument.php
<?php

namespace WPDaddy\Dom;

use DOMDocument;

/**
 * Provides access to special properties and methods not present by default
 * on a regular document.
 *
 * @property-read HTMLCollection $anchors List of all of the anchors
 *  in the document. Anchors are <a> Elements with the `name` attribute.
 * @property-read Element        $body    The <body> element. Returns new Element if there
 *  was no body in the source HTML.
 * @property-read HTMLCollection $forms   List of all <form> elements.
 * @property-read Element        $head    The <head> element. Returns new Element if there
 *  was no head in the source HTML.
 * @property-read HTMLCollection $images  List of all <img> elements.
 * @property-read HTMLCollection $links   List of all links in the document.
 *  Links are <a> Elements with the `href` attribute.
 * @property-read HTMLCollection $scripts List of all <script> elements.
 * @property string              $title   The title of the document, defined using <title>.
 */
class HTMLDocument extends Document {
	use LiveProperty, ParentNode;

	/**
	 * An option passed to loadHTML() and loadHTMLFile() to disable duplicate element IDs exception.
	 */
	const ALLOW_DUPLICATE_IDS = 67108864;

	/**
	 * A modification (passed to modify()) that removes all but the last title elements.
	 */
	const FIX_MULTIPLE_TITLES = 2;

	/**
	 * A modification (passed to modify()) that removes all but the last metatags with matching name or property attributes.
	 */
	const FIX_DUPLICATE_METATAGS = 4;

	/**
	 * A modification (passed to modify()) that merges multiple head elements.
	 */
	const FIX_MULTIPLE_HEADS = 8;

	/**
	 * A modification (passed to modify()) that merges multiple body elements.
	 */
	const FIX_MULTIPLE_BODIES = 16;

	/**
	 * A modification (passed to modify()) that moves charset metatag and title elements first.
	 */
	const OPTIMIZE_HEAD = 32;

	/**
	 *
	 * @var array
	 */
	static private $newObjectsCache = [];

	/**
	 * Indicates whether an HTML code is loaded.
	 *
	 * @var boolean
	 */
	private $loaded = false;

	public function __construct($document = ""){
		parent::__construct($document);

		if(!($document instanceof DOMDocument)) {
			if(empty($document)) {
				$this->fillEmptyDocumentElement();
			} else {
// loadHTML expects an ISO-8859-1 encoded string.
// http://stackoverflow.com/questions/11309194/php-domdocument-failing-to-handle-utf-8-characters
				if(function_exists('mb_convert_encoding')) {
					$document = mb_convert_encoding(
						$document,
						"HTML-ENTITIES",
						"UTF-8"
					);
					$this->loadHTML($document);
				} else {
					$this->loadHTML('<meta http-equiv="content-type" content="text/html; charset=utf-8">'.$document);
				}
			}
		}
	}

	public function getElementsByClassName($names){
		return $this->documentElement->getElementsByClassName($names);
	}

	protected function prop_get_head(){
		return $this->getOrCreateElement("head");
	}

	protected function prop_get_body(){
		return $this->getOrCreateElement("body");
	}

	protected function prop_get_forms(){
		return $this->getElementsByTagName("form");
	}

	protected function prop_get_anchors(){
		return $this->querySelectorAll("a[name]");
	}

	protected function prop_get_images(){
		return $this->getElementsByTagName("img");
	}

	protected function prop_get_links(){
		return $this->querySelectorAll("a[href]");
	}

	protected function prop_get_title(){
		$title = $this->head->getElementsByTagName("title")->item(0);

		if(is_null($title)) {
			return "";
		} else {
			return $title->textContent;
		}
	}

	protected function prop_set_title($value){
		$title = $this->head->getElementsByTagName("title")->item(0);

		if(is_null($title)) {
			$title = $this->createElement("title");
			$this->head->appendChild($title);
		}

		$title->textContent = $value;
	}

	private function getOrCreateElement($tagName){
		$element = $this->querySelector($tagName);
		if(is_null($element)) {
			$element = $this->createElement($tagName);
			$this->documentElement->appendChild($element);
		}

		return $element;
	}

	public function loadHTML($source, $options = 0)
	{
		$options = $options | self::ALLOW_DUPLICATE_IDS;
		// Enables libxml errors handling
		$internalErrorsOptionValue = libxml_use_internal_errors();
		if ($internalErrorsOptionValue === false) {
			libxml_use_internal_errors(true);
		}

		$source = trim($source);

		// Add CDATA around script tags content
		$matches = null;
		preg_match_all('/<script(.*?)>/', $source, $matches);
		if (isset($matches[0])) {
			$matches[0] = array_unique($matches[0]);
			foreach ($matches[0] as $match) {
				if (substr($match, -2, 1) !== '/') { // check if ends with />
					$source = str_replace($match, $match . '<![CDATA[html5-dom-document-internal-cdata', $source);
				}
			}
		}
		$source = str_replace('</script>', 'html5-dom-document-internal-cdata]]></script>', $source);
		$source = str_replace('<![CDATA[html5-dom-document-internal-cdatahtml5-dom-document-internal-cdata]]>', '', $source); // clean empty script tags
		$matches = null;
		preg_match_all('/\<!\[CDATA\[html5-dom-document-internal-cdata.*?html5-dom-document-internal-cdata\]\]>/s', $source, $matches);
		if (isset($matches[0])) {
			$matches[0] = array_unique($matches[0]);
			foreach ($matches[0] as $match) {
				if (strpos($match, '</') !== false) { // check if contains </
					$source = str_replace($match, str_replace('</', '<html5-dom-document-internal-cdata-endtagfix/', $match), $source);
				}
			}
		}

		$autoAddHtmlAndBodyTags = !defined('LIBXML_HTML_NOIMPLIED') || ($options & LIBXML_HTML_NOIMPLIED) === 0;
		$autoAddDoctype = !defined('LIBXML_HTML_NODEFDTD') || ($options & LIBXML_HTML_NODEFDTD) === 0;

		$allowDuplicateIDs = ($options & self::ALLOW_DUPLICATE_IDS) !== 0;

		// Add body tag if missing
		if ($autoAddHtmlAndBodyTags && $source !== '' && preg_match('/\<!DOCTYPE.*?\>/', $source) === 0 && preg_match('/\<html.*?\>/', $source) === 0 && preg_match('/\<body.*?\>/', $source) === 0 && preg_match('/\<head.*?\>/', $source) === 0) {
			$source = '<body>' . $source . '</body>';
		}

		// Add DOCTYPE if missing
		if ($autoAddDoctype && strtoupper(substr($source, 0, 9)) !== '<!DOCTYPE') {
			$source = "<!DOCTYPE html>\n" . $source;
		}

		// Adds temporary head tag
		$charsetTag = '<meta data-html5-dom-document-internal-attribute="charset-meta" http-equiv="content-type" content="text/html; charset=utf-8" />';
		$matches = [];
		preg_match('/\<head.*?\>/', $source, $matches);
		$removeHeadTag = false;
		$removeHtmlTag = false;
		if (isset($matches[0])) { // has head tag
			$insertPosition = strpos($source, $matches[0]) + strlen($matches[0]);
			$source = substr($source, 0, $insertPosition) . $charsetTag . substr($source, $insertPosition);
		} else {
			$matches = [];
			preg_match('/\<html.*?\>/', $source, $matches);
			if (isset($matches[0])) { // has html tag
				$source = str_replace($matches[0], $matches[0] . '<head>' . $charsetTag . '</head>', $source);
			} else {
				$source = '<head>' . $charsetTag . '</head>' . $source;
				$removeHtmlTag = true;
			}
			$removeHeadTag = true;
		}

		// Preserve html entities
		$source = preg_replace('/&([a-zA-Z]*);/', 'html5-dom-document-internal-entity1-$1-end', $source);
		$source = preg_replace('/&#([0-9]*);/', 'html5-dom-document-internal-entity2-$1-end', $source);

		$result = parent::loadHTML('<?xml encoding="utf-8" ?>' . $source, $options);
		if ($internalErrorsOptionValue === false) {
			libxml_use_internal_errors(false);
		}
		if ($result === false) {
			return false;
		}
		$this->encoding = 'utf-8';
		foreach ($this->childNodes as $item) {
			if ($item->nodeType === XML_PI_NODE) {
				$this->removeChild($item);
				break;
			}
		}
		$metaTagElement = $this->getElementsByTagName('meta')->item(0);
		if ($metaTagElement !== null) {
			if ($metaTagElement->getAttribute('data-html5-dom-document-internal-attribute') === 'charset-meta') {
				$headElement = $metaTagElement->parentNode;
				$htmlElement = $headElement->parentNode;
				$metaTagElement->parentNode->removeChild($metaTagElement);
				if ($removeHeadTag && $headElement !== null && $headElement->parentNode !== null && ($headElement->firstChild === null || ($headElement->childNodes->length === 1 && $headElement->firstChild instanceof \DOMText))) {
					$headElement->parentNode->removeChild($headElement);
				}
				if ($removeHtmlTag && $htmlElement !== null && $htmlElement->parentNode !== null && $htmlElement->firstChild === null) {
					$htmlElement->parentNode->removeChild($htmlElement);
				}
			}
		}

		if (!$allowDuplicateIDs) {
			$matches = [];
			preg_match_all('/\sid[\s]*=[\s]*(["\'])(.*?)\1/', $source, $matches);
			if (!empty($matches[2]) && max(array_count_values($matches[2])) > 1) {
				$elementIDs = [];
				$walkChildren = function ($element) use (&$walkChildren, &$elementIDs) {
					foreach ($element->childNodes as $child) {
						if ($child instanceof \DOMElement) {
							if ($child->attributes->length > 0) { // Performance optimization
								$id = $child->getAttribute('id');
								if ($id !== '') {
									if (isset($elementIDs[$id])) {
										throw new \Exception('A DOM node with an ID value "' . $id . '" already exists!');
									} else {
										$elementIDs[$id] = true;
									}
								}
							}
							$walkChildren($child);
						}
					}
				};
				$walkChildren($this);
			}
		}

		$this->loaded = true;
		return true;
	}

	public function saveHTML(\DOMNode $node = null)
	{
		if (!$this->loaded) {
			return '<!DOCTYPE html>';
		}

		$nodeMode = $node !== null;
		if ($nodeMode && $node instanceof \DOMDocument) {
			$nodeMode = false;
		}

		if ($nodeMode) {
			if (!isset(self::$newObjectsCache['html5domdocument'])) {
				self::$newObjectsCache['html5domdocument'] = new HTMLDocument();
			}
			$tempDomDocument = clone (self::$newObjectsCache['html5domdocument']);
			if ($node->nodeName === 'html') {
				$tempDomDocument->loadHTML('<!DOCTYPE html>');
				$tempDomDocument->appendChild($tempDomDocument->importNode(clone ($node), true));
				$html = $tempDomDocument->saveHTML();
				$html = substr($html, 16); // remove the DOCTYPE + the new line after
			} elseif ($node->nodeName === 'head' || $node->nodeName === 'body') {
				$tempDomDocument->loadHTML("<!DOCTYPE html>\n<html></html>");
				$tempDomDocument->childNodes[1]->appendChild($tempDomDocument->importNode(clone ($node), true));
				$html = $tempDomDocument->saveHTML();
				$html = substr($html, 22, -7); // remove the DOCTYPE + the new line after + html tag
			} else {
				$isInHead = false;
				$parentNode = $node;
				for ($i = 0; $i < 1000; $i++) {
					$parentNode = $parentNode->parentNode;
					if ($parentNode === null) {
						break;
					}
					if ($parentNode->nodeName === 'body') {
						break;
					} elseif ($parentNode->nodeName === 'head') {
						$isInHead = true;
						break;
					}
				}
				$tempDomDocument->loadHTML("<!DOCTYPE html>\n<html>" . ($isInHead ? '<head></head>' : '<body></body>') . '</html>');
				$tempDomDocument->childNodes[1]->childNodes[0]->appendChild($tempDomDocument->importNode(clone ($node), true));
				$html = $tempDomDocument->saveHTML();
				$html = substr($html, 28, -14); // remove the DOCTYPE + the new line + html + body or head tags
			}
			$html = trim($html);
		} else {
			$removeHtmlElement = false;
			$removeHeadElement = false;
			$headElement = $this->getElementsByTagName('head')->item(0);
			if ($headElement === null) {
				if ($this->addHtmlElementIfMissing()) {
					$removeHtmlElement = true;
				}
				if ($this->addHeadElementIfMissing()) {
					$removeHeadElement = true;
				}
				$headElement = $this->getElementsByTagName('head')->item(0);
			}
			$meta = $this->createElement('meta');
			$meta->setAttribute('data-html5-dom-document-internal-attribute', 'charset-meta');
			$meta->setAttribute('http-equiv', 'content-type');
			$meta->setAttribute('content', 'text/html; charset=utf-8');
			if ($headElement->firstChild !== null) {
				$headElement->insertBefore($meta, $headElement->firstChild);
			} else {
				$headElement->appendChild($meta);
			}
			$html = parent::saveHTML();
			$html = rtrim($html, "\n");

			if ($removeHeadElement) {
				$headElement->parentNode->removeChild($headElement);
			} else {
				$meta->parentNode->removeChild($meta);
			}

			if (strpos($html, 'html5-dom-document-internal-entity') !== false) {
				$html = preg_replace('/html5-dom-document-internal-entity1-(.*?)-end/', '&$1;', $html);
				$html = preg_replace('/html5-dom-document-internal-entity2-(.*?)-end/', '&#$1;', $html);
			}

			$codeToRemove = [
				'html5-dom-document-internal-content',
				'<meta data-html5-dom-document-internal-attribute="charset-meta" http-equiv="content-type" content="text/html; charset=utf-8">',
				'</area>', '</base>', '</br>', '</col>', '</command>', '</embed>', '</hr>', '</img>', '</input>', '</keygen>', '</link>', '</meta>', '</param>', '</source>', '</track>', '</wbr>',
				'<![CDATA[html5-dom-document-internal-cdata', 'html5-dom-document-internal-cdata]]>', 'html5-dom-document-internal-cdata-endtagfix'
			];
			if ($removeHeadElement) {
				$codeToRemove[] = '<head></head>';
			}
			if ($removeHtmlElement) {
				$codeToRemove[] = '<html></html>';
			}

			$html = str_replace($codeToRemove, '', $html);
		}
		return $html;
	}

	private function addHtmlElementIfMissing()
	{
		if ($this->getElementsByTagName('html')->length === 0) {
			if (!isset(self::$newObjectsCache['htmlelement'])) {
				self::$newObjectsCache['htmlelement'] = new \DOMElement('html');
			}
			$this->appendChild(clone (self::$newObjectsCache['htmlelement']));
			return true;
		}
		return false;
	}

	/**
	 * Adds the HEAD tag to the document if missing.
	 *
	 * @return boolean TRUE on success, FALSE otherwise.
	 */
	private function addHeadElementIfMissing()
	{
		if ($this->getElementsByTagName('head')->length === 0) {
			$htmlElement = $this->getElementsByTagName('html')->item(0);
			if (!isset(self::$newObjectsCache['headelement'])) {
				self::$newObjectsCache['headelement'] = new \DOMElement('head');
			}
			$headElement = clone (self::$newObjectsCache['headelement']);
			if ($htmlElement->firstChild === null) {
				$htmlElement->appendChild($headElement);
			} else {
				$htmlElement->insertBefore($headElement, $htmlElement->firstChild);
			}
			return true;
		}
		return false;
	}

	/**
	 * Adds the BODY tag to the document if missing.
	 *
	 * @return boolean TRUE on success, FALSE otherwise.
	 */
	private function addBodyElementIfMissing()
	{
		if ($this->getElementsByTagName('body')->length === 0) {
			if (!isset(self::$newObjectsCache['bodyelement'])) {
				self::$newObjectsCache['bodyelement'] = new \DOMElement('body');
			}
			$this->getElementsByTagName('html')->item(0)->appendChild(clone (self::$newObjectsCache['bodyelement']));
			return true;
		}
		return false;
	}



	private function fillEmptyDocumentElement(){
		$this->loadHTML("<!doctype html><html></html>");
		$tagsToCreate = [ "head", "body" ];

		foreach($tagsToCreate as $tag) {
			$node = $this->createElement($tag);
			$this->documentElement->appendChild($node);
		}
	}
}
© 2025 GrazzMean