79 $this->xmlDocument->preserveWhiteSpace =
false;
80 $this->xmlDocument->strictErrorChecking =
false;
81 $this->xmlDocument->formatOutput =
true;
84 if($this->minifyXML) {
85 $this->xmlDocument->formatOutput =
false;
106 $this->allowedTags = $allowedTags::getTags();
126 $this->allowedAttrs = $allowedAttrs::getAttributes();
153 $dirty = preg_replace(
'/<\?(=|php)(.+?)\?>/i',
'', $dirty);
157 $loaded = $this->xmlDocument->loadXML($dirty);
168 $allElements = $this->xmlDocument->getElementsByTagName(
"*");
174 $clean = $this->xmlDocument->saveXML($this->xmlDocument->documentElement, LIBXML_NOEMPTYTAG);
179 if($this->minifyXML) {
180 $clean = preg_replace(
'/\s+/',
' ', $clean);
193 $this->xmlLoaderValue = libxml_disable_entity_loader(
true);
196 libxml_use_internal_errors(
true);
208 libxml_disable_entity_loader($this->xmlLoaderValue);
217 foreach ($this->xmlDocument->childNodes as $child) {
218 if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) {
219 $child->parentNode->removeChild($child);
234 for ($i = $elements->length - 1; $i >= 0; $i--) {
235 $currentElement = $elements->item($i);
238 if (!in_array(strtolower($currentElement->tagName), $this->allowedTags)) {
239 $currentElement->parentNode->removeChild($currentElement);
258 for (
$x = $element->attributes->length - 1;
$x >= 0;
$x--) {
260 $attrName = $element->attributes->item(
$x)->name;
263 if (!in_array(strtolower($attrName), $this->allowedAttrs)) {
264 $element->removeAttribute($attrName);
270 if (isset($element->attributes->item(
$x)->value) && $this->
hasRemoteReference($element->attributes->item(
$x)->value)) {
271 $element->removeAttribute($attrName);
284 $xlinks = $element->getAttributeNS(
'http://www.w3.org/1999/xlink',
'href');
285 if (preg_match(self::SCRIPT_REGEX, $xlinks) === 1) {
286 $element->removeAttributeNS(
'http://www.w3.org/1999/xlink',
'href');
297 $href = $element->getAttribute(
'href');
298 if (preg_match(self::SCRIPT_REGEX, $href) === 1) {
299 $element->removeAttribute(
'href');
311 if (preg_match(self::REMOTE_REFERENCE_REGEX, $value) === 1) {
323 public function minify($shouldMinify =
false)
325 $this->minifyXML = (bool) $shouldMinify;
getAllowedAttrs()
Get the array of allowed attributes.
hasRemoteReference($value)
Does this attribute value have a remote reference?
resetInternal()
Set up the DOMDocument.
cleanAttributesOnWhitelist(\DOMElement $element)
Only allow attributes that are on the whitelist.
setAllowedAttrs(AttributeInterface $allowedAttrs)
Set custom allowed attributes.
setUpBefore()
Set up libXML before we start.
startClean(\DOMNodeList $elements)
Start the cleaning with tags, then we move onto attributes and hrefs later.
resetAfter()
Reset the class after use.
setAllowedTags(TagInterface $allowedTags)
Set custom allowed tags.
sanitize($dirty)
Sanitize the passed string.
cleanXlinkHrefs(\DOMElement &$element)
Clean the xlink:hrefs of script and data embeds.
minify($shouldMinify=false)
Should we minify the output?
const SCRIPT_REGEX
Regex to catch script and data values in attributes.
removeDoctype()
Remove the XML Doctype It may be caught later on output but that seems to be buggy, so we need to make sure it's gone.
const REMOTE_REFERENCE_REGEX
Regex to test for remote URLs in linked assets.
static getAttributes()
Returns an array of attributes.
cleanHrefs(\DOMElement &$element)
Clean the hrefs of script and data embeds.
getAllowedTags()
Get the array of allowed tags.
removeRemoteReferences($removeRemoteRefs=false)
Should we remove references to remote files?