78 $this->xmlDocument =
new DOMDocument();
79 $this->xmlDocument->preserveWhiteSpace =
false;
80 $this->xmlDocument->strictErrorChecking =
false;
81 $this->xmlDocument->formatOutput =
true;
84 if($this->minifyXML) {
85 $this->xmlDocument->formatOutput =
false;
106 $this->allowedTags = $allowedTags::getTags();
126 $this->allowedAttrs = $allowedAttrs::getAttributes();
153 $dirty = preg_replace(
'/<\?(=|php)(.+?)\?>/i',
'', $dirty);
157 $loaded = $this->xmlDocument->loadXML($dirty);
168 $allElements = $this->xmlDocument->getElementsByTagName(
"*");
174 $clean = $this->xmlDocument->saveXML($this->xmlDocument->documentElement, LIBXML_NOEMPTYTAG);
179 if($this->minifyXML) {
180 $clean = preg_replace(
'/\s+/',
' ', $clean);
193 $this->xmlLoaderValue = libxml_disable_entity_loader(
true);
196 libxml_use_internal_errors(
true);
208 libxml_disable_entity_loader($this->xmlLoaderValue);
217 foreach ($this->xmlDocument->childNodes as $child) {
218 if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) {
219 $child->parentNode->removeChild($child);
234 for ($i = $elements->length - 1; $i >= 0; $i--) {
235 $currentElement = $elements->item($i);
238 if (!in_array(strtolower($currentElement->tagName), $this->allowedTags)) {
239 $currentElement->parentNode->removeChild($currentElement);
258 for (
$x = $element->attributes->length - 1;
$x >= 0;
$x--) {
260 $attrName = $element->attributes->item(
$x)->name;
263 if (!in_array(strtolower($attrName), $this->allowedAttrs)) {
264 $element->removeAttribute($attrName);
270 if (isset($element->attributes->item(
$x)->value) && $this->hasRemoteReference($element->attributes->item(
$x)->value)) {
271 $element->removeAttribute($attrName);
284 $xlinks = $element->getAttributeNS(
'http://www.w3.org/1999/xlink',
'href');
285 if (preg_match(self::SCRIPT_REGEX, $xlinks) === 1) {
286 $element->removeAttributeNS(
'http://www.w3.org/1999/xlink',
'href');
297 $href = $element->getAttribute(
'href');
298 if (preg_match(self::SCRIPT_REGEX, $href) === 1) {
299 $element->removeAttribute(
'href');
311 if (preg_match(self::REMOTE_REFERENCE_REGEX, $value) === 1) {
323 public function minify($shouldMinify =
false)
325 $this->minifyXML = (bool) $shouldMinify;
const SCRIPT_REGEX
Regex to catch script and data values in attributes.
getAllowedAttrs()
Get the array of allowed attributes.
resetInternal()
Set up the DOMDocument.
setAllowedTags(TagInterface $allowedTags)
Set custom allowed tags.
cleanAttributesOnWhitelist(\DOMElement $element)
Only allow attributes that are on the whitelist.
setUpBefore()
Set up libXML before we start.
hasRemoteReference($value)
Does this attribute value have a remote reference?
resetAfter()
Reset the class after use.
cleanHrefs(\DOMElement &$element)
Clean the hrefs of script and data embeds.
cleanXlinkHrefs(\DOMElement &$element)
Clean the xlink:hrefs of script and data embeds.
removeDoctype()
Remove the XML Doctype It may be caught later on output but that seems to be buggy,...
setAllowedAttrs(AttributeInterface $allowedAttrs)
Set custom allowed attributes.
sanitize($dirty)
Sanitize the passed string.
const REMOTE_REFERENCE_REGEX
Regex to test for remote URLs in linked assets.
startClean(\DOMNodeList $elements)
Start the cleaning with tags, then we move onto attributes and hrefs later.
minify($shouldMinify=false)
Should we minify the output?
removeRemoteReferences($removeRemoteRefs=false)
Should we remove references to remote files?
getAllowedTags()
Get the array of allowed tags.
static getAttributes()
Returns an array of attributes.