` is dropped by libxml and collapses lines in PDF output; normalize to `
`.
$html = str_replace('', '
', $html);
$allowedTags = '
| |
|---|
';
$html = strip_tags($html, $allowedTags);
$previous = libxml_use_internal_errors(true);
$doc = new DOMDocument;
$wrapped = ''.$html.'
';
$doc->loadHTML($wrapped, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
libxml_clear_errors();
libxml_use_internal_errors($previous);
$xpath = new DOMXPath($doc);
$root = $xpath->query('//*[@id="__pdf_notes"]')->item(0);
if (! $root instanceof DOMElement) {
return $html;
}
foreach ($xpath->query('.//*', $root) as $element) {
if (! $element instanceof DOMElement) {
continue;
}
$toRemove = [];
foreach ($element->attributes as $attr) {
if (self::shouldRemoveAttribute($attr->name)) {
$toRemove[] = $attr->name;
}
}
foreach ($toRemove as $name) {
$element->removeAttribute($name);
}
}
$result = '';
foreach ($root->childNodes as $child) {
$result .= $doc->saveHTML($child);
}
return $result;
}
private static function shouldRemoveAttribute(string $name): bool
{
$lower = strtolower($name);
if (str_starts_with($lower, 'on')) {
return true;
}
return in_array($lower, [
'style',
'src',
'href',
'srcset',
'srcdoc',
'poster',
'formaction',
'xlink:href',
], true);
}
}