diff --git a/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java b/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java
index 31eafc23..08d247c4 100644
--- a/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java
+++ b/src/main/java/org/owasp/validator/html/scan/AbstractAntiSamyScanner.java
@@ -118,6 +118,18 @@ protected org.apache.xml.serialize.HTMLSerializer getHTMLSerializer(
     return new ASHTMLSerializer(w, format, policy);
   }
 
+  /**
+   * Returns a new {@link HtmlSerializer} configured for the current policy. This is the preferred
+   * serializer that does not depend on the deprecated Xerces
+   * {@code org.apache.xml.serialize.HTMLSerializer}.
+   *
+   * @param w the writer to serialize into
+   * @return a fully configured {@link HtmlSerializer}
+   */
+  protected HtmlSerializer getHtmlSerializer(Writer w) {
+    return new HtmlSerializer(w, policy);
+  }
+
   protected String trim(String original, String cleaned) {
     if (cleaned.endsWith("\n")) {
       if (!original.endsWith("\n")) {
diff --git a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java
index ca754867..4716a122 100644
--- a/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java
+++ b/src/main/java/org/owasp/validator/html/scan/AntiSamyDOMScanner.java
@@ -185,11 +185,7 @@ public CleanResults scan(String html) throws ScanException {
 
       StringWriter out = new StringWriter();
 
-      @SuppressWarnings("deprecation")
-      org.apache.xml.serialize.OutputFormat format = getOutputFormat();
-
-      //noinspection deprecation
-      org.apache.xml.serialize.HTMLSerializer serializer = getHTMLSerializer(out, format);
+      HtmlSerializer serializer = getHtmlSerializer(out);
       serializer.serialize(dom);
 
       /*
diff --git a/src/main/java/org/owasp/validator/html/scan/HtmlSerializer.java b/src/main/java/org/owasp/validator/html/scan/HtmlSerializer.java
new file mode 100644
index 00000000..3b50d6df
--- /dev/null
+++ b/src/main/java/org/owasp/validator/html/scan/HtmlSerializer.java
@@ -0,0 +1,925 @@
+/*
+ * Copyright (c) 2007-2024, Arshan Dabirsiaghi, Jason Li
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are permitted
+ * provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this list of conditions
+ * and the following disclaimer. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the documentation and/or other
+ * materials provided with the distribution. Neither the name of OWASP nor the names of its
+ * contributors may be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+ * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.owasp.validator.html.scan;
+
+import java.io.IOException;
+import java.io.Writer;
+import java.util.ArrayDeque;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import org.owasp.validator.html.InternalPolicy;
+import org.owasp.validator.html.TagMatcher;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.w3c.dom.Attr;
+import org.w3c.dom.Comment;
+import org.w3c.dom.DocumentFragment;
+import org.w3c.dom.Element;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Node;
+import org.w3c.dom.Text;
+
+/**
+ * A pure-Java HTML serializer that replaces the deprecated Xerces {@code HTMLSerializer}
+ * (org.apache.xml.serialize.HTMLSerializer) used by the old {@link ASHTMLSerializer}. This class
+ * serializes a DOM {@link DocumentFragment} to an HTML string without any Apache Xerces dependency.
+ *
+ * <p>Behaviour is modelled on the Xerces {@code HTMLSerializer} / {@code BaseMarkupSerializer}
+ * stack and the AntiSamy-specific {@code ASHTMLSerializer} overrides, so that existing output is
+ * preserved exactly.
+ *
+ * @see ASHTMLSerializer
+ */
+public class HtmlSerializer {
+
+  private static final Logger logger = LoggerFactory.getLogger(HtmlSerializer.class);
+
+  // -----------------------------------------------------------------------
+  // Static HTML metadata tables (replicate Xerces HTMLdtd behaviour)
+  // -----------------------------------------------------------------------
+
+  /**
+   * Elements that Xerces HTMLdtd considers "empty" (isEmptyTag returns true). These are elements
+   * whose flags have the ONLY_OPENING (0x01) or EMPTY (0x10) bits set. When such an element has no
+   * child nodes it is serialized in the "else" (void) branch of serializeElement.
+   */
+  private static final Set<String> EMPTY_ELEMENTS;
+
+  /**
+   * True void/self-closing HTML elements (have both EMPTY + ONLY_OPENING flags in Xerces, i.e. no
+   * closing tag should be printed). These never reach {@link #endElementIO} in practice because
+   * they are handled directly in the void branch.
+   */
+  private static final Set<String> VOID_ELEMENTS;
+
+  /** Elements whose content should be treated as preserve-space (no indentation inside). */
+  private static final Set<String> PRESERVE_SPACE_ELEMENTS;
+
+  /**
+   * Attribute names that carry URI values and must be escaped via {@link #printEscaped}. Matches
+   * Xerces {@code HTMLdtd.isURI} which checks {@code href} and {@code src} case-insensitively.
+   */
+  private static final Set<String> URI_ATTRS;
+
+  /**
+   * Boolean attributes keyed by lower-case element name. Matches the Xerces {@code HTMLdtd}
+   * defineBoolean table. Boolean attributes are written as just the attribute name with no {@code
+   * ="value"} suffix.
+   */
+  private static final Map<String, Set<String>> BOOLEAN_ATTRS_BY_ELEMENT;
+
+  /**
+   * HTML named character-entity map: Unicode code point → entity name. Built from the same
+   * HTMLEntities.res data that Xerces uses, so entity encoding is identical.
+   */
+  private static final Map<Integer, String> HTML_ENTITIES;
+
+  static {
+    // Elements treated as "empty" by Xerces isEmptyTag (ONLY_OPENING or EMPTY flag bits)
+    EMPTY_ELEMENTS =
+        Collections.unmodifiableSet(
+            new HashSet<>(
+                Arrays.asList(
+                    "area", "base", "basefont", "br", "col",
+                    "dd", "dt", "frame", "hr", "img",
+                    "input", "isindex", "li", "link", "meta",
+                    "option", "param")));
+
+    // Pure void HTML elements – no closing tag, ever
+    VOID_ELEMENTS =
+        Collections.unmodifiableSet(
+            new HashSet<>(
+                Arrays.asList(
+                    "area", "base", "basefont", "br", "col",
+                    "frame", "hr", "img", "input", "isindex",
+                    "link", "meta", "param")));
+
+    // preserve-space elements (PRE has PRESERVE flag in Xerces; SCRIPT/STYLE also have it)
+    PRESERVE_SPACE_ELEMENTS =
+        Collections.unmodifiableSet(
+            new HashSet<>(Arrays.asList("pre", "script", "style", "textarea")));
+
+    URI_ATTRS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList("href", "src")));
+
+    Map<String, Set<String>> boolMap = new HashMap<>();
+    boolMap.put("area", new HashSet<>(Collections.singletonList("href")));
+    boolMap.put("button", new HashSet<>(Collections.singletonList("disabled")));
+    boolMap.put("dir", new HashSet<>(Collections.singletonList("compact")));
+    boolMap.put("dl", new HashSet<>(Collections.singletonList("compact")));
+    boolMap.put("frame", new HashSet<>(Collections.singletonList("noresize")));
+    boolMap.put("hr", new HashSet<>(Collections.singletonList("noshade")));
+    boolMap.put("image", new HashSet<>(Collections.singletonList("ismap")));
+    boolMap.put(
+        "input",
+        new HashSet<>(
+            Arrays.asList("defaultchecked", "checked", "readonly", "disabled")));
+    boolMap.put("link", new HashSet<>(Collections.singletonList("link")));
+    boolMap.put("menu", new HashSet<>(Collections.singletonList("compact")));
+    boolMap.put("object", new HashSet<>(Collections.singletonList("declare")));
+    boolMap.put("ol", new HashSet<>(Collections.singletonList("compact")));
+    boolMap.put("optgroup", new HashSet<>(Collections.singletonList("disabled")));
+    boolMap.put(
+        "option",
+        new HashSet<>(Arrays.asList("default-selected", "selected", "disabled")));
+    boolMap.put("script", new HashSet<>(Collections.singletonList("defer")));
+    boolMap.put("select", new HashSet<>(Arrays.asList("multiple", "disabled")));
+    boolMap.put("style", new HashSet<>(Collections.singletonList("disabled")));
+    boolMap.put("td", new HashSet<>(Collections.singletonList("nowrap")));
+    boolMap.put("th", new HashSet<>(Collections.singletonList("nowrap")));
+    boolMap.put(
+        "textarea", new HashSet<>(Arrays.asList("disabled", "readonly")));
+    boolMap.put("ul", new HashSet<>(Collections.singletonList("compact")));
+    BOOLEAN_ATTRS_BY_ELEMENT = Collections.unmodifiableMap(boolMap);
+
+    // HTML named character entities (from Xerces HTMLEntities.res)
+    Map<Integer, String> ent = new HashMap<>();
+    // markup-significant
+    ent.put(34, "quot");
+    ent.put(38, "amp");
+    ent.put(60, "lt");
+    ent.put(62, "gt");
+    // ISO 8859-1
+    ent.put(160, "nbsp");
+    ent.put(161, "iexcl");
+    ent.put(162, "cent");
+    ent.put(163, "pound");
+    ent.put(164, "curren");
+    ent.put(165, "yen");
+    ent.put(166, "brvbar");
+    ent.put(167, "sect");
+    ent.put(168, "uml");
+    ent.put(169, "copy");
+    ent.put(170, "ordf");
+    ent.put(171, "laquo");
+    ent.put(172, "not");
+    ent.put(173, "shy");
+    ent.put(174, "reg");
+    ent.put(175, "macr");
+    ent.put(176, "deg");
+    ent.put(177, "plusmn");
+    ent.put(178, "sup2");
+    ent.put(179, "sup3");
+    ent.put(180, "acute");
+    ent.put(181, "micro");
+    ent.put(182, "para");
+    ent.put(183, "middot");
+    ent.put(184, "cedil");
+    ent.put(185, "sup1");
+    ent.put(186, "ordm");
+    ent.put(187, "raquo");
+    ent.put(188, "frac14");
+    ent.put(189, "frac12");
+    ent.put(190, "frac34");
+    ent.put(191, "iquest");
+    ent.put(192, "Agrave");
+    ent.put(193, "Aacute");
+    ent.put(194, "Acirc");
+    ent.put(195, "Atilde");
+    ent.put(196, "Auml");
+    ent.put(197, "Aring");
+    ent.put(198, "AElig");
+    ent.put(199, "Ccedil");
+    ent.put(200, "Egrave");
+    ent.put(201, "Eacute");
+    ent.put(202, "Ecirc");
+    ent.put(203, "Euml");
+    ent.put(204, "Igrave");
+    ent.put(205, "Iacute");
+    ent.put(206, "Icirc");
+    ent.put(207, "Iuml");
+    ent.put(208, "ETH");
+    ent.put(209, "Ntilde");
+    ent.put(210, "Ograve");
+    ent.put(211, "Oacute");
+    ent.put(212, "Ocirc");
+    ent.put(213, "Otilde");
+    ent.put(214, "Ouml");
+    ent.put(215, "times");
+    ent.put(216, "Oslash");
+    ent.put(217, "Ugrave");
+    ent.put(218, "Uacute");
+    ent.put(219, "Ucirc");
+    ent.put(220, "Uuml");
+    ent.put(221, "Yacute");
+    ent.put(222, "THORN");
+    ent.put(223, "szlig");
+    ent.put(224, "agrave");
+    ent.put(225, "aacute");
+    ent.put(226, "acirc");
+    ent.put(227, "atilde");
+    ent.put(228, "auml");
+    ent.put(229, "aring");
+    ent.put(230, "aelig");
+    ent.put(231, "ccedil");
+    ent.put(232, "egrave");
+    ent.put(233, "eacute");
+    ent.put(234, "ecirc");
+    ent.put(235, "euml");
+    ent.put(236, "igrave");
+    ent.put(237, "iacute");
+    ent.put(238, "icirc");
+    ent.put(239, "iuml");
+    ent.put(240, "eth");
+    ent.put(241, "ntilde");
+    ent.put(242, "ograve");
+    ent.put(243, "oacute");
+    ent.put(244, "ocirc");
+    ent.put(245, "otilde");
+    ent.put(246, "ouml");
+    ent.put(247, "divide");
+    ent.put(248, "oslash");
+    ent.put(249, "ugrave");
+    ent.put(250, "uacute");
+    ent.put(251, "ucirc");
+    ent.put(252, "uuml");
+    ent.put(253, "yacute");
+    ent.put(254, "thorn");
+    ent.put(255, "yuml");
+    // Symbols / Math / Greek
+    ent.put(402, "fnof");
+    ent.put(913, "Alpha");
+    ent.put(914, "Beta");
+    ent.put(915, "Gamma");
+    ent.put(916, "Delta");
+    ent.put(917, "Epsilon");
+    ent.put(918, "Zeta");
+    ent.put(919, "Eta");
+    ent.put(920, "Theta");
+    ent.put(921, "Iota");
+    ent.put(922, "Kappa");
+    ent.put(923, "Lambda");
+    ent.put(924, "Mu");
+    ent.put(925, "Nu");
+    ent.put(926, "Xi");
+    ent.put(927, "Omicron");
+    ent.put(928, "Pi");
+    ent.put(929, "Rho");
+    ent.put(931, "Sigma");
+    ent.put(932, "Tau");
+    ent.put(933, "Upsilon");
+    ent.put(934, "Phi");
+    ent.put(935, "Chi");
+    ent.put(936, "Psi");
+    ent.put(937, "Omega");
+    ent.put(945, "alpha");
+    ent.put(946, "beta");
+    ent.put(947, "gamma");
+    ent.put(948, "delta");
+    ent.put(949, "epsilon");
+    ent.put(950, "zeta");
+    ent.put(951, "eta");
+    ent.put(952, "theta");
+    ent.put(953, "iota");
+    ent.put(954, "kappa");
+    ent.put(955, "lambda");
+    ent.put(956, "mu");
+    ent.put(957, "nu");
+    ent.put(958, "xi");
+    ent.put(959, "omicron");
+    ent.put(960, "pi");
+    ent.put(961, "rho");
+    ent.put(962, "sigmaf");
+    ent.put(963, "sigma");
+    ent.put(964, "tau");
+    ent.put(965, "upsilon");
+    ent.put(966, "phi");
+    ent.put(967, "chi");
+    ent.put(968, "psi");
+    ent.put(969, "omega");
+    ent.put(977, "thetasym");
+    ent.put(978, "upsih");
+    ent.put(982, "piv");
+    // General Punctuation
+    ent.put(8226, "bull");
+    ent.put(8230, "hellip");
+    ent.put(8242, "prime");
+    ent.put(8243, "Prime");
+    ent.put(8254, "oline");
+    ent.put(8260, "frasl");
+    // Letterlike Symbols
+    ent.put(8472, "weierp");
+    ent.put(8465, "image");
+    ent.put(8476, "real");
+    ent.put(8482, "trade");
+    ent.put(8501, "alefsym");
+    // Arrows
+    ent.put(8592, "larr");
+    ent.put(8593, "uarr");
+    ent.put(8594, "rarr");
+    ent.put(8595, "darr");
+    ent.put(8596, "harr");
+    ent.put(8629, "crarr");
+    ent.put(8656, "lArr");
+    ent.put(8657, "uArr");
+    ent.put(8658, "rArr");
+    ent.put(8659, "dArr");
+    ent.put(8660, "hArr");
+    // Mathematical Operators
+    ent.put(8704, "forall");
+    ent.put(8706, "part");
+    ent.put(8707, "exist");
+    ent.put(8709, "empty");
+    ent.put(8711, "nabla");
+    ent.put(8712, "isin");
+    ent.put(8713, "notin");
+    ent.put(8715, "ni");
+    ent.put(8719, "prod");
+    ent.put(8721, "sum");
+    ent.put(8722, "minus");
+    ent.put(8727, "lowast");
+    ent.put(8730, "radic");
+    ent.put(8733, "prop");
+    ent.put(8734, "infin");
+    ent.put(8736, "ang");
+    ent.put(8743, "and");
+    ent.put(8744, "or");
+    ent.put(8745, "cap");
+    ent.put(8746, "cup");
+    ent.put(8747, "int");
+    ent.put(8756, "there4");
+    ent.put(8764, "sim");
+    ent.put(8773, "cong");
+    ent.put(8776, "asymp");
+    ent.put(8800, "ne");
+    ent.put(8801, "equiv");
+    ent.put(8804, "le");
+    ent.put(8805, "ge");
+    ent.put(8834, "sub");
+    ent.put(8835, "sup");
+    ent.put(8836, "nsub");
+    ent.put(8838, "sube");
+    ent.put(8839, "supe");
+    ent.put(8853, "oplus");
+    ent.put(8855, "otimes");
+    ent.put(8869, "perp");
+    ent.put(8901, "sdot");
+    // Miscellaneous Technical
+    ent.put(8968, "lceil");
+    ent.put(8969, "rceil");
+    ent.put(8970, "lfloor");
+    ent.put(8971, "rfloor");
+    ent.put(9001, "lang");
+    ent.put(9002, "rang");
+    // Geometric Shapes
+    ent.put(9674, "loz");
+    // Miscellaneous Symbols
+    ent.put(9824, "spades");
+    ent.put(9827, "clubs");
+    ent.put(9829, "hearts");
+    ent.put(9830, "diams");
+    // Internationalisation
+    ent.put(338, "OElig");
+    ent.put(339, "oelig");
+    ent.put(376, "Yuml");
+    ent.put(710, "circ");
+    ent.put(732, "tilde");
+    ent.put(8194, "ensp");
+    ent.put(8195, "emsp");
+    ent.put(8201, "thinsp");
+    ent.put(8204, "zwnj");
+    ent.put(8205, "zwj");
+    ent.put(8206, "lrm");
+    ent.put(8207, "rlm");
+    ent.put(8211, "ndash");
+    ent.put(8212, "mdash");
+    ent.put(8216, "lsquo");
+    ent.put(8217, "rsquo");
+    ent.put(8218, "sbquo");
+    ent.put(8220, "ldquo");
+    ent.put(8221, "rdquo");
+    ent.put(8222, "bdquo");
+    ent.put(8224, "dagger");
+    ent.put(8225, "Dagger");
+    ent.put(8240, "permil");
+    ent.put(8249, "lsaquo");
+    ent.put(8250, "rsaquo");
+    ent.put(8364, "euro");
+    HTML_ENTITIES = Collections.unmodifiableMap(ent);
+  }
+
+  // -----------------------------------------------------------------------
+  // Instance state
+  // -----------------------------------------------------------------------
+
+  private final Writer writer;
+  private final boolean encodeAllPossibleEntities;
+  private final TagMatcher allowedEmptyTags;
+  private final TagMatcher requiresClosingTags;
+  private final boolean omitXmlDeclaration;
+  private final boolean omitDoctypeDeclaration;
+  private final boolean indenting;
+  private final int indentSize;
+  private final boolean globalPreserveSpace;
+
+  // Tracking state across serialization
+  private boolean started = false;
+  private int currentIndent = 0;
+  private final Deque<ElementState> stateStack = new ArrayDeque<>();
+
+  // -----------------------------------------------------------------------
+  // Per-element state (mirrors Xerces ElementState)
+  // -----------------------------------------------------------------------
+
+  private static class ElementState {
+    String rawName;
+    boolean preserveSpace;
+    /** True while the element's opening {@code >} has not yet been written. */
+    boolean empty = true;
+    /** True when the last serialized sibling was an element (used for indenting). */
+    boolean afterElement = false;
+    /** True for SCRIPT/STYLE – content is not HTML-escaped. */
+    boolean unescaped = false;
+  }
+
+  // -----------------------------------------------------------------------
+  // Constructor
+  // -----------------------------------------------------------------------
+
+  public HtmlSerializer(Writer w, InternalPolicy policy) {
+    this.writer = w;
+    this.encodeAllPossibleEntities = policy.isEntityEncodeIntlCharacters();
+    this.allowedEmptyTags = policy.getAllowedEmptyTags();
+    this.requiresClosingTags = policy.getRequiresClosingTags();
+    this.omitXmlDeclaration = policy.isOmitXmlDeclaration();
+    this.omitDoctypeDeclaration = policy.isOmitDoctypeDeclaration();
+    this.indenting = policy.isFormatOutput();
+    this.indentSize = 2;
+    this.globalPreserveSpace = policy.isPreserveSpace();
+  }
+
+  // -----------------------------------------------------------------------
+  // Public API
+  // -----------------------------------------------------------------------
+
+  /**
+   * Serialize a DOM {@link DocumentFragment} to the {@link Writer} supplied at construction time.
+   *
+   * @param fragment the fragment to serialize
+   * @throws IOException if writing to the underlying writer fails
+   */
+  public void serialize(DocumentFragment fragment) throws IOException {
+    Node child = fragment.getFirstChild();
+    while (child != null) {
+      serializeNode(child);
+      child = child.getNextSibling();
+    }
+    writer.flush();
+  }
+
+  // -----------------------------------------------------------------------
+  // Internal serialization helpers
+  // -----------------------------------------------------------------------
+
+  private void serializeNode(Node node) throws IOException {
+    switch (node.getNodeType()) {
+      case Node.ELEMENT_NODE:
+        serializeElement((Element) node);
+        break;
+      case Node.TEXT_NODE:
+      case Node.CDATA_SECTION_NODE:
+        serializeText((Text) node);
+        break;
+      case Node.COMMENT_NODE:
+        serializeComment((Comment) node);
+        break;
+      default:
+        // Processing instructions and other nodes have already been removed
+        // by the AntiSamy DOM scanner before serialization is called.
+        break;
+    }
+  }
+
+  /**
+   * Emit an optional XML declaration / DOCTYPE, mirroring
+   * BaseMarkupSerializer.startDocument(). In AntiSamy both flags are normally {@code true} so
+   * nothing is output.
+   */
+  private void startDocument(String rootTagName) throws IOException {
+    StringBuilder sb = new StringBuilder();
+    if (!omitXmlDeclaration) {
+      sb.append("<?xml version=\"1.0\"?>");
+    }
+    if (!omitDoctypeDeclaration) {
+      sb.append("<!DOCTYPE html>");
+    }
+    if (sb.length() > 0) {
+      writer.write(sb.toString());
+      writer.write('\n');
+    }
+    started = true;
+  }
+
+  /**
+   * Serializes a single DOM {@link Element}, replicating the logic in
+   * {@code ASHTMLSerializer.serializeElement} and {@code BaseMarkupSerializer}.
+   */
+  private void serializeElement(Element elem) throws IOException {
+    String tagName = elem.getTagName();
+    boolean isRootLevel = isDocumentState();
+    ElementState parentState = peekState();
+
+    // --- Document-level bookkeeping (mirrors HTMLSerializer.serializeElement) ---
+    if (isRootLevel) {
+      if (!started) {
+        startDocument(tagName);
+      }
+    } else {
+      // Close the parent element's opening ">" if it hasn't been printed yet.
+      // Capture the value first so we can use it in the breakLine condition below.
+      boolean wasEmpty = parentState.empty;
+      if (wasEmpty) {
+        writer.write('>');
+        parentState.empty = false;
+      }
+      // Line-break before this element when indenting.
+      if (indenting && !parentState.preserveSpace && (wasEmpty || parentState.afterElement)) {
+        breakLine();
+      }
+    }
+
+    // Inherit preserve-space from the parent.
+    boolean preserveSpace =
+        (parentState != null) ? parentState.preserveSpace : globalPreserveSpace;
+
+    // --- Opening tag ---
+    writer.write('<');
+    writer.write(tagName);
+    currentIndent++;
+
+    // --- Attributes ---
+    NamedNodeMap attrMap = elem.getAttributes();
+    if (attrMap != null) {
+      for (int i = 0; i < attrMap.getLength(); i++) {
+        Attr attr = (Attr) attrMap.item(i);
+        if (!attr.getSpecified()) {
+          continue;
+        }
+        String name = attr.getName().toLowerCase(Locale.ENGLISH);
+        String value = attr.getValue();
+        if (value == null) {
+          value = "";
+        }
+        writer.write(' ');
+
+        if (isUriAttr(name)) {
+          // URI attribute: name="<printEscaped(value)>"
+          // escapeURI in ASHTMLSerializer calls printEscaped directly and returns "".
+          writer.write(name);
+          writer.write("=\"");
+          printEscaped(value);
+          writer.write('"');
+        } else if (isBooleanAttr(tagName, name)) {
+          // Boolean attribute: print only the name
+          writer.write(name);
+        } else {
+          writer.write(name);
+          writer.write("=\"");
+          printEscaped(value);
+          writer.write('"');
+        }
+      }
+    }
+
+    if (isPreserveSpaceElement(tagName)) {
+      preserveSpace = true;
+    }
+
+    // --- Decide: element with content vs. void element ---
+    if (elem.hasChildNodes() || !isEmptyElement(tagName)) {
+      // Push a new element state and serialize children.
+      ElementState state = pushState(tagName, preserveSpace);
+
+      // A and TD: close the opening ">" immediately (no line breaks inside).
+      if ("a".equalsIgnoreCase(tagName) || "td".equalsIgnoreCase(tagName)) {
+        state.empty = false;
+        writer.write('>');
+      }
+
+      // SCRIPT and STYLE: content is not HTML-escaped.
+      if ("script".equalsIgnoreCase(tagName) || "style".equalsIgnoreCase(tagName)) {
+        state.unescaped = true;
+      }
+
+      Node child = elem.getFirstChild();
+      while (child != null) {
+        serializeNode(child);
+        child = child.getNextSibling();
+      }
+
+      endElementIO(tagName);
+
+    } else {
+      // Void / empty element branch (mirrors ASHTMLSerializer.serializeElement else-branch).
+      currentIndent--;
+      if (isAllowedEmptyTag(tagName) && !requiresClosingTag(tagName)) {
+        writer.write("/>");
+      } else {
+        writer.write('>');
+      }
+      if (!isRootLevel) {
+        parentState.afterElement = true;
+        parentState.empty = false;
+      }
+      if (isRootLevel) {
+        if (indenting) {
+          writer.write('\n');
+        }
+        writer.flush();
+      }
+    }
+  }
+
+  /**
+   * Closes an element that was opened via {@link #pushState}, mirroring
+   * {@code ASHTMLSerializer.endElementIO}.
+   */
+  private void endElementIO(String rawName) throws IOException {
+    currentIndent--;
+    ElementState state = peekState();
+
+    if (state.empty && isAllowedEmptyTag(rawName) && !requiresClosingTag(rawName)) {
+      // Element had no children and is allowed to self-close.
+      writer.write("/>");
+    } else {
+      // Close the opening tag if not already done.
+      if (state.empty) {
+        writer.write('>');
+      }
+      // All elements reaching endElementIO get a closing tag.
+      // (Void elements are handled in the else-branch of serializeElement and never
+      // reach this method.)
+      if (indenting && !state.preserveSpace && state.afterElement) {
+        breakLine();
+      }
+      writer.write("</");
+      writer.write(state.rawName);
+      writer.write('>');
+    }
+
+    popState();
+    ElementState parentState = peekState();
+    // A and TD elements don't trigger afterElement indentation in their parent.
+    // rawName will always be non-null here (it's the tag name we just serialized).
+    if (!"a".equalsIgnoreCase(rawName) && !"td".equalsIgnoreCase(rawName)) {
+      if (parentState != null) {
+        parentState.afterElement = true;
+        parentState.empty = false;
+      }
+    }
+    if (isDocumentState()) {
+      if (indenting) {
+        writer.write('\n');
+      }
+      writer.flush();
+    }
+  }
+
+  /**
+   * Serializes a text (or CDATA) node, replicating {@code BaseMarkupSerializer.content()} +
+   * {@code characters(String)}.
+   */
+  private void serializeText(Text node) throws IOException {
+    String text = node.getNodeValue();
+    if (text == null || text.isEmpty()) {
+      return;
+    }
+
+    ElementState state = peekState();
+    boolean inPreserveSpace = state != null && state.preserveSpace;
+
+    // When formatting output, whitespace-only text nodes between block-level elements should be
+    // skipped. The Xerces IndentPrinter effectively absorbs them into its line buffer and they are
+    // displaced by the indentation. Skipping them here reproduces that behaviour without the
+    // complexity of a buffered printer. Preserve-space elements (PRE, SCRIPT, STYLE) are excluded.
+    if (indenting && !inPreserveSpace && isWhitespaceOnly(text)) {
+      return;
+    }
+
+    // content() equivalent: close the opening tag and clear afterElement.
+    if (!isDocumentState() && state != null) {
+      if (state.empty) {
+        writer.write('>');
+        state.empty = false;
+      }
+      state.afterElement = false;
+    }
+
+    if (state != null && state.unescaped) {
+      // SCRIPT / STYLE content is written verbatim.
+      writer.write(text);
+    } else {
+      printEscaped(text);
+    }
+  }
+
+  /**
+   * Serializes an HTML comment, replicating {@code BaseMarkupSerializer.comment(String)}.
+   */
+  private void serializeComment(Comment comment) throws IOException {
+    ElementState state = peekState();
+    // content() equivalent: close the opening tag.
+    if (!isDocumentState() && state != null && state.empty) {
+      writer.write('>');
+      state.empty = false;
+    }
+    if (indenting && !isDocumentState() && state != null && !state.preserveSpace) {
+      breakLine();
+    }
+    writer.write("<!--");
+    String text = comment.getNodeValue();
+    writer.write(text != null ? text : "");
+    writer.write("-->");
+    // After a comment, afterElement remains false (mirroring Xerces BaseMarkupSerializer).
+    if (!isDocumentState() && state != null) {
+      state.afterElement = false;
+    }
+  }
+
+  // -----------------------------------------------------------------------
+  // Entity / character escaping
+  // -----------------------------------------------------------------------
+
+  /**
+   * HTML-escapes {@code text} and writes the result directly to the writer, replicating {@code
+   * BaseMarkupSerializer.printEscaped}. Surrogate pairs are encoded as numeric character
+   * references.
+   */
+  private void printEscaped(String text) throws IOException {
+    int length = text.length();
+    for (int i = 0; i < length; ) {
+      char c = text.charAt(i);
+
+      // Handle surrogate pairs (supplementary characters >= U+10000).
+      if (Character.isHighSurrogate(c) && i + 1 < length) {
+        char low = text.charAt(i + 1);
+        if (Character.isLowSurrogate(low)) {
+          int codePoint = Character.toCodePoint(c, low);
+          writer.write("&#x");
+          writer.write(Integer.toHexString(codePoint));
+          writer.write(';');
+          i += 2;
+          continue;
+        }
+      }
+
+      int ch = c;
+      String entity = getEntityRef(ch);
+      if (entity != null) {
+        writer.write('&');
+        writer.write(entity);
+        writer.write(';');
+      } else if (ch == '\n' || ch == '\r' || ch == '\t' || ch >= ' ') {
+        writer.write(ch);
+      } else {
+        // Non-printable control character – numeric reference.
+        writer.write("&#x");
+        writer.write(Integer.toHexString(ch));
+        writer.write(';');
+      }
+      i++;
+    }
+  }
+
+  /**
+   * Returns the HTML named entity for {@code ch}, or {@code null} if none should be used. Mirrors
+   * {@code ASHTMLSerializer.getEntityRef}: only consults the entity table when
+   * {@code encodeAllPossibleEntities} is {@code true} or the character is one of the "big5"
+   * security-critical chars ({@code < > " ' &}).
+   */
+  private String getEntityRef(int ch) {
+    if (encodeAllPossibleEntities || Constants.big5CharsToEncodeSet.contains(ch)) {
+      return HTML_ENTITIES.get(ch);
+    }
+    return null;
+  }
+
+  // -----------------------------------------------------------------------
+  // HTML metadata helpers
+  // -----------------------------------------------------------------------
+
+  /**
+   * Returns true when the element is treated as "empty" by Xerces {@code HTMLdtd.isEmptyTag}
+   * (elements with the {@code ONLY_OPENING} or {@code EMPTY} flag). When such an element has no
+   * child nodes it is serialized in the void branch without calling {@link #endElementIO}.
+   */
+  private boolean isEmptyElement(String tagName) {
+    return EMPTY_ELEMENTS.contains(tagName.toLowerCase(Locale.ENGLISH));
+  }
+
+  /**
+   * Returns true for space-preserving elements (PRE, SCRIPT, STYLE, TEXTAREA): inside them
+   * indentation line-breaks are suppressed.
+   */
+  private boolean isPreserveSpaceElement(String tagName) {
+    return PRESERVE_SPACE_ELEMENTS.contains(tagName.toLowerCase(Locale.ENGLISH));
+  }
+
+  /** Returns true when the attribute name is a URI attribute (href or src). */
+  private boolean isUriAttr(String attrName) {
+    return URI_ATTRS.contains(attrName.toLowerCase(Locale.ENGLISH));
+  }
+
+  /**
+   * Returns true when the attribute is a boolean attribute for the given element (matching the
+   * Xerces {@code HTMLdtd} defineBoolean table). Boolean attributes are printed without a value.
+   */
+  private boolean isBooleanAttr(String tagName, String attrName) {
+    Set<String> boolAttrs =
+        BOOLEAN_ATTRS_BY_ELEMENT.get(tagName.toLowerCase(Locale.ENGLISH));
+    return boolAttrs != null
+        && boolAttrs.contains(attrName.toLowerCase(Locale.ENGLISH));
+  }
+
+  /**
+   * Returns true when the tag is allowed to be written as a self-closing empty tag by the policy.
+   * Mirrors {@code ASHTMLSerializer.isAllowedEmptyTag}.
+   */
+  private boolean isAllowedEmptyTag(String tagName) {
+    String lower = tagName.toLowerCase(Locale.ENGLISH);
+    return "head".equals(lower) || allowedEmptyTags.matches(tagName);
+  }
+
+  /**
+   * Returns true when the tag must have an explicit closing tag even when empty (e.g. {@code
+   * <script></script>}).
+   */
+  private boolean requiresClosingTag(String tagName) {
+    return requiresClosingTags.matches(tagName);
+  }
+
+  /** Returns {@code true} when every character in {@code text} is an ASCII/Unicode whitespace. */
+  private static boolean isWhitespaceOnly(String text) {
+    for (int i = 0; i < text.length(); i++) {
+      if (!Character.isWhitespace(text.charAt(i))) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  // -----------------------------------------------------------------------
+  // Element-state stack
+  // -----------------------------------------------------------------------
+
+  private boolean isDocumentState() {
+    return stateStack.isEmpty();
+  }
+
+  private ElementState peekState() {
+    return stateStack.isEmpty() ? null : stateStack.peek();
+  }
+
+  private ElementState pushState(String rawName, boolean preserveSpace) {
+    ElementState state = new ElementState();
+    state.rawName = rawName;
+    state.preserveSpace = preserveSpace;
+    stateStack.push(state);
+    return state;
+  }
+
+  private void popState() {
+    if (!stateStack.isEmpty()) {
+      stateStack.pop();
+    }
+  }
+
+  // -----------------------------------------------------------------------
+  // Indentation
+  // -----------------------------------------------------------------------
+
+  private void breakLine() throws IOException {
+    writer.write('\n');
+    for (int i = 0; i < currentIndent * indentSize; i++) {
+      writer.write(' ');
+    }
+  }
+}
diff --git a/src/test/java/org/owasp/validator/html/test/HtmlSerializerTest.java b/src/test/java/org/owasp/validator/html/test/HtmlSerializerTest.java
new file mode 100644
index 00000000..0f1a518d
--- /dev/null
+++ b/src/test/java/org/owasp/validator/html/test/HtmlSerializerTest.java
@@ -0,0 +1,1456 @@
+/*
+ * Copyright (c) 2007-2024, Arshan Dabirsiaghi, Jason Li
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification, are permitted
+ * provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this list of conditions
+ * and the following disclaimer. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the documentation and/or other
+ * materials provided with the distribution. Neither the name of OWASP nor the names of its
+ * contributors may be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.owasp.validator.html.test;
+
+import static org.hamcrest.CoreMatchers.containsString;
+import static org.hamcrest.CoreMatchers.not;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.StringWriter;
+import java.net.URL;
+import java.util.Arrays;
+import java.util.Collection;
+import org.htmlunit.cyberneko.parsers.DOMFragmentParser;
+import org.htmlunit.cyberneko.xerces.dom.DocumentImpl;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.owasp.validator.html.AntiSamy;
+import org.owasp.validator.html.CleanResults;
+import org.owasp.validator.html.InternalPolicy;
+import org.owasp.validator.html.Policy;
+import org.owasp.validator.html.scan.HtmlSerializer;
+import org.w3c.dom.Document;
+import org.w3c.dom.DocumentFragment;
+import org.w3c.dom.Element;
+import org.w3c.dom.Text;
+import org.xml.sax.InputSource;
+
+import java.io.StringReader;
+
+/**
+ * Comprehensive test suite for {@link HtmlSerializer} — the pure-Java replacement for the
+ * Xerces {@code HTMLSerializer} dependency.
+ *
+ * <p>Tests are organised into:
+ * <ol>
+ *   <li>Unit tests that build DOM fragments programmatically and assert serialized output.</li>
+ *   <li>Data-driven round-trip tests that run {@code AntiSamy.scan()} in DOM mode and compare
+ *       against expected substrings or exact values.</li>
+ * </ol>
+ */
+public class HtmlSerializerTest {
+
+  // ---------------------------------------------------------------------------
+  // Infrastructure
+  // ---------------------------------------------------------------------------
+
+  private TestPolicy policy;
+  private AntiSamy antiSamy;
+  private Document document;
+  /** Policy with formatOutput=false (for unit tests that check exact serialized output). */
+  private InternalPolicy noFormatPolicy;
+  /** Policy with entityEncodeIntlChars=true (for tests checking entity-encoded output). */
+  private InternalPolicy encodeIntlPolicy;
+
+  @Before
+  public void setUp() throws Exception {
+    URL url = getClass().getResource("/antisamy.xml");
+    policy = TestPolicy.getInstance(url);
+    antiSamy = new AntiSamy();
+    document = new DocumentImpl();
+    noFormatPolicy = (InternalPolicy) policy.cloneWithDirective("formatOutput", "false");
+    encodeIntlPolicy = (InternalPolicy) policy.cloneWithDirective("entityEncodeIntlChars", "true");
+  }
+
+  // ---------------------------------------------------------------------------
+  // Helpers
+  // ---------------------------------------------------------------------------
+
+  /** Serialize a {@link DocumentFragment} using default policy settings. */
+  private String serialize(DocumentFragment frag) throws Exception {
+    return serialize(frag, (InternalPolicy) policy);
+  }
+
+  private String serialize(DocumentFragment frag, InternalPolicy pol) throws Exception {
+    StringWriter out = new StringWriter();
+    HtmlSerializer ser = new HtmlSerializer(out, pol);
+    ser.serialize(frag);
+    return out.getBuffer().toString();
+  }
+
+  /**
+   * Parse {@code html} into a {@link DocumentFragment} using the same cyberneko parser
+   * configuration as the DOM scanner, then serialize with {@link HtmlSerializer} using the given
+   * policy.
+   */
+  private String roundTrip(String html, InternalPolicy pol) throws Exception {
+    DOMFragmentParser parser = new DOMFragmentParser();
+    parser.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
+    parser.setFeature(
+        "http://cyberneko.org/html/features/scanner/style/strip-cdata-delims", false);
+    parser.setFeature("http://cyberneko.org/html/features/scanner/cdata-sections", true);
+    parser.setFeature("http://cyberneko.org/html/features/parse-noscript-content", false);
+    Document doc = new DocumentImpl();
+    DocumentFragment frag = doc.createDocumentFragment();
+    parser.parse(new InputSource(new StringReader(html)), frag);
+    return serialize(frag, pol);
+  }
+
+  /** Convenience: round-trip with default policy. */
+  private String roundTrip(String html) throws Exception {
+    return roundTrip(html, (InternalPolicy) policy);
+  }
+
+  /** Build a DocumentFragment containing a single element (no children). */
+  private DocumentFragment fragmentWithElement(String tagName) {
+    DocumentFragment frag = document.createDocumentFragment();
+    frag.appendChild(document.createElement(tagName));
+    return frag;
+  }
+
+  /** Build a DocumentFragment containing a single element with one text child. */
+  private DocumentFragment fragmentWithText(String tagName, String text) {
+    DocumentFragment frag = document.createDocumentFragment();
+    Element el = document.createElement(tagName);
+    el.appendChild(document.createTextNode(text));
+    frag.appendChild(el);
+    return frag;
+  }
+
+  /** Scan via AntiSamy DOM scanner and return clean HTML. */
+  private String domScan(String html) throws Exception {
+    return antiSamy.scan(html, policy, AntiSamy.DOM).getCleanHTML();
+  }
+
+  /** Scan via AntiSamy DOM scanner with a specific policy. */
+  private String domScan(String html, InternalPolicy pol) throws Exception {
+    return antiSamy.scan(html, pol, AntiSamy.DOM).getCleanHTML();
+  }
+
+  // ===========================================================================
+  // 1. Basic element serialization
+  // ===========================================================================
+
+  @Test
+  public void simpleElementWithText() throws Exception {
+    DocumentFragment frag = fragmentWithText("p", "Hello");
+    assertEquals("<p>Hello</p>", serialize(frag, noFormatPolicy));
+  }
+
+  @Test
+  public void simpleElementNoChildren() throws Exception {
+    // 'div' has children=false but is not a void element, so gets <div></div>
+    DocumentFragment frag = fragmentWithElement("div");
+    String out = serialize(frag);
+    assertTrue("Expected <div> opening", out.startsWith("<div"));
+    assertTrue("Expected </div> or />", out.contains("</div>") || out.contains("/>"));
+  }
+
+  @Test
+  public void nestedElements() throws Exception {
+    DocumentFragment frag = document.createDocumentFragment();
+    Element outer = document.createElement("div");
+    Element inner = document.createElement("p");
+    inner.appendChild(document.createTextNode("nested"));
+    outer.appendChild(inner);
+    frag.appendChild(outer);
+    String out = serialize(frag);
+    assertThat(out, containsString("<p>nested</p>"));
+    assertThat(out, containsString("<div"));
+    assertThat(out, containsString("</div>"));
+  }
+
+  @Test
+  public void siblingElements() throws Exception {
+    DocumentFragment frag = document.createDocumentFragment();
+    Element p1 = document.createElement("p");
+    p1.appendChild(document.createTextNode("first"));
+    Element p2 = document.createElement("p");
+    p2.appendChild(document.createTextNode("second"));
+    frag.appendChild(p1);
+    frag.appendChild(p2);
+    String out = serialize(frag);
+    assertThat(out, containsString("<p>first</p>"));
+    assertThat(out, containsString("<p>second</p>"));
+  }
+
+  @Test
+  public void mixedContentElementAndText() throws Exception {
+    DocumentFragment frag = document.createDocumentFragment();
+    Element p = document.createElement("p");
+    p.appendChild(document.createTextNode("before "));
+    Element em = document.createElement("em");
+    em.appendChild(document.createTextNode("emphasis"));
+    p.appendChild(em);
+    p.appendChild(document.createTextNode(" after"));
+    frag.appendChild(p);
+    String out = serialize(frag, noFormatPolicy);
+    assertEquals("<p>before <em>emphasis</em> after</p>", out);
+  }
+
+  // ===========================================================================
+  // 2. Void / self-closing elements
+  // ===========================================================================
+
+  @Test
+  public void brElementSelfCloses() throws Exception {
+    DocumentFragment frag = fragmentWithElement("br");
+    String out = serialize(frag, noFormatPolicy);
+    assertEquals("<br/>", out);
+  }
+
+  @Test
+  public void hrElementSelfCloses() throws Exception {
+    DocumentFragment frag = fragmentWithElement("hr");
+    String out = serialize(frag, noFormatPolicy);
+    assertEquals("<hr/>", out);
+  }
+
+  @Test
+  public void imgElementSelfCloses() throws Exception {
+    DocumentFragment frag = document.createDocumentFragment();
+    Element img = document.createElement("img");
+    img.setAttribute("src", "test.png");
+    img.setAttribute("alt", "test");
+    frag.appendChild(img);
+    String out = serialize(frag);
+    assertThat(out, containsString("<img"));
+    assertThat(out, containsString("/>"));
+    assertThat(out, not(containsString("</img>")));
+  }
+
+  @Test
+  public void inputElementSelfCloses() throws Exception {
+    DocumentFragment frag = document.createDocumentFragment();
+    Element inp = document.createElement("input");
+    inp.setAttribute("type", "text");
+    frag.appendChild(inp);
+    String out = serialize(frag);
+    assertThat(out, containsString("<input"));
+    assertThat(out, containsString("/>"));
+    assertThat(out, not(containsString("</input>")));
+  }
+
+  @Test
+  public void metaElementSelfCloses() throws Exception {
+    DocumentFragment frag = document.createDocumentFragment();
+    Element meta = document.createElement("meta");
+    meta.setAttribute("charset", "UTF-8");
+    frag.appendChild(meta);
+    String out = serialize(frag);
+    assertThat(out, containsString("<meta"));
+    assertThat(out, containsString("/>"));
+    assertThat(out, not(containsString("</meta>")));
+  }
+
+  @Test
+  public void linkElementSelfCloses() throws Exception {
+    DocumentFragment frag = document.createDocumentFragment();
+    Element link = document.createElement("link");
+    link.setAttribute("rel", "stylesheet");
+    link.setAttribute("href", "style.css");
+    frag.appendChild(link);
+    String out = serialize(frag, noFormatPolicy);
+    assertThat(out, containsString("<link"));
+    // 'link' is in the default requiresClosingTags list, so it gets '>' not '/>'
+    assertThat(out, not(containsString("</link>")));
+  }
+
+  @Test
+  public void colElementSelfCloses() throws Exception {
+    DocumentFragment frag = fragmentWithElement("col");
+    String out = serialize(frag);
+    assertThat(out, containsString("<col"));
+    assertThat(out, containsString("/>"));
+  }
+
+  @Test
+  public void paramElementSelfCloses() throws Exception {
+    DocumentFragment frag = document.createDocumentFragment();
+    Element param = document.createElement("param");
+    param.setAttribute("name", "movie");
+    frag.appendChild(param);
+    String out = serialize(frag);
+    assertThat(out, containsString("<param"));
+    assertThat(out, containsString("/>"));
+  }
+
+  // ===========================================================================
+  // 3. Attribute serialization
+  // ===========================================================================
+
+  @Test
+  public void regularAttribute() throws Exception {
+    DocumentFragment frag = document.createDocumentFragment();
+    Element a = document.createElement("a");
+    a.setAttribute("href", "http://example.com");
+    a.appendChild(document.createTextNode("link"));
+    frag.appendChild(a);
+    String out = serialize(frag);
+    assertThat(out, containsString("href=\"http://example.com\""));
+  }
+
+  @Test
+  public void multipleAttributes() throws Exception {
+    DocumentFragment frag = document.createDocumentFragment();
+    Element img = document.createElement("img");
+    img.setAttribute("src", "pic.jpg");
+    img.setAttribute("alt", "picture");
+    img.setAttribute("width", "100");
+    frag.appendChild(img);
+    String out = serialize(frag);
+    assertThat(out, containsString("src=\"pic.jpg\""));
+    assertThat(out, containsString("alt=\"picture\""));
+    assertThat(out, containsString("width=\"100\""));
+  }
+
+  @Test
+  public void attributeWithSpecialCharsAreEscaped() throws Exception {
+    DocumentFragment frag = document.createDocumentFragment();
+    Element el = document.createElement("p");
+    el.setAttribute("title", "a<b>&c\"d");
+    el.appendChild(document.createTextNode("text"));
+    frag.appendChild(el);
+    String out = serialize(frag);
+    // Attribute value should have < and & and " escaped
+    assertThat(out, containsString("&lt;"));
+    assertThat(out, containsString("&amp;"));
+    assertThat(out, containsString("&quot;"));
+  }
+
+  @Test
+  public void booleanAttributeSelected() throws Exception {
+    DocumentFragment frag = document.createDocumentFragment();
+    Element option = document.createElement("option");
+    option.setAttribute("selected", "selected");
+    option.setAttribute("value", "1");
+    option.appendChild(document.createTextNode("One"));
+    frag.appendChild(option);
+    String out = serialize(frag);
+    // 'selected' is a boolean attr for option → serialized without value
+    assertThat(out, containsString("selected"));
+    assertThat(out, containsString("value=\"1\""));
+  }
+
+  @Test
+  public void booleanAttributeChecked() throws Exception {
+    DocumentFragment frag = document.createDocumentFragment();
+    Element inp = document.createElement("input");
+    inp.setAttribute("type", "checkbox");
+    inp.setAttribute("checked", "checked");
+    frag.appendChild(inp);
+    String out = serialize(frag);
+    assertThat(out, containsString("checked"));
+    // Not: checked="checked"
+    assertThat(out, not(containsString("checked=\"checked\"")));
+  }
+
+  @Test
+  public void booleanAttributeDisabled() throws Exception {
+    DocumentFragment frag = document.createDocumentFragment();
+    Element btn = document.createElement("button");
+    btn.setAttribute("disabled", "disabled");
+    btn.appendChild(document.createTextNode("Click"));
+    frag.appendChild(btn);
+    String out = serialize(frag);
+    assertThat(out, containsString("disabled"));
+    assertThat(out, not(containsString("disabled=\"disabled\"")));
+  }
+
+  @Test
+  public void booleanAttributeMultiple() throws Exception {
+    DocumentFragment frag = document.createDocumentFragment();
+    Element sel = document.createElement("select");
+    sel.setAttribute("multiple", "multiple");
+    frag.appendChild(sel);
+    String out = serialize(frag);
+    assertThat(out, containsString("multiple"));
+    assertThat(out, not(containsString("multiple=\"multiple\"")));
+  }
+
+  @Test
+  public void booleanAttributeReadonly() throws Exception {
+    DocumentFragment frag = document.createDocumentFragment();
+    Element inp = document.createElement("input");
+    inp.setAttribute("readonly", "readonly");
+    frag.appendChild(inp);
+    String out = serialize(frag);
+    assertThat(out, containsString("readonly"));
+    assertThat(out, not(containsString("readonly=\"readonly\"")));
+  }
+
+  @Test
+  public void booleanAttributeNowrap() throws Exception {
+    DocumentFragment frag = document.createDocumentFragment();
+    Element td = document.createElement("td");
+    td.setAttribute("nowrap", "nowrap");
+    td.appendChild(document.createTextNode("cell"));
+    frag.appendChild(td);
+    String out = serialize(frag);
+    assertThat(out, containsString("nowrap"));
+    assertThat(out, not(containsString("nowrap=\"nowrap\"")));
+  }
+
+  @Test
+  public void emptyAttributeValue() throws Exception {
+    DocumentFragment frag = document.createDocumentFragment();
+    Element p = document.createElement("p");
+    p.setAttribute("class", "");
+    p.appendChild(document.createTextNode("text"));
+    frag.appendChild(p);
+    String out = serialize(frag);
+    assertThat(out, containsString("class=\"\""));
+  }
+
+  @Test
+  public void hrefAttributeEscapedAsUri() throws Exception {
+    // href and src are URI attributes in Xerces HTMLdtd → special char handling
+    DocumentFragment frag = document.createDocumentFragment();
+    Element a = document.createElement("a");
+    a.setAttribute("href", "http://example.com/path?a=1&b=2");
+    a.appendChild(document.createTextNode("link"));
+    frag.appendChild(a);
+    String out = serialize(frag);
+    assertThat(out, containsString("href="));
+    // & in URI gets encoded to &amp;
+    assertThat(out, containsString("&amp;"));
+  }
+
+  // ===========================================================================
+  // 4. Text content and entity encoding
+  // ===========================================================================
+
+  @Test
+  public void textWithLessThanIsEscaped() throws Exception {
+    assertEquals("<p>&lt;script&gt;</p>", serialize(fragmentWithText("p", "<script>"), noFormatPolicy));
+  }
+
+  @Test
+  public void textWithAmpIsEscaped() throws Exception {
+    assertEquals("<p>A &amp; B</p>", serialize(fragmentWithText("p", "A & B"), noFormatPolicy));
+  }
+
+  @Test
+  public void textWithGreaterThanIsEscaped() throws Exception {
+    String out = serialize(fragmentWithText("p", "a > b"), noFormatPolicy);
+    assertThat(out, containsString("&gt;"));
+  }
+
+  @Test
+  public void textWithDoubleQuoteIsEscaped() throws Exception {
+    String out = serialize(fragmentWithText("p", "say \"hello\""), noFormatPolicy);
+    assertThat(out, containsString("&quot;"));
+  }
+
+  @Test
+  public void textWithNonBreakingSpace() throws Exception {
+    DocumentFragment frag = fragmentWithText("p", "\u00A0");
+    String out = serialize(frag, encodeIntlPolicy);
+    assertThat(out, containsString("&nbsp;"));
+  }
+
+  @Test
+  public void textWithCopyright() throws Exception {
+    DocumentFragment frag = fragmentWithText("p", "\u00A9");
+    String out = serialize(frag, encodeIntlPolicy);
+    assertThat(out, containsString("&copy;"));
+  }
+
+  @Test
+  public void textWithRegisteredTrademark() throws Exception {
+    DocumentFragment frag = fragmentWithText("p", "\u00AE");
+    String out = serialize(frag, encodeIntlPolicy);
+    assertThat(out, containsString("&reg;"));
+  }
+
+  @Test
+  public void textWithEuroSign() throws Exception {
+    DocumentFragment frag = fragmentWithText("p", "\u20AC");
+    String out = serialize(frag, encodeIntlPolicy);
+    assertThat(out, containsString("&euro;"));
+  }
+
+  @Test
+  public void textWithNdash() throws Exception {
+    DocumentFragment frag = fragmentWithText("p", "\u2013");
+    String out = serialize(frag, encodeIntlPolicy);
+    assertThat(out, containsString("&ndash;"));
+  }
+
+  @Test
+  public void textWithMdash() throws Exception {
+    DocumentFragment frag = fragmentWithText("p", "\u2014");
+    String out = serialize(frag, encodeIntlPolicy);
+    assertThat(out, containsString("&mdash;"));
+  }
+
+  @Test
+  public void textWithLeftDoubleQuote() throws Exception {
+    DocumentFragment frag = fragmentWithText("p", "\u201C");
+    String out = serialize(frag, encodeIntlPolicy);
+    assertThat(out, containsString("&ldquo;"));
+  }
+
+  @Test
+  public void textWithRightDoubleQuote() throws Exception {
+    DocumentFragment frag = fragmentWithText("p", "\u201D");
+    String out = serialize(frag, encodeIntlPolicy);
+    assertThat(out, containsString("&rdquo;"));
+  }
+
+  @Test
+  public void textWithAccentedCharLatin1() throws Exception {
+    // é = U+00E9 = &eacute;
+    DocumentFragment frag = fragmentWithText("p", "\u00E9");
+    String out = serialize(frag, encodeIntlPolicy);
+    assertThat(out, containsString("&eacute;"));
+  }
+
+  @Test
+  public void textWithMultipleEntities() throws Exception {
+    DocumentFragment frag = fragmentWithText("p", "<b>&amp;</b>");
+    String out = serialize(frag, noFormatPolicy);
+    assertThat(out, containsString("&lt;b&gt;&amp;amp;&lt;/b&gt;"));
+  }
+
+  @Test
+  public void plainAsciiTextIsNotEscaped() throws Exception {
+    DocumentFragment frag = fragmentWithText("p", "Hello World 123");
+    String out = serialize(frag, noFormatPolicy);
+    assertEquals("<p>Hello World 123</p>", out);
+  }
+
+  // ===========================================================================
+  // 5. Script / style elements (unescaped content)
+  // ===========================================================================
+
+  @Test
+  public void styleContentNotEscaped() throws Exception {
+    DocumentFragment frag = fragmentWithText("style", "p { color: red; }");
+    String out = serialize(frag, noFormatPolicy);
+    assertEquals("<style>p { color: red; }</style>", out);
+  }
+
+  @Test
+  public void styleContentWithLtNotEscaped() throws Exception {
+    // Angle brackets inside style should NOT be HTML-escaped
+    DocumentFragment frag = fragmentWithText("style", "a < b");
+    String out = serialize(frag);
+    assertThat(out, containsString("a < b"));
+    assertThat(out, not(containsString("&lt;")));
+  }
+
+  @Test
+  public void scriptContentNotEscaped() throws Exception {
+    DocumentFragment frag = fragmentWithText("script", "var x = 1 < 2;");
+    String out = serialize(frag);
+    assertThat(out, containsString("var x = 1 < 2;"));
+    assertThat(out, not(containsString("&lt;")));
+  }
+
+  @Test
+  public void styleWithMultilineCss() throws Exception {
+    String css = "P {\n\tmargin-bottom: 0.08in;\n}\n";
+    DocumentFragment frag = fragmentWithText("style", css);
+    String out = serialize(frag);
+    assertThat(out, containsString(css));
+    assertThat(out, not(containsString("&lt;")));
+    assertThat(out, not(containsString("&amp;")));
+  }
+
+  // ===========================================================================
+  // 6. Comments
+  // ===========================================================================
+
+  @Test
+  public void commentPreservedWhenPolicyAllows() throws Exception {
+    InternalPolicy withComments =
+        (InternalPolicy) policy.cloneWithDirective("preserveComments", "true");
+    String result = domScan("<!-- a comment -->text", withComments);
+    assertThat(result, containsString("<!-- a comment -->"));
+  }
+
+  @Test
+  public void commentStrippedWhenPolicyDisallows() throws Exception {
+    InternalPolicy noComments =
+        (InternalPolicy) policy.cloneWithDirective("preserveComments", "false");
+    String result = domScan("<!-- secret -->visible", noComments);
+    assertThat(result, not(containsString("<!--")));
+    assertThat(result, containsString("visible"));
+  }
+
+  @Test
+  public void commentWithSpecialCharsPreserved() throws Exception {
+    InternalPolicy withComments =
+        (InternalPolicy) policy.cloneWithDirective("preserveComments", "true");
+    String result = domScan("<!-- <b>bold</b> -->text", withComments);
+    // Conditional directives are stripped, but plain HTML comments survive
+    assertThat(result, containsString("<!--"));
+  }
+
+  // ===========================================================================
+  // 7. Indentation / formatting output
+  // ===========================================================================
+
+  @Test
+  public void formattingOffProducesNoExtraWhitespace() throws Exception {
+    InternalPolicy noFormat =
+        (InternalPolicy) policy.cloneWithDirective("formatOutput", "false");
+    DocumentFragment frag = document.createDocumentFragment();
+    Element outer = document.createElement("div");
+    Element inner = document.createElement("p");
+    inner.appendChild(document.createTextNode("text"));
+    outer.appendChild(inner);
+    frag.appendChild(outer);
+    String out = serialize(frag, noFormat);
+    assertEquals("<div><p>text</p></div>", out);
+  }
+
+  @Test
+  public void formattingOnAddsNewlineAfterRootElement() throws Exception {
+    InternalPolicy withFormat =
+        (InternalPolicy) policy.cloneWithDirective("formatOutput", "true");
+    DocumentFragment frag = fragmentWithText("p", "text");
+    String out = serialize(frag, withFormat);
+    assertTrue("Expected trailing newline when formatting", out.endsWith("\n"));
+  }
+
+  @Test
+  public void formattingOnAddsNewlinesBetweenRootSiblings() throws Exception {
+    InternalPolicy withFormat =
+        (InternalPolicy) policy.cloneWithDirective("formatOutput", "true");
+    DocumentFragment frag = document.createDocumentFragment();
+    Element p1 = document.createElement("p");
+    p1.appendChild(document.createTextNode("first"));
+    Element p2 = document.createElement("p");
+    p2.appendChild(document.createTextNode("second"));
+    frag.appendChild(p1);
+    frag.appendChild(p2);
+    String out = serialize(frag, withFormat);
+    assertThat(out, containsString("</p>\n<p>"));
+  }
+
+  @Test
+  public void formattingOnIndentsNestedElements() throws Exception {
+    InternalPolicy withFormat =
+        (InternalPolicy) policy.cloneWithDirective("formatOutput", "true");
+    DocumentFragment frag = document.createDocumentFragment();
+    Element div = document.createElement("div");
+    Element p = document.createElement("p");
+    p.appendChild(document.createTextNode("content"));
+    div.appendChild(p);
+    frag.appendChild(div);
+    String out = serialize(frag, withFormat);
+    // Should have indentation before <p>
+    assertThat(out, containsString("\n"));
+    assertThat(out, containsString("  ")); // indent size=2
+  }
+
+  @Test
+  public void formattingOnVoidElementFollowedByNewline() throws Exception {
+    InternalPolicy withFormat =
+        (InternalPolicy) policy.cloneWithDirective("formatOutput", "true");
+    DocumentFragment frag = fragmentWithElement("br");
+    String out = serialize(frag, withFormat);
+    assertEquals("<br/>\n", out);
+  }
+
+  @Test
+  public void whitespaceOnlyTextNodesSkippedWhenFormatting() throws Exception {
+    InternalPolicy withFormat =
+        (InternalPolicy) policy.cloneWithDirective("formatOutput", "true");
+    // When formatting, whitespace text nodes between block elements should not appear
+    String out = roundTrip("<div>\n  <p>text</p>\n</div>", withFormat);
+    // The newlines between div and p are whitespace-only and should be skipped
+    // but the actual text content must be preserved
+    assertThat(out, containsString("<p>text</p>"));
+  }
+
+  @Test
+  public void stylePreservesWhitespaceWhenFormatting() throws Exception {
+    InternalPolicy withFormat =
+        (InternalPolicy) policy.cloneWithDirective("formatOutput", "true");
+    String css = "a {\n  color: red;\n}\n";
+    DocumentFragment frag = fragmentWithText("style", css);
+    String out = serialize(frag, withFormat);
+    assertThat(out, containsString(css));
+  }
+
+  @Test
+  public void nestedFormattingDepth3() throws Exception {
+    InternalPolicy withFormat =
+        (InternalPolicy) policy.cloneWithDirective("formatOutput", "true");
+    DocumentFragment frag = document.createDocumentFragment();
+    Element ul = document.createElement("ul");
+    Element li1 = document.createElement("li");
+    li1.appendChild(document.createTextNode("item1"));
+    Element li2 = document.createElement("li");
+    li2.appendChild(document.createTextNode("item2"));
+    ul.appendChild(li1);
+    ul.appendChild(li2);
+    frag.appendChild(ul);
+    String out = serialize(frag, withFormat);
+    // ul must contain li elements on separate lines
+    assertThat(out, containsString("<ul>\n"));
+    assertThat(out, containsString("\n</ul>"));
+  }
+
+  // ===========================================================================
+  // 8. Preserve-space elements (style, script, textarea, pre)
+  // ===========================================================================
+
+  @Test
+  public void textareaPreservesWhitespace() throws Exception {
+    InternalPolicy withFormat =
+        (InternalPolicy) policy.cloneWithDirective("formatOutput", "true");
+    String content = "  line1\n  line2\n";
+    DocumentFragment frag = fragmentWithText("textarea", content);
+    String out = serialize(frag, withFormat);
+    assertThat(out, containsString(content));
+  }
+
+  @Test
+  public void prePreservesWhitespace() throws Exception {
+    InternalPolicy withFormat =
+        (InternalPolicy) policy.cloneWithDirective("formatOutput", "true");
+    String content = "  code\n  more\n";
+    DocumentFragment frag = fragmentWithText("pre", content);
+    String out = serialize(frag, withFormat);
+    assertThat(out, containsString(content));
+  }
+
+  @Test
+  public void whiteSpaceNotSkippedInsidePreWhenFormatting() throws Exception {
+    InternalPolicy withFormat =
+        (InternalPolicy) policy.cloneWithDirective("formatOutput", "true");
+    // A whitespace-only text node inside <pre> must NOT be dropped
+    DocumentFragment frag = document.createDocumentFragment();
+    Element pre = document.createElement("pre");
+    pre.appendChild(document.createTextNode("\n"));
+    frag.appendChild(pre);
+    String out = serialize(frag, withFormat);
+    assertThat(out, containsString("<pre>"));
+    // The \n text must still appear (pre is preserve-space)
+    assertThat(out, containsString("\n"));
+  }
+
+  // ===========================================================================
+  // 9. Round-trip tests via AntiSamy DOM scanner
+  // ===========================================================================
+
+  @Test
+  public void roundTripSimpleParagraph() throws Exception {
+    String result = domScan("<p>Hello World</p>");
+    assertThat(result, containsString("<p>Hello World</p>"));
+  }
+
+  @Test
+  public void roundTripBoldAndItalic() throws Exception {
+    String result = domScan("<p><b>bold</b> and <i>italic</i></p>");
+    assertThat(result, containsString("<b>bold</b>"));
+    assertThat(result, containsString("<i>italic</i>"));
+  }
+
+  @Test
+  public void roundTripLink() throws Exception {
+    String result = domScan("<a href=\"http://example.com\">click here</a>");
+    assertThat(result, containsString("<a"));
+    assertThat(result, containsString("href="));
+    assertThat(result, containsString("click here</a>"));
+  }
+
+  @Test
+  public void roundTripImage() throws Exception {
+    String result = domScan("<img src=\"image.png\" alt=\"img\"/>");
+    assertThat(result, containsString("<img"));
+    assertThat(result, containsString("src="));
+    assertThat(result, not(containsString("</img>")));
+  }
+
+  @Test
+  public void roundTripTable() throws Exception {
+    String html =
+        "<table><tr><td>cell1</td><td>cell2</td></tr></table>";
+    String result = domScan(html);
+    assertThat(result, containsString("<table>"));
+    assertThat(result, containsString("cell1"));
+    assertThat(result, containsString("cell2"));
+    assertThat(result, containsString("</table>"));
+  }
+
+  @Test
+  public void roundTripOrderedList() throws Exception {
+    String html = "<ol><li>one</li><li>two</li><li>three</li></ol>";
+    String result = domScan(html);
+    assertThat(result, containsString("<ol>"));
+    assertThat(result, containsString("<li>one</li>"));
+    assertThat(result, containsString("<li>two</li>"));
+    assertThat(result, containsString("</ol>"));
+  }
+
+  @Test
+  public void roundTripUnorderedList() throws Exception {
+    String html = "<ul><li>a</li><li>b</li></ul>";
+    String result = domScan(html);
+    assertThat(result, containsString("<ul>"));
+    assertThat(result, containsString("<li>a</li>"));
+    assertThat(result, containsString("</ul>"));
+  }
+
+  @Test
+  public void roundTripStyleTag() throws Exception {
+    String css = "P {\n\tmargin-bottom: 0.08in;\n}\n";
+    String result = domScan("<style>" + css + "</style>");
+    assertThat(result, containsString("<style>"));
+    assertThat(result, containsString("margin-bottom"));
+    assertThat(result, containsString("</style>"));
+  }
+
+  @Test
+  public void roundTripStyleTagCssNotEscaped() throws Exception {
+    // The < > inside style must NOT be entity-encoded
+    String html = "<style>a[href] > span { color: red; }</style>";
+    String result = domScan(html);
+    assertThat(result, not(containsString("&gt;")));
+    assertThat(result, not(containsString("&lt;")));
+  }
+
+  @Test
+  public void roundTripSpecialEntities() throws Exception {
+    String result = domScan("<p>&amp; &lt; &gt; &quot; &nbsp;</p>");
+    assertThat(result, containsString("&amp;"));
+    assertThat(result, containsString("&lt;"));
+    assertThat(result, containsString("&gt;"));
+  }
+
+  @Test
+  public void roundTripEuroSymbol() throws Exception {
+    String result = domScan("<p>\u20AC</p>", encodeIntlPolicy);
+    assertThat(result, containsString("&euro;"));
+  }
+
+  @Test
+  public void roundTripHeadingElements() throws Exception {
+    for (int i = 1; i <= 6; i++) {
+      String html = "<h" + i + ">heading " + i + "</h" + i + ">";
+      String result = domScan(html);
+      assertThat(result, containsString("<h" + i + ">heading " + i + "</h" + i + ">"));
+    }
+  }
+
+  @Test
+  public void roundTripSelectWithOptions() throws Exception {
+    String html =
+        "<select name=\"x\">"
+            + "<option value=\"1\">One</option>"
+            + "<option value=\"2\" selected>Two</option>"
+            + "</select>";
+    String result = domScan(html);
+    assertThat(result, containsString("<select"));
+    assertThat(result, containsString("One</option>"));
+    assertThat(result, containsString("Two</option>"));
+    assertThat(result, containsString("</select>"));
+  }
+
+  @Test
+  public void roundTripInputTypes() throws Exception {
+    String[] types = {"text", "password", "checkbox", "radio", "submit", "hidden"};
+    for (String type : types) {
+      String html = "<input type=\"" + type + "\" name=\"x\"/>";
+      String result = domScan(html);
+      assertThat("input[type=" + type + "]", result, containsString("<input"));
+      assertThat("input[type=" + type + "] closed", result, not(containsString("</input>")));
+    }
+  }
+
+  @Test
+  public void roundTripSpanWithClass() throws Exception {
+    String result = domScan("<span class=\"highlight\">text</span>");
+    assertThat(result, containsString("<span"));
+    assertThat(result, containsString("class=\"highlight\""));
+    assertThat(result, containsString("text</span>"));
+  }
+
+  @Test
+  public void roundTripDivWithId() throws Exception {
+    String result = domScan("<div id=\"main\">content</div>");
+    assertThat(result, containsString("<div"));
+    assertThat(result, containsString("id=\"main\""));
+    assertThat(result, containsString("content</div>"));
+  }
+
+  @Test
+  public void roundTripFormElements() throws Exception {
+    String html =
+        "<form action=\"/submit\" method=\"post\">"
+            + "<input type=\"text\" name=\"user\"/>"
+            + "<input type=\"submit\" value=\"Go\"/>"
+            + "</form>";
+    String result = domScan(html);
+    assertThat(result, containsString("<form"));
+    assertThat(result, containsString("</form>"));
+    assertThat(result, containsString("type=\"text\""));
+    assertThat(result, containsString("type=\"submit\""));
+  }
+
+  @Test
+  public void roundTripBlockquote() throws Exception {
+    String result = domScan("<blockquote><p>quote</p></blockquote>");
+    assertThat(result, containsString("<blockquote>"));
+    assertThat(result, containsString("<p>quote</p>"));
+    assertThat(result, containsString("</blockquote>"));
+  }
+
+  @Test
+  public void roundTripSuperscriptSubscript() throws Exception {
+    String result = domScan("<p>H<sub>2</sub>O and E=mc<sup>2</sup></p>");
+    assertThat(result, containsString("<sub>2</sub>"));
+    assertThat(result, containsString("<sup>2</sup>"));
+  }
+
+  @Test
+  public void roundTripHorizontalRule() throws Exception {
+    String result = domScan("<p>before</p><hr/><p>after</p>");
+    assertThat(result, containsString("<hr/>"));
+    assertThat(result, not(containsString("</hr>")));
+  }
+
+  @Test
+  public void roundTripBreakElement() throws Exception {
+    String result = domScan("<p>line1<br/>line2</p>");
+    assertThat(result, containsString("<br/>"));
+    assertThat(result, not(containsString("</br>")));
+  }
+
+  @Test
+  public void roundTripXssScriptTagStripped() throws Exception {
+    String result = domScan("<script>alert('xss')</script>");
+    assertThat(result, not(containsString("<script>")));
+    assertThat(result, not(containsString("alert")));
+  }
+
+  @Test
+  public void roundTripXssOnclickStripped() throws Exception {
+    String result = domScan("<p onclick=\"alert(1)\">text</p>");
+    assertThat(result, not(containsString("onclick")));
+    assertThat(result, containsString("text"));
+  }
+
+  @Test
+  public void roundTripXssInHref() throws Exception {
+    String result = domScan("<a href=\"javascript:alert(1)\">click</a>");
+    assertThat(result, not(containsString("javascript:")));
+  }
+
+  // ===========================================================================
+  // 10. Formatting output round-trip tests
+  // ===========================================================================
+
+  @Test
+  public void issueGithub484FormattingBrBetweenParagraphs() throws Exception {
+    // Regression for GitHub issue #484: newlines between root-level elements
+    String html = "<p>this is para data</p><br/><p>this is para data 2</p>";
+    String result = domScan(html);
+    String cleaned = result.replaceAll("\r?\n", "").replaceAll("\\s\\s+", " ");
+    assertThat(cleaned, containsString("<p>this is para data</p>"));
+    assertThat(cleaned, containsString("<br/>"));
+    assertThat(cleaned, containsString("<p>this is para data 2</p>"));
+  }
+
+  @Test
+  public void issueGithub484StyleTagEndsWithNewline() throws Exception {
+    // Regression for issue #30: style element content preserved; trailing newline present
+    String css = "P {\n\tmargin-bottom: 0.08in;\n}\n";
+    String result = domScan("<style>" + css);
+    assertThat(result, containsString("margin-bottom"));
+    assertThat(result, containsString("</style>"));
+  }
+
+  @Test
+  public void issueGithub453SelectAndOptionIndented() throws Exception {
+    // Regression for GitHub issue #453: nested elements have spacing after cleanup
+    String html =
+        "<html lang=\"en\">\n<head>\n</head>\n<table>\n"
+            + "<SELECT NAME=\"Lang\">\n"
+            + "<OPTION VALUE=\"da\">Dansk</OPTION>\n"
+            + "<OPTION VALUE=\"en\" selected=selected>English</OPTION>\n"
+            + "</SELECT>\n</table>\n</html>";
+    String result = domScan(html);
+    String cleaned = result.replaceAll("\r?\n", "").replaceAll("\\s\\s+", " ");
+    assertThat(
+        cleaned,
+        containsString(
+            "<body> <table> <select name=\"Lang\"> <option value=\"da\">Dansk</option> "));
+  }
+
+  @Test
+  public void formattedOutputHasProperIndentForDeepNesting() throws Exception {
+    InternalPolicy withFormat =
+        (InternalPolicy) policy.cloneWithDirective("formatOutput", "true");
+    DocumentFragment frag = document.createDocumentFragment();
+    Element table = document.createElement("table");
+    Element tr = document.createElement("tr");
+    Element td = document.createElement("td");
+    td.appendChild(document.createTextNode("cell"));
+    tr.appendChild(td);
+    table.appendChild(tr);
+    frag.appendChild(table);
+    String out = serialize(frag, withFormat);
+    // Should have indentation
+    assertThat(out, containsString("  "));
+    assertThat(out, containsString("cell"));
+  }
+
+  // ===========================================================================
+  // 11. Policy directive interactions
+  // ===========================================================================
+
+  @Test
+  public void encodeIntlCharactersTrue() throws Exception {
+    // With encodeAllPossibleEntities=true, characters with named entities are encoded
+    InternalPolicy encodeIntl =
+        (InternalPolicy) policy.cloneWithDirective("entityEncodeIntlChars", "true");
+    DocumentFragment frag = fragmentWithText("p", "\u00E9"); // é
+    String out = serialize(frag, encodeIntl);
+    assertThat(out, containsString("&eacute;"));
+  }
+
+  @Test
+  public void encodeIntlCharactersFalse() throws Exception {
+    // With encodeAllPossibleEntities=false, Latin-1 characters may be written literally
+    InternalPolicy noEncodeIntl =
+        (InternalPolicy) policy.cloneWithDirective("entityEncodeIntlChars", "false");
+    DocumentFragment frag = fragmentWithText("p", "\u00E9");
+    String out = serialize(frag, noEncodeIntl);
+    // Either entity or literal is acceptable, but should not crash
+    assertTrue("Should contain é", out.contains("&eacute;") || out.contains("\u00E9"));
+  }
+
+  // ===========================================================================
+  // 12. Data-driven tests
+  // ===========================================================================
+
+  /**
+   * Parameterized data-driven test: each row is {@code [inputHtml, expectedSubstring]}.
+   * Tests are run through the full AntiSamy DOM scan pipeline.
+   */
+  @RunWith(Parameterized.class)
+  public static class DataDrivenRoundTripTest {
+
+    private final String description;
+    private final String inputHtml;
+    private final String expectedSubstring;
+    private AntiSamy antiSamy;
+    private TestPolicy policy;
+
+    public DataDrivenRoundTripTest(String description, String inputHtml, String expected) {
+      this.description = description;
+      this.inputHtml = inputHtml;
+      this.expectedSubstring = expected;
+    }
+
+    @Before
+    public void setUp() throws Exception {
+      URL url = getClass().getResource("/antisamy.xml");
+      policy = TestPolicy.getInstance(url);
+      antiSamy = new AntiSamy();
+    }
+
+    @Parameterized.Parameters(name = "{0}")
+    public static Collection<Object[]> data() {
+      return Arrays.asList(
+          new Object[][] {
+            // Basic text formatting
+            {"bold text", "<b>bold</b>", "<b>bold</b>"},
+            {"italic text", "<i>italic</i>", "<i>italic</i>"},
+            {"underline text", "<u>underline</u>", "<u>underline</u>"},
+            {"strikethrough", "<s>strike</s>", "strike"},
+            {"strong text", "<strong>strong</strong>", "<strong>strong</strong>"},
+            {"em text", "<em>emphasis</em>", "<em>emphasis</em>"},
+            {"code inline", "<code>x=1</code>", "<code>x=1</code>"},
+            {"kbd element", "<kbd>Ctrl+C</kbd>", "Ctrl"},
+            {"samp element", "<samp>output</samp>", "output"},
+            // Headings
+            {"h1", "<h1>Heading 1</h1>", "Heading 1"},
+            {"h2", "<h2>Heading 2</h2>", "Heading 2"},
+            {"h3", "<h3>Heading 3</h3>", "Heading 3"},
+            {"h4", "<h4>Heading 4</h4>", "Heading 4"},
+            {"h5", "<h5>Heading 5</h5>", "Heading 5"},
+            {"h6", "<h6>Heading 6</h6>", "Heading 6"},
+            // Lists
+            {"ol li", "<ol><li>item</li></ol>", "<li>item</li>"},
+            {"ul li", "<ul><li>item</li></ul>", "<li>item</li>"},
+            {"dl dt dd", "<dl><dt>term</dt><dd>def</dd></dl>", "term"},
+            // Tables
+            {"table basic", "<table><tr><td>data</td></tr></table>", "data"},
+            {"table with header", "<table><tr><th>head</th></tr></table>", "head"},
+            {"table colspan", "<table><tr><td colspan=\"2\">span</td></tr></table>", "span"},
+            {"table border", "<table border=\"1\"><tr><td>x</td></tr></table>", "border"},
+            // Links
+            {"anchor", "<a href=\"http://example.com\">link</a>", "link"},
+            {"anchor no href", "<a>text</a>", "text"},
+            // Images
+            {"img", "<img src=\"x.png\" alt=\"\"/>", "<img"},
+            // Form elements
+            {"input text", "<input type=\"text\" name=\"x\"/>", "type=\"text\""},
+            {"input hidden", "<input type=\"hidden\" name=\"x\" value=\"v\"/>", "type=\"hidden\""},
+            {"select option", "<select><option>a</option></select>", "option"},
+            {"textarea", "<textarea>content</textarea>", "content"},
+            // Structure
+            {"div", "<div>content</div>", "content"},
+            {"span", "<span>content</span>", "content"},
+            {"p", "<p>content</p>", "content"},
+            {"pre", "<pre>  code  </pre>", "  code  "},
+            {"blockquote", "<blockquote>quote</blockquote>", "quote"},
+            {"abbr", "<abbr title=\"HyperText\">HTML</abbr>", "HTML"},
+            {"address", "<address>123 Main St</address>", "123 Main St"},
+            {"article", "<article>story</article>", "story"},
+            {"aside", "<aside>sidebar</aside>", "sidebar"},
+            {"caption", "<table><caption>title</caption><tr><td>d</td></tr></table>", "title"},
+            {"cite", "<cite>Author</cite>", "Author"},
+            {"del", "<del>removed</del>", "removed"},
+            {"dfn", "<dfn>term</dfn>", "term"},
+            {"figcaption", "<figure><figcaption>cap</figcaption></figure>", "cap"},
+            {"footer", "<footer>foot</footer>", "foot"},
+            {"header", "<header>top</header>", "top"},
+            {"mark", "<mark>highlighted</mark>", "highlighted"},
+            {"nav", "<nav>menu</nav>", "menu"},
+            {"section", "<section>sec</section>", "sec"},
+            {"small", "<small>fine print</small>", "fine print"},
+            // Void elements produce self-closing output
+            {"br self-close", "<p>a<br/>b</p>", "<br/>"},
+            {"hr self-close", "<p>a</p><hr/><p>b</p>", "<hr/>"},
+            // Encoding
+            {"amp in text", "<p>a &amp; b</p>", "&amp;"},
+            {"lt in text", "<p>a &lt; b</p>", "&lt;"},
+            {"gt in text", "<p>a &gt; b</p>", "&gt;"},
+            {"nbsp", "<p>&nbsp;</p>", "&nbsp;"},
+            {"copy", "<p>&copy;</p>", "&copy;"},
+            {"reg", "<p>&reg;</p>", "&reg;"},
+            {"trade", "<p>&trade;</p>", "&trade;"},
+            // Nested mixed content
+            {"p with inline", "<p><b>b</b> and <i>i</i></p>", "<b>b</b>"},
+            {"nested lists",
+             "<ul><li>a<ul><li>b</li></ul></li></ul>",
+             "b"},
+            {"blockquote with p",
+             "<blockquote><p>inner</p></blockquote>",
+             "inner"},
+            // Attributes
+            {"class attr", "<p class=\"foo\">x</p>", "class=\"foo\""},
+            {"id attr", "<div id=\"myid\">x</div>", "id=\"myid\""},
+            {"style attr", "<span style=\"color:red\">x</span>", "color:red"},
+            {"lang attr", "<p lang=\"en\">x</p>", "lang=\"en\""},
+            {"dir attr", "<p dir=\"ltr\">x</p>", "x"},
+            // data: attributes (may or may not be allowed, test just that output is generated)
+            {"target attr", "<a href=\"/\" target=\"_blank\">x</a>", "x"},
+          });
+    }
+
+    @Test
+    public void roundTripContainsExpected() throws Exception {
+      CleanResults cr = antiSamy.scan(inputHtml, policy, AntiSamy.DOM);
+      String result = cr.getCleanHTML();
+      assertThat(description + " → " + result, result, containsString(expectedSubstring));
+    }
+  }
+
+  // ===========================================================================
+  // 13. Edge cases and boundary conditions
+  // ===========================================================================
+
+  @Test
+  public void emptyFragmentProducesEmptyOutput() throws Exception {
+    DocumentFragment frag = document.createDocumentFragment();
+    assertEquals("", serialize(frag));
+  }
+
+  @Test
+  public void emptyTextNodeProducesNoOutput() throws Exception {
+    DocumentFragment frag = document.createDocumentFragment();
+    frag.appendChild(document.createTextNode(""));
+    String out = serialize(frag);
+    assertTrue("Empty text node should produce empty or whitespace output",
+        out.trim().isEmpty());
+  }
+
+  @Test
+  public void deeplyNestedElements() throws Exception {
+    // Create 10-level deep nesting
+    DocumentFragment frag = document.createDocumentFragment();
+    Element current = document.createElement("div");
+    frag.appendChild(current);
+    for (int i = 0; i < 9; i++) {
+      Element child = document.createElement(i % 2 == 0 ? "p" : "span");
+      current.appendChild(child);
+      current = child;
+    }
+    current.appendChild(document.createTextNode("deep"));
+    String out = serialize(frag);
+    assertThat(out, containsString("deep"));
+  }
+
+  @Test
+  public void largeTextContent() throws Exception {
+    StringBuilder sb = new StringBuilder();
+    for (int i = 0; i < 1000; i++) {
+      sb.append("word").append(i).append(" ");
+    }
+    String text = sb.toString().trim();
+    DocumentFragment frag = fragmentWithText("p", text);
+    String out = serialize(frag);
+    assertThat(out, containsString("word0 "));
+    assertThat(out, containsString("word999"));
+  }
+
+  @Test
+  public void unicodeContentOutsideLatin1() throws Exception {
+    // Characters beyond Latin-1 (U+0100+) do not have named entities and are
+    // written as-is (or numeric entity, depending on policy)
+    DocumentFragment frag = fragmentWithText("p", "\u4E2D\u6587"); // 中文
+    String out = serialize(frag);
+    assertThat(out, containsString("<p>"));
+    // Should contain the characters in some form
+    assertTrue("Chinese chars", out.contains("\u4E2D") || out.contains("&#"));
+  }
+
+  @Test
+  public void multipleRootElementsAreAllSerialized() throws Exception {
+    DocumentFragment frag = document.createDocumentFragment();
+    for (int i = 0; i < 5; i++) {
+      Element p = document.createElement("p");
+      p.appendChild(document.createTextNode("para" + i));
+      frag.appendChild(p);
+    }
+    String out = serialize(frag);
+    for (int i = 0; i < 5; i++) {
+      assertThat(out, containsString("para" + i));
+    }
+  }
+
+  @Test
+  public void tagNameLowercaseAfterCybernekoParsing() throws Exception {
+    // Cyberneko normalizes tag names to lower-case
+    String result = domScan("<P>Hello</P>");
+    assertThat(result, containsString("<p>Hello</p>"));
+    assertThat(result, not(containsString("<P>")));
+  }
+
+  @Test
+  public void attributeNameLowercaseAfterCybernekoParsing() throws Exception {
+    // Cyberneko normalizes attribute names to lower-case
+    String result = domScan("<P CLASS=\"x\">Hello</P>");
+    assertThat(result, containsString("class=\"x\""));
+    assertThat(result, not(containsString("CLASS")));
+  }
+
+  @Test
+  public void textXssAngleBracketsInTextContent() throws Exception {
+    String result = domScan("<p>&lt;script&gt;alert(1)&lt;/script&gt;</p>");
+    assertThat(result, containsString("&lt;script&gt;"));
+    assertThat(result, not(containsString("<script>")));
+  }
+
+  @Test
+  public void textXssDoubleEncoded() throws Exception {
+    String result = domScan("<p>&amp;lt;b&amp;gt;</p>");
+    assertThat(result, not(containsString("<b>")));
+  }
+
+  @Test
+  public void multipleAttributesOnSameElement() throws Exception {
+    DocumentFragment frag = document.createDocumentFragment();
+    Element el = document.createElement("a");
+    el.setAttribute("href", "http://example.com");
+    el.setAttribute("title", "Example");
+    el.setAttribute("class", "link");
+    el.appendChild(document.createTextNode("go"));
+    frag.appendChild(el);
+    String out = serialize(frag);
+    assertThat(out, containsString("href="));
+    assertThat(out, containsString("title="));
+    assertThat(out, containsString("class="));
+  }
+
+  @Test
+  public void aElementDoesNotAffectChildIndentAfterElement() throws Exception {
+    // The A element is special: it does not set afterElement=true on its parent,
+    // so text following </a> should NOT get a line break.
+    InternalPolicy withFormat =
+        (InternalPolicy) policy.cloneWithDirective("formatOutput", "true");
+    DocumentFragment frag = document.createDocumentFragment();
+    Element p = document.createElement("p");
+    Element a = document.createElement("a");
+    a.setAttribute("href", "#");
+    a.appendChild(document.createTextNode("link"));
+    p.appendChild(a);
+    p.appendChild(document.createTextNode(" text"));
+    frag.appendChild(p);
+    String out = serialize(frag, withFormat);
+    // After the <a> element, the following text " text" should not be preceded by a line break
+    assertThat(out, containsString("link</a> text"));
+  }
+
+  @Test
+  public void tdElementDoesNotAffectChildIndentAfterElement() throws Exception {
+    // The TD element (like A) should not trigger afterElement-based indentation
+    InternalPolicy withFormat =
+        (InternalPolicy) policy.cloneWithDirective("formatOutput", "true");
+    DocumentFragment frag = document.createDocumentFragment();
+    Element table = document.createElement("table");
+    Element tr = document.createElement("tr");
+    Element td1 = document.createElement("td");
+    td1.appendChild(document.createTextNode("c1"));
+    Element td2 = document.createElement("td");
+    td2.appendChild(document.createTextNode("c2"));
+    tr.appendChild(td1);
+    tr.appendChild(td2);
+    table.appendChild(tr);
+    frag.appendChild(table);
+    String out = serialize(frag, withFormat);
+    assertThat(out, containsString("c1"));
+    assertThat(out, containsString("c2"));
+  }
+
+  @Test
+  public void selfClosingAllowedForEmptyAllowedElement() throws Exception {
+    // An element in allowedEmptyTags with no children → self-close (br is in allowedEmptyTags
+    // and NOT in requiresClosingTags)
+    DocumentFragment frag = document.createDocumentFragment();
+    Element br = document.createElement("br");
+    frag.appendChild(br);
+    String out = serialize(frag, noFormatPolicy);
+    assertEquals("<br/>", out);
+  }
+
+  @Test
+  public void nonVoidElementWithChildrenNotSelfClosed() throws Exception {
+    DocumentFragment frag = fragmentWithText("div", "text");
+    String out = serialize(frag, noFormatPolicy);
+    assertThat(out, containsString("<div>"));
+    assertThat(out, containsString("</div>"));
+    assertThat(out, not(containsString("<div/>")));
+  }
+
+  // ===========================================================================
+  // 14. Specific regression tests (GitHub issues)
+  // ===========================================================================
+
+  @Test
+  public void issue30StyleTagPreservesContent() throws Exception {
+    String css = "P {\n\tmargin-bottom: 0.08in;\n}\n";
+    CleanResults cr = antiSamy.scan("<style>" + css, policy, AntiSamy.DOM);
+    String result = cr.getCleanHTML();
+    assertThat(result, containsString("<style>"));
+    assertThat(result, containsString("margin-bottom"));
+    assertThat(result, containsString("</style>"));
+    // CSS content should not be HTML-escaped
+    assertThat(result, not(containsString("&lt;")));
+    assertThat(result, not(containsString("&gt;")));
+  }
+
+  @Test
+  public void issue30StyleTagTrailingNewlineWhenFormatted() throws Exception {
+    String css = "P {\n\tmargin-bottom: 0.08in;\n}\n";
+    CleanResults cr = antiSamy.scan("<style>" + css, policy, AntiSamy.DOM);
+    String result = cr.getCleanHTML();
+    // With formatOutput=true (default in policy), there should be a trailing newline
+    // (or at minimum, the content must include all the CSS)
+    assertThat(result, containsString("0.08in"));
+  }
+
+  @Test
+  public void issue484ParagraphsWithBrElement() throws Exception {
+    String html = "<p>this is para data</p><br/><p>this is para data 2</p>";
+    CleanResults cr = antiSamy.scan(html, policy, AntiSamy.DOM);
+    String result = cr.getCleanHTML();
+    assertThat(result, containsString("this is para data"));
+    assertThat(result, containsString("<br/>"));
+    assertThat(result, containsString("this is para data 2"));
+  }
+
+  @Test
+  public void issue453HtmlStructureWithSelect() throws Exception {
+    String html =
+        "<html lang=\"en\">\n<head>\n</head>\n<table>\n"
+            + "<SELECT NAME=\"Lang\">\n"
+            + "<OPTION VALUE=\"da\">Dansk</OPTION>\n"
+            + "<OPTION VALUE=\"en\" selected=selected>English</OPTION>\n"
+            + "</SELECT>\n</table>\n</html>";
+    CleanResults cr = antiSamy.scan(html, policy, AntiSamy.DOM);
+    String result = cr.getCleanHTML();
+    String cleaned = result.replaceAll("\r?\n", "").replaceAll("\\s\\s+", " ");
+    assertThat(
+        cleaned,
+        containsString(
+            "<body> <table> <select name=\"Lang\"> <option value=\"da\">Dansk</option> "));
+  }
+
+  @Test
+  public void cssWithMediaQuery() throws Exception {
+    String css = "@media screen { body { font-size: 14px; } }";
+    String result = domScan("<style>" + css + "</style>");
+    assertThat(result, containsString("font-size"));
+  }
+
+  @Test
+  public void htmlWithBothHeadAndBody() throws Exception {
+    String html = "<html><head><title>T</title></head><body><p>text</p></body></html>";
+    String result = domScan(html);
+    assertThat(result, containsString("text"));
+  }
+
+  @Test
+  public void longAttributeValue() throws Exception {
+    String longVal = "x".repeat(500);
+    DocumentFragment frag = document.createDocumentFragment();
+    Element el = document.createElement("p");
+    el.setAttribute("class", longVal);
+    el.appendChild(document.createTextNode("text"));
+    frag.appendChild(el);
+    String out = serialize(frag);
+    assertThat(out, containsString("class=\"" + longVal + "\""));
+  }
+
+  @Test
+  public void manyChildElements() throws Exception {
+    DocumentFragment frag = document.createDocumentFragment();
+    Element ul = document.createElement("ul");
+    frag.appendChild(ul);
+    for (int i = 0; i < 50; i++) {
+      Element li = document.createElement("li");
+      li.appendChild(document.createTextNode("item" + i));
+      ul.appendChild(li);
+    }
+    String out = serialize(frag);
+    assertThat(out, containsString("item0"));
+    assertThat(out, containsString("item49"));
+  }
+}