fixes and refactor

2025-08-14 19:16:28 -05:00
parent 5111079149
commit bfc04a6909
61 changed files with 5689 additions and 767 deletions
--- a/tests/test_highlight_parity.py
+++ b/tests/test_highlight_parity.py
@@ -0,0 +1,82 @@
+import json
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+
+from app.api.search_highlight import build_query_tokens, highlight_text
+
+
+def _run_node_highlight(value: str, query: str):
+    """Invoke Node to run client highlight.js and return tokens and html.
+
+    Skips DOM and sanitizer loading by providing a minimal window with an
+    escape() function that mirrors server escaping behavior.
+    """
+    node_path = shutil.which("node")
+    if not node_path:
+        return None
+
+    repo_root = Path(__file__).resolve().parents[1]
+    highlight_js_path = repo_root / "static/js/highlight.js"
+    if not highlight_js_path.exists():
+        return None
+
+    payload = json.dumps({"value": value, "query": query})
+    script = f"""
+      const fs = require('fs');
+      global.window = {{}};
+      // Provide escape that matches server: replace &, <, >, ", '
+      window.htmlSanitizer = {{
+        escape: function(text) {{
+          const str = String(text == null ? '' : text);
+          return str
+            .replace(/&/g, '&amp;')
+            .replace(/</g, '&lt;')
+            .replace(/>/g, '&gt;')
+            .replace(/"/g, '&quot;')
+            .replace(/'/g, '&#39;');
+        }}
+      }};
+      require('{highlight_js_path.as_posix()}');
+      const input = JSON.parse(process.argv[2]);
+      const tokens = window.highlightUtils.buildTokens(input.query);
+      const html = window.highlightUtils.highlight(input.value, tokens);
+      process.stdout.write(JSON.stringify({{ tokens, html }}));
+    """
+    res = subprocess.run(
+        [node_path, "-e", script, payload],
+        cwd=str(repo_root),
+        capture_output=True,
+        text=True,
+    )
+    if res.returncode != 0:
+        return None
+    return json.loads(res.stdout)
+
+
+def test_highlight_parity_with_client_when_node_available():
+    """Compare tokens and highlighted HTML between server and client implementations.
+
+    This test is skipped when Node is unavailable.
+    """
+    samples = [
+        ("Hello John Smith", "john smith"),
+        ("<b>A&B</b> and C", "a b"),
+        ("Anna and Ann went", "ann anna"),
+        ("He said \"Hello\" & it's fine", "hello"),
+        ("Case 12345", "case 123"),
+    ]
+
+    for value, query in samples:
+        client = _run_node_highlight(value, query)
+        if client is None:
+            # Skip gracefully if Node not present or script failed
+            import pytest
+            pytest.skip("Node or client highlight not available")
+        server_tokens = build_query_tokens(query)
+        server_html = highlight_text(value, server_tokens)
+        assert client["tokens"] == server_tokens
+        assert client["html"] == server_html
+
+