fixes and refactor

This commit is contained in:
HotSwapp
2025-08-14 19:16:28 -05:00
parent 5111079149
commit bfc04a6909
61 changed files with 5689 additions and 767 deletions

View File

@@ -2,8 +2,10 @@
Server-side highlight utilities for search results.
These functions generate HTML snippets with <strong> around matched tokens,
preserving the original casing of the source text. The output is intended to be
sanitized on the client before insertion into the DOM.
preserving the original casing of the source text. All non-HTML segments are
HTML-escaped server-side to prevent injection. Only the <strong> tags added by
this module are emitted as HTML; any pre-existing HTML in source text is
escaped.
"""
from typing import List, Tuple, Any
import re
@@ -42,18 +44,40 @@ def _merge_ranges(ranges: List[Tuple[int, int]]) -> List[Tuple[int, int]]:
def highlight_text(value: str, tokens: List[str]) -> str:
"""Return `value` with case-insensitive matches of `tokens` wrapped in <strong>, preserving original casing."""
"""Return `value` with case-insensitive matches of `tokens` wrapped in <strong>, preserving original casing.
Non-highlighted segments and the highlighted text content are HTML-escaped.
Only the surrounding <strong> wrappers are emitted as markup.
"""
if value is None:
return ""
def _escape_html(text: str) -> str:
# Minimal, safe HTML escaping
if text is None:
return ""
# Replace ampersand first to avoid double-escaping
text = str(text)
text = text.replace("&", "&amp;")
text = text.replace("<", "&lt;")
text = text.replace(">", "&gt;")
text = text.replace('"', "&quot;")
text = text.replace("'", "&#39;")
return text
source = str(value)
if not source or not tokens:
return source
return _escape_html(source)
haystack = source.lower()
ranges: List[Tuple[int, int]] = []
# Deduplicate tokens case-insensitively to avoid redundant scans (parity with client)
unique_needles = []
seen_needles = set()
for t in tokens:
needle = str(t or "").lower()
if not needle:
continue
if needle and needle not in seen_needles:
unique_needles.append(needle)
seen_needles.add(needle)
for needle in unique_needles:
start = 0
last_possible = max(0, len(haystack) - len(needle))
while start <= last_possible and len(needle) > 0:
@@ -63,17 +87,17 @@ def highlight_text(value: str, tokens: List[str]) -> str:
ranges.append((idx, idx + len(needle)))
start = idx + 1
if not ranges:
return source
return _escape_html(source)
parts: List[str] = []
merged = _merge_ranges(ranges)
pos = 0
for s, e in merged:
if pos < s:
parts.append(source[pos:s])
parts.append("<strong>" + source[s:e] + "</strong>")
parts.append(_escape_html(source[pos:s]))
parts.append("<strong>" + _escape_html(source[s:e]) + "</strong>")
pos = e
if pos < len(source):
parts.append(source[pos:])
parts.append(_escape_html(source[pos:]))
return "".join(parts)