fixes and refactor
This commit is contained in:
@@ -2,8 +2,10 @@
|
||||
Server-side highlight utilities for search results.
|
||||
|
||||
These functions generate HTML snippets with <strong> around matched tokens,
|
||||
preserving the original casing of the source text. The output is intended to be
|
||||
sanitized on the client before insertion into the DOM.
|
||||
preserving the original casing of the source text. All non-HTML segments are
|
||||
HTML-escaped server-side to prevent injection. Only the <strong> tags added by
|
||||
this module are emitted as HTML; any pre-existing HTML in source text is
|
||||
escaped.
|
||||
"""
|
||||
from typing import List, Tuple, Any
|
||||
import re
|
||||
@@ -42,18 +44,40 @@ def _merge_ranges(ranges: List[Tuple[int, int]]) -> List[Tuple[int, int]]:
|
||||
|
||||
|
||||
def highlight_text(value: str, tokens: List[str]) -> str:
|
||||
"""Return `value` with case-insensitive matches of `tokens` wrapped in <strong>, preserving original casing."""
|
||||
"""Return `value` with case-insensitive matches of `tokens` wrapped in <strong>, preserving original casing.
|
||||
|
||||
Non-highlighted segments and the highlighted text content are HTML-escaped.
|
||||
Only the surrounding <strong> wrappers are emitted as markup.
|
||||
"""
|
||||
if value is None:
|
||||
return ""
|
||||
|
||||
def _escape_html(text: str) -> str:
|
||||
# Minimal, safe HTML escaping
|
||||
if text is None:
|
||||
return ""
|
||||
# Replace ampersand first to avoid double-escaping
|
||||
text = str(text)
|
||||
text = text.replace("&", "&")
|
||||
text = text.replace("<", "<")
|
||||
text = text.replace(">", ">")
|
||||
text = text.replace('"', """)
|
||||
text = text.replace("'", "'")
|
||||
return text
|
||||
source = str(value)
|
||||
if not source or not tokens:
|
||||
return source
|
||||
return _escape_html(source)
|
||||
haystack = source.lower()
|
||||
ranges: List[Tuple[int, int]] = []
|
||||
# Deduplicate tokens case-insensitively to avoid redundant scans (parity with client)
|
||||
unique_needles = []
|
||||
seen_needles = set()
|
||||
for t in tokens:
|
||||
needle = str(t or "").lower()
|
||||
if not needle:
|
||||
continue
|
||||
if needle and needle not in seen_needles:
|
||||
unique_needles.append(needle)
|
||||
seen_needles.add(needle)
|
||||
for needle in unique_needles:
|
||||
start = 0
|
||||
last_possible = max(0, len(haystack) - len(needle))
|
||||
while start <= last_possible and len(needle) > 0:
|
||||
@@ -63,17 +87,17 @@ def highlight_text(value: str, tokens: List[str]) -> str:
|
||||
ranges.append((idx, idx + len(needle)))
|
||||
start = idx + 1
|
||||
if not ranges:
|
||||
return source
|
||||
return _escape_html(source)
|
||||
parts: List[str] = []
|
||||
merged = _merge_ranges(ranges)
|
||||
pos = 0
|
||||
for s, e in merged:
|
||||
if pos < s:
|
||||
parts.append(source[pos:s])
|
||||
parts.append("<strong>" + source[s:e] + "</strong>")
|
||||
parts.append(_escape_html(source[pos:s]))
|
||||
parts.append("<strong>" + _escape_html(source[s:e]) + "</strong>")
|
||||
pos = e
|
||||
if pos < len(source):
|
||||
parts.append(source[pos:])
|
||||
parts.append(_escape_html(source[pos:]))
|
||||
return "".join(parts)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user