166 lines
5.6 KiB
Python
166 lines
5.6 KiB
Python
"""
|
|
Server-side highlight utilities for search results.
|
|
|
|
These functions generate HTML snippets with <strong> around matched tokens,
|
|
preserving the original casing of the source text. All non-HTML segments are
|
|
HTML-escaped server-side to prevent injection. Only the <strong> tags added by
|
|
this module are emitted as HTML; any pre-existing HTML in source text is
|
|
escaped.
|
|
"""
|
|
from typing import List, Tuple, Any
|
|
import re
|
|
|
|
|
|
def build_query_tokens(query: str) -> List[str]:
|
|
"""Split query into alphanumeric tokens, trimming punctuation and deduping while preserving order."""
|
|
if not query:
|
|
return []
|
|
raw_parts = re.sub(r"[,_;:]+", " ", str(query or "").strip()).split()
|
|
cleaned: List[str] = []
|
|
seen = set()
|
|
for part in raw_parts:
|
|
token = re.sub(r"^[^A-Za-z0-9]+|[^A-Za-z0-9]+$", "", part)
|
|
lowered = token.lower()
|
|
if token and lowered not in seen:
|
|
cleaned.append(token)
|
|
seen.add(lowered)
|
|
return cleaned
|
|
|
|
|
|
def _merge_ranges(ranges: List[Tuple[int, int]]) -> List[Tuple[int, int]]:
|
|
if not ranges:
|
|
return []
|
|
ranges.sort(key=lambda x: (x[0], x[1]))
|
|
merged: List[Tuple[int, int]] = []
|
|
cur_s, cur_e = ranges[0]
|
|
for s, e in ranges[1:]:
|
|
if s <= cur_e:
|
|
cur_e = max(cur_e, e)
|
|
else:
|
|
merged.append((cur_s, cur_e))
|
|
cur_s, cur_e = s, e
|
|
merged.append((cur_s, cur_e))
|
|
return merged
|
|
|
|
|
|
def highlight_text(value: str, tokens: List[str]) -> str:
|
|
"""Return `value` with case-insensitive matches of `tokens` wrapped in <strong>, preserving original casing.
|
|
|
|
Non-highlighted segments and the highlighted text content are HTML-escaped.
|
|
Only the surrounding <strong> wrappers are emitted as markup.
|
|
"""
|
|
if value is None:
|
|
return ""
|
|
|
|
def _escape_html(text: str) -> str:
|
|
# Minimal, safe HTML escaping
|
|
if text is None:
|
|
return ""
|
|
# Replace ampersand first to avoid double-escaping
|
|
text = str(text)
|
|
text = text.replace("&", "&")
|
|
text = text.replace("<", "<")
|
|
text = text.replace(">", ">")
|
|
text = text.replace('"', """)
|
|
text = text.replace("'", "'")
|
|
return text
|
|
source = str(value)
|
|
if not source or not tokens:
|
|
return _escape_html(source)
|
|
haystack = source.lower()
|
|
ranges: List[Tuple[int, int]] = []
|
|
# Deduplicate tokens case-insensitively to avoid redundant scans (parity with client)
|
|
unique_needles = []
|
|
seen_needles = set()
|
|
for t in tokens:
|
|
needle = str(t or "").lower()
|
|
if needle and needle not in seen_needles:
|
|
unique_needles.append(needle)
|
|
seen_needles.add(needle)
|
|
for needle in unique_needles:
|
|
start = 0
|
|
last_possible = max(0, len(haystack) - len(needle))
|
|
while start <= last_possible and len(needle) > 0:
|
|
idx = haystack.find(needle, start)
|
|
if idx == -1:
|
|
break
|
|
ranges.append((idx, idx + len(needle)))
|
|
start = idx + 1
|
|
if not ranges:
|
|
return _escape_html(source)
|
|
parts: List[str] = []
|
|
merged = _merge_ranges(ranges)
|
|
pos = 0
|
|
for s, e in merged:
|
|
if pos < s:
|
|
parts.append(_escape_html(source[pos:s]))
|
|
parts.append("<strong>" + _escape_html(source[s:e]) + "</strong>")
|
|
pos = e
|
|
if pos < len(source):
|
|
parts.append(_escape_html(source[pos:]))
|
|
return "".join(parts)
|
|
|
|
|
|
def create_customer_highlight(customer: Any, query: str) -> str:
|
|
if not query:
|
|
return ""
|
|
tokens = build_query_tokens(query)
|
|
full_name = f"{getattr(customer, 'first', '') or ''} {getattr(customer, 'last', '')}".strip()
|
|
email = getattr(customer, 'email', None)
|
|
city = getattr(customer, 'city', None)
|
|
ql = query.lower()
|
|
|
|
if full_name and ql in full_name.lower():
|
|
return f"Name: {highlight_text(full_name, tokens)}"
|
|
if email and ql in str(email).lower():
|
|
return f"Email: {highlight_text(str(email), tokens)}"
|
|
if city and ql in str(city).lower():
|
|
return f"City: {highlight_text(str(city), tokens)}"
|
|
return ""
|
|
|
|
|
|
def create_file_highlight(file_obj: Any, query: str) -> str:
|
|
if not query:
|
|
return ""
|
|
tokens = build_query_tokens(query)
|
|
regarding = getattr(file_obj, 'regarding', None)
|
|
file_type = getattr(file_obj, 'file_type', None)
|
|
ql = query.lower()
|
|
if regarding and ql in str(regarding).lower():
|
|
return f"Matter: {highlight_text(str(regarding), tokens)}"
|
|
if file_type and ql in str(file_type).lower():
|
|
return f"Type: {highlight_text(str(file_type), tokens)}"
|
|
return ""
|
|
|
|
|
|
def create_ledger_highlight(ledger: Any, query: str) -> str:
|
|
if not query:
|
|
return ""
|
|
tokens = build_query_tokens(query)
|
|
note = getattr(ledger, 'note', None)
|
|
if note and query.lower() in str(note).lower():
|
|
text = str(note) or ""
|
|
preview = text[:160]
|
|
suffix = "..." if len(text) > 160 else ""
|
|
return f"Note: {highlight_text(preview, tokens)}{suffix}"
|
|
return ""
|
|
|
|
|
|
def create_qdro_highlight(qdro: Any, query: str) -> str:
|
|
if not query:
|
|
return ""
|
|
tokens = build_query_tokens(query)
|
|
form_name = getattr(qdro, 'form_name', None)
|
|
pet = getattr(qdro, 'pet', None)
|
|
case_number = getattr(qdro, 'case_number', None)
|
|
ql = query.lower()
|
|
if form_name and ql in str(form_name).lower():
|
|
return f"Form: {highlight_text(str(form_name), tokens)}"
|
|
if pet and ql in str(pet).lower():
|
|
return f"Petitioner: {highlight_text(str(pet), tokens)}"
|
|
if case_number and ql in str(case_number).lower():
|
|
return f"Case: {highlight_text(str(case_number), tokens)}"
|
|
return ""
|
|
|
|
|