""" Server-side highlight utilities for search results. These functions generate HTML snippets with around matched tokens, preserving the original casing of the source text. All non-HTML segments are HTML-escaped server-side to prevent injection. Only the tags added by this module are emitted as HTML; any pre-existing HTML in source text is escaped. """ from typing import List, Tuple, Any import re def build_query_tokens(query: str) -> List[str]: """Split query into alphanumeric tokens, trimming punctuation and deduping while preserving order.""" if not query: return [] raw_parts = re.sub(r"[,_;:]+", " ", str(query or "").strip()).split() cleaned: List[str] = [] seen = set() for part in raw_parts: token = re.sub(r"^[^A-Za-z0-9]+|[^A-Za-z0-9]+$", "", part) lowered = token.lower() if token and lowered not in seen: cleaned.append(token) seen.add(lowered) return cleaned def _merge_ranges(ranges: List[Tuple[int, int]]) -> List[Tuple[int, int]]: if not ranges: return [] ranges.sort(key=lambda x: (x[0], x[1])) merged: List[Tuple[int, int]] = [] cur_s, cur_e = ranges[0] for s, e in ranges[1:]: if s <= cur_e: cur_e = max(cur_e, e) else: merged.append((cur_s, cur_e)) cur_s, cur_e = s, e merged.append((cur_s, cur_e)) return merged def highlight_text(value: str, tokens: List[str]) -> str: """Return `value` with case-insensitive matches of `tokens` wrapped in , preserving original casing. Non-highlighted segments and the highlighted text content are HTML-escaped. Only the surrounding wrappers are emitted as markup. """ if value is None: return "" def _escape_html(text: str) -> str: # Minimal, safe HTML escaping if text is None: return "" # Replace ampersand first to avoid double-escaping text = str(text) text = text.replace("&", "&") text = text.replace("<", "<") text = text.replace(">", ">") text = text.replace('"', """) text = text.replace("'", "'") return text source = str(value) if not source or not tokens: return _escape_html(source) haystack = source.lower() ranges: List[Tuple[int, int]] = [] # Deduplicate tokens case-insensitively to avoid redundant scans (parity with client) unique_needles = [] seen_needles = set() for t in tokens: needle = str(t or "").lower() if needle and needle not in seen_needles: unique_needles.append(needle) seen_needles.add(needle) for needle in unique_needles: start = 0 last_possible = max(0, len(haystack) - len(needle)) while start <= last_possible and len(needle) > 0: idx = haystack.find(needle, start) if idx == -1: break ranges.append((idx, idx + len(needle))) start = idx + 1 if not ranges: return _escape_html(source) parts: List[str] = [] merged = _merge_ranges(ranges) pos = 0 for s, e in merged: if pos < s: parts.append(_escape_html(source[pos:s])) parts.append("" + _escape_html(source[s:e]) + "") pos = e if pos < len(source): parts.append(_escape_html(source[pos:])) return "".join(parts) def create_customer_highlight(customer: Any, query: str) -> str: if not query: return "" tokens = build_query_tokens(query) full_name = f"{getattr(customer, 'first', '') or ''} {getattr(customer, 'last', '')}".strip() email = getattr(customer, 'email', None) city = getattr(customer, 'city', None) ql = query.lower() if full_name and ql in full_name.lower(): return f"Name: {highlight_text(full_name, tokens)}" if email and ql in str(email).lower(): return f"Email: {highlight_text(str(email), tokens)}" if city and ql in str(city).lower(): return f"City: {highlight_text(str(city), tokens)}" return "" def create_file_highlight(file_obj: Any, query: str) -> str: if not query: return "" tokens = build_query_tokens(query) regarding = getattr(file_obj, 'regarding', None) file_type = getattr(file_obj, 'file_type', None) ql = query.lower() if regarding and ql in str(regarding).lower(): return f"Matter: {highlight_text(str(regarding), tokens)}" if file_type and ql in str(file_type).lower(): return f"Type: {highlight_text(str(file_type), tokens)}" return "" def create_ledger_highlight(ledger: Any, query: str) -> str: if not query: return "" tokens = build_query_tokens(query) note = getattr(ledger, 'note', None) if note and query.lower() in str(note).lower(): text = str(note) or "" preview = text[:160] suffix = "..." if len(text) > 160 else "" return f"Note: {highlight_text(preview, tokens)}{suffix}" return "" def create_qdro_highlight(qdro: Any, query: str) -> str: if not query: return "" tokens = build_query_tokens(query) form_name = getattr(qdro, 'form_name', None) pet = getattr(qdro, 'pet', None) case_number = getattr(qdro, 'case_number', None) ql = query.lower() if form_name and ql in str(form_name).lower(): return f"Form: {highlight_text(str(form_name), tokens)}" if pet and ql in str(pet).lower(): return f"Petitioner: {highlight_text(str(pet), tokens)}" if case_number and ql in str(case_number).lower(): return f"Case: {highlight_text(str(case_number), tokens)}" return ""