from app.api.search_highlight import ( build_query_tokens, highlight_text, create_customer_highlight, create_file_highlight, create_ledger_highlight, create_qdro_highlight, ) def test_build_query_tokens_dedup_and_trim(): tokens = build_query_tokens(' John, Smith; "Smith" (J.) ') assert tokens == ['John', 'Smith', 'J'] def test_highlight_text_case_insensitive_preserves_original(): out = highlight_text('John Smith', ['joHN', 'smiTH']) assert out == 'John Smith' def test_highlight_text_overlapping_tokens(): out = highlight_text('Anna and Ann went', ['ann', 'anna']) # Should highlight both; merged ranges will encompass 'Anna' first, then 'Ann' assert 'Anna' in out assert ' and Ann went' in out def test_highlight_text_multiple_occurrences(): out = highlight_text('alpha beta alpha', ['alpha']) assert out.count('alpha') == 2 def test_highlight_text_returns_original_when_token_absent(): out = highlight_text('Hello World', ['zzz']) assert out == 'Hello World' def test_highlight_text_merges_overlapping_tokens_single_range(): out = highlight_text('banana', ['ana', 'nan']) assert out == 'banana' def test_build_query_tokens_mixed_case_dedup_order_preserving(): tokens = build_query_tokens('ALPHA alpha Beta beta BETA') assert tokens == ['ALPHA', 'Beta'] def test_build_query_tokens_trims_wrapping_punctuation_and_ignores_empties(): tokens = build_query_tokens('...Alpha!!!, __Alpha__, (Beta); "beta";; gamma---') assert tokens == ['Alpha', 'Beta', 'gamma'] def test_build_query_tokens_empty_input(): assert build_query_tokens(' ') == [] def _make_customer(**attrs): obj = type("CustomerStub", (), {})() for k, v in attrs.items(): setattr(obj, k, v) return obj def test_create_customer_highlight_prefers_name_over_other_fields(): customer = _make_customer(first='John', last='Smith', email='john@example.com', city='Johnstown') out = create_customer_highlight(customer, 'john') assert out.startswith('Name: ') assert 'Email:' not in out and 'City:' not in out def test_create_customer_highlight_uses_email_when_name_not_matching(): customer = _make_customer(first='Alice', last='Wonder', email='johnson@example.com', city='Paris') out = create_customer_highlight(customer, 'john') assert out.startswith('Email: ') def test_create_customer_highlight_uses_city_when_only_city_matches(): customer = _make_customer(first='Alice', last='Wonder', email='awonder@example.com', city='Ann Arbor') out = create_customer_highlight(customer, 'arbor') assert out.startswith('City: ') def test_create_customer_highlight_requires_full_query_in_single_field(): customer = _make_customer(first='John', last='Smith', email='js@example.com', city='Boston') # 'john boston' does not occur in any single attribute; should return empty string out = create_customer_highlight(customer, 'john boston') assert out == '' def test_create_customer_highlight_highlights_both_tokens_in_full_name(): customer = _make_customer(first='John', last='Smith', email='js@example.com', city='Boston') out = create_customer_highlight(customer, 'John Smith') assert out == 'Name: John Smith' def _make_file(**attrs): obj = type("FileStub", (), {})() for k, v in attrs.items(): setattr(obj, k, v) return obj def test_create_file_highlight_prefers_matter_over_type(): file_obj = _make_file(regarding='Divorce Matter - John Doe', file_type='QDRO') out = create_file_highlight(file_obj, 'divorce') assert out.startswith('Matter: ') assert 'Divorce' in out def test_create_file_highlight_uses_type_when_matter_not_matching(): file_obj = _make_file(regarding='Miscellaneous', file_type='Income Tax') out = create_file_highlight(file_obj, 'tax') assert out.startswith('Type: ') # Preserve original casing from the source assert 'Tax' in out def test_create_file_highlight_returns_empty_when_no_match(): file_obj = _make_file(regarding='Misc', file_type='General') out = create_file_highlight(file_obj, 'unrelated') assert out == '' def _make_ledger(**attrs): obj = type("LedgerStub", (), {})() for k, v in attrs.items(): setattr(obj, k, v) return obj def test_create_ledger_highlight_truncates_to_160_with_suffix_and_highlights(): prefix = 'x' * 50 match = 'AlphaBeta' filler = 'y' * 200 marker_after = 'ZZZ_AFTER' note_text = prefix + match + filler + marker_after ledger = _make_ledger(note=note_text) out = create_ledger_highlight(ledger, 'alpha') assert out.startswith('Note: ') # Should include highlight within the preview assert 'AlphaBeta' in out # Should be truncated with suffix because original length > 160 assert out.endswith('...') # Ensure content after 160 chars (marker_after) is not present assert 'ZZZ_AFTER' not in out def _make_qdro(**attrs): obj = type("QdroStub", (), {})() for k, v in attrs.items(): setattr(obj, k, v) return obj def test_create_qdro_highlight_prefers_form_name_over_pet_and_case(): qdro = _make_qdro(form_name='Domestic Relations Form - QDRO', pet='Jane Doe', case_number='2024-XYZ') out = create_qdro_highlight(qdro, 'qdro') assert out.startswith('Form: ') assert 'QDRO' in out def test_create_qdro_highlight_uses_pet_when_form_not_matching(): qdro = _make_qdro(form_name='Child Support', pet='John Johnson', case_number='A-1') out = create_qdro_highlight(qdro, 'john') assert out.startswith('Petitioner: ') def test_create_qdro_highlight_uses_case_when_only_case_matches(): qdro = _make_qdro(form_name='Child Support', pet='Mary Jane', case_number='Case 12345') out = create_qdro_highlight(qdro, 'case 123') assert out.startswith('Case: ') assert 'Case' in out and '123' in out def test_create_qdro_highlight_none_or_empty_fields_return_empty(): qdro = _make_qdro(form_name=None, pet=None, case_number=None) assert create_qdro_highlight(qdro, 'anything') == '' populated = _make_qdro(form_name='Form A', pet='Pet B', case_number='C-1') assert create_qdro_highlight(populated, '') == '' def test_create_qdro_highlight_requires_full_query_in_single_field(): # Tokens present across fields but not as a contiguous substring in any single field qdro = _make_qdro(form_name='QDRO Plan', pet='Alpha', case_number='123') out = create_qdro_highlight(qdro, 'plan 123') assert out == '' def test_highlight_text_escapes_html_in_source_and_tokens(): # Source contains HTML, should be escaped, not interpreted out = highlight_text(' Alpha & Beta', ['alpha', 'beta']) # Tags are escaped; only wrappers exist assert '<script>alert(1)</script>' in out assert 'Alpha' in out assert 'Beta' in out assert '' not in out def test_highlight_text_handles_quotes_and_apostrophes_safely(): out = highlight_text('He said "Hello" & it\'s fine', ['hello']) # Quotes and ampersand should be escaped assert '"Hello"' in out assert ''s' in out assert '&' in out def test_highlight_text_no_tokens_returns_escaped_source(): out = highlight_text('bold', []) assert out == '<b>bold</b>'