Files
delphi-database/tests/test_search_api.py
2025-08-14 19:16:28 -05:00

467 lines
16 KiB
Python

import os
import sys
from pathlib import Path
import uuid
from datetime import date
import pytest
from fastapi.testclient import TestClient
# Ensure required env vars for app import/config
os.environ.setdefault("SECRET_KEY", "x" * 32)
os.environ.setdefault("DATABASE_URL", "sqlite:////tmp/delphi_test.sqlite")
# Ensure repository root on sys.path for direct test runs
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from app.main import app # noqa: E402
from app.auth.security import get_current_user # noqa: E402
from tests.helpers import assert_validation_error # noqa: E402
from app.api.financial import LedgerCreate # noqa: E402
from app.database.base import SessionLocal # noqa: E402
from app.models.qdro import QDRO # noqa: E402
from app.config import settings # noqa: E402
@pytest.fixture(scope="module")
def client():
# Override auth to bypass JWT for these tests
class _User:
def __init__(self):
self.id = "test"
self.username = "tester"
self.is_admin = True
self.is_active = True
app.dependency_overrides[get_current_user] = lambda: _User()
try:
# Disable cache for search API tests unless explicitly testing caching
settings.cache_enabled = False
yield TestClient(app)
finally:
app.dependency_overrides.pop(get_current_user, None)
def _create_customer(client: TestClient, last_suffix: str) -> str:
customer_id = f"SRCH-CUST-{uuid.uuid4().hex[:8]}"
payload = {
"id": customer_id,
"last": f"Search-{last_suffix}",
"first": "Unit",
"email": f"{customer_id.lower()}@example.com",
"city": "Austin",
"abrev": "TX",
}
resp = client.post("/api/customers/", json=payload)
assert resp.status_code == 200
return customer_id
def _create_file(client: TestClient, owner_id: str, regarding_token: str) -> str:
file_no = f"SRCH-F-{uuid.uuid4().hex[:6]}"
payload = {
"file_no": file_no,
"id": owner_id,
"regarding": f"Search Matter {regarding_token}",
"empl_num": "E01",
"file_type": "CIVIL",
"opened": date.today().isoformat(),
"status": "ACTIVE",
"rate_per_hour": 150.0,
"memo": "Created by search tests",
}
resp = client.post("/api/files/", json=payload)
assert resp.status_code == 200
return file_no
def test_search_customers_min_length_and_limit_validation(client: TestClient):
# q must be at least 2 chars
resp = client.get("/api/search/customers", params={"q": "a"})
assert_validation_error(resp, "q")
# limit must be between 1 and 100
resp = client.get("/api/search/customers", params={"q": "ab", "limit": 0})
assert_validation_error(resp, "limit")
resp = client.get("/api/search/customers", params={"q": "ab", "limit": 101})
assert_validation_error(resp, "limit")
def test_search_files_min_length_and_limit_validation(client: TestClient):
resp = client.get("/api/search/files", params={"q": "a"})
assert_validation_error(resp, "q")
resp = client.get("/api/search/files", params={"q": "ab", "limit": 0})
assert_validation_error(resp, "limit")
resp = client.get("/api/search/files", params={"q": "ab", "limit": 101})
assert_validation_error(resp, "limit")
def test_search_customers_results_and_filtering(client: TestClient):
token = f"TOK-{uuid.uuid4().hex[:6]}"
id1 = _create_customer(client, f"{token}-Alpha")
id2 = _create_customer(client, f"{token}-Beta")
# Search by shared token
resp = client.get("/api/search/customers", params={"q": token, "limit": 50})
assert resp.status_code == 200
results = resp.json()
assert isinstance(results, list)
assert all(r.get("type") == "customer" for r in results)
ids = {r.get("id") for r in results}
assert id1 in ids and id2 in ids
# Limit parameter should restrict result count
resp = client.get("/api/search/customers", params={"q": token, "limit": 1})
assert resp.status_code == 200
assert isinstance(resp.json(), list) and len(resp.json()) == 1
def test_search_files_results_and_filtering(client: TestClient):
token = f"FTOK-{uuid.uuid4().hex[:6]}"
owner_id = _create_customer(client, f"Owner-{token}")
f1 = _create_file(client, owner_id, regarding_token=f"{token}-Alpha")
f2 = _create_file(client, owner_id, regarding_token=f"{token}-Beta")
# Search by token in regarding
resp = client.get("/api/search/files", params={"q": token, "limit": 50})
assert resp.status_code == 200
results = resp.json()
assert isinstance(results, list)
assert all(r.get("type") == "file" for r in results)
file_nos = {r.get("id") for r in results}
assert f1 in file_nos and f2 in file_nos
# Limit restricts results
resp = client.get("/api/search/files", params={"q": token, "limit": 1})
assert resp.status_code == 200
assert isinstance(resp.json(), list) and len(resp.json()) == 1
def test_search_case_insensitive_matching_and_highlight_preserves_casing(client: TestClient):
token = f"MC-{uuid.uuid4().hex[:6]}"
# Create customers with specific casing
id_upper = _create_customer(client, f"{token}-SMITH")
id_mixed = _create_customer(client, f"{token}-Smithson")
# Mixed-case query should match both via case-insensitive search
resp = client.get("/api/search/customers", params={"q": token.lower()})
assert resp.status_code == 200
results = resp.json()
ids = {r.get("id") for r in results}
assert id_upper in ids and id_mixed in ids
# Now search files with mixed-case regarding
owner_id = id_upper
file_no = _create_file(client, owner_id, regarding_token=f"{token}-DoE")
# Query should be case-insensitive
resp = client.get("/api/search/files", params={"q": token.lower()})
assert resp.status_code == 200
files = resp.json()
file_ids = {r.get("id") for r in files}
assert file_no in file_ids
# Ensure highlight preserves original casing in snippet when server supplies text
# For customers highlight may include Name/Email/City with original case
cust = next(r for r in results if r.get("id") == id_upper)
# Server should return a snippet with <strong> around matches, preserving original casing
if cust.get("highlight"):
assert "<strong>" in cust["highlight"]
# The word 'Search' prefix should remain with original case if present
assert any(tag in cust["highlight"] for tag in ["Name:", "City:", "Email:"])
# Also create a ledger entry with mixed-case note and ensure highlight
resp = client.post(
"/api/financial/ledger/",
json=LedgerCreate(
file_no=file_no,
date=date.today().isoformat(),
t_code="NOTE",
t_type="2",
empl_num="E01",
quantity=0.0,
rate=0.0,
amount=0.0,
billed="N",
note=f"MixedCase DoE note {token}"
).model_dump(mode="json")
)
assert resp.status_code == 200
# Ledger search via global endpoints isn't exposed directly here, but query through legacy ledger search when available
# We can at least ensure files search returns highlight on regarding; ledger highlight is already unit-tested
def _create_qdro_with_form_name(file_no: str, form_name: str) -> int:
db = SessionLocal()
try:
qdro = QDRO(file_no=file_no, form_name=form_name, status="DRAFT")
db.add(qdro)
db.commit()
db.refresh(qdro)
return qdro.id
finally:
db.close()
def test_advanced_search_highlights_mixed_case_for_customer_file_qdro(client: TestClient):
token_mixed = f"MiXeD{uuid.uuid4().hex[:6]}"
token_lower = token_mixed.lower()
# Customer with mixed-case in name
cust_id = _create_customer(client, last_suffix=token_mixed)
# File with mixed-case in regarding
file_no = _create_file(client, cust_id, regarding_token=token_mixed)
# QDRO seeded directly with mixed-case in form_name
qdro_id = _create_qdro_with_form_name(file_no, form_name=f"Form {token_mixed} Plan")
# Advanced search across types
payload = {
"query": token_lower,
"search_types": ["customer", "file", "qdro"],
"limit": 50,
}
resp = client.post("/api/search/advanced", json=payload)
assert resp.status_code == 200
data = resp.json()
assert data.get("total_results", 0) >= 3
# Index by (type, id)
results = data["results"]
by_key = {(r["type"], r["id"]): r for r in results}
# Customer
cust_res = by_key.get(("customer", cust_id))
assert cust_res is not None and isinstance(cust_res.get("highlight"), str)
assert "<strong>" in cust_res["highlight"]
assert f"<strong>{token_mixed}</strong>" in cust_res["highlight"]
# File
file_res = by_key.get(("file", file_no))
assert file_res is not None and isinstance(file_res.get("highlight"), str)
assert "<strong>" in file_res["highlight"]
assert f"<strong>{token_mixed}</strong>" in file_res["highlight"]
# QDRO
qdro_res = by_key.get(("qdro", qdro_id))
assert qdro_res is not None and isinstance(qdro_res.get("highlight"), str)
assert "<strong>" in qdro_res["highlight"]
assert f"<strong>{token_mixed}</strong>" in qdro_res["highlight"]
def test_global_search_highlights_mixed_case_for_customer_file_qdro(client: TestClient):
token_mixed = f"MiXeD{uuid.uuid4().hex[:6]}"
token_lower = token_mixed.lower()
# Seed data
cust_id = _create_customer(client, last_suffix=token_mixed)
file_no = _create_file(client, cust_id, regarding_token=token_mixed)
qdro_id = _create_qdro_with_form_name(file_no, form_name=f"QDRO {token_mixed} Case")
# Global search
resp = client.get("/api/search/global", params={"q": token_lower, "limit": 50})
assert resp.status_code == 200
data = resp.json()
# Customers
custs = data.get("customers", [])
cust = next((r for r in custs if r.get("id") == cust_id), None)
assert cust is not None and isinstance(cust.get("highlight"), str)
assert "<strong>" in cust["highlight"]
assert f"<strong>{token_mixed}</strong>" in cust["highlight"]
# Files
files = data.get("files", [])
fil = next((r for r in files if r.get("id") == file_no), None)
assert fil is not None and isinstance(fil.get("highlight"), str)
assert "<strong>" in fil["highlight"]
assert f"<strong>{token_mixed}</strong>" in fil["highlight"]
# QDROs
qdros = data.get("qdros", [])
q = next((r for r in qdros if r.get("id") == qdro_id), None)
assert q is not None and isinstance(q.get("highlight"), str)
assert "<strong>" in q["highlight"]
assert f"<strong>{token_mixed}</strong>" in q["highlight"]
def test_file_search_whole_words_and_exact_phrase(client: TestClient):
token = f"FW-{uuid.uuid4().hex[:6]}"
owner_id = _create_customer(client, f"Owner-{token}")
f_exact = _create_file(client, owner_id, regarding_token="The apple pie is fresh")
f_plural = _create_file(client, owner_id, regarding_token="The apple pies are fresh")
# whole_words=True should match 'pie' but not 'pies'
payload = {
"query": "pie",
"search_types": ["file"],
"whole_words": True,
"limit": 50,
}
resp = client.post("/api/search/advanced", json=payload)
assert resp.status_code == 200
results = resp.json()["results"]
ids = {r["id"] for r in results}
assert f_exact in ids
assert f_plural not in ids
# exact_phrase should match the exact wording only
payload = {
"query": "apple pie",
"search_types": ["file"],
"exact_phrase": True,
"limit": 50,
}
resp = client.post("/api/search/advanced", json=payload)
assert resp.status_code == 200
results = resp.json()["results"]
ids = {r["id"] for r in results}
assert f_exact in ids
assert f_plural not in ids
# default (substring) matching should include both
payload = {
"query": "pie",
"search_types": ["file"],
"limit": 50,
}
resp = client.post("/api/search/advanced", json=payload)
assert resp.status_code == 200
results = resp.json()["results"]
ids = {r["id"] for r in results}
assert f_exact in ids and f_plural in ids
def test_ledger_search_whole_words(client: TestClient):
token = f"LW-{uuid.uuid4().hex[:6]}"
# Create a file for ledger linkage
owner_id = _create_customer(client, f"Owner-{token}")
file_no = _create_file(client, owner_id, regarding_token=token)
# Ledger entries: 'retainer' vs 'retained'
resp = client.post(
"/api/financial/ledger/",
json=LedgerCreate(
file_no=file_no,
date=date.today().isoformat(),
t_code="NOTE",
t_type="2",
empl_num="E01",
quantity=0.0,
rate=0.0,
amount=0.0,
billed="N",
note="retainer fee approved",
).model_dump(mode="json"),
)
assert resp.status_code == 200
resp = client.post(
"/api/financial/ledger/",
json=LedgerCreate(
file_no=file_no,
date=date.today().isoformat(),
t_code="NOTE",
t_type="2",
empl_num="E01",
quantity=0.0,
rate=0.0,
amount=0.0,
billed="N",
note="retained amount on file",
).model_dump(mode="json"),
)
assert resp.status_code == 200
payload = {
"query": "retainer",
"search_types": ["ledger"],
"whole_words": True,
"limit": 50,
}
resp = client.post("/api/search/advanced", json=payload)
assert resp.status_code == 200
results = resp.json()["results"]
# Should contain the entry with 'retainer fee approved' and exclude 'retained amount on file'
texts = [r.get("description", "") for r in results]
assert any("retainer fee approved" in t for t in texts)
assert all("retained amount on file" not in t for t in texts)
def test_qdro_search_whole_words_and_exact_phrase(client: TestClient):
token = f"QW-{uuid.uuid4().hex[:6]}"
owner_id = _create_customer(client, f"Owner-{token}")
file_no = _create_file(client, owner_id, regarding_token=token)
q1 = _create_qdro_with_form_name(file_no, form_name="Order for benefit under plan")
q2 = _create_qdro_with_form_name(file_no, form_name="Order benefiting alternate payee")
# whole_words=True should match 'benefit' but not 'benefiting'
payload = {
"query": "benefit",
"search_types": ["qdro"],
"whole_words": True,
"limit": 50,
}
resp = client.post("/api/search/advanced", json=payload)
assert resp.status_code == 200
results = resp.json()["results"]
ids = {r["id"] for r in results}
assert q1 in ids
assert q2 not in ids
# exact_phrase should only match the precise phrase
payload = {
"query": "Order for benefit",
"search_types": ["qdro"],
"exact_phrase": True,
"limit": 50,
}
resp = client.post("/api/search/advanced", json=payload)
assert resp.status_code == 200
results = resp.json()["results"]
ids = {r["id"] for r in results}
assert q1 in ids
assert q2 not in ids
def test_advanced_facets_include_state_and_transaction_type(client: TestClient):
token = f"FAC-{uuid.uuid4().hex[:6]}"
# Ensure at least one TX customer
_ = _create_customer(client, f"Facet-{token}")
# Ensure at least one ledger with t_type '2'
owner_id = _create_customer(client, f"Owner-{token}")
file_no = _create_file(client, owner_id, regarding_token=token)
resp = client.post(
"/api/financial/ledger/",
json=LedgerCreate(
file_no=file_no,
date=date.today().isoformat(),
t_code="NOTE",
t_type="2",
empl_num="E01",
quantity=0.0,
rate=0.0,
amount=0.0,
billed="N",
note="Fee for facets token",
).model_dump(mode="json"),
)
assert resp.status_code == 200
# Query can be empty; we'll aggregate facets across returned results
payload = {
"search_types": ["customer", "ledger"],
"limit": 200,
}
resp = client.post("/api/search/advanced", json=payload)
assert resp.status_code == 200
data = resp.json()
facets = data.get("facets", {})
assert "state" in facets and isinstance(facets["state"], dict)
assert any(k in ("TX", "Tx", "tx") for k in facets["state"].keys())
assert "transaction_type" in facets and isinstance(facets["transaction_type"], dict)
assert "2" in facets["transaction_type"] or 2 in facets["transaction_type"]