changes

2025-08-18 20:20:04 -05:00
parent 89b2bc0aa2
commit bac8cc4bd5
114 changed files with 30258 additions and 1341 deletions
--- a/app/api/documents.py
+++ b/app/api/documents.py
@@ -7,9 +7,12 @@ from fastapi import APIRouter, Depends, HTTPException, status, Query, UploadFile
 from sqlalchemy.orm import Session, joinedload
 from sqlalchemy import or_, func, and_, desc, asc, text
 from datetime import date, datetime, timezone
+import io
+import zipfile
 import os
 import uuid
 import shutil
+from pathlib import Path

 from app.database.base import get_db
 from app.api.search_highlight import build_query_tokens
@@ -21,9 +24,17 @@ from app.models.lookups import FormIndex, FormList, Footer, Employee
 from app.models.user import User
 from app.auth.security import get_current_user
 from app.models.additional import Document
+from app.models.document_workflows import EventLog
 from app.core.logging import get_logger
 from app.services.audit import audit_service
 from app.services.cache import invalidate_search_cache
+from app.models.templates import DocumentTemplate, DocumentTemplateVersion
+from app.models.jobs import JobRecord
+from app.services.storage import get_default_storage
+from app.services.template_merge import extract_tokens_from_bytes, build_context, resolve_tokens, render_docx
+from app.services.document_notifications import notify_processing, notify_completed, notify_failed, topic_for_file, ADMIN_DOCUMENTS_TOPIC, get_last_status
+from app.middleware.websocket_middleware import get_websocket_manager, WebSocketMessage
+from fastapi import WebSocket

 router = APIRouter()

@@ -118,6 +129,87 @@ class PaginatedQDROResponse(BaseModel):
    total: int


+class CurrentStatusResponse(BaseModel):
+    file_no: str
+    status: str  # processing | completed | failed | unknown
+    timestamp: Optional[str] = None
+    data: Optional[Dict[str, Any]] = None
+    history: Optional[list] = None
+
+
+@router.get("/current-status/{file_no}", response_model=CurrentStatusResponse)
+async def get_current_document_status(
+    file_no: str,
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """
+    Return last-known document generation status for a file.
+
+    Priority:
+    1) In-memory last broadcast state (processing/completed/failed)
+    2) If no memory record, check for any uploaded/generated documents and report 'completed'
+    3) Fallback to 'unknown'
+    """
+    # Build recent history from EventLog (last N events)
+    history_items = []
+    try:
+        recent = (
+            db.query(EventLog)
+            .filter(EventLog.file_no == file_no, EventLog.event_type.in_(["document_processing", "document_completed", "document_failed"]))
+            .order_by(EventLog.occurred_at.desc())
+            .limit(10)
+            .all()
+        )
+        for ev in recent:
+            history_items.append({
+                "type": ev.event_type,
+                "timestamp": ev.occurred_at.isoformat() if getattr(ev, "occurred_at", None) else None,
+                "data": ev.event_data or {},
+            })
+    except Exception:
+        history_items = []
+
+    # Try in-memory record for current status
+    last = get_last_status(file_no)
+    if last:
+        ts = last.get("timestamp")
+        iso = ts.isoformat() if hasattr(ts, "isoformat") else None
+        status_val = str(last.get("status") or "unknown")
+        # Treat stale 'processing' as unknown if older than 10 minutes
+        try:
+            if status_val == "processing" and isinstance(ts, datetime):
+                age = datetime.now(timezone.utc) - ts
+                if age.total_seconds() > 600:
+                    status_val = "unknown"
+        except Exception:
+            pass
+        return CurrentStatusResponse(
+            file_no=file_no,
+            status=status_val,
+            timestamp=iso,
+            data=(last.get("data") or None),
+            history=history_items,
+        )
+
+    # Fallback: any existing documents imply last status completed
+    any_doc = db.query(Document).filter(Document.file_no == file_no).order_by(Document.id.desc()).first()
+    if any_doc:
+        return CurrentStatusResponse(
+            file_no=file_no,
+            status="completed",
+            timestamp=getattr(any_doc, "upload_date", None).isoformat() if getattr(any_doc, "upload_date", None) else None,
+            data={
+                "document_id": any_doc.id,
+                "filename": any_doc.filename,
+                "size": any_doc.size,
+            },
+            history=history_items,
+        )
+
+    return CurrentStatusResponse(file_no=file_no, status="unknown", history=history_items)
+
+
@router.get("/qdros/", response_model=Union[List[QDROResponse], PaginatedQDROResponse])
 async def list_qdros(
    skip: int = Query(0, ge=0),
@@ -814,6 +906,371 @@ def _merge_template_variables(content: str, variables: Dict[str, Any]) -> str:
    return merged


+# --- Batch Document Generation (MVP synchronous) ---
+class BatchGenerateRequest(BaseModel):
+    """Batch generation request using DocumentTemplate system."""
+    template_id: int
+    version_id: Optional[int] = None
+    file_nos: List[str]
+    output_format: str = "DOCX"  # DOCX (default), PDF (not yet supported), HTML (not yet supported)
+    context: Optional[Dict[str, Any]] = None  # additional global context
+    bundle_zip: bool = False  # when true, also create a ZIP bundle of generated outputs
+
+
+class BatchGenerateItemResult(BaseModel):
+    file_no: str
+    status: str  # "success" | "error"
+    document_id: Optional[int] = None
+    filename: Optional[str] = None
+    path: Optional[str] = None
+    url: Optional[str] = None
+    size: Optional[int] = None
+    unresolved: Optional[List[str]] = None
+    error: Optional[str] = None
+
+
+class BatchGenerateResponse(BaseModel):
+    job_id: str
+    template_id: int
+    version_id: int
+    total_requested: int
+    total_success: int
+    total_failed: int
+    results: List[BatchGenerateItemResult]
+    bundle_url: Optional[str] = None
+    bundle_size: Optional[int] = None
+
+
+@router.post("/generate-batch", response_model=BatchGenerateResponse)
+async def generate_batch_documents(
+    payload: BatchGenerateRequest,
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    """Synchronously generate documents for multiple files from a template version.
+
+    Notes:
+    - Currently supports DOCX output. PDF/HTML conversion is not yet implemented.
+    - Saves generated bytes to default storage under uploads/generated/{file_no}/.
+    - Persists a `Document` record per successful file.
+    - Returns per-item status with unresolved tokens for transparency.
+    """
+    tpl = db.query(DocumentTemplate).filter(DocumentTemplate.id == payload.template_id).first()
+    if not tpl:
+        raise HTTPException(status_code=404, detail="Template not found")
+    resolved_version_id = payload.version_id or tpl.current_version_id
+    if not resolved_version_id:
+        raise HTTPException(status_code=400, detail="Template has no approved/current version")
+    ver = (
+        db.query(DocumentTemplateVersion)
+        .filter(
+            DocumentTemplateVersion.id == resolved_version_id,
+            DocumentTemplateVersion.template_id == tpl.id,
+        )
+        .first()
+    )
+    if not ver:
+        raise HTTPException(status_code=404, detail="Template version not found")
+
+    storage = get_default_storage()
+    try:
+        template_bytes = storage.open_bytes(ver.storage_path)
+    except Exception:
+        raise HTTPException(status_code=404, detail="Stored template file not found")
+
+    tokens = extract_tokens_from_bytes(template_bytes)
+    results: List[BatchGenerateItemResult] = []
+
+    # Pre-normalize file numbers (strip spaces, ignore empties)
+    requested_files: List[str] = [fn.strip() for fn in (payload.file_nos or []) if fn and str(fn).strip()]
+    if not requested_files:
+        raise HTTPException(status_code=400, detail="No file numbers provided")
+
+    # Fetch all files in one query
+    files_map: Dict[str, FileModel] = {
+        f.file_no: f
+        for f in db.query(FileModel).options(joinedload(FileModel.owner)).filter(FileModel.file_no.in_(requested_files)).all()
+    }
+
+    generated_items: List[Dict[str, Any]] = []  # capture bytes for optional ZIP
+    for file_no in requested_files:
+        # Notify processing started for this file
+        try:
+            await notify_processing(
+                file_no=file_no,
+                user_id=current_user.id,
+                data={
+                    "template_id": tpl.id,
+                    "template_name": tpl.name,
+                    "job_id": job_id
+                }
+            )
+        except Exception:
+            # Don't fail generation if notification fails
+            pass
+        
+        file_obj = files_map.get(file_no)
+        if not file_obj:
+            # Notify failure
+            try:
+                await notify_failed(
+                    file_no=file_no,
+                    user_id=current_user.id,
+                    data={"error": "File not found", "template_id": tpl.id}
+                )
+            except Exception:
+                pass
+            
+            results.append(
+                BatchGenerateItemResult(
+                    file_no=file_no,
+                    status="error",
+                    error="File not found",
+                )
+            )
+            continue
+
+        # Build per-file context
+        file_context: Dict[str, Any] = {
+            "FILE_NO": file_obj.file_no,
+            "CLIENT_FIRST": getattr(getattr(file_obj, "owner", None), "first", "") or "",
+            "CLIENT_LAST": getattr(getattr(file_obj, "owner", None), "last", "") or "",
+            "CLIENT_FULL": (
+                f"{getattr(getattr(file_obj, 'owner', None), 'first', '') or ''} "
+                f"{getattr(getattr(file_obj, 'owner', None), 'last', '') or ''}"
+            ).strip(),
+            "MATTER": file_obj.regarding or "",
+            "OPENED": file_obj.opened.strftime("%B %d, %Y") if getattr(file_obj, "opened", None) else "",
+            "ATTORNEY": getattr(file_obj, "empl_num", "") or "",
+        }
+        # Merge global context
+        merged_context = build_context({**(payload.context or {}), **file_context}, "file", file_obj.file_no)
+        resolved_vars, unresolved_tokens = resolve_tokens(db, tokens, merged_context)
+
+        try:
+            if ver.mime_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
+                output_bytes = render_docx(template_bytes, resolved_vars)
+                output_mime = ver.mime_type
+                extension = ".docx"
+            else:
+                # For non-DOCX templates (e.g., PDF), pass-through content
+                output_bytes = template_bytes
+                output_mime = ver.mime_type
+                extension = ".bin"
+
+            # Name and save
+            safe_name = f"{tpl.name}_{file_obj.file_no}{extension}"
+            subdir = f"generated/{file_obj.file_no}"
+            storage_path = storage.save_bytes(content=output_bytes, filename_hint=safe_name, subdir=subdir, content_type=output_mime)
+
+            # Persist Document record
+            abs_or_rel_path = os.path.join("uploads", storage_path).replace("\\", "/")
+            doc = Document(
+                file_no=file_obj.file_no,
+                filename=safe_name,
+                path=abs_or_rel_path,
+                description=f"Generated from template '{tpl.name}'",
+                type=output_mime,
+                size=len(output_bytes),
+                uploaded_by=getattr(current_user, "username", None),
+            )
+            db.add(doc)
+            db.commit()
+            db.refresh(doc)
+
+            # Notify successful completion
+            try:
+                await notify_completed(
+                    file_no=file_obj.file_no,
+                    user_id=current_user.id,
+                    data={
+                        "template_id": tpl.id,
+                        "template_name": tpl.name,
+                        "document_id": doc.id,
+                        "filename": doc.filename,
+                        "size": doc.size,
+                        "unresolved_tokens": unresolved_tokens or []
+                    }
+                )
+            except Exception:
+                # Don't fail generation if notification fails
+                pass
+
+            results.append(
+                BatchGenerateItemResult(
+                    file_no=file_obj.file_no,
+                    status="success",
+                    document_id=doc.id,
+                    filename=doc.filename,
+                    path=doc.path,
+                    url=storage.public_url(storage_path),
+                    size=doc.size,
+                    unresolved=unresolved_tokens or [],
+                )
+            )
+            # Keep for bundling
+            generated_items.append({
+                "filename": doc.filename,
+                "storage_path": storage_path,
+            })
+        except Exception as e:
+            # Notify failure
+            try:
+                await notify_failed(
+                    file_no=file_obj.file_no,
+                    user_id=current_user.id,
+                    data={
+                        "template_id": tpl.id,
+                        "template_name": tpl.name,
+                        "error": str(e),
+                        "unresolved_tokens": unresolved_tokens or []
+                    }
+                )
+            except Exception:
+                pass
+            
+            # Best-effort rollback of partial doc add
+            try:
+                db.rollback()
+            except Exception:
+                pass
+            results.append(
+                BatchGenerateItemResult(
+                    file_no=file_obj.file_no,
+                    status="error",
+                    error=str(e),
+                    unresolved=unresolved_tokens or [],
+                )
+            )
+
+    job_id = str(uuid.uuid4())
+    total_success = sum(1 for r in results if r.status == "success")
+    total_failed = sum(1 for r in results if r.status == "error")
+    bundle_url: Optional[str] = None
+    bundle_size: Optional[int] = None
+
+    # Optionally create a ZIP bundle of generated outputs
+    bundle_storage_path: Optional[str] = None
+    if payload.bundle_zip and total_success > 0:
+        # Stream zip to memory then save via storage adapter
+        zip_buffer = io.BytesIO()
+        with zipfile.ZipFile(zip_buffer, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
+            for item in generated_items:
+                try:
+                    file_bytes = storage.open_bytes(item["storage_path"])  # relative path under uploads
+                    # Use clean filename inside zip
+                    zf.writestr(item["filename"], file_bytes)
+                except Exception:
+                    # Skip missing/unreadable files from bundle; keep job successful
+                    continue
+        zip_bytes = zip_buffer.getvalue()
+        safe_zip_name = f"documents_batch_{job_id}.zip"
+        bundle_storage_path = storage.save_bytes(content=zip_bytes, filename_hint=safe_zip_name, subdir="bundles", content_type="application/zip")
+        bundle_url = storage.public_url(bundle_storage_path)
+        bundle_size = len(zip_bytes)
+
+    # Persist simple job record
+    try:
+        job = JobRecord(
+            job_id=job_id,
+            job_type="documents_batch",
+            status="completed",
+            requested_by_username=getattr(current_user, "username", None),
+            started_at=datetime.now(timezone.utc),
+            completed_at=datetime.now(timezone.utc),
+            total_requested=len(requested_files),
+            total_success=total_success,
+            total_failed=total_failed,
+            result_storage_path=bundle_storage_path,
+            result_mime_type=("application/zip" if bundle_storage_path else None),
+            result_size=bundle_size,
+            details={
+                "template_id": tpl.id,
+                "version_id": ver.id,
+                "file_nos": requested_files,
+            },
+        )
+        db.add(job)
+        db.commit()
+    except Exception:
+        try:
+            db.rollback()
+        except Exception:
+            pass
+
+    return BatchGenerateResponse(
+        job_id=job_id,
+        template_id=tpl.id,
+        version_id=ver.id,
+        total_requested=len(requested_files),
+        total_success=total_success,
+        total_failed=total_failed,
+        results=results,
+        bundle_url=bundle_url,
+        bundle_size=bundle_size,
+    )
+
+from fastapi.responses import StreamingResponse
+
+class JobStatusResponse(BaseModel):
+    job_id: str
+    job_type: str
+    status: str
+    total_requested: int
+    total_success: int
+    total_failed: int
+    started_at: Optional[datetime] = None
+    completed_at: Optional[datetime] = None
+    bundle_available: bool = False
+    bundle_url: Optional[str] = None
+    bundle_size: Optional[int] = None
+
+
+@router.get("/jobs/{job_id}", response_model=JobStatusResponse)
+async def get_job_status(
+    job_id: str,
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    job = db.query(JobRecord).filter(JobRecord.job_id == job_id).first()
+    if not job:
+        raise HTTPException(status_code=404, detail="Job not found")
+    return JobStatusResponse(
+        job_id=job.job_id,
+        job_type=job.job_type,
+        status=job.status,
+        total_requested=job.total_requested or 0,
+        total_success=job.total_success or 0,
+        total_failed=job.total_failed or 0,
+        started_at=getattr(job, "started_at", None),
+        completed_at=getattr(job, "completed_at", None),
+        bundle_available=bool(job.result_storage_path),
+        bundle_url=(get_default_storage().public_url(job.result_storage_path) if job.result_storage_path else None),
+        bundle_size=job.result_size,
+    )
+
+
+@router.get("/jobs/{job_id}/result")
+async def download_job_result(
+    job_id: str,
+    db: Session = Depends(get_db),
+    current_user: User = Depends(get_current_user),
+):
+    job = db.query(JobRecord).filter(JobRecord.job_id == job_id).first()
+    if not job or not job.result_storage_path:
+        raise HTTPException(status_code=404, detail="Result not available for this job")
+    storage = get_default_storage()
+    try:
+        content = storage.open_bytes(job.result_storage_path)
+    except Exception:
+        raise HTTPException(status_code=404, detail="Stored bundle not found")
+
+    # Derive filename
+    base = os.path.basename(job.result_storage_path)
+    headers = {
+        "Content-Disposition": f"attachment; filename=\"{base}\"",
+    }
+    return StreamingResponse(iter([content]), media_type=(job.result_mime_type or "application/zip"), headers=headers)
 # --- Client Error Logging (for Documents page) ---
 class ClientErrorLog(BaseModel):
    """Payload for client-side error logging"""
@@ -894,54 +1351,118 @@ async def upload_document(
    db: Session = Depends(get_db),
    current_user: User = Depends(get_current_user)
 ):
-    """Upload a document to a file"""
+    """Upload a document to a file with comprehensive security validation and async operations"""
+    from app.utils.file_security import file_validator, create_upload_directory
+    from app.services.async_file_operations import async_file_ops, validate_large_upload
+    from app.services.async_storage import async_storage
+    
    file_obj = db.query(FileModel).filter(FileModel.file_no == file_no).first()
    if not file_obj:
        raise HTTPException(status_code=404, detail="File not found")

-    if not file.filename:
-        raise HTTPException(status_code=400, detail="No file uploaded")
+    # Determine if this is a large file that needs streaming
+    file_size_estimate = getattr(file, 'size', 0) or 0
+    use_streaming = file_size_estimate > 10 * 1024 * 1024  # 10MB threshold

-    allowed_types = [
-        "application/pdf",
-        "application/msword",
-        "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-        "image/jpeg",
-        "image/png"
-    ]
-    if file.content_type not in allowed_types:
-        raise HTTPException(status_code=400, detail="Invalid file type")
+    if use_streaming:
+        # Use streaming validation for large files
+        # Enforce the same 10MB limit used for non-streaming uploads
+        is_valid, error_msg, metadata = await validate_large_upload(
+            file, category='document', max_size=10 * 1024 * 1024
+        )
+        
+        if not is_valid:
+            raise HTTPException(status_code=400, detail=error_msg)
+        
+        safe_filename = file_validator.sanitize_filename(file.filename)
+        file_ext = Path(safe_filename).suffix
+        mime_type = metadata.get('content_type', 'application/octet-stream')
+        
+        # Stream upload for large files
+        subdir = f"documents/{file_no}"
+        final_path, actual_size, _checksum = await async_file_ops.stream_upload_file(
+            file,
+            f"{subdir}/{uuid.uuid4()}{file_ext}",
+            progress_callback=None  # Could add WebSocket progress here
+        )
+        
+        # Get absolute path for database storage
+        absolute_path = str(final_path)
+        # For downstream DB fields that expect a relative path, also keep a relative for consistency
+        relative_path = str(Path(final_path).relative_to(async_file_ops.base_upload_dir))
+        
+    else:
+        # Use traditional validation for smaller files
+        content, safe_filename, file_ext, mime_type = await file_validator.validate_upload_file(
+            file, category='document'
+        )

-    max_size = 10 * 1024 * 1024  # 10MB
-    content = await file.read()
-    # Treat zero-byte payloads as no file uploaded to provide a clearer client error
-    if len(content) == 0:
-        raise HTTPException(status_code=400, detail="No file uploaded")
-    if len(content) > max_size:
-        raise HTTPException(status_code=400, detail="File too large")
+        # Create secure upload directory
+        upload_dir = f"uploads/{file_no}"
+        create_upload_directory(upload_dir)

-    upload_dir = f"uploads/{file_no}"
-    os.makedirs(upload_dir, exist_ok=True)
+        # Generate secure file path with UUID to prevent conflicts
+        unique_name = f"{uuid.uuid4()}{file_ext}"
+        path = file_validator.generate_secure_path(upload_dir, unique_name)

-    ext = file.filename.split(".")[-1]
-    unique_name = f"{uuid.uuid4()}.{ext}"
-    path = f"{upload_dir}/{unique_name}"
-
-    with open(path, "wb") as f:
-        f.write(content)
+        # Write file using async storage for consistency
+        try:
+            relative_path = await async_storage.save_bytes_async(
+                content, 
+                safe_filename, 
+                subdir=f"documents/{file_no}"
+            )
+            absolute_path = str(async_storage.base_dir / relative_path)
+            actual_size = len(content)
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=f"Could not save file: {str(e)}")

    doc = Document(
        file_no=file_no,
-        filename=file.filename,
-        path=path,
+        filename=safe_filename,  # Use sanitized filename
+        path=absolute_path,
        description=description,
-        type=file.content_type,
-        size=len(content),
+        type=mime_type,  # Use validated MIME type
+        size=actual_size,
        uploaded_by=current_user.username
    )
    db.add(doc)
    db.commit()
    db.refresh(doc)
+    
+    # Send real-time notification for document upload
+    try:
+        await notify_completed(
+            file_no=file_no,
+            user_id=current_user.id,
+            data={
+                "action": "upload",
+                "document_id": doc.id,
+                "filename": safe_filename,
+                "size": actual_size,
+                "type": mime_type,
+                "description": description
+            }
+        )
+    except Exception as e:
+        # Don't fail the operation if notification fails
+        get_logger("documents").warning(f"Failed to send document upload notification: {str(e)}")
+    
+    # Log workflow event for document upload
+    try:
+        from app.services.workflow_integration import log_document_uploaded_sync
+        log_document_uploaded_sync(
+            db=db,
+            file_no=file_no,
+            document_id=doc.id,
+            filename=safe_filename,
+            document_type=mime_type,
+            user_id=current_user.id
+        )
+    except Exception as e:
+        # Don't fail the operation if workflow logging fails
+        get_logger("documents").warning(f"Failed to log workflow event for document upload: {str(e)}")
+    
    return doc

@router.get("/{file_no}/uploaded")
@@ -987,4 +1508,125 @@ async def update_document(
    doc.description = description
    db.commit()
    db.refresh(doc)
-    return doc
+    return doc
+
+
+# WebSocket endpoints for real-time document status notifications
+
+@router.websocket("/ws/status/{file_no}")
+async def ws_document_status(websocket: WebSocket, file_no: str):
+    """
+    Subscribe to real-time document processing status updates for a specific file.
+    
+    Users can connect to this endpoint to receive notifications about:
+    - Document generation started (processing)
+    - Document generation completed 
+    - Document generation failed
+    - Document uploads
+    
+    Authentication required via token query parameter.
+    """
+    websocket_manager = get_websocket_manager()
+    topic = topic_for_file(file_no)
+    
+    # Custom message handler for document status updates
+    async def handle_document_message(connection_id: str, message: WebSocketMessage):
+        """Handle custom messages for document status"""
+        get_logger("documents").debug("Received document status message",
+                       connection_id=connection_id,
+                       file_no=file_no,
+                       message_type=message.type)
+    
+    # Use the WebSocket manager to handle the connection
+    connection_id = await websocket_manager.handle_connection(
+        websocket=websocket,
+        topics={topic},
+        require_auth=True,
+        metadata={"file_no": file_no, "endpoint": "document_status"},
+        message_handler=handle_document_message
+    )
+    
+    if connection_id:
+        # Send initial welcome message with subscription confirmation
+        try:
+            pool = websocket_manager.pool
+            welcome_message = WebSocketMessage(
+                type="subscription_confirmed",
+                topic=topic,
+                data={
+                    "file_no": file_no,
+                    "message": f"Subscribed to document status updates for file {file_no}"
+                }
+            )
+            await pool._send_to_connection(connection_id, welcome_message)
+            get_logger("documents").info("Document status subscription confirmed",
+                          connection_id=connection_id,
+                          file_no=file_no)
+        except Exception as e:
+            get_logger("documents").error("Failed to send subscription confirmation",
+                                connection_id=connection_id,
+                                file_no=file_no,
+                                error=str(e))
+
+
+# Test endpoint for document notification system
+@router.post("/test-notification/{file_no}")
+async def test_document_notification(
+    file_no: str,
+    status: str = Query(..., description="Notification status: processing, completed, or failed"),
+    message: Optional[str] = Query(None, description="Optional message"),
+    current_user: User = Depends(get_current_user)
+):
+    """
+    Test endpoint to simulate document processing notifications.
+    
+    This endpoint allows testing the WebSocket notification system by sending
+    simulated document status updates. Useful for development and debugging.
+    """
+    if status not in ["processing", "completed", "failed"]:
+        raise HTTPException(
+            status_code=400, 
+            detail="Status must be one of: processing, completed, failed"
+        )
+    
+    # Prepare test data
+    test_data = {
+        "test": True,
+        "triggered_by": current_user.username,
+        "message": message or f"Test {status} notification for file {file_no}",
+        "timestamp": datetime.now(timezone.utc).isoformat()
+    }
+    
+    # Send notification based on status
+    try:
+        if status == "processing":
+            sent_count = await notify_processing(
+                file_no=file_no,
+                user_id=current_user.id,
+                data=test_data
+            )
+        elif status == "completed":
+            sent_count = await notify_completed(
+                file_no=file_no,
+                user_id=current_user.id,
+                data=test_data
+            )
+        else:  # failed
+            sent_count = await notify_failed(
+                file_no=file_no,
+                user_id=current_user.id,
+                data=test_data
+            )
+        
+        return {
+            "message": f"Test notification sent for file {file_no}",
+            "status": status,
+            "sent_to_connections": sent_count,
+            "data": test_data
+        }
+        
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to send test notification: {str(e)}"
+        )