""" Document Management API endpoints - QDROs, Templates, and General Documents """ from __future__ import annotations from typing import List, Optional, Dict, Any, Union from fastapi import APIRouter, Depends, HTTPException, status, Query, UploadFile, File, Form, Request from sqlalchemy.orm import Session, joinedload from sqlalchemy import or_, func, and_, desc, asc, text from datetime import date, datetime, timezone import io import zipfile import os import uuid import shutil from pathlib import Path from app.database.base import get_db from app.api.search_highlight import build_query_tokens from app.services.query_utils import tokenized_ilike_filter, apply_pagination, apply_sorting, paginate_with_total from app.models.qdro import QDRO from app.models.files import File as FileModel from app.models.rolodex import Rolodex from app.models.lookups import FormIndex, FormList, Footer, Employee from app.models.user import User from app.auth.security import get_current_user from app.models.additional import Document from app.models.document_workflows import EventLog from app.core.logging import get_logger from app.services.audit import audit_service from app.services.cache import invalidate_search_cache from app.models.templates import DocumentTemplate, DocumentTemplateVersion from app.models.jobs import JobRecord from app.services.storage import get_default_storage from app.services.template_merge import extract_tokens_from_bytes, build_context, resolve_tokens, render_docx from app.services.document_notifications import notify_processing, notify_completed, notify_failed, topic_for_file, ADMIN_DOCUMENTS_TOPIC, get_last_status from app.middleware.websocket_middleware import get_websocket_manager, WebSocketMessage from fastapi import WebSocket router = APIRouter() # Pydantic schemas from pydantic import BaseModel, ConfigDict class QDROBase(BaseModel): file_no: str version: str = "01" title: Optional[str] = None form_name: Optional[str] = None content: Optional[str] = None status: str = "DRAFT" created_date: Optional[date] = None approved_date: Optional[date] = None filed_date: Optional[date] = None participant_name: Optional[str] = None spouse_name: Optional[str] = None plan_name: Optional[str] = None plan_administrator: Optional[str] = None notes: Optional[str] = None class QDROCreate(QDROBase): pass class QDROUpdate(BaseModel): version: Optional[str] = None title: Optional[str] = None form_name: Optional[str] = None content: Optional[str] = None status: Optional[str] = None created_date: Optional[date] = None approved_date: Optional[date] = None filed_date: Optional[date] = None participant_name: Optional[str] = None spouse_name: Optional[str] = None plan_name: Optional[str] = None plan_administrator: Optional[str] = None notes: Optional[str] = None class QDROResponse(QDROBase): id: int model_config = ConfigDict(from_attributes=True) class PaginatedQDROResponse(BaseModel): items: List[QDROResponse] total: int @router.get("/qdros/{file_no}", response_model=Union[List[QDROResponse], PaginatedQDROResponse]) async def get_file_qdros( file_no: str, skip: int = Query(0, ge=0, description="Offset for pagination"), limit: int = Query(100, ge=1, le=1000, description="Page size"), sort_by: Optional[str] = Query("updated", description="Sort by: updated, created, version, status"), sort_dir: Optional[str] = Query("desc", description="Sort direction: asc or desc"), include_total: bool = Query(False, description="When true, returns {items, total} instead of a plain list"), db: Session = Depends(get_db), current_user: User = Depends(get_current_user) ): """Get QDROs for a specific file with optional sorting/pagination""" query = db.query(QDRO).filter(QDRO.file_no == file_no) # Sorting (whitelisted) query = apply_sorting( query, sort_by, sort_dir, allowed={ "updated": [QDRO.updated_at, QDRO.id], "created": [QDRO.created_at, QDRO.id], "version": [QDRO.version], "status": [QDRO.status], }, ) qdros, total = paginate_with_total(query, skip, limit, include_total) if include_total: return {"items": qdros, "total": total or 0} return qdros class PaginatedQDROResponse(BaseModel): items: List[QDROResponse] total: int class CurrentStatusResponse(BaseModel): file_no: str status: str # processing | completed | failed | unknown timestamp: Optional[str] = None data: Optional[Dict[str, Any]] = None history: Optional[list] = None @router.get("/current-status/{file_no}", response_model=CurrentStatusResponse) async def get_current_document_status( file_no: str, db: Session = Depends(get_db), current_user: User = Depends(get_current_user), ): """ Return last-known document generation status for a file. Priority: 1) In-memory last broadcast state (processing/completed/failed) 2) If no memory record, check for any uploaded/generated documents and report 'completed' 3) Fallback to 'unknown' """ # Build recent history from EventLog (last N events) history_items = [] try: recent = ( db.query(EventLog) .filter(EventLog.file_no == file_no, EventLog.event_type.in_(["document_processing", "document_completed", "document_failed"])) .order_by(EventLog.occurred_at.desc()) .limit(10) .all() ) for ev in recent: history_items.append({ "type": ev.event_type, "timestamp": ev.occurred_at.isoformat() if getattr(ev, "occurred_at", None) else None, "data": ev.event_data or {}, }) except Exception: history_items = [] # Try in-memory record for current status last = get_last_status(file_no) if last: ts = last.get("timestamp") iso = ts.isoformat() if hasattr(ts, "isoformat") else None status_val = str(last.get("status") or "unknown") # Treat stale 'processing' as unknown if older than 10 minutes try: if status_val == "processing" and isinstance(ts, datetime): age = datetime.now(timezone.utc) - ts if age.total_seconds() > 600: status_val = "unknown" except Exception: pass return CurrentStatusResponse( file_no=file_no, status=status_val, timestamp=iso, data=(last.get("data") or None), history=history_items, ) # Fallback: any existing documents imply last status completed any_doc = db.query(Document).filter(Document.file_no == file_no).order_by(Document.id.desc()).first() if any_doc: return CurrentStatusResponse( file_no=file_no, status="completed", timestamp=getattr(any_doc, "upload_date", None).isoformat() if getattr(any_doc, "upload_date", None) else None, data={ "document_id": any_doc.id, "filename": any_doc.filename, "size": any_doc.size, }, history=history_items, ) return CurrentStatusResponse(file_no=file_no, status="unknown", history=history_items) @router.get("/qdros/", response_model=Union[List[QDROResponse], PaginatedQDROResponse]) async def list_qdros( skip: int = Query(0, ge=0), limit: int = Query(50, ge=1, le=200), status_filter: Optional[str] = Query(None), search: Optional[str] = Query(None), sort_by: Optional[str] = Query(None, description="Sort by: file_no, version, status, created, updated"), sort_dir: Optional[str] = Query("asc", description="Sort direction: asc or desc"), include_total: bool = Query(False, description="When true, returns {items, total} instead of a plain list"), db: Session = Depends(get_db), current_user: User = Depends(get_current_user) ): """List all QDROs with filtering""" query = db.query(QDRO) if status_filter: query = query.filter(QDRO.status == status_filter) if search: # DRY: tokenize and apply case-insensitive search across common QDRO fields tokens = build_query_tokens(search) filter_expr = tokenized_ilike_filter(tokens, [ QDRO.file_no, QDRO.form_name, QDRO.pet, QDRO.res, QDRO.case_number, QDRO.notes, QDRO.status, ]) if filter_expr is not None: query = query.filter(filter_expr) # Sorting (whitelisted) query = apply_sorting( query, sort_by, sort_dir, allowed={ "file_no": [QDRO.file_no], "version": [QDRO.version], "status": [QDRO.status], "created": [QDRO.created_at], "updated": [QDRO.updated_at], }, ) qdros, total = paginate_with_total(query, skip, limit, include_total) if include_total: return {"items": qdros, "total": total or 0} return qdros @router.post("/qdros/", response_model=QDROResponse) async def create_qdro( qdro_data: QDROCreate, db: Session = Depends(get_db), current_user: User = Depends(get_current_user) ): """Create new QDRO""" # Only accept fields that exist on the model and exclude None values allowed_fields = {c.name for c in QDRO.__table__.columns} payload = { k: v for k, v in qdro_data.model_dump(exclude_unset=True).items() if v is not None and k in allowed_fields } qdro = QDRO(**payload) # Backfill created_date if model supports it; otherwise rely on created_at if hasattr(qdro, "created_date") and not getattr(qdro, "created_date"): setattr(qdro, "created_date", date.today()) db.add(qdro) db.commit() db.refresh(qdro) try: await invalidate_search_cache() except Exception: pass return qdro @router.get("/qdros/{file_no}/{qdro_id}", response_model=QDROResponse) async def get_qdro( file_no: str, qdro_id: int, db: Session = Depends(get_db), current_user: User = Depends(get_current_user) ): """Get specific QDRO""" qdro = db.query(QDRO).filter( QDRO.id == qdro_id, QDRO.file_no == file_no ).first() if not qdro: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="QDRO not found" ) return qdro @router.put("/qdros/{file_no}/{qdro_id}", response_model=QDROResponse) async def update_qdro( file_no: str, qdro_id: int, qdro_data: QDROUpdate, db: Session = Depends(get_db), current_user: User = Depends(get_current_user) ): """Update QDRO""" qdro = db.query(QDRO).filter( QDRO.id == qdro_id, QDRO.file_no == file_no ).first() if not qdro: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="QDRO not found" ) # Update fields present on the model only allowed_fields = {c.name for c in QDRO.__table__.columns} for field, value in qdro_data.model_dump(exclude_unset=True).items(): if field in allowed_fields: setattr(qdro, field, value) db.commit() db.refresh(qdro) try: await invalidate_search_cache() except Exception: pass return qdro @router.delete("/qdros/{file_no}/{qdro_id}") async def delete_qdro( file_no: str, qdro_id: int, db: Session = Depends(get_db), current_user: User = Depends(get_current_user) ): """Delete QDRO""" qdro = db.query(QDRO).filter( QDRO.id == qdro_id, QDRO.file_no == file_no ).first() if not qdro: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="QDRO not found" ) db.delete(qdro) db.commit() try: await invalidate_search_cache() except Exception: pass return {"message": "QDRO deleted successfully"} # Enhanced Document Management Endpoints # Template Management Schemas class TemplateBase(BaseModel): """Base template schema""" form_id: str form_name: str category: str = "GENERAL" content: str = "" variables: Optional[Dict[str, str]] = None class TemplateCreate(TemplateBase): pass class TemplateUpdate(BaseModel): form_name: Optional[str] = None category: Optional[str] = None content: Optional[str] = None variables: Optional[Dict[str, str]] = None class TemplateResponse(TemplateBase): active: bool = True created_at: Optional[datetime] = None model_config = ConfigDict(from_attributes=True) # Document Generation Schema class DocumentGenerateRequest(BaseModel): """Request to generate document from template""" template_id: str file_no: str output_format: str = "PDF" # PDF, DOCX, HTML variables: Optional[Dict[str, Any]] = None class DocumentResponse(BaseModel): """Generated document response""" document_id: str file_name: str file_path: str size: int created_at: datetime # Document Statistics class DocumentStats(BaseModel): """Document system statistics""" total_templates: int total_qdros: int templates_by_category: Dict[str, int] recent_activity: List[Dict[str, Any]] class PaginatedTemplatesResponse(BaseModel): items: List[TemplateResponse] total: int @router.get("/templates/", response_model=Union[List[TemplateResponse], PaginatedTemplatesResponse]) async def list_templates( skip: int = Query(0, ge=0), limit: int = Query(50, ge=1, le=200), category: Optional[str] = Query(None), search: Optional[str] = Query(None), active_only: bool = Query(True), sort_by: Optional[str] = Query(None, description="Sort by: form_id, form_name, category, created, updated"), sort_dir: Optional[str] = Query("asc", description="Sort direction: asc or desc"), include_total: bool = Query(False, description="When true, returns {items, total} instead of a plain list"), db: Session = Depends(get_db), current_user: User = Depends(get_current_user) ): """List available document templates""" query = db.query(FormIndex) if active_only: query = query.filter(FormIndex.active == True) if category: query = query.filter(FormIndex.category == category) if search: # DRY: tokenize and apply case-insensitive search for templates tokens = build_query_tokens(search) filter_expr = tokenized_ilike_filter(tokens, [ FormIndex.form_name, FormIndex.form_id, FormIndex.category, ]) if filter_expr is not None: query = query.filter(filter_expr) # Sorting (whitelisted) query = apply_sorting( query, sort_by, sort_dir, allowed={ "form_id": [FormIndex.form_id], "form_name": [FormIndex.form_name], "category": [FormIndex.category], "created": [FormIndex.created_at], "updated": [FormIndex.updated_at], }, ) templates, total = paginate_with_total(query, skip, limit, include_total) # Enhanced response with template content results = [] for template in templates: template_lines = db.query(FormList).filter( FormList.form_id == template.form_id ).order_by(FormList.line_number).all() content = "\n".join([line.content or "" for line in template_lines]) results.append({ "form_id": template.form_id, "form_name": template.form_name, "category": template.category, "content": content, "active": template.active, "created_at": template.created_at, "variables": _extract_variables_from_content(content) }) if include_total: return {"items": results, "total": total or 0} return results @router.post("/templates/", response_model=TemplateResponse) async def create_template( template_data: TemplateCreate, db: Session = Depends(get_db), current_user: User = Depends(get_current_user) ): """Create new document template""" # Check if template already exists existing = db.query(FormIndex).filter(FormIndex.form_id == template_data.form_id).first() if existing: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail="Template with this ID already exists" ) # Create form index entry form_index = FormIndex( form_id=template_data.form_id, form_name=template_data.form_name, category=template_data.category, active=True ) db.add(form_index) # Create form content lines content_lines = template_data.content.split('\n') for i, line in enumerate(content_lines, 1): form_line = FormList( form_id=template_data.form_id, line_number=i, content=line ) db.add(form_line) db.commit() db.refresh(form_index) try: await invalidate_search_cache() except Exception: pass return { "form_id": form_index.form_id, "form_name": form_index.form_name, "category": form_index.category, "content": template_data.content, "active": form_index.active, "created_at": form_index.created_at, "variables": template_data.variables or {} } @router.get("/templates/{template_id}", response_model=TemplateResponse) async def get_template( template_id: str, db: Session = Depends(get_db), current_user: User = Depends(get_current_user) ): """Get specific template with content""" template = db.query(FormIndex).filter(FormIndex.form_id == template_id).first() if not template: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="Template not found" ) # Get template content template_lines = db.query(FormList).filter( FormList.form_id == template_id ).order_by(FormList.line_number).all() content = "\n".join([line.content or "" for line in template_lines]) return { "form_id": template.form_id, "form_name": template.form_name, "category": template.category, "content": content, "active": template.active, "created_at": template.created_at, "variables": _extract_variables_from_content(content) } @router.put("/templates/{template_id}", response_model=TemplateResponse) async def update_template( template_id: str, template_data: TemplateUpdate, db: Session = Depends(get_db), current_user: User = Depends(get_current_user) ): """Update document template""" template = db.query(FormIndex).filter(FormIndex.form_id == template_id).first() if not template: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="Template not found" ) # Update form index if template_data.form_name: template.form_name = template_data.form_name if template_data.category: template.category = template_data.category # Update content if provided if template_data.content is not None: # Delete existing content lines db.query(FormList).filter(FormList.form_id == template_id).delete() # Add new content lines content_lines = template_data.content.split('\n') for i, line in enumerate(content_lines, 1): form_line = FormList( form_id=template_id, line_number=i, content=line ) db.add(form_line) db.commit() db.refresh(template) try: await invalidate_search_cache() except Exception: pass # Get updated content template_lines = db.query(FormList).filter( FormList.form_id == template_id ).order_by(FormList.line_number).all() content = "\n".join([line.content or "" for line in template_lines]) return { "form_id": template.form_id, "form_name": template.form_name, "category": template.category, "content": content, "active": template.active, "created_at": template.created_at, "variables": _extract_variables_from_content(content) } @router.delete("/templates/{template_id}") async def delete_template( template_id: str, db: Session = Depends(get_db), current_user: User = Depends(get_current_user) ): """Delete document template""" template = db.query(FormIndex).filter(FormIndex.form_id == template_id).first() if not template: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail="Template not found" ) # Delete content lines db.query(FormList).filter(FormList.form_id == template_id).delete() # Delete template db.delete(template) db.commit() try: await invalidate_search_cache() except Exception: pass return {"message": "Template deleted successfully"} @router.post("/generate/{template_id}") async def generate_document( template_id: str, request: DocumentGenerateRequest, db: Session = Depends(get_db), current_user: User = Depends(get_current_user) ): """Generate document from template""" # Get template template = db.query(FormIndex).filter(FormIndex.form_id == template_id).first() if not template: raise HTTPException(status_code=404, detail="Template not found") # Get file information file_obj = db.query(FileModel).options( joinedload(FileModel.owner) ).filter(FileModel.file_no == request.file_no).first() if not file_obj: raise HTTPException(status_code=404, detail="File not found") # Get template content template_lines = db.query(FormList).filter( FormList.form_id == template_id ).order_by(FormList.line_number).all() template_content = "\n".join([line.content or "" for line in template_lines]) # Prepare merge variables merge_vars = { "FILE_NO": file_obj.file_no, "CLIENT_FIRST": file_obj.owner.first if file_obj.owner else "", "CLIENT_LAST": file_obj.owner.last if file_obj.owner else "", "CLIENT_FULL": f"{file_obj.owner.first or ''} {file_obj.owner.last}".strip() if file_obj.owner else "", "MATTER": file_obj.regarding or "", "OPENED": file_obj.opened.strftime("%B %d, %Y") if file_obj.opened else "", "ATTORNEY": file_obj.empl_num or "", "TODAY": date.today().strftime("%B %d, %Y") } # Add any custom variables from the request if request.variables: merge_vars.update(request.variables) # Perform variable substitution merged_content = _merge_template_variables(template_content, merge_vars) # Generate document file document_id = str(uuid.uuid4()) file_name = f"{template.form_name}_{file_obj.file_no}_{date.today().isoformat()}" exports_dir = "/app/exports" try: os.makedirs(exports_dir, exist_ok=True) except Exception: try: os.makedirs("exports", exist_ok=True) exports_dir = "exports" except Exception: exports_dir = "." if request.output_format.upper() == "PDF": file_path = f"{exports_dir}/{document_id}.pdf" file_name += ".pdf" # Here you would implement PDF generation # For now, create a simple text file with open(f"{exports_dir}/{document_id}.txt", "w") as f: f.write(merged_content) file_path = f"{exports_dir}/{document_id}.txt" elif request.output_format.upper() == "DOCX": file_path = f"{exports_dir}/{document_id}.docx" file_name += ".docx" # Implement DOCX generation with open(f"{exports_dir}/{document_id}.txt", "w") as f: f.write(merged_content) file_path = f"{exports_dir}/{document_id}.txt" else: # HTML file_path = f"{exports_dir}/{document_id}.html" file_name += ".html" html_content = f"
{merged_content}
" with open(file_path, "w") as f: f.write(html_content) file_size = os.path.getsize(file_path) if os.path.exists(file_path) else 0 return { "document_id": document_id, "file_name": file_name, "file_path": file_path, "size": file_size, "created_at": datetime.now(timezone.utc) } @router.get("/categories/") async def get_template_categories( db: Session = Depends(get_db), current_user: User = Depends(get_current_user) ): """Get available template categories""" categories = db.query(FormIndex.category).distinct().all() return [cat[0] for cat in categories if cat[0]] @router.get("/stats/summary") async def get_document_stats( db: Session = Depends(get_db), current_user: User = Depends(get_current_user) ): """Get document system statistics""" # Template statistics total_templates = db.query(FormIndex).filter(FormIndex.active == True).count() total_qdros = db.query(QDRO).count() # Templates by category category_stats = db.query( FormIndex.category, func.count(FormIndex.form_id) ).filter(FormIndex.active == True).group_by(FormIndex.category).all() categories_dict = {cat[0] or "Uncategorized": cat[1] for cat in category_stats} # Recent QDRO activity recent_qdros = db.query(QDRO).order_by(desc(QDRO.updated_at)).limit(5).all() recent_activity = [ { "type": "QDRO", "file_no": qdro.file_no, "status": qdro.status, "updated_at": qdro.updated_at.isoformat() if qdro.updated_at else None } for qdro in recent_qdros ] return { "total_templates": total_templates, "total_qdros": total_qdros, "templates_by_category": categories_dict, "recent_activity": recent_activity } @router.get("/file/{file_no}/documents") async def get_file_documents( file_no: str, sort_by: Optional[str] = Query("updated", description="Sort by: updated, created"), sort_dir: Optional[str] = Query("desc", description="Sort direction: asc or desc"), skip: int = Query(0, ge=0), limit: int = Query(100, ge=1, le=1000), include_total: bool = Query(False, description="When true, returns {items, total} instead of a plain list"), db: Session = Depends(get_db), current_user: User = Depends(get_current_user) ): """Get all documents associated with a specific file, with optional sorting/pagination""" # Base query for QDROs tied to the file query = db.query(QDRO).filter(QDRO.file_no == file_no) # Apply sorting using shared helper (map friendly names to columns) query = apply_sorting( query, sort_by, sort_dir, allowed={ "updated": [QDRO.updated_at, QDRO.id], "created": [QDRO.created_at, QDRO.id], }, ) qdros, total = paginate_with_total(query, skip, limit, include_total) items = [ { "id": qdro.id, "type": "QDRO", "title": f"QDRO v{qdro.version}", "status": qdro.status, "created_date": qdro.created_date.isoformat() if getattr(qdro, "created_date", None) else None, "updated_at": qdro.updated_at.isoformat() if getattr(qdro, "updated_at", None) else None, "file_no": qdro.file_no, } for qdro in qdros ] payload = {"file_no": file_no, "documents": items, "total_count": (total if include_total else None)} # Maintain previous shape by omitting total_count when include_total is False? The prior code always returned total_count. # Keep total_count for backward compatibility but set to actual total when include_total else len(items) payload["total_count"] = (total if include_total else len(items)) return payload def _extract_variables_from_content(content: str) -> Dict[str, str]: """Extract variable placeholders from template content""" import re variables = {} # Find variables in format {{VARIABLE_NAME}} matches = re.findall(r'\{\{([^}]+)\}\}', content) for match in matches: var_name = match.strip() variables[var_name] = f"Placeholder for {var_name}" # Find variables in format ^VARIABLE matches = re.findall(r'\^([A-Z_]+)', content) for match in matches: variables[match] = f"Placeholder for {match}" return variables def _merge_template_variables(content: str, variables: Dict[str, Any]) -> str: """Replace template variables with actual values""" merged = content # Replace {{VARIABLE}} format for var_name, value in variables.items(): merged = merged.replace(f"{{{{{var_name}}}}}", str(value or "")) merged = merged.replace(f"^{var_name}", str(value or "")) return merged # --- Batch Document Generation (MVP synchronous) --- class BatchGenerateRequest(BaseModel): """Batch generation request using DocumentTemplate system.""" template_id: int version_id: Optional[int] = None file_nos: List[str] output_format: str = "DOCX" # DOCX (default), PDF (not yet supported), HTML (not yet supported) context: Optional[Dict[str, Any]] = None # additional global context bundle_zip: bool = False # when true, also create a ZIP bundle of generated outputs class BatchGenerateItemResult(BaseModel): file_no: str status: str # "success" | "error" document_id: Optional[int] = None filename: Optional[str] = None path: Optional[str] = None url: Optional[str] = None size: Optional[int] = None unresolved: Optional[List[str]] = None error: Optional[str] = None class BatchGenerateResponse(BaseModel): job_id: str template_id: int version_id: int total_requested: int total_success: int total_failed: int results: List[BatchGenerateItemResult] bundle_url: Optional[str] = None bundle_size: Optional[int] = None @router.post("/generate-batch", response_model=BatchGenerateResponse) async def generate_batch_documents( payload: BatchGenerateRequest, db: Session = Depends(get_db), current_user: User = Depends(get_current_user), ): """Synchronously generate documents for multiple files from a template version. Notes: - Currently supports DOCX output. PDF/HTML conversion is not yet implemented. - Saves generated bytes to default storage under uploads/generated/{file_no}/. - Persists a `Document` record per successful file. - Returns per-item status with unresolved tokens for transparency. """ tpl = db.query(DocumentTemplate).filter(DocumentTemplate.id == payload.template_id).first() if not tpl: raise HTTPException(status_code=404, detail="Template not found") resolved_version_id = payload.version_id or tpl.current_version_id if not resolved_version_id: raise HTTPException(status_code=400, detail="Template has no approved/current version") ver = ( db.query(DocumentTemplateVersion) .filter( DocumentTemplateVersion.id == resolved_version_id, DocumentTemplateVersion.template_id == tpl.id, ) .first() ) if not ver: raise HTTPException(status_code=404, detail="Template version not found") storage = get_default_storage() try: template_bytes = storage.open_bytes(ver.storage_path) except Exception: raise HTTPException(status_code=404, detail="Stored template file not found") tokens = extract_tokens_from_bytes(template_bytes) results: List[BatchGenerateItemResult] = [] # Pre-normalize file numbers (strip spaces, ignore empties) requested_files: List[str] = [fn.strip() for fn in (payload.file_nos or []) if fn and str(fn).strip()] if not requested_files: raise HTTPException(status_code=400, detail="No file numbers provided") # Fetch all files in one query files_map: Dict[str, FileModel] = { f.file_no: f for f in db.query(FileModel).options(joinedload(FileModel.owner)).filter(FileModel.file_no.in_(requested_files)).all() } generated_items: List[Dict[str, Any]] = [] # capture bytes for optional ZIP for file_no in requested_files: # Notify processing started for this file try: await notify_processing( file_no=file_no, user_id=current_user.id, data={ "template_id": tpl.id, "template_name": tpl.name, "job_id": job_id } ) except Exception: # Don't fail generation if notification fails pass file_obj = files_map.get(file_no) if not file_obj: # Notify failure try: await notify_failed( file_no=file_no, user_id=current_user.id, data={"error": "File not found", "template_id": tpl.id} ) except Exception: pass results.append( BatchGenerateItemResult( file_no=file_no, status="error", error="File not found", ) ) continue # Build per-file context file_context: Dict[str, Any] = { "FILE_NO": file_obj.file_no, "CLIENT_FIRST": getattr(getattr(file_obj, "owner", None), "first", "") or "", "CLIENT_LAST": getattr(getattr(file_obj, "owner", None), "last", "") or "", "CLIENT_FULL": ( f"{getattr(getattr(file_obj, 'owner', None), 'first', '') or ''} " f"{getattr(getattr(file_obj, 'owner', None), 'last', '') or ''}" ).strip(), "MATTER": file_obj.regarding or "", "OPENED": file_obj.opened.strftime("%B %d, %Y") if getattr(file_obj, "opened", None) else "", "ATTORNEY": getattr(file_obj, "empl_num", "") or "", } # Merge global context merged_context = build_context({**(payload.context or {}), **file_context}, "file", file_obj.file_no) resolved_vars, unresolved_tokens = resolve_tokens(db, tokens, merged_context) try: if ver.mime_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document": output_bytes = render_docx(template_bytes, resolved_vars) output_mime = ver.mime_type extension = ".docx" else: # For non-DOCX templates (e.g., PDF), pass-through content output_bytes = template_bytes output_mime = ver.mime_type extension = ".bin" # Name and save safe_name = f"{tpl.name}_{file_obj.file_no}{extension}" subdir = f"generated/{file_obj.file_no}" storage_path = storage.save_bytes(content=output_bytes, filename_hint=safe_name, subdir=subdir, content_type=output_mime) # Persist Document record abs_or_rel_path = os.path.join("uploads", storage_path).replace("\\", "/") doc = Document( file_no=file_obj.file_no, filename=safe_name, path=abs_or_rel_path, description=f"Generated from template '{tpl.name}'", type=output_mime, size=len(output_bytes), uploaded_by=getattr(current_user, "username", None), ) db.add(doc) db.commit() db.refresh(doc) # Notify successful completion try: await notify_completed( file_no=file_obj.file_no, user_id=current_user.id, data={ "template_id": tpl.id, "template_name": tpl.name, "document_id": doc.id, "filename": doc.filename, "size": doc.size, "unresolved_tokens": unresolved_tokens or [] } ) except Exception: # Don't fail generation if notification fails pass results.append( BatchGenerateItemResult( file_no=file_obj.file_no, status="success", document_id=doc.id, filename=doc.filename, path=doc.path, url=storage.public_url(storage_path), size=doc.size, unresolved=unresolved_tokens or [], ) ) # Keep for bundling generated_items.append({ "filename": doc.filename, "storage_path": storage_path, }) except Exception as e: # Notify failure try: await notify_failed( file_no=file_obj.file_no, user_id=current_user.id, data={ "template_id": tpl.id, "template_name": tpl.name, "error": str(e), "unresolved_tokens": unresolved_tokens or [] } ) except Exception: pass # Best-effort rollback of partial doc add try: db.rollback() except Exception: pass results.append( BatchGenerateItemResult( file_no=file_obj.file_no, status="error", error=str(e), unresolved=unresolved_tokens or [], ) ) job_id = str(uuid.uuid4()) total_success = sum(1 for r in results if r.status == "success") total_failed = sum(1 for r in results if r.status == "error") bundle_url: Optional[str] = None bundle_size: Optional[int] = None # Optionally create a ZIP bundle of generated outputs bundle_storage_path: Optional[str] = None if payload.bundle_zip and total_success > 0: # Stream zip to memory then save via storage adapter zip_buffer = io.BytesIO() with zipfile.ZipFile(zip_buffer, mode="w", compression=zipfile.ZIP_DEFLATED) as zf: for item in generated_items: try: file_bytes = storage.open_bytes(item["storage_path"]) # relative path under uploads # Use clean filename inside zip zf.writestr(item["filename"], file_bytes) except Exception: # Skip missing/unreadable files from bundle; keep job successful continue zip_bytes = zip_buffer.getvalue() safe_zip_name = f"documents_batch_{job_id}.zip" bundle_storage_path = storage.save_bytes(content=zip_bytes, filename_hint=safe_zip_name, subdir="bundles", content_type="application/zip") bundle_url = storage.public_url(bundle_storage_path) bundle_size = len(zip_bytes) # Persist simple job record try: job = JobRecord( job_id=job_id, job_type="documents_batch", status="completed", requested_by_username=getattr(current_user, "username", None), started_at=datetime.now(timezone.utc), completed_at=datetime.now(timezone.utc), total_requested=len(requested_files), total_success=total_success, total_failed=total_failed, result_storage_path=bundle_storage_path, result_mime_type=("application/zip" if bundle_storage_path else None), result_size=bundle_size, details={ "template_id": tpl.id, "version_id": ver.id, "file_nos": requested_files, }, ) db.add(job) db.commit() except Exception: try: db.rollback() except Exception: pass return BatchGenerateResponse( job_id=job_id, template_id=tpl.id, version_id=ver.id, total_requested=len(requested_files), total_success=total_success, total_failed=total_failed, results=results, bundle_url=bundle_url, bundle_size=bundle_size, ) from fastapi.responses import StreamingResponse class JobStatusResponse(BaseModel): job_id: str job_type: str status: str total_requested: int total_success: int total_failed: int started_at: Optional[datetime] = None completed_at: Optional[datetime] = None bundle_available: bool = False bundle_url: Optional[str] = None bundle_size: Optional[int] = None @router.get("/jobs/{job_id}", response_model=JobStatusResponse) async def get_job_status( job_id: str, db: Session = Depends(get_db), current_user: User = Depends(get_current_user), ): job = db.query(JobRecord).filter(JobRecord.job_id == job_id).first() if not job: raise HTTPException(status_code=404, detail="Job not found") return JobStatusResponse( job_id=job.job_id, job_type=job.job_type, status=job.status, total_requested=job.total_requested or 0, total_success=job.total_success or 0, total_failed=job.total_failed or 0, started_at=getattr(job, "started_at", None), completed_at=getattr(job, "completed_at", None), bundle_available=bool(job.result_storage_path), bundle_url=(get_default_storage().public_url(job.result_storage_path) if job.result_storage_path else None), bundle_size=job.result_size, ) @router.get("/jobs/{job_id}/result") async def download_job_result( job_id: str, db: Session = Depends(get_db), current_user: User = Depends(get_current_user), ): job = db.query(JobRecord).filter(JobRecord.job_id == job_id).first() if not job or not job.result_storage_path: raise HTTPException(status_code=404, detail="Result not available for this job") storage = get_default_storage() try: content = storage.open_bytes(job.result_storage_path) except Exception: raise HTTPException(status_code=404, detail="Stored bundle not found") # Derive filename base = os.path.basename(job.result_storage_path) headers = { "Content-Disposition": f"attachment; filename=\"{base}\"", } return StreamingResponse(iter([content]), media_type=(job.result_mime_type or "application/zip"), headers=headers) # --- Client Error Logging (for Documents page) --- class ClientErrorLog(BaseModel): """Payload for client-side error logging""" message: str action: Optional[str] = None stack: Optional[str] = None url: Optional[str] = None line: Optional[int] = None column: Optional[int] = None user_agent: Optional[str] = None extra: Optional[Dict[str, Any]] = None @router.post("/client-error") async def log_client_error( payload: ClientErrorLog, request: Request, db: Session = Depends(get_db), current_user: Optional[User] = Depends(lambda: None) ): """Accept client-side error logs from the Documents page. This endpoint is lightweight and safe to call; it records the error to the application logs and best-effort to the audit log without interrupting the UI. """ logger = get_logger("client.documents") client_ip = request.headers.get("x-forwarded-for") if client_ip: client_ip = client_ip.split(",")[0].strip() else: client_ip = request.client.host if request.client else None logger.error( "Client error reported", action=payload.action, message=payload.message, stack=payload.stack, page="/documents", url=payload.url or str(request.url), line=payload.line, column=payload.column, user=getattr(current_user, "username", None), user_id=getattr(current_user, "id", None), user_agent=payload.user_agent or request.headers.get("user-agent"), client_ip=client_ip, extra=payload.extra, ) # Best-effort audit log; do not raise on failure try: audit_service.log_action( db=db, action="CLIENT_ERROR", resource_type="DOCUMENTS", user=current_user, resource_id=None, details={ "action": payload.action, "message": payload.message, "url": payload.url or str(request.url), "line": payload.line, "column": payload.column, "extra": payload.extra, }, request=request, ) except Exception: pass return {"status": "logged"} @router.post("/upload/{file_no}") async def upload_document( file_no: str, file: UploadFile = File(...), description: Optional[str] = Form(None), db: Session = Depends(get_db), current_user: User = Depends(get_current_user) ): """Upload a document to a file with comprehensive security validation and async operations""" from app.utils.file_security import file_validator, create_upload_directory from app.services.async_file_operations import async_file_ops, validate_large_upload from app.services.async_storage import async_storage file_obj = db.query(FileModel).filter(FileModel.file_no == file_no).first() if not file_obj: raise HTTPException(status_code=404, detail="File not found") # Determine if this is a large file that needs streaming file_size_estimate = getattr(file, 'size', 0) or 0 use_streaming = file_size_estimate > 10 * 1024 * 1024 # 10MB threshold if use_streaming: # Use streaming validation for large files # Enforce the same 10MB limit used for non-streaming uploads is_valid, error_msg, metadata = await validate_large_upload( file, category='document', max_size=10 * 1024 * 1024 ) if not is_valid: raise HTTPException(status_code=400, detail=error_msg) safe_filename = file_validator.sanitize_filename(file.filename) file_ext = Path(safe_filename).suffix mime_type = metadata.get('content_type', 'application/octet-stream') # Stream upload for large files subdir = f"documents/{file_no}" final_path, actual_size, _checksum = await async_file_ops.stream_upload_file( file, f"{subdir}/{uuid.uuid4()}{file_ext}", progress_callback=None # Could add WebSocket progress here ) # Get absolute path for database storage absolute_path = str(final_path) # For downstream DB fields that expect a relative path, also keep a relative for consistency relative_path = str(Path(final_path).relative_to(async_file_ops.base_upload_dir)) else: # Use traditional validation for smaller files content, safe_filename, file_ext, mime_type = await file_validator.validate_upload_file( file, category='document' ) # Create secure upload directory upload_dir = f"uploads/{file_no}" create_upload_directory(upload_dir) # Generate secure file path with UUID to prevent conflicts unique_name = f"{uuid.uuid4()}{file_ext}" path = file_validator.generate_secure_path(upload_dir, unique_name) # Write file using async storage for consistency try: relative_path = await async_storage.save_bytes_async( content, safe_filename, subdir=f"documents/{file_no}" ) absolute_path = str(async_storage.base_dir / relative_path) actual_size = len(content) except Exception as e: raise HTTPException(status_code=500, detail=f"Could not save file: {str(e)}") doc = Document( file_no=file_no, filename=safe_filename, # Use sanitized filename path=absolute_path, description=description, type=mime_type, # Use validated MIME type size=actual_size, uploaded_by=current_user.username ) db.add(doc) db.commit() db.refresh(doc) # Send real-time notification for document upload try: await notify_completed( file_no=file_no, user_id=current_user.id, data={ "action": "upload", "document_id": doc.id, "filename": safe_filename, "size": actual_size, "type": mime_type, "description": description } ) except Exception as e: # Don't fail the operation if notification fails get_logger("documents").warning(f"Failed to send document upload notification: {str(e)}") # Log workflow event for document upload try: from app.services.workflow_integration import log_document_uploaded_sync log_document_uploaded_sync( db=db, file_no=file_no, document_id=doc.id, filename=safe_filename, document_type=mime_type, user_id=current_user.id ) except Exception as e: # Don't fail the operation if workflow logging fails get_logger("documents").warning(f"Failed to log workflow event for document upload: {str(e)}") return doc @router.get("/{file_no}/uploaded") async def list_uploaded_documents( file_no: str, db: Session = Depends(get_db), current_user: User = Depends(get_current_user) ): """List uploaded documents for a file""" docs = db.query(Document).filter(Document.file_no == file_no).all() return docs @router.delete("/uploaded/{doc_id}") async def delete_document( doc_id: int, db: Session = Depends(get_db), current_user: User = Depends(get_current_user) ): """Delete an uploaded document""" doc = db.query(Document).filter(Document.id == doc_id).first() if not doc: raise HTTPException(status_code=404, detail="Document not found") if os.path.exists(doc.path): os.remove(doc.path) db.delete(doc) db.commit() return {"message": "Document deleted successfully"} @router.put("/uploaded/{doc_id}") async def update_document( doc_id: int, description: str = Form(...), db: Session = Depends(get_db), current_user: User = Depends(get_current_user) ): """Update document description""" doc = db.query(Document).filter(Document.id == doc_id).first() if not doc: raise HTTPException(status_code=404, detail="Document not found") doc.description = description db.commit() db.refresh(doc) return doc # WebSocket endpoints for real-time document status notifications @router.websocket("/ws/status/{file_no}") async def ws_document_status(websocket: WebSocket, file_no: str): """ Subscribe to real-time document processing status updates for a specific file. Users can connect to this endpoint to receive notifications about: - Document generation started (processing) - Document generation completed - Document generation failed - Document uploads Authentication required via token query parameter. """ websocket_manager = get_websocket_manager() topic = topic_for_file(file_no) # Custom message handler for document status updates async def handle_document_message(connection_id: str, message: WebSocketMessage): """Handle custom messages for document status""" get_logger("documents").debug("Received document status message", connection_id=connection_id, file_no=file_no, message_type=message.type) # Use the WebSocket manager to handle the connection connection_id = await websocket_manager.handle_connection( websocket=websocket, topics={topic}, require_auth=True, metadata={"file_no": file_no, "endpoint": "document_status"}, message_handler=handle_document_message ) if connection_id: # Send initial welcome message with subscription confirmation try: pool = websocket_manager.pool welcome_message = WebSocketMessage( type="subscription_confirmed", topic=topic, data={ "file_no": file_no, "message": f"Subscribed to document status updates for file {file_no}" } ) await pool._send_to_connection(connection_id, welcome_message) get_logger("documents").info("Document status subscription confirmed", connection_id=connection_id, file_no=file_no) except Exception as e: get_logger("documents").error("Failed to send subscription confirmation", connection_id=connection_id, file_no=file_no, error=str(e)) # Test endpoint for document notification system @router.post("/test-notification/{file_no}") async def test_document_notification( file_no: str, status: str = Query(..., description="Notification status: processing, completed, or failed"), message: Optional[str] = Query(None, description="Optional message"), current_user: User = Depends(get_current_user) ): """ Test endpoint to simulate document processing notifications. This endpoint allows testing the WebSocket notification system by sending simulated document status updates. Useful for development and debugging. """ if status not in ["processing", "completed", "failed"]: raise HTTPException( status_code=400, detail="Status must be one of: processing, completed, failed" ) # Prepare test data test_data = { "test": True, "triggered_by": current_user.username, "message": message or f"Test {status} notification for file {file_no}", "timestamp": datetime.now(timezone.utc).isoformat() } # Send notification based on status try: if status == "processing": sent_count = await notify_processing( file_no=file_no, user_id=current_user.id, data=test_data ) elif status == "completed": sent_count = await notify_completed( file_no=file_no, user_id=current_user.id, data=test_data ) else: # failed sent_count = await notify_failed( file_no=file_no, user_id=current_user.id, data=test_data ) return { "message": f"Test notification sent for file {file_no}", "status": status, "sent_to_connections": sent_count, "data": test_data } except Exception as e: raise HTTPException( status_code=500, detail=f"Failed to send test notification: {str(e)}" )