Files
delphi-database/app/api/documents.py
HotSwapp bac8cc4bd5 changes
2025-08-18 20:20:04 -05:00

1632 lines
55 KiB
Python

"""
Document Management API endpoints - QDROs, Templates, and General Documents
"""
from __future__ import annotations
from typing import List, Optional, Dict, Any, Union
from fastapi import APIRouter, Depends, HTTPException, status, Query, UploadFile, File, Form, Request
from sqlalchemy.orm import Session, joinedload
from sqlalchemy import or_, func, and_, desc, asc, text
from datetime import date, datetime, timezone
import io
import zipfile
import os
import uuid
import shutil
from pathlib import Path
from app.database.base import get_db
from app.api.search_highlight import build_query_tokens
from app.services.query_utils import tokenized_ilike_filter, apply_pagination, apply_sorting, paginate_with_total
from app.models.qdro import QDRO
from app.models.files import File as FileModel
from app.models.rolodex import Rolodex
from app.models.lookups import FormIndex, FormList, Footer, Employee
from app.models.user import User
from app.auth.security import get_current_user
from app.models.additional import Document
from app.models.document_workflows import EventLog
from app.core.logging import get_logger
from app.services.audit import audit_service
from app.services.cache import invalidate_search_cache
from app.models.templates import DocumentTemplate, DocumentTemplateVersion
from app.models.jobs import JobRecord
from app.services.storage import get_default_storage
from app.services.template_merge import extract_tokens_from_bytes, build_context, resolve_tokens, render_docx
from app.services.document_notifications import notify_processing, notify_completed, notify_failed, topic_for_file, ADMIN_DOCUMENTS_TOPIC, get_last_status
from app.middleware.websocket_middleware import get_websocket_manager, WebSocketMessage
from fastapi import WebSocket
router = APIRouter()
# Pydantic schemas
from pydantic import BaseModel, ConfigDict
class QDROBase(BaseModel):
file_no: str
version: str = "01"
title: Optional[str] = None
form_name: Optional[str] = None
content: Optional[str] = None
status: str = "DRAFT"
created_date: Optional[date] = None
approved_date: Optional[date] = None
filed_date: Optional[date] = None
participant_name: Optional[str] = None
spouse_name: Optional[str] = None
plan_name: Optional[str] = None
plan_administrator: Optional[str] = None
notes: Optional[str] = None
class QDROCreate(QDROBase):
pass
class QDROUpdate(BaseModel):
version: Optional[str] = None
title: Optional[str] = None
form_name: Optional[str] = None
content: Optional[str] = None
status: Optional[str] = None
created_date: Optional[date] = None
approved_date: Optional[date] = None
filed_date: Optional[date] = None
participant_name: Optional[str] = None
spouse_name: Optional[str] = None
plan_name: Optional[str] = None
plan_administrator: Optional[str] = None
notes: Optional[str] = None
class QDROResponse(QDROBase):
id: int
model_config = ConfigDict(from_attributes=True)
class PaginatedQDROResponse(BaseModel):
items: List[QDROResponse]
total: int
@router.get("/qdros/{file_no}", response_model=Union[List[QDROResponse], PaginatedQDROResponse])
async def get_file_qdros(
file_no: str,
skip: int = Query(0, ge=0, description="Offset for pagination"),
limit: int = Query(100, ge=1, le=1000, description="Page size"),
sort_by: Optional[str] = Query("updated", description="Sort by: updated, created, version, status"),
sort_dir: Optional[str] = Query("desc", description="Sort direction: asc or desc"),
include_total: bool = Query(False, description="When true, returns {items, total} instead of a plain list"),
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Get QDROs for a specific file with optional sorting/pagination"""
query = db.query(QDRO).filter(QDRO.file_no == file_no)
# Sorting (whitelisted)
query = apply_sorting(
query,
sort_by,
sort_dir,
allowed={
"updated": [QDRO.updated_at, QDRO.id],
"created": [QDRO.created_at, QDRO.id],
"version": [QDRO.version],
"status": [QDRO.status],
},
)
qdros, total = paginate_with_total(query, skip, limit, include_total)
if include_total:
return {"items": qdros, "total": total or 0}
return qdros
class PaginatedQDROResponse(BaseModel):
items: List[QDROResponse]
total: int
class CurrentStatusResponse(BaseModel):
file_no: str
status: str # processing | completed | failed | unknown
timestamp: Optional[str] = None
data: Optional[Dict[str, Any]] = None
history: Optional[list] = None
@router.get("/current-status/{file_no}", response_model=CurrentStatusResponse)
async def get_current_document_status(
file_no: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""
Return last-known document generation status for a file.
Priority:
1) In-memory last broadcast state (processing/completed/failed)
2) If no memory record, check for any uploaded/generated documents and report 'completed'
3) Fallback to 'unknown'
"""
# Build recent history from EventLog (last N events)
history_items = []
try:
recent = (
db.query(EventLog)
.filter(EventLog.file_no == file_no, EventLog.event_type.in_(["document_processing", "document_completed", "document_failed"]))
.order_by(EventLog.occurred_at.desc())
.limit(10)
.all()
)
for ev in recent:
history_items.append({
"type": ev.event_type,
"timestamp": ev.occurred_at.isoformat() if getattr(ev, "occurred_at", None) else None,
"data": ev.event_data or {},
})
except Exception:
history_items = []
# Try in-memory record for current status
last = get_last_status(file_no)
if last:
ts = last.get("timestamp")
iso = ts.isoformat() if hasattr(ts, "isoformat") else None
status_val = str(last.get("status") or "unknown")
# Treat stale 'processing' as unknown if older than 10 minutes
try:
if status_val == "processing" and isinstance(ts, datetime):
age = datetime.now(timezone.utc) - ts
if age.total_seconds() > 600:
status_val = "unknown"
except Exception:
pass
return CurrentStatusResponse(
file_no=file_no,
status=status_val,
timestamp=iso,
data=(last.get("data") or None),
history=history_items,
)
# Fallback: any existing documents imply last status completed
any_doc = db.query(Document).filter(Document.file_no == file_no).order_by(Document.id.desc()).first()
if any_doc:
return CurrentStatusResponse(
file_no=file_no,
status="completed",
timestamp=getattr(any_doc, "upload_date", None).isoformat() if getattr(any_doc, "upload_date", None) else None,
data={
"document_id": any_doc.id,
"filename": any_doc.filename,
"size": any_doc.size,
},
history=history_items,
)
return CurrentStatusResponse(file_no=file_no, status="unknown", history=history_items)
@router.get("/qdros/", response_model=Union[List[QDROResponse], PaginatedQDROResponse])
async def list_qdros(
skip: int = Query(0, ge=0),
limit: int = Query(50, ge=1, le=200),
status_filter: Optional[str] = Query(None),
search: Optional[str] = Query(None),
sort_by: Optional[str] = Query(None, description="Sort by: file_no, version, status, created, updated"),
sort_dir: Optional[str] = Query("asc", description="Sort direction: asc or desc"),
include_total: bool = Query(False, description="When true, returns {items, total} instead of a plain list"),
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""List all QDROs with filtering"""
query = db.query(QDRO)
if status_filter:
query = query.filter(QDRO.status == status_filter)
if search:
# DRY: tokenize and apply case-insensitive search across common QDRO fields
tokens = build_query_tokens(search)
filter_expr = tokenized_ilike_filter(tokens, [
QDRO.file_no,
QDRO.form_name,
QDRO.pet,
QDRO.res,
QDRO.case_number,
QDRO.notes,
QDRO.status,
])
if filter_expr is not None:
query = query.filter(filter_expr)
# Sorting (whitelisted)
query = apply_sorting(
query,
sort_by,
sort_dir,
allowed={
"file_no": [QDRO.file_no],
"version": [QDRO.version],
"status": [QDRO.status],
"created": [QDRO.created_at],
"updated": [QDRO.updated_at],
},
)
qdros, total = paginate_with_total(query, skip, limit, include_total)
if include_total:
return {"items": qdros, "total": total or 0}
return qdros
@router.post("/qdros/", response_model=QDROResponse)
async def create_qdro(
qdro_data: QDROCreate,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Create new QDRO"""
# Only accept fields that exist on the model and exclude None values
allowed_fields = {c.name for c in QDRO.__table__.columns}
payload = {
k: v
for k, v in qdro_data.model_dump(exclude_unset=True).items()
if v is not None and k in allowed_fields
}
qdro = QDRO(**payload)
# Backfill created_date if model supports it; otherwise rely on created_at
if hasattr(qdro, "created_date") and not getattr(qdro, "created_date"):
setattr(qdro, "created_date", date.today())
db.add(qdro)
db.commit()
db.refresh(qdro)
try:
await invalidate_search_cache()
except Exception:
pass
return qdro
@router.get("/qdros/{file_no}/{qdro_id}", response_model=QDROResponse)
async def get_qdro(
file_no: str,
qdro_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Get specific QDRO"""
qdro = db.query(QDRO).filter(
QDRO.id == qdro_id,
QDRO.file_no == file_no
).first()
if not qdro:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="QDRO not found"
)
return qdro
@router.put("/qdros/{file_no}/{qdro_id}", response_model=QDROResponse)
async def update_qdro(
file_no: str,
qdro_id: int,
qdro_data: QDROUpdate,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Update QDRO"""
qdro = db.query(QDRO).filter(
QDRO.id == qdro_id,
QDRO.file_no == file_no
).first()
if not qdro:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="QDRO not found"
)
# Update fields present on the model only
allowed_fields = {c.name for c in QDRO.__table__.columns}
for field, value in qdro_data.model_dump(exclude_unset=True).items():
if field in allowed_fields:
setattr(qdro, field, value)
db.commit()
db.refresh(qdro)
try:
await invalidate_search_cache()
except Exception:
pass
return qdro
@router.delete("/qdros/{file_no}/{qdro_id}")
async def delete_qdro(
file_no: str,
qdro_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Delete QDRO"""
qdro = db.query(QDRO).filter(
QDRO.id == qdro_id,
QDRO.file_no == file_no
).first()
if not qdro:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="QDRO not found"
)
db.delete(qdro)
db.commit()
try:
await invalidate_search_cache()
except Exception:
pass
return {"message": "QDRO deleted successfully"}
# Enhanced Document Management Endpoints
# Template Management Schemas
class TemplateBase(BaseModel):
"""Base template schema"""
form_id: str
form_name: str
category: str = "GENERAL"
content: str = ""
variables: Optional[Dict[str, str]] = None
class TemplateCreate(TemplateBase):
pass
class TemplateUpdate(BaseModel):
form_name: Optional[str] = None
category: Optional[str] = None
content: Optional[str] = None
variables: Optional[Dict[str, str]] = None
class TemplateResponse(TemplateBase):
active: bool = True
created_at: Optional[datetime] = None
model_config = ConfigDict(from_attributes=True)
# Document Generation Schema
class DocumentGenerateRequest(BaseModel):
"""Request to generate document from template"""
template_id: str
file_no: str
output_format: str = "PDF" # PDF, DOCX, HTML
variables: Optional[Dict[str, Any]] = None
class DocumentResponse(BaseModel):
"""Generated document response"""
document_id: str
file_name: str
file_path: str
size: int
created_at: datetime
# Document Statistics
class DocumentStats(BaseModel):
"""Document system statistics"""
total_templates: int
total_qdros: int
templates_by_category: Dict[str, int]
recent_activity: List[Dict[str, Any]]
class PaginatedTemplatesResponse(BaseModel):
items: List[TemplateResponse]
total: int
@router.get("/templates/", response_model=Union[List[TemplateResponse], PaginatedTemplatesResponse])
async def list_templates(
skip: int = Query(0, ge=0),
limit: int = Query(50, ge=1, le=200),
category: Optional[str] = Query(None),
search: Optional[str] = Query(None),
active_only: bool = Query(True),
sort_by: Optional[str] = Query(None, description="Sort by: form_id, form_name, category, created, updated"),
sort_dir: Optional[str] = Query("asc", description="Sort direction: asc or desc"),
include_total: bool = Query(False, description="When true, returns {items, total} instead of a plain list"),
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""List available document templates"""
query = db.query(FormIndex)
if active_only:
query = query.filter(FormIndex.active == True)
if category:
query = query.filter(FormIndex.category == category)
if search:
# DRY: tokenize and apply case-insensitive search for templates
tokens = build_query_tokens(search)
filter_expr = tokenized_ilike_filter(tokens, [
FormIndex.form_name,
FormIndex.form_id,
FormIndex.category,
])
if filter_expr is not None:
query = query.filter(filter_expr)
# Sorting (whitelisted)
query = apply_sorting(
query,
sort_by,
sort_dir,
allowed={
"form_id": [FormIndex.form_id],
"form_name": [FormIndex.form_name],
"category": [FormIndex.category],
"created": [FormIndex.created_at],
"updated": [FormIndex.updated_at],
},
)
templates, total = paginate_with_total(query, skip, limit, include_total)
# Enhanced response with template content
results = []
for template in templates:
template_lines = db.query(FormList).filter(
FormList.form_id == template.form_id
).order_by(FormList.line_number).all()
content = "\n".join([line.content or "" for line in template_lines])
results.append({
"form_id": template.form_id,
"form_name": template.form_name,
"category": template.category,
"content": content,
"active": template.active,
"created_at": template.created_at,
"variables": _extract_variables_from_content(content)
})
if include_total:
return {"items": results, "total": total or 0}
return results
@router.post("/templates/", response_model=TemplateResponse)
async def create_template(
template_data: TemplateCreate,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Create new document template"""
# Check if template already exists
existing = db.query(FormIndex).filter(FormIndex.form_id == template_data.form_id).first()
if existing:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Template with this ID already exists"
)
# Create form index entry
form_index = FormIndex(
form_id=template_data.form_id,
form_name=template_data.form_name,
category=template_data.category,
active=True
)
db.add(form_index)
# Create form content lines
content_lines = template_data.content.split('\n')
for i, line in enumerate(content_lines, 1):
form_line = FormList(
form_id=template_data.form_id,
line_number=i,
content=line
)
db.add(form_line)
db.commit()
db.refresh(form_index)
try:
await invalidate_search_cache()
except Exception:
pass
return {
"form_id": form_index.form_id,
"form_name": form_index.form_name,
"category": form_index.category,
"content": template_data.content,
"active": form_index.active,
"created_at": form_index.created_at,
"variables": template_data.variables or {}
}
@router.get("/templates/{template_id}", response_model=TemplateResponse)
async def get_template(
template_id: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Get specific template with content"""
template = db.query(FormIndex).filter(FormIndex.form_id == template_id).first()
if not template:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Template not found"
)
# Get template content
template_lines = db.query(FormList).filter(
FormList.form_id == template_id
).order_by(FormList.line_number).all()
content = "\n".join([line.content or "" for line in template_lines])
return {
"form_id": template.form_id,
"form_name": template.form_name,
"category": template.category,
"content": content,
"active": template.active,
"created_at": template.created_at,
"variables": _extract_variables_from_content(content)
}
@router.put("/templates/{template_id}", response_model=TemplateResponse)
async def update_template(
template_id: str,
template_data: TemplateUpdate,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Update document template"""
template = db.query(FormIndex).filter(FormIndex.form_id == template_id).first()
if not template:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Template not found"
)
# Update form index
if template_data.form_name:
template.form_name = template_data.form_name
if template_data.category:
template.category = template_data.category
# Update content if provided
if template_data.content is not None:
# Delete existing content lines
db.query(FormList).filter(FormList.form_id == template_id).delete()
# Add new content lines
content_lines = template_data.content.split('\n')
for i, line in enumerate(content_lines, 1):
form_line = FormList(
form_id=template_id,
line_number=i,
content=line
)
db.add(form_line)
db.commit()
db.refresh(template)
try:
await invalidate_search_cache()
except Exception:
pass
# Get updated content
template_lines = db.query(FormList).filter(
FormList.form_id == template_id
).order_by(FormList.line_number).all()
content = "\n".join([line.content or "" for line in template_lines])
return {
"form_id": template.form_id,
"form_name": template.form_name,
"category": template.category,
"content": content,
"active": template.active,
"created_at": template.created_at,
"variables": _extract_variables_from_content(content)
}
@router.delete("/templates/{template_id}")
async def delete_template(
template_id: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Delete document template"""
template = db.query(FormIndex).filter(FormIndex.form_id == template_id).first()
if not template:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Template not found"
)
# Delete content lines
db.query(FormList).filter(FormList.form_id == template_id).delete()
# Delete template
db.delete(template)
db.commit()
try:
await invalidate_search_cache()
except Exception:
pass
return {"message": "Template deleted successfully"}
@router.post("/generate/{template_id}")
async def generate_document(
template_id: str,
request: DocumentGenerateRequest,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Generate document from template"""
# Get template
template = db.query(FormIndex).filter(FormIndex.form_id == template_id).first()
if not template:
raise HTTPException(status_code=404, detail="Template not found")
# Get file information
file_obj = db.query(FileModel).options(
joinedload(FileModel.owner)
).filter(FileModel.file_no == request.file_no).first()
if not file_obj:
raise HTTPException(status_code=404, detail="File not found")
# Get template content
template_lines = db.query(FormList).filter(
FormList.form_id == template_id
).order_by(FormList.line_number).all()
template_content = "\n".join([line.content or "" for line in template_lines])
# Prepare merge variables
merge_vars = {
"FILE_NO": file_obj.file_no,
"CLIENT_FIRST": file_obj.owner.first if file_obj.owner else "",
"CLIENT_LAST": file_obj.owner.last if file_obj.owner else "",
"CLIENT_FULL": f"{file_obj.owner.first or ''} {file_obj.owner.last}".strip() if file_obj.owner else "",
"MATTER": file_obj.regarding or "",
"OPENED": file_obj.opened.strftime("%B %d, %Y") if file_obj.opened else "",
"ATTORNEY": file_obj.empl_num or "",
"TODAY": date.today().strftime("%B %d, %Y")
}
# Add any custom variables from the request
if request.variables:
merge_vars.update(request.variables)
# Perform variable substitution
merged_content = _merge_template_variables(template_content, merge_vars)
# Generate document file
document_id = str(uuid.uuid4())
file_name = f"{template.form_name}_{file_obj.file_no}_{date.today().isoformat()}"
exports_dir = "/app/exports"
try:
os.makedirs(exports_dir, exist_ok=True)
except Exception:
try:
os.makedirs("exports", exist_ok=True)
exports_dir = "exports"
except Exception:
exports_dir = "."
if request.output_format.upper() == "PDF":
file_path = f"{exports_dir}/{document_id}.pdf"
file_name += ".pdf"
# Here you would implement PDF generation
# For now, create a simple text file
with open(f"{exports_dir}/{document_id}.txt", "w") as f:
f.write(merged_content)
file_path = f"{exports_dir}/{document_id}.txt"
elif request.output_format.upper() == "DOCX":
file_path = f"{exports_dir}/{document_id}.docx"
file_name += ".docx"
# Implement DOCX generation
with open(f"{exports_dir}/{document_id}.txt", "w") as f:
f.write(merged_content)
file_path = f"{exports_dir}/{document_id}.txt"
else: # HTML
file_path = f"{exports_dir}/{document_id}.html"
file_name += ".html"
html_content = f"<html><body><pre>{merged_content}</pre></body></html>"
with open(file_path, "w") as f:
f.write(html_content)
file_size = os.path.getsize(file_path) if os.path.exists(file_path) else 0
return {
"document_id": document_id,
"file_name": file_name,
"file_path": file_path,
"size": file_size,
"created_at": datetime.now(timezone.utc)
}
@router.get("/categories/")
async def get_template_categories(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Get available template categories"""
categories = db.query(FormIndex.category).distinct().all()
return [cat[0] for cat in categories if cat[0]]
@router.get("/stats/summary")
async def get_document_stats(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Get document system statistics"""
# Template statistics
total_templates = db.query(FormIndex).filter(FormIndex.active == True).count()
total_qdros = db.query(QDRO).count()
# Templates by category
category_stats = db.query(
FormIndex.category,
func.count(FormIndex.form_id)
).filter(FormIndex.active == True).group_by(FormIndex.category).all()
categories_dict = {cat[0] or "Uncategorized": cat[1] for cat in category_stats}
# Recent QDRO activity
recent_qdros = db.query(QDRO).order_by(desc(QDRO.updated_at)).limit(5).all()
recent_activity = [
{
"type": "QDRO",
"file_no": qdro.file_no,
"status": qdro.status,
"updated_at": qdro.updated_at.isoformat() if qdro.updated_at else None
}
for qdro in recent_qdros
]
return {
"total_templates": total_templates,
"total_qdros": total_qdros,
"templates_by_category": categories_dict,
"recent_activity": recent_activity
}
@router.get("/file/{file_no}/documents")
async def get_file_documents(
file_no: str,
sort_by: Optional[str] = Query("updated", description="Sort by: updated, created"),
sort_dir: Optional[str] = Query("desc", description="Sort direction: asc or desc"),
skip: int = Query(0, ge=0),
limit: int = Query(100, ge=1, le=1000),
include_total: bool = Query(False, description="When true, returns {items, total} instead of a plain list"),
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Get all documents associated with a specific file, with optional sorting/pagination"""
# Base query for QDROs tied to the file
query = db.query(QDRO).filter(QDRO.file_no == file_no)
# Apply sorting using shared helper (map friendly names to columns)
query = apply_sorting(
query,
sort_by,
sort_dir,
allowed={
"updated": [QDRO.updated_at, QDRO.id],
"created": [QDRO.created_at, QDRO.id],
},
)
qdros, total = paginate_with_total(query, skip, limit, include_total)
items = [
{
"id": qdro.id,
"type": "QDRO",
"title": f"QDRO v{qdro.version}",
"status": qdro.status,
"created_date": qdro.created_date.isoformat() if getattr(qdro, "created_date", None) else None,
"updated_at": qdro.updated_at.isoformat() if getattr(qdro, "updated_at", None) else None,
"file_no": qdro.file_no,
}
for qdro in qdros
]
payload = {"file_no": file_no, "documents": items, "total_count": (total if include_total else None)}
# Maintain previous shape by omitting total_count when include_total is False? The prior code always returned total_count.
# Keep total_count for backward compatibility but set to actual total when include_total else len(items)
payload["total_count"] = (total if include_total else len(items))
return payload
def _extract_variables_from_content(content: str) -> Dict[str, str]:
"""Extract variable placeholders from template content"""
import re
variables = {}
# Find variables in format {{VARIABLE_NAME}}
matches = re.findall(r'\{\{([^}]+)\}\}', content)
for match in matches:
var_name = match.strip()
variables[var_name] = f"Placeholder for {var_name}"
# Find variables in format ^VARIABLE
matches = re.findall(r'\^([A-Z_]+)', content)
for match in matches:
variables[match] = f"Placeholder for {match}"
return variables
def _merge_template_variables(content: str, variables: Dict[str, Any]) -> str:
"""Replace template variables with actual values"""
merged = content
# Replace {{VARIABLE}} format
for var_name, value in variables.items():
merged = merged.replace(f"{{{{{var_name}}}}}", str(value or ""))
merged = merged.replace(f"^{var_name}", str(value or ""))
return merged
# --- Batch Document Generation (MVP synchronous) ---
class BatchGenerateRequest(BaseModel):
"""Batch generation request using DocumentTemplate system."""
template_id: int
version_id: Optional[int] = None
file_nos: List[str]
output_format: str = "DOCX" # DOCX (default), PDF (not yet supported), HTML (not yet supported)
context: Optional[Dict[str, Any]] = None # additional global context
bundle_zip: bool = False # when true, also create a ZIP bundle of generated outputs
class BatchGenerateItemResult(BaseModel):
file_no: str
status: str # "success" | "error"
document_id: Optional[int] = None
filename: Optional[str] = None
path: Optional[str] = None
url: Optional[str] = None
size: Optional[int] = None
unresolved: Optional[List[str]] = None
error: Optional[str] = None
class BatchGenerateResponse(BaseModel):
job_id: str
template_id: int
version_id: int
total_requested: int
total_success: int
total_failed: int
results: List[BatchGenerateItemResult]
bundle_url: Optional[str] = None
bundle_size: Optional[int] = None
@router.post("/generate-batch", response_model=BatchGenerateResponse)
async def generate_batch_documents(
payload: BatchGenerateRequest,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""Synchronously generate documents for multiple files from a template version.
Notes:
- Currently supports DOCX output. PDF/HTML conversion is not yet implemented.
- Saves generated bytes to default storage under uploads/generated/{file_no}/.
- Persists a `Document` record per successful file.
- Returns per-item status with unresolved tokens for transparency.
"""
tpl = db.query(DocumentTemplate).filter(DocumentTemplate.id == payload.template_id).first()
if not tpl:
raise HTTPException(status_code=404, detail="Template not found")
resolved_version_id = payload.version_id or tpl.current_version_id
if not resolved_version_id:
raise HTTPException(status_code=400, detail="Template has no approved/current version")
ver = (
db.query(DocumentTemplateVersion)
.filter(
DocumentTemplateVersion.id == resolved_version_id,
DocumentTemplateVersion.template_id == tpl.id,
)
.first()
)
if not ver:
raise HTTPException(status_code=404, detail="Template version not found")
storage = get_default_storage()
try:
template_bytes = storage.open_bytes(ver.storage_path)
except Exception:
raise HTTPException(status_code=404, detail="Stored template file not found")
tokens = extract_tokens_from_bytes(template_bytes)
results: List[BatchGenerateItemResult] = []
# Pre-normalize file numbers (strip spaces, ignore empties)
requested_files: List[str] = [fn.strip() for fn in (payload.file_nos or []) if fn and str(fn).strip()]
if not requested_files:
raise HTTPException(status_code=400, detail="No file numbers provided")
# Fetch all files in one query
files_map: Dict[str, FileModel] = {
f.file_no: f
for f in db.query(FileModel).options(joinedload(FileModel.owner)).filter(FileModel.file_no.in_(requested_files)).all()
}
generated_items: List[Dict[str, Any]] = [] # capture bytes for optional ZIP
for file_no in requested_files:
# Notify processing started for this file
try:
await notify_processing(
file_no=file_no,
user_id=current_user.id,
data={
"template_id": tpl.id,
"template_name": tpl.name,
"job_id": job_id
}
)
except Exception:
# Don't fail generation if notification fails
pass
file_obj = files_map.get(file_no)
if not file_obj:
# Notify failure
try:
await notify_failed(
file_no=file_no,
user_id=current_user.id,
data={"error": "File not found", "template_id": tpl.id}
)
except Exception:
pass
results.append(
BatchGenerateItemResult(
file_no=file_no,
status="error",
error="File not found",
)
)
continue
# Build per-file context
file_context: Dict[str, Any] = {
"FILE_NO": file_obj.file_no,
"CLIENT_FIRST": getattr(getattr(file_obj, "owner", None), "first", "") or "",
"CLIENT_LAST": getattr(getattr(file_obj, "owner", None), "last", "") or "",
"CLIENT_FULL": (
f"{getattr(getattr(file_obj, 'owner', None), 'first', '') or ''} "
f"{getattr(getattr(file_obj, 'owner', None), 'last', '') or ''}"
).strip(),
"MATTER": file_obj.regarding or "",
"OPENED": file_obj.opened.strftime("%B %d, %Y") if getattr(file_obj, "opened", None) else "",
"ATTORNEY": getattr(file_obj, "empl_num", "") or "",
}
# Merge global context
merged_context = build_context({**(payload.context or {}), **file_context}, "file", file_obj.file_no)
resolved_vars, unresolved_tokens = resolve_tokens(db, tokens, merged_context)
try:
if ver.mime_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
output_bytes = render_docx(template_bytes, resolved_vars)
output_mime = ver.mime_type
extension = ".docx"
else:
# For non-DOCX templates (e.g., PDF), pass-through content
output_bytes = template_bytes
output_mime = ver.mime_type
extension = ".bin"
# Name and save
safe_name = f"{tpl.name}_{file_obj.file_no}{extension}"
subdir = f"generated/{file_obj.file_no}"
storage_path = storage.save_bytes(content=output_bytes, filename_hint=safe_name, subdir=subdir, content_type=output_mime)
# Persist Document record
abs_or_rel_path = os.path.join("uploads", storage_path).replace("\\", "/")
doc = Document(
file_no=file_obj.file_no,
filename=safe_name,
path=abs_or_rel_path,
description=f"Generated from template '{tpl.name}'",
type=output_mime,
size=len(output_bytes),
uploaded_by=getattr(current_user, "username", None),
)
db.add(doc)
db.commit()
db.refresh(doc)
# Notify successful completion
try:
await notify_completed(
file_no=file_obj.file_no,
user_id=current_user.id,
data={
"template_id": tpl.id,
"template_name": tpl.name,
"document_id": doc.id,
"filename": doc.filename,
"size": doc.size,
"unresolved_tokens": unresolved_tokens or []
}
)
except Exception:
# Don't fail generation if notification fails
pass
results.append(
BatchGenerateItemResult(
file_no=file_obj.file_no,
status="success",
document_id=doc.id,
filename=doc.filename,
path=doc.path,
url=storage.public_url(storage_path),
size=doc.size,
unresolved=unresolved_tokens or [],
)
)
# Keep for bundling
generated_items.append({
"filename": doc.filename,
"storage_path": storage_path,
})
except Exception as e:
# Notify failure
try:
await notify_failed(
file_no=file_obj.file_no,
user_id=current_user.id,
data={
"template_id": tpl.id,
"template_name": tpl.name,
"error": str(e),
"unresolved_tokens": unresolved_tokens or []
}
)
except Exception:
pass
# Best-effort rollback of partial doc add
try:
db.rollback()
except Exception:
pass
results.append(
BatchGenerateItemResult(
file_no=file_obj.file_no,
status="error",
error=str(e),
unresolved=unresolved_tokens or [],
)
)
job_id = str(uuid.uuid4())
total_success = sum(1 for r in results if r.status == "success")
total_failed = sum(1 for r in results if r.status == "error")
bundle_url: Optional[str] = None
bundle_size: Optional[int] = None
# Optionally create a ZIP bundle of generated outputs
bundle_storage_path: Optional[str] = None
if payload.bundle_zip and total_success > 0:
# Stream zip to memory then save via storage adapter
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
for item in generated_items:
try:
file_bytes = storage.open_bytes(item["storage_path"]) # relative path under uploads
# Use clean filename inside zip
zf.writestr(item["filename"], file_bytes)
except Exception:
# Skip missing/unreadable files from bundle; keep job successful
continue
zip_bytes = zip_buffer.getvalue()
safe_zip_name = f"documents_batch_{job_id}.zip"
bundle_storage_path = storage.save_bytes(content=zip_bytes, filename_hint=safe_zip_name, subdir="bundles", content_type="application/zip")
bundle_url = storage.public_url(bundle_storage_path)
bundle_size = len(zip_bytes)
# Persist simple job record
try:
job = JobRecord(
job_id=job_id,
job_type="documents_batch",
status="completed",
requested_by_username=getattr(current_user, "username", None),
started_at=datetime.now(timezone.utc),
completed_at=datetime.now(timezone.utc),
total_requested=len(requested_files),
total_success=total_success,
total_failed=total_failed,
result_storage_path=bundle_storage_path,
result_mime_type=("application/zip" if bundle_storage_path else None),
result_size=bundle_size,
details={
"template_id": tpl.id,
"version_id": ver.id,
"file_nos": requested_files,
},
)
db.add(job)
db.commit()
except Exception:
try:
db.rollback()
except Exception:
pass
return BatchGenerateResponse(
job_id=job_id,
template_id=tpl.id,
version_id=ver.id,
total_requested=len(requested_files),
total_success=total_success,
total_failed=total_failed,
results=results,
bundle_url=bundle_url,
bundle_size=bundle_size,
)
from fastapi.responses import StreamingResponse
class JobStatusResponse(BaseModel):
job_id: str
job_type: str
status: str
total_requested: int
total_success: int
total_failed: int
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
bundle_available: bool = False
bundle_url: Optional[str] = None
bundle_size: Optional[int] = None
@router.get("/jobs/{job_id}", response_model=JobStatusResponse)
async def get_job_status(
job_id: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
job = db.query(JobRecord).filter(JobRecord.job_id == job_id).first()
if not job:
raise HTTPException(status_code=404, detail="Job not found")
return JobStatusResponse(
job_id=job.job_id,
job_type=job.job_type,
status=job.status,
total_requested=job.total_requested or 0,
total_success=job.total_success or 0,
total_failed=job.total_failed or 0,
started_at=getattr(job, "started_at", None),
completed_at=getattr(job, "completed_at", None),
bundle_available=bool(job.result_storage_path),
bundle_url=(get_default_storage().public_url(job.result_storage_path) if job.result_storage_path else None),
bundle_size=job.result_size,
)
@router.get("/jobs/{job_id}/result")
async def download_job_result(
job_id: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
job = db.query(JobRecord).filter(JobRecord.job_id == job_id).first()
if not job or not job.result_storage_path:
raise HTTPException(status_code=404, detail="Result not available for this job")
storage = get_default_storage()
try:
content = storage.open_bytes(job.result_storage_path)
except Exception:
raise HTTPException(status_code=404, detail="Stored bundle not found")
# Derive filename
base = os.path.basename(job.result_storage_path)
headers = {
"Content-Disposition": f"attachment; filename=\"{base}\"",
}
return StreamingResponse(iter([content]), media_type=(job.result_mime_type or "application/zip"), headers=headers)
# --- Client Error Logging (for Documents page) ---
class ClientErrorLog(BaseModel):
"""Payload for client-side error logging"""
message: str
action: Optional[str] = None
stack: Optional[str] = None
url: Optional[str] = None
line: Optional[int] = None
column: Optional[int] = None
user_agent: Optional[str] = None
extra: Optional[Dict[str, Any]] = None
@router.post("/client-error")
async def log_client_error(
payload: ClientErrorLog,
request: Request,
db: Session = Depends(get_db),
current_user: Optional[User] = Depends(lambda: None)
):
"""Accept client-side error logs from the Documents page.
This endpoint is lightweight and safe to call; it records the error to the
application logs and best-effort to the audit log without interrupting the UI.
"""
logger = get_logger("client.documents")
client_ip = request.headers.get("x-forwarded-for")
if client_ip:
client_ip = client_ip.split(",")[0].strip()
else:
client_ip = request.client.host if request.client else None
logger.error(
"Client error reported",
action=payload.action,
message=payload.message,
stack=payload.stack,
page="/documents",
url=payload.url or str(request.url),
line=payload.line,
column=payload.column,
user=getattr(current_user, "username", None),
user_id=getattr(current_user, "id", None),
user_agent=payload.user_agent or request.headers.get("user-agent"),
client_ip=client_ip,
extra=payload.extra,
)
# Best-effort audit log; do not raise on failure
try:
audit_service.log_action(
db=db,
action="CLIENT_ERROR",
resource_type="DOCUMENTS",
user=current_user,
resource_id=None,
details={
"action": payload.action,
"message": payload.message,
"url": payload.url or str(request.url),
"line": payload.line,
"column": payload.column,
"extra": payload.extra,
},
request=request,
)
except Exception:
pass
return {"status": "logged"}
@router.post("/upload/{file_no}")
async def upload_document(
file_no: str,
file: UploadFile = File(...),
description: Optional[str] = Form(None),
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Upload a document to a file with comprehensive security validation and async operations"""
from app.utils.file_security import file_validator, create_upload_directory
from app.services.async_file_operations import async_file_ops, validate_large_upload
from app.services.async_storage import async_storage
file_obj = db.query(FileModel).filter(FileModel.file_no == file_no).first()
if not file_obj:
raise HTTPException(status_code=404, detail="File not found")
# Determine if this is a large file that needs streaming
file_size_estimate = getattr(file, 'size', 0) or 0
use_streaming = file_size_estimate > 10 * 1024 * 1024 # 10MB threshold
if use_streaming:
# Use streaming validation for large files
# Enforce the same 10MB limit used for non-streaming uploads
is_valid, error_msg, metadata = await validate_large_upload(
file, category='document', max_size=10 * 1024 * 1024
)
if not is_valid:
raise HTTPException(status_code=400, detail=error_msg)
safe_filename = file_validator.sanitize_filename(file.filename)
file_ext = Path(safe_filename).suffix
mime_type = metadata.get('content_type', 'application/octet-stream')
# Stream upload for large files
subdir = f"documents/{file_no}"
final_path, actual_size, _checksum = await async_file_ops.stream_upload_file(
file,
f"{subdir}/{uuid.uuid4()}{file_ext}",
progress_callback=None # Could add WebSocket progress here
)
# Get absolute path for database storage
absolute_path = str(final_path)
# For downstream DB fields that expect a relative path, also keep a relative for consistency
relative_path = str(Path(final_path).relative_to(async_file_ops.base_upload_dir))
else:
# Use traditional validation for smaller files
content, safe_filename, file_ext, mime_type = await file_validator.validate_upload_file(
file, category='document'
)
# Create secure upload directory
upload_dir = f"uploads/{file_no}"
create_upload_directory(upload_dir)
# Generate secure file path with UUID to prevent conflicts
unique_name = f"{uuid.uuid4()}{file_ext}"
path = file_validator.generate_secure_path(upload_dir, unique_name)
# Write file using async storage for consistency
try:
relative_path = await async_storage.save_bytes_async(
content,
safe_filename,
subdir=f"documents/{file_no}"
)
absolute_path = str(async_storage.base_dir / relative_path)
actual_size = len(content)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Could not save file: {str(e)}")
doc = Document(
file_no=file_no,
filename=safe_filename, # Use sanitized filename
path=absolute_path,
description=description,
type=mime_type, # Use validated MIME type
size=actual_size,
uploaded_by=current_user.username
)
db.add(doc)
db.commit()
db.refresh(doc)
# Send real-time notification for document upload
try:
await notify_completed(
file_no=file_no,
user_id=current_user.id,
data={
"action": "upload",
"document_id": doc.id,
"filename": safe_filename,
"size": actual_size,
"type": mime_type,
"description": description
}
)
except Exception as e:
# Don't fail the operation if notification fails
get_logger("documents").warning(f"Failed to send document upload notification: {str(e)}")
# Log workflow event for document upload
try:
from app.services.workflow_integration import log_document_uploaded_sync
log_document_uploaded_sync(
db=db,
file_no=file_no,
document_id=doc.id,
filename=safe_filename,
document_type=mime_type,
user_id=current_user.id
)
except Exception as e:
# Don't fail the operation if workflow logging fails
get_logger("documents").warning(f"Failed to log workflow event for document upload: {str(e)}")
return doc
@router.get("/{file_no}/uploaded")
async def list_uploaded_documents(
file_no: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""List uploaded documents for a file"""
docs = db.query(Document).filter(Document.file_no == file_no).all()
return docs
@router.delete("/uploaded/{doc_id}")
async def delete_document(
doc_id: int,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Delete an uploaded document"""
doc = db.query(Document).filter(Document.id == doc_id).first()
if not doc:
raise HTTPException(status_code=404, detail="Document not found")
if os.path.exists(doc.path):
os.remove(doc.path)
db.delete(doc)
db.commit()
return {"message": "Document deleted successfully"}
@router.put("/uploaded/{doc_id}")
async def update_document(
doc_id: int,
description: str = Form(...),
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Update document description"""
doc = db.query(Document).filter(Document.id == doc_id).first()
if not doc:
raise HTTPException(status_code=404, detail="Document not found")
doc.description = description
db.commit()
db.refresh(doc)
return doc
# WebSocket endpoints for real-time document status notifications
@router.websocket("/ws/status/{file_no}")
async def ws_document_status(websocket: WebSocket, file_no: str):
"""
Subscribe to real-time document processing status updates for a specific file.
Users can connect to this endpoint to receive notifications about:
- Document generation started (processing)
- Document generation completed
- Document generation failed
- Document uploads
Authentication required via token query parameter.
"""
websocket_manager = get_websocket_manager()
topic = topic_for_file(file_no)
# Custom message handler for document status updates
async def handle_document_message(connection_id: str, message: WebSocketMessage):
"""Handle custom messages for document status"""
get_logger("documents").debug("Received document status message",
connection_id=connection_id,
file_no=file_no,
message_type=message.type)
# Use the WebSocket manager to handle the connection
connection_id = await websocket_manager.handle_connection(
websocket=websocket,
topics={topic},
require_auth=True,
metadata={"file_no": file_no, "endpoint": "document_status"},
message_handler=handle_document_message
)
if connection_id:
# Send initial welcome message with subscription confirmation
try:
pool = websocket_manager.pool
welcome_message = WebSocketMessage(
type="subscription_confirmed",
topic=topic,
data={
"file_no": file_no,
"message": f"Subscribed to document status updates for file {file_no}"
}
)
await pool._send_to_connection(connection_id, welcome_message)
get_logger("documents").info("Document status subscription confirmed",
connection_id=connection_id,
file_no=file_no)
except Exception as e:
get_logger("documents").error("Failed to send subscription confirmation",
connection_id=connection_id,
file_no=file_no,
error=str(e))
# Test endpoint for document notification system
@router.post("/test-notification/{file_no}")
async def test_document_notification(
file_no: str,
status: str = Query(..., description="Notification status: processing, completed, or failed"),
message: Optional[str] = Query(None, description="Optional message"),
current_user: User = Depends(get_current_user)
):
"""
Test endpoint to simulate document processing notifications.
This endpoint allows testing the WebSocket notification system by sending
simulated document status updates. Useful for development and debugging.
"""
if status not in ["processing", "completed", "failed"]:
raise HTTPException(
status_code=400,
detail="Status must be one of: processing, completed, failed"
)
# Prepare test data
test_data = {
"test": True,
"triggered_by": current_user.username,
"message": message or f"Test {status} notification for file {file_no}",
"timestamp": datetime.now(timezone.utc).isoformat()
}
# Send notification based on status
try:
if status == "processing":
sent_count = await notify_processing(
file_no=file_no,
user_id=current_user.id,
data=test_data
)
elif status == "completed":
sent_count = await notify_completed(
file_no=file_no,
user_id=current_user.id,
data=test_data
)
else: # failed
sent_count = await notify_failed(
file_no=file_no,
user_id=current_user.id,
data=test_data
)
return {
"message": f"Test notification sent for file {file_no}",
"status": status,
"sent_to_connections": sent_count,
"data": test_data
}
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to send test notification: {str(e)}"
)