This commit is contained in:
HotSwapp
2025-08-18 20:20:04 -05:00
parent 89b2bc0aa2
commit bac8cc4bd5
114 changed files with 30258 additions and 1341 deletions

View File

@@ -7,9 +7,12 @@ from fastapi import APIRouter, Depends, HTTPException, status, Query, UploadFile
from sqlalchemy.orm import Session, joinedload
from sqlalchemy import or_, func, and_, desc, asc, text
from datetime import date, datetime, timezone
import io
import zipfile
import os
import uuid
import shutil
from pathlib import Path
from app.database.base import get_db
from app.api.search_highlight import build_query_tokens
@@ -21,9 +24,17 @@ from app.models.lookups import FormIndex, FormList, Footer, Employee
from app.models.user import User
from app.auth.security import get_current_user
from app.models.additional import Document
from app.models.document_workflows import EventLog
from app.core.logging import get_logger
from app.services.audit import audit_service
from app.services.cache import invalidate_search_cache
from app.models.templates import DocumentTemplate, DocumentTemplateVersion
from app.models.jobs import JobRecord
from app.services.storage import get_default_storage
from app.services.template_merge import extract_tokens_from_bytes, build_context, resolve_tokens, render_docx
from app.services.document_notifications import notify_processing, notify_completed, notify_failed, topic_for_file, ADMIN_DOCUMENTS_TOPIC, get_last_status
from app.middleware.websocket_middleware import get_websocket_manager, WebSocketMessage
from fastapi import WebSocket
router = APIRouter()
@@ -118,6 +129,87 @@ class PaginatedQDROResponse(BaseModel):
total: int
class CurrentStatusResponse(BaseModel):
file_no: str
status: str # processing | completed | failed | unknown
timestamp: Optional[str] = None
data: Optional[Dict[str, Any]] = None
history: Optional[list] = None
@router.get("/current-status/{file_no}", response_model=CurrentStatusResponse)
async def get_current_document_status(
file_no: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""
Return last-known document generation status for a file.
Priority:
1) In-memory last broadcast state (processing/completed/failed)
2) If no memory record, check for any uploaded/generated documents and report 'completed'
3) Fallback to 'unknown'
"""
# Build recent history from EventLog (last N events)
history_items = []
try:
recent = (
db.query(EventLog)
.filter(EventLog.file_no == file_no, EventLog.event_type.in_(["document_processing", "document_completed", "document_failed"]))
.order_by(EventLog.occurred_at.desc())
.limit(10)
.all()
)
for ev in recent:
history_items.append({
"type": ev.event_type,
"timestamp": ev.occurred_at.isoformat() if getattr(ev, "occurred_at", None) else None,
"data": ev.event_data or {},
})
except Exception:
history_items = []
# Try in-memory record for current status
last = get_last_status(file_no)
if last:
ts = last.get("timestamp")
iso = ts.isoformat() if hasattr(ts, "isoformat") else None
status_val = str(last.get("status") or "unknown")
# Treat stale 'processing' as unknown if older than 10 minutes
try:
if status_val == "processing" and isinstance(ts, datetime):
age = datetime.now(timezone.utc) - ts
if age.total_seconds() > 600:
status_val = "unknown"
except Exception:
pass
return CurrentStatusResponse(
file_no=file_no,
status=status_val,
timestamp=iso,
data=(last.get("data") or None),
history=history_items,
)
# Fallback: any existing documents imply last status completed
any_doc = db.query(Document).filter(Document.file_no == file_no).order_by(Document.id.desc()).first()
if any_doc:
return CurrentStatusResponse(
file_no=file_no,
status="completed",
timestamp=getattr(any_doc, "upload_date", None).isoformat() if getattr(any_doc, "upload_date", None) else None,
data={
"document_id": any_doc.id,
"filename": any_doc.filename,
"size": any_doc.size,
},
history=history_items,
)
return CurrentStatusResponse(file_no=file_no, status="unknown", history=history_items)
@router.get("/qdros/", response_model=Union[List[QDROResponse], PaginatedQDROResponse])
async def list_qdros(
skip: int = Query(0, ge=0),
@@ -814,6 +906,371 @@ def _merge_template_variables(content: str, variables: Dict[str, Any]) -> str:
return merged
# --- Batch Document Generation (MVP synchronous) ---
class BatchGenerateRequest(BaseModel):
"""Batch generation request using DocumentTemplate system."""
template_id: int
version_id: Optional[int] = None
file_nos: List[str]
output_format: str = "DOCX" # DOCX (default), PDF (not yet supported), HTML (not yet supported)
context: Optional[Dict[str, Any]] = None # additional global context
bundle_zip: bool = False # when true, also create a ZIP bundle of generated outputs
class BatchGenerateItemResult(BaseModel):
file_no: str
status: str # "success" | "error"
document_id: Optional[int] = None
filename: Optional[str] = None
path: Optional[str] = None
url: Optional[str] = None
size: Optional[int] = None
unresolved: Optional[List[str]] = None
error: Optional[str] = None
class BatchGenerateResponse(BaseModel):
job_id: str
template_id: int
version_id: int
total_requested: int
total_success: int
total_failed: int
results: List[BatchGenerateItemResult]
bundle_url: Optional[str] = None
bundle_size: Optional[int] = None
@router.post("/generate-batch", response_model=BatchGenerateResponse)
async def generate_batch_documents(
payload: BatchGenerateRequest,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
"""Synchronously generate documents for multiple files from a template version.
Notes:
- Currently supports DOCX output. PDF/HTML conversion is not yet implemented.
- Saves generated bytes to default storage under uploads/generated/{file_no}/.
- Persists a `Document` record per successful file.
- Returns per-item status with unresolved tokens for transparency.
"""
tpl = db.query(DocumentTemplate).filter(DocumentTemplate.id == payload.template_id).first()
if not tpl:
raise HTTPException(status_code=404, detail="Template not found")
resolved_version_id = payload.version_id or tpl.current_version_id
if not resolved_version_id:
raise HTTPException(status_code=400, detail="Template has no approved/current version")
ver = (
db.query(DocumentTemplateVersion)
.filter(
DocumentTemplateVersion.id == resolved_version_id,
DocumentTemplateVersion.template_id == tpl.id,
)
.first()
)
if not ver:
raise HTTPException(status_code=404, detail="Template version not found")
storage = get_default_storage()
try:
template_bytes = storage.open_bytes(ver.storage_path)
except Exception:
raise HTTPException(status_code=404, detail="Stored template file not found")
tokens = extract_tokens_from_bytes(template_bytes)
results: List[BatchGenerateItemResult] = []
# Pre-normalize file numbers (strip spaces, ignore empties)
requested_files: List[str] = [fn.strip() for fn in (payload.file_nos or []) if fn and str(fn).strip()]
if not requested_files:
raise HTTPException(status_code=400, detail="No file numbers provided")
# Fetch all files in one query
files_map: Dict[str, FileModel] = {
f.file_no: f
for f in db.query(FileModel).options(joinedload(FileModel.owner)).filter(FileModel.file_no.in_(requested_files)).all()
}
generated_items: List[Dict[str, Any]] = [] # capture bytes for optional ZIP
for file_no in requested_files:
# Notify processing started for this file
try:
await notify_processing(
file_no=file_no,
user_id=current_user.id,
data={
"template_id": tpl.id,
"template_name": tpl.name,
"job_id": job_id
}
)
except Exception:
# Don't fail generation if notification fails
pass
file_obj = files_map.get(file_no)
if not file_obj:
# Notify failure
try:
await notify_failed(
file_no=file_no,
user_id=current_user.id,
data={"error": "File not found", "template_id": tpl.id}
)
except Exception:
pass
results.append(
BatchGenerateItemResult(
file_no=file_no,
status="error",
error="File not found",
)
)
continue
# Build per-file context
file_context: Dict[str, Any] = {
"FILE_NO": file_obj.file_no,
"CLIENT_FIRST": getattr(getattr(file_obj, "owner", None), "first", "") or "",
"CLIENT_LAST": getattr(getattr(file_obj, "owner", None), "last", "") or "",
"CLIENT_FULL": (
f"{getattr(getattr(file_obj, 'owner', None), 'first', '') or ''} "
f"{getattr(getattr(file_obj, 'owner', None), 'last', '') or ''}"
).strip(),
"MATTER": file_obj.regarding or "",
"OPENED": file_obj.opened.strftime("%B %d, %Y") if getattr(file_obj, "opened", None) else "",
"ATTORNEY": getattr(file_obj, "empl_num", "") or "",
}
# Merge global context
merged_context = build_context({**(payload.context or {}), **file_context}, "file", file_obj.file_no)
resolved_vars, unresolved_tokens = resolve_tokens(db, tokens, merged_context)
try:
if ver.mime_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
output_bytes = render_docx(template_bytes, resolved_vars)
output_mime = ver.mime_type
extension = ".docx"
else:
# For non-DOCX templates (e.g., PDF), pass-through content
output_bytes = template_bytes
output_mime = ver.mime_type
extension = ".bin"
# Name and save
safe_name = f"{tpl.name}_{file_obj.file_no}{extension}"
subdir = f"generated/{file_obj.file_no}"
storage_path = storage.save_bytes(content=output_bytes, filename_hint=safe_name, subdir=subdir, content_type=output_mime)
# Persist Document record
abs_or_rel_path = os.path.join("uploads", storage_path).replace("\\", "/")
doc = Document(
file_no=file_obj.file_no,
filename=safe_name,
path=abs_or_rel_path,
description=f"Generated from template '{tpl.name}'",
type=output_mime,
size=len(output_bytes),
uploaded_by=getattr(current_user, "username", None),
)
db.add(doc)
db.commit()
db.refresh(doc)
# Notify successful completion
try:
await notify_completed(
file_no=file_obj.file_no,
user_id=current_user.id,
data={
"template_id": tpl.id,
"template_name": tpl.name,
"document_id": doc.id,
"filename": doc.filename,
"size": doc.size,
"unresolved_tokens": unresolved_tokens or []
}
)
except Exception:
# Don't fail generation if notification fails
pass
results.append(
BatchGenerateItemResult(
file_no=file_obj.file_no,
status="success",
document_id=doc.id,
filename=doc.filename,
path=doc.path,
url=storage.public_url(storage_path),
size=doc.size,
unresolved=unresolved_tokens or [],
)
)
# Keep for bundling
generated_items.append({
"filename": doc.filename,
"storage_path": storage_path,
})
except Exception as e:
# Notify failure
try:
await notify_failed(
file_no=file_obj.file_no,
user_id=current_user.id,
data={
"template_id": tpl.id,
"template_name": tpl.name,
"error": str(e),
"unresolved_tokens": unresolved_tokens or []
}
)
except Exception:
pass
# Best-effort rollback of partial doc add
try:
db.rollback()
except Exception:
pass
results.append(
BatchGenerateItemResult(
file_no=file_obj.file_no,
status="error",
error=str(e),
unresolved=unresolved_tokens or [],
)
)
job_id = str(uuid.uuid4())
total_success = sum(1 for r in results if r.status == "success")
total_failed = sum(1 for r in results if r.status == "error")
bundle_url: Optional[str] = None
bundle_size: Optional[int] = None
# Optionally create a ZIP bundle of generated outputs
bundle_storage_path: Optional[str] = None
if payload.bundle_zip and total_success > 0:
# Stream zip to memory then save via storage adapter
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, mode="w", compression=zipfile.ZIP_DEFLATED) as zf:
for item in generated_items:
try:
file_bytes = storage.open_bytes(item["storage_path"]) # relative path under uploads
# Use clean filename inside zip
zf.writestr(item["filename"], file_bytes)
except Exception:
# Skip missing/unreadable files from bundle; keep job successful
continue
zip_bytes = zip_buffer.getvalue()
safe_zip_name = f"documents_batch_{job_id}.zip"
bundle_storage_path = storage.save_bytes(content=zip_bytes, filename_hint=safe_zip_name, subdir="bundles", content_type="application/zip")
bundle_url = storage.public_url(bundle_storage_path)
bundle_size = len(zip_bytes)
# Persist simple job record
try:
job = JobRecord(
job_id=job_id,
job_type="documents_batch",
status="completed",
requested_by_username=getattr(current_user, "username", None),
started_at=datetime.now(timezone.utc),
completed_at=datetime.now(timezone.utc),
total_requested=len(requested_files),
total_success=total_success,
total_failed=total_failed,
result_storage_path=bundle_storage_path,
result_mime_type=("application/zip" if bundle_storage_path else None),
result_size=bundle_size,
details={
"template_id": tpl.id,
"version_id": ver.id,
"file_nos": requested_files,
},
)
db.add(job)
db.commit()
except Exception:
try:
db.rollback()
except Exception:
pass
return BatchGenerateResponse(
job_id=job_id,
template_id=tpl.id,
version_id=ver.id,
total_requested=len(requested_files),
total_success=total_success,
total_failed=total_failed,
results=results,
bundle_url=bundle_url,
bundle_size=bundle_size,
)
from fastapi.responses import StreamingResponse
class JobStatusResponse(BaseModel):
job_id: str
job_type: str
status: str
total_requested: int
total_success: int
total_failed: int
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
bundle_available: bool = False
bundle_url: Optional[str] = None
bundle_size: Optional[int] = None
@router.get("/jobs/{job_id}", response_model=JobStatusResponse)
async def get_job_status(
job_id: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
job = db.query(JobRecord).filter(JobRecord.job_id == job_id).first()
if not job:
raise HTTPException(status_code=404, detail="Job not found")
return JobStatusResponse(
job_id=job.job_id,
job_type=job.job_type,
status=job.status,
total_requested=job.total_requested or 0,
total_success=job.total_success or 0,
total_failed=job.total_failed or 0,
started_at=getattr(job, "started_at", None),
completed_at=getattr(job, "completed_at", None),
bundle_available=bool(job.result_storage_path),
bundle_url=(get_default_storage().public_url(job.result_storage_path) if job.result_storage_path else None),
bundle_size=job.result_size,
)
@router.get("/jobs/{job_id}/result")
async def download_job_result(
job_id: str,
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user),
):
job = db.query(JobRecord).filter(JobRecord.job_id == job_id).first()
if not job or not job.result_storage_path:
raise HTTPException(status_code=404, detail="Result not available for this job")
storage = get_default_storage()
try:
content = storage.open_bytes(job.result_storage_path)
except Exception:
raise HTTPException(status_code=404, detail="Stored bundle not found")
# Derive filename
base = os.path.basename(job.result_storage_path)
headers = {
"Content-Disposition": f"attachment; filename=\"{base}\"",
}
return StreamingResponse(iter([content]), media_type=(job.result_mime_type or "application/zip"), headers=headers)
# --- Client Error Logging (for Documents page) ---
class ClientErrorLog(BaseModel):
"""Payload for client-side error logging"""
@@ -894,54 +1351,118 @@ async def upload_document(
db: Session = Depends(get_db),
current_user: User = Depends(get_current_user)
):
"""Upload a document to a file"""
"""Upload a document to a file with comprehensive security validation and async operations"""
from app.utils.file_security import file_validator, create_upload_directory
from app.services.async_file_operations import async_file_ops, validate_large_upload
from app.services.async_storage import async_storage
file_obj = db.query(FileModel).filter(FileModel.file_no == file_no).first()
if not file_obj:
raise HTTPException(status_code=404, detail="File not found")
if not file.filename:
raise HTTPException(status_code=400, detail="No file uploaded")
# Determine if this is a large file that needs streaming
file_size_estimate = getattr(file, 'size', 0) or 0
use_streaming = file_size_estimate > 10 * 1024 * 1024 # 10MB threshold
allowed_types = [
"application/pdf",
"application/msword",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"image/jpeg",
"image/png"
]
if file.content_type not in allowed_types:
raise HTTPException(status_code=400, detail="Invalid file type")
if use_streaming:
# Use streaming validation for large files
# Enforce the same 10MB limit used for non-streaming uploads
is_valid, error_msg, metadata = await validate_large_upload(
file, category='document', max_size=10 * 1024 * 1024
)
if not is_valid:
raise HTTPException(status_code=400, detail=error_msg)
safe_filename = file_validator.sanitize_filename(file.filename)
file_ext = Path(safe_filename).suffix
mime_type = metadata.get('content_type', 'application/octet-stream')
# Stream upload for large files
subdir = f"documents/{file_no}"
final_path, actual_size, _checksum = await async_file_ops.stream_upload_file(
file,
f"{subdir}/{uuid.uuid4()}{file_ext}",
progress_callback=None # Could add WebSocket progress here
)
# Get absolute path for database storage
absolute_path = str(final_path)
# For downstream DB fields that expect a relative path, also keep a relative for consistency
relative_path = str(Path(final_path).relative_to(async_file_ops.base_upload_dir))
else:
# Use traditional validation for smaller files
content, safe_filename, file_ext, mime_type = await file_validator.validate_upload_file(
file, category='document'
)
max_size = 10 * 1024 * 1024 # 10MB
content = await file.read()
# Treat zero-byte payloads as no file uploaded to provide a clearer client error
if len(content) == 0:
raise HTTPException(status_code=400, detail="No file uploaded")
if len(content) > max_size:
raise HTTPException(status_code=400, detail="File too large")
# Create secure upload directory
upload_dir = f"uploads/{file_no}"
create_upload_directory(upload_dir)
upload_dir = f"uploads/{file_no}"
os.makedirs(upload_dir, exist_ok=True)
# Generate secure file path with UUID to prevent conflicts
unique_name = f"{uuid.uuid4()}{file_ext}"
path = file_validator.generate_secure_path(upload_dir, unique_name)
ext = file.filename.split(".")[-1]
unique_name = f"{uuid.uuid4()}.{ext}"
path = f"{upload_dir}/{unique_name}"
with open(path, "wb") as f:
f.write(content)
# Write file using async storage for consistency
try:
relative_path = await async_storage.save_bytes_async(
content,
safe_filename,
subdir=f"documents/{file_no}"
)
absolute_path = str(async_storage.base_dir / relative_path)
actual_size = len(content)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Could not save file: {str(e)}")
doc = Document(
file_no=file_no,
filename=file.filename,
path=path,
filename=safe_filename, # Use sanitized filename
path=absolute_path,
description=description,
type=file.content_type,
size=len(content),
type=mime_type, # Use validated MIME type
size=actual_size,
uploaded_by=current_user.username
)
db.add(doc)
db.commit()
db.refresh(doc)
# Send real-time notification for document upload
try:
await notify_completed(
file_no=file_no,
user_id=current_user.id,
data={
"action": "upload",
"document_id": doc.id,
"filename": safe_filename,
"size": actual_size,
"type": mime_type,
"description": description
}
)
except Exception as e:
# Don't fail the operation if notification fails
get_logger("documents").warning(f"Failed to send document upload notification: {str(e)}")
# Log workflow event for document upload
try:
from app.services.workflow_integration import log_document_uploaded_sync
log_document_uploaded_sync(
db=db,
file_no=file_no,
document_id=doc.id,
filename=safe_filename,
document_type=mime_type,
user_id=current_user.id
)
except Exception as e:
# Don't fail the operation if workflow logging fails
get_logger("documents").warning(f"Failed to log workflow event for document upload: {str(e)}")
return doc
@router.get("/{file_no}/uploaded")
@@ -987,4 +1508,125 @@ async def update_document(
doc.description = description
db.commit()
db.refresh(doc)
return doc
return doc
# WebSocket endpoints for real-time document status notifications
@router.websocket("/ws/status/{file_no}")
async def ws_document_status(websocket: WebSocket, file_no: str):
"""
Subscribe to real-time document processing status updates for a specific file.
Users can connect to this endpoint to receive notifications about:
- Document generation started (processing)
- Document generation completed
- Document generation failed
- Document uploads
Authentication required via token query parameter.
"""
websocket_manager = get_websocket_manager()
topic = topic_for_file(file_no)
# Custom message handler for document status updates
async def handle_document_message(connection_id: str, message: WebSocketMessage):
"""Handle custom messages for document status"""
get_logger("documents").debug("Received document status message",
connection_id=connection_id,
file_no=file_no,
message_type=message.type)
# Use the WebSocket manager to handle the connection
connection_id = await websocket_manager.handle_connection(
websocket=websocket,
topics={topic},
require_auth=True,
metadata={"file_no": file_no, "endpoint": "document_status"},
message_handler=handle_document_message
)
if connection_id:
# Send initial welcome message with subscription confirmation
try:
pool = websocket_manager.pool
welcome_message = WebSocketMessage(
type="subscription_confirmed",
topic=topic,
data={
"file_no": file_no,
"message": f"Subscribed to document status updates for file {file_no}"
}
)
await pool._send_to_connection(connection_id, welcome_message)
get_logger("documents").info("Document status subscription confirmed",
connection_id=connection_id,
file_no=file_no)
except Exception as e:
get_logger("documents").error("Failed to send subscription confirmation",
connection_id=connection_id,
file_no=file_no,
error=str(e))
# Test endpoint for document notification system
@router.post("/test-notification/{file_no}")
async def test_document_notification(
file_no: str,
status: str = Query(..., description="Notification status: processing, completed, or failed"),
message: Optional[str] = Query(None, description="Optional message"),
current_user: User = Depends(get_current_user)
):
"""
Test endpoint to simulate document processing notifications.
This endpoint allows testing the WebSocket notification system by sending
simulated document status updates. Useful for development and debugging.
"""
if status not in ["processing", "completed", "failed"]:
raise HTTPException(
status_code=400,
detail="Status must be one of: processing, completed, failed"
)
# Prepare test data
test_data = {
"test": True,
"triggered_by": current_user.username,
"message": message or f"Test {status} notification for file {file_no}",
"timestamp": datetime.now(timezone.utc).isoformat()
}
# Send notification based on status
try:
if status == "processing":
sent_count = await notify_processing(
file_no=file_no,
user_id=current_user.id,
data=test_data
)
elif status == "completed":
sent_count = await notify_completed(
file_no=file_no,
user_id=current_user.id,
data=test_data
)
else: # failed
sent_count = await notify_failed(
file_no=file_no,
user_id=current_user.id,
data=test_data
)
return {
"message": f"Test notification sent for file {file_no}",
"status": status,
"sent_to_connections": sent_count,
"data": test_data
}
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to send test notification: {str(e)}"
)