from fastapi import APIRouter, BackgroundTasks from pydantic import BaseModel import uuid import os from pathlib import Path router = APIRouter(prefix="/api/v1", tags=["downloader"]) # 存储任务状态 tasks = {} class CrawlRequest(BaseModel): url: str cookies: str timestamp: str class TaskStatus(BaseModel): status: str # 'running', 'completed', 'failed' result: dict = None error: str = None @router.post("/start-crawl") async def start_crawl(request: CrawlRequest, background_tasks: BackgroundTasks): task_id = str(uuid.uuid4()) tasks[task_id] = {'status': 'running', 'result': None, 'error': None} # 在后台运行爬虫任务 background_tasks.add_task(run_crawler, task_id, request) return {"task_id": task_id, "status": "started"} @router.get("/task-status/{task_id}") async def get_task_status(task_id: str): task = tasks.get(task_id) if not task: return {"status": "not_found"} return task async def run_crawler(task_id: str, request: CrawlRequest): try: # 这里执行您的爬虫逻辑,模拟长时间运行 # 例如:time.sleep(300) # 5分钟 # 确保 downloads 目录存在(双重保障) downloads_dir = Path("downloads") downloads_dir.mkdir(exist_ok=True) # 模拟下载文件到 downloads 目录 filename = f"download_{task_id}.txt" filepath = downloads_dir / filename with open(filepath, 'w', encoding='utf-8') as f: f.write(f"URL: {request.url}\n") f.write(f"Cookies: {request.cookies}\n") f.write(f"Timestamp: {request.timestamp}\n") f.write("Download completed successfully\n") # 爬虫完成后更新状态 tasks[task_id] = { 'status': 'completed', 'result': { 'message': '爬虫完成', 'data': '您的爬虫结果', 'download_path': str(filepath) }, 'error': None } except Exception as e: tasks[task_id] = { 'status': 'failed', 'result': None, 'error': str(e) }