eh-v2/downloader.py

from fastapi import APIRouter, BackgroundTasks
from pydantic import BaseModel
import uuid
import os
from pathlib import Path

router = APIRouter(prefix="/api/v1", tags=["downloader"])

# 存储任务状态
tasks = {}

class CrawlRequest(BaseModel):
    url: str
    cookies: str
    timestamp: str

class TaskStatus(BaseModel):
    status: str  # 'running', 'completed', 'failed'
    result: dict = None
    error: str = None

@router.post("/start-crawl")
async def start_crawl(request: CrawlRequest, background_tasks: BackgroundTasks):
    task_id = str(uuid.uuid4())
    tasks[task_id] = {'status': 'running', 'result': None, 'error': None}

    # 在后台运行爬虫任务
    background_tasks.add_task(run_crawler, task_id, request)

    return {"task_id": task_id, "status": "started"}

@router.get("/task-status/{task_id}")
async def get_task_status(task_id: str):
    task = tasks.get(task_id)
    if not task:
        return {"status": "not_found"}
    return task

async def run_crawler(task_id: str, request: CrawlRequest):
    try:
        # 这里执行您的爬虫逻辑，模拟长时间运行
        # 例如：time.sleep(300)  # 5分钟

        # 确保 downloads 目录存在（双重保障）
        downloads_dir = Path("downloads")
        downloads_dir.mkdir(exist_ok=True)

        # 模拟下载文件到 downloads 目录
        filename = f"download_{task_id}.txt"
        filepath = downloads_dir / filename

        with open(filepath, 'w', encoding='utf-8') as f:
            f.write(f"URL: {request.url}\n")
            f.write(f"Cookies: {request.cookies}\n")
            f.write(f"Timestamp: {request.timestamp}\n")
            f.write("Download completed successfully\n")

        # 爬虫完成后更新状态
        tasks[task_id] = {
            'status': 'completed',
            'result': {
                'message': '爬虫完成',
                'data': '您的爬虫结果',
                'download_path': str(filepath)
            },
            'error': None
        }
    except Exception as e:
        tasks[task_id] = {
            'status': 'failed',
            'result': None,
            'error': str(e)
        }