alpha_tools/fetch-alphas/wqb-get-alphas.py

import httpx
from httpx import BasicAuth, Timeout
import pandas as pd
from tqdm import tqdm
from pathlib import Path
import logging
from typing import List, Optional
import time

class AlphaManager:
    def __init__(self, credentials_file='account.txt', base_path=None):
        """
        初始化Alpha管理器

        Args:
            credentials_file: 账号文件路径（备用）
            base_path: 输出文件保存路径
        """
        self.client = None
        self.base_path = Path(base_path) if base_path else Path.cwd()
        self.logger = self._setup_logger()

        # 登录
        self.login(credentials_file)

    def _setup_logger(self):
        """设置日志记录器"""
        logger = logging.getLogger(__name__)
        logger.setLevel(logging.INFO)

        if not logger.handlers:
            handler = logging.StreamHandler()
            formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
            handler.setFormatter(formatter)
            logger.addHandler(handler)

        return logger

    def login(self, credentials_file=None):
        """登录WorldQuant Brain API"""
        try:
            # 从nacos获取账号密码
            with httpx.Client(timeout=10.0) as temp_client:
                nacos_resp = temp_client.get(
                    'http://192.168.31.41:30848/nacos/v1/cs/configs?dataId=wq_account&group=quantify'
                )

                if nacos_resp.status_code != 200:
                    self.logger.error('获取账号密码失败')
                    return False

                config = nacos_resp.json()
                username = config.get('user_name')
                password = config.get('password')

                if not username or not password:
                    self.logger.error('账号密码不完整')
                    return False

            self.logger.info(f"正在登录账户: {username}")

            # 创建客户端并设置超时（关键修复）
            # 设置更长的超时时间：连接30秒，读取60秒
            timeout = Timeout(connect=30.0, read=60.0, write=30.0, pool=30.0)
            self.client = httpx.Client(
                auth=BasicAuth(username, password),
                timeout=timeout
            )

            # 发送登录请求
            response = self.client.post('https://api.worldquantbrain.com/authentication')

            if response.status_code == 201:
                self.logger.info("登录成功!")
                return True
            else:
                self.logger.error(f"登录失败: {response.status_code} - {response.text}")
                self.client.close()
                self.client = None
                return False

        except Exception as e:
            self.logger.error(f"登录异常: {e}")
            return False

    def update_alpha_color(self, alpha_id: str, color: str) -> bool:
        """标记Alpha颜色"""
        if not self.client:
            self.logger.error("客户端未登录")
            return False

        try:
            update_data = {"color": color}
            response = self.client.patch(
                f"https://api.worldquantbrain.com/alphas/{alpha_id}",
                json=update_data
            )
            return response.status_code == 200
        except Exception as e:
            self.logger.error(f"标记颜色失败: {e}")
            return False

    def wechat_check_corr_message(self, message: str):
        """微信通知（可选功能）"""
        self.logger.info(f"通知消息: {message}")

    def _make_request_with_retry(self, url: str, max_retries: int = 3, retry_delay: float = 2.0):
        """
        带重试机制的请求

        Args:
            url: 请求URL
            max_retries: 最大重试次数
            retry_delay: 重试延迟（秒）

        Returns:
            httpx.Response 或 None
        """
        for attempt in range(max_retries):
            try:
                response = self.client.get(url)
                return response
            except Exception as e:
                self.logger.warning(f"请求失败 (尝试 {attempt+1}/{max_retries}): {e}")
                if attempt < max_retries - 1:
                    time.sleep(retry_delay)
                else:
                    self.logger.error(f"请求最终失败: {url}")
                    raise
        return None

    def get_alphas(
        self,
        total_alphas: int = 5,
        limit: int = 100,
        delay: int = 1,
        region: str = "USA",
        universe: str = "TOP3000",
        required_fields: Optional[List[str]] = None,
        match_mode: str = "all",
        min_sharpe: Optional[float] = None,
        min_fitness: Optional[float] = None,
        hidden: str = "false",
        submittable: bool = False,
        auto_color: bool = False,
        color: str = "GREEN",
        output_file_name: str = "alpha_search_list.csv",
        mode: str = "w",
        max_retries: int = 3,
    ):
        """
        搜索Alpha并筛选

        Args:
            total_alphas: 最多获取多少个Alpha
            limit: 每次API请求获取的数量
            delay: API请求的延迟设置
            region: 市场区域，如 "USA", "CHINA"
            universe: 股票池，如 "TOP3000"
            required_fields: 关键词列表，如 ['put', 'call']
            match_mode: 关键词匹配模式："all"(全匹配) 或 "any"(任一匹配)
            min_sharpe: 最小夏普比率阈值
            min_fitness: 最小适应度阈值
            hidden: 是否搜索已隐藏的Alpha ("true"/"false")
            submittable: 是否只筛选可提交（无FAIL检查项）的Alpha
            auto_color: 是否自动给符合条件的Alpha标记颜色
            color: 标记的颜色
            output_file_name: 输出的CSV文件名
            mode: 写入模式："w"(覆盖写入) 或 "a"(追加写入)
            max_retries: 请求失败时的最大重试次数

        Returns:
            list: 符合条件的Alpha列表
        """
        if not self.client:
            self.logger.error("客户端未登录，无法执行搜索")
            return []

        # 验证颜色参数
        valid_colors = [None, "GREEN", "YELLOW", "RED", "BLUE", "PURPLE", "ORANGE"]
        if color not in valid_colors:
            raise ValueError(f"颜色必须是以下之一: {valid_colors}")

        fetched_alphas = []
        offset = 0
        total_accessed = 0
        colored_count = 0

        # 先获取总数（带重试）
        count_url = (
            f"https://api.worldquantbrain.com/users/self/alphas?stage=IS&hidden={hidden}"
            f"&limit=1&settings.delay={delay}&settings.region={region}&status=UNSUBMITTED%1FIS_FAIL&settings.universe={universe}"
        )

        total_available = 0
        for attempt in range(max_retries):
            try:
                count_response = self.client.get(count_url)
                total_available = count_response.json()["count"]
                break
            except Exception as e:
                self.logger.warning(f"获取Alpha总数失败 (尝试 {attempt+1}/{max_retries}): {e}")
                if attempt < max_retries - 1:
                    time.sleep(2.0)
                else:
                    self.logger.error("获取Alpha总数最终失败")
                    return []

        if total_available == 0:
            self.logger.warning("未找到任何Alpha")
            return []

        self.logger.info(f"共找到 {total_available} 个Alpha，开始筛选...")

        pbar = tqdm(total=min(total_available, 10000), desc="扫描Alpha", unit="条")

        while len(fetched_alphas) < total_alphas and offset < total_available:
            # 构建请求URL
            url = (
                f"https://api.worldquantbrain.com/users/self/alphas?stage=IS&limit={limit}"
                f"&offset={offset}&settings.delay={delay}&settings.region={region}&hidden={hidden}&status=UNSUBMITTED%1FIS_PASS&settings.universe={universe}"
            )

            try:
                # 使用重试机制
                response = None
                for attempt in range(max_retries):
                    try:
                        response = self.client.get(url)
                        break
                    except Exception as e:
                        self.logger.warning(f"请求失败 (尝试 {attempt+1}/{max_retries}): {e}")
                        if attempt < max_retries - 1:
                            time.sleep(2.0)
                        else:
                            raise

                if response.status_code == 400:
                    self.logger.warning(f"遇到API限制 (offset={offset})，停止获取更多数据")
                    break

                response_data = response.json()

                if not isinstance(response_data, dict) or "results" not in response_data:
                    self.logger.error(f"API返回了意外的数据: {response_data}")
                    break

                alphas = response_data["results"]

                if not alphas:
                    break

                total_accessed += len(alphas)

                # 关键词筛选
                if required_fields:
                    if match_mode == "all":
                        filtered_alphas = [
                            alpha for alpha in alphas
                            if all(field in alpha["regular"]["code"] for field in required_fields)
                        ]
                    elif match_mode == "any":
                        filtered_alphas = [
                            alpha for alpha in alphas
                            if any(field in alpha["regular"]["code"] for field in required_fields)
                        ]
                    else:
                        raise ValueError("match_mode 必须是 'all' 或 'any'")
                else:
                    filtered_alphas = alphas

                # 进一步筛选夏普比率和fitness
                final_filtered = []
                for alpha in filtered_alphas:
                    sharpe = alpha.get("is", {}).get("sharpe", 0)
                    fitness = alpha.get("is", {}).get("fitness", 0)

                    sharpe_ok = (min_sharpe is None) or (sharpe is not None and abs(sharpe) >= min_sharpe)
                    fitness_ok = (min_fitness is None) or (fitness is not None and abs(fitness) >= min_fitness)

                    if sharpe_ok and fitness_ok:
                        if submittable:
                            checks = alpha.get("is", {}).get("checks", [])
                            fail_count = sum(1 for check in checks if check.get("result") == "FAIL")

                            if fail_count == 0 and auto_color:
                                alpha_id = alpha.get("id")
                                if alpha_id:
                                    success = self.update_alpha_color(alpha_id, color)
                                    if success:
                                        colored_count += 1

                            if fail_count == 0:
                                final_filtered.append(alpha)
                        else:
                            final_filtered.append(alpha)

                fetched_alphas.extend(final_filtered)

                # 更新进度条
                pbar.update(len(alphas))
                pbar.set_postfix({
                    "Region": region,
                    "已扫描": total_accessed,
                    "找到": len(fetched_alphas),
                    "本次": len(final_filtered),
                    "标记": colored_count,
                })

                if len(alphas) < limit:
                    break

                offset += limit

                # 添加延迟避免请求过快
                time.sleep(0.5)

            except Exception as e:
                self.logger.error(f"请求失败: {e}")
                break

        pbar.close()

        # 输出标记统计
        if auto_color and colored_count > 0:
            self.logger.info(f"共标记了 {colored_count} 个Alpha颜色为{color}")
            self.wechat_check_corr_message(f"共标记了 {colored_count} 个Alpha颜色为{color}")

        alpha_list = fetched_alphas[:total_alphas]

        if not alpha_list:
            self.logger.warning("未找到任何符合条件的Alpha！请检查筛选条件是否过于严格。")
        else:
            df = pd.DataFrame(alpha_list)
            df["temp_fitness"] = df.apply(
                lambda row: row["is"].get("fitness", 0) if isinstance(row.get("is"), dict) else 0,
                axis=1
            )
            df_sorted = df.sort_values(by="temp_fitness", ascending=False)
            df_sorted = df_sorted.drop("temp_fitness", axis=1)

            output_path = self.base_path / output_file_name

            if mode == "w":
                df_sorted.to_csv(output_path, index=False)
            elif mode == "a":
                df_sorted.to_csv(output_path, mode="a", index=False, header=False)

            self.logger.info(f"批量回测初步检测结果已经下载！共{len(alpha_list)}条记录！\n{output_path}文件名保存！")

            return alpha_list

    def close(self):
        """关闭客户端"""
        if self.client:
            self.client.close()


# 使用示例
if __name__ == "__main__":
    # 创建管理器实例（会自动登录）
    manager = AlphaManager()

    if manager.client:
        # 搜索包含"put"关键词的可提交Alpha
        results = manager.get_alphas(
            total_alphas=10,
            region="USA",
            universe="TOP3000",
            required_fields=['put'],
            match_mode="all",
            submittable=True,
            auto_color=True,
            color="GREEN",
            output_file_name="my_alpha_list.csv"
        )

        print(f"找到 {len(results) if results else 0} 个符合条件的Alpha")

        # 关闭客户端
        manager.close()