import httpx from httpx import BasicAuth, Timeout import pandas as pd from tqdm import tqdm from pathlib import Path import logging import time import random class SimpleAlphaFetcher: def __init__(self, base_path=None): """ 初始化 Alpha 获取器 Args: base_path: 输出文件保存路径 """ self.client = None self.base_path = Path(base_path) if base_path else Path.cwd() self.logger = self._setup_logger() # 登录 self.login() def _setup_logger(self): """设置日志记录器""" logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) if not logger.handlers: handler = logging.StreamHandler() formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') handler.setFormatter(formatter) logger.addHandler(handler) return logger def login(self): """登录 WorldQuant Brain API""" try: # 从 nacos 获取账号密码 with httpx.Client(timeout=10.0) as temp_client: nacos_resp = temp_client.get( 'http://192.168.31.41:30848/nacos/v1/cs/configs?dataId=wq_account&group=quantify' ) if nacos_resp.status_code != 200: self.logger.error('获取账号密码失败') return False config = nacos_resp.json() username = config.get('user_name') password = config.get('password') if not username or not password: self.logger.error('账号密码不完整') return False self.logger.info(f"正在登录账户: {username}") # 创建客户端并设置超时 timeout = Timeout(connect=30.0, read=60.0, write=30.0, pool=30.0) self.client = httpx.Client( auth=BasicAuth(username, password), timeout=timeout ) # 发送登录请求 response = self.client.post('https://api.worldquantbrain.com/authentication') if response.status_code == 201: self.logger.info("登录成功!") return True else: self.logger.error(f"登录失败: {response.status_code} - {response.text}") self.client.close() self.client = None return False except Exception as e: self.logger.error(f"登录异常: {e}") return False def fetch_alphas( self, max_pages: int = 100, limit: int = 100, delay: int = 1, region: str = "USA", universe: str = "TOP3000", hidden: str = "false", output_file_name: str = "alpha_list.csv", mode: str = "w", max_retries: int = 3, ): """ 获取 Alpha 列表 Args: max_pages: 最大爬取页数 limit: 每页数量 delay: API 请求的延迟设置 region: 市场区域 universe: 股票池 hidden: 是否搜索已隐藏的 Alpha output_file_name: 输出的 CSV 文件名 mode: 写入模式:"w"(覆盖写入) 或 "a"(追加写入) max_retries: 请求失败时的最大重试次数 Returns: list: Alpha 列表 """ if not self.client: self.logger.error("客户端未登录,无法执行获取") return [] fetched_alphas = [] offset = 0 total_accessed = 0 pages_fetched = 0 # 先获取总数 count_url = ( f"https://api.worldquantbrain.com/users/self/alphas?stage=IS&hidden={hidden}" f"&limit=1&settings.delay={delay}&settings.region={region}&status=UNSUBMITTED%1FIS_FAIL&settings.universe={universe}" ) total_available = 0 for attempt in range(max_retries): try: count_response = self.client.get(count_url) total_available = count_response.json()["count"] break except Exception as e: self.logger.warning(f"获取 Alpha 总数失败 (尝试 {attempt+1}/{max_retries}): {e}") if attempt < max_retries - 1: time.sleep(random.uniform(3, 5)) else: self.logger.error("获取 Alpha 总数最终失败") return [] if total_available == 0: self.logger.warning("未找到任何 Alpha") return [] # 计算实际最大页数 actual_max_pages = min(max_pages, (total_available + limit - 1) // limit) self.logger.info(f"共找到 {total_available} 个 Alpha,计划爬取 {actual_max_pages} 页...") pbar = tqdm(total=actual_max_pages, desc="爬取 Alpha 页面", unit="页") while pages_fetched < actual_max_pages: # 构建请求 URL url = ( f"https://api.worldquantbrain.com/users/self/alphas?stage=IS&limit={limit}" f"&offset={offset}&settings.delay={delay}&settings.region={region}&hidden={hidden}&status=UNSUBMITTED%1FIS_FAIL&settings.universe={universe}" ) try: # 使用重试机制 response = None for attempt in range(max_retries): try: response = self.client.get(url) break except Exception as e: self.logger.warning(f"请求失败 (尝试 {attempt+1}/{max_retries}): {e}") if attempt < max_retries - 1: time.sleep(random.uniform(3, 5)) else: raise if response.status_code == 400: self.logger.warning(f"遇到 API 限制 (offset={offset}),停止获取") break response_data = response.json() if not isinstance(response_data, dict) or "results" not in response_data: self.logger.error(f"API 返回了意外的数据: {response_data}") break alphas = response_data["results"] if not alphas: break fetched_alphas.extend(alphas) total_accessed += len(alphas) pages_fetched += 1 # 更新进度条 pbar.update(1) pbar.set_postfix({ "Region": region, "已获取": total_accessed, "本页": len(alphas), }) if len(alphas) < limit: self.logger.info(f"本页数据不足 {limit} 条,已到达末尾") break offset += limit # 添加延迟避免请求过快 time.sleep(0.5) except Exception as e: self.logger.error(f"请求失败: {e}") break pbar.close() if not fetched_alphas: self.logger.warning("未获取到任何 Alpha!") return [] # 按 fitness 排序 df = pd.DataFrame(fetched_alphas) df["temp_fitness"] = df.apply( lambda row: row["is"].get("fitness", 0) if isinstance(row.get("is"), dict) else 0, axis=1 ) df_sorted = df.sort_values(by="temp_fitness", ascending=False) df_sorted = df_sorted.drop("temp_fitness", axis=1) output_path = self.base_path / output_file_name if mode == "w": df_sorted.to_csv(output_path, index=False) elif mode == "a": df_sorted.to_csv(output_path, mode="a", index=False, header=False) self.logger.info(f"Alpha 列表已保存!共 {len(fetched_alphas)} 条记录!\n文件路径: {output_path}") return fetched_alphas def close(self): """关闭客户端""" if self.client: self.client.close() # 使用示例 if __name__ == "__main__": # 创建获取器实例(会自动登录) fetcher = SimpleAlphaFetcher() if fetcher.client: # 获取 Alpha 列表 results = fetcher.fetch_alphas( max_pages=100, # 最多爬取 100 页 limit=100, # 每页 100 条 region="USA", # 美股市场 universe="TOP3000", # TOP3000 股票池 output_file_name="alpha_list.csv" # 输出文件名 ) print(f"成功获取 {len(results) if results else 0} 个 Alpha") # 关闭客户端 fetcher.close()