commit f1440fe3ae95ccc1525a78489553969ac5527b38 Author: Jack Date: Thu Nov 13 23:29:47 2025 +0800 first commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c9d0dc3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,67 @@ +.DS_Store +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg +.idea/* +xml_files/ + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +other/split_clash_config/split_config +ai_news/save_data +daily/*.txt + +./result \ No newline at end of file diff --git a/Readme.md b/Readme.md new file mode 100644 index 0000000..09e93a3 --- /dev/null +++ b/Readme.md @@ -0,0 +1,25 @@ + +### 依赖 +pip install httpx + +### 目录结构 + +```text +FactorSimulator/ +├── __init__.py # 包初始化文件,定义包级别的导入和元数据 +├── main.py # 程序主入口,负责启动批量模拟流程 +├── core/ # 核心业务逻辑模块 +│ ├── __init__.py # 核心模块初始化,定义模块级别的导入 +│ ├── api_client.py # WorldQuant Brain API客户端封装,处理HTTP请求和认证 +│ └── models.py # 数据模型定义,使用dataclass定义各种指标和结果的数据结构 +├── managers/ # 管理器模块,负责业务流程协调 +│ ├── __init__.py # 管理器模块初始化 +│ └── simulation_manager.py # 模拟管理器,负责批量模拟的调度、线程池管理和结果汇总 +├── utils/ # 工具函数模块 +│ ├── __init__.py # 工具模块初始化 +│ ├── file_utils.py # 文件操作工具,处理因子列表加载和结果保存 +│ └── time_utils.py # 时间格式化工具,将秒数转换为可读格式 +└── config/ # 配置模块 + ├── __init__.py # 配置模块初始化 + └── settings.py # 模拟参数配置,定义默认的模拟设置常量 +``` \ No newline at end of file diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..aa27a57 --- /dev/null +++ b/__init__.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- +""" +WorldQuant Brain 因子模拟器 +用于批量模拟Alpha因子的工具 +""" + +__version__ = "0.0.1" +__author__ = "Jack" + +from .core.api_client import WorldQuantBrainSimulate +from .managers.simulation_manager import AlphaSimulationManager + +__all__ = ['WorldQuantBrainSimulate', 'AlphaSimulationManager'] \ No newline at end of file diff --git a/account.txt b/account.txt new file mode 100644 index 0000000..5cfaa62 --- /dev/null +++ b/account.txt @@ -0,0 +1 @@ +['jack0210_@hotmail.com', '!QAZ2wsx+0913'] \ No newline at end of file diff --git a/alpha.txt b/alpha.txt new file mode 100644 index 0000000..86a5846 --- /dev/null +++ b/alpha.txt @@ -0,0 +1,4 @@ +ts_rank(ts_delta(close, 5), 20) +ts_corr(ts_delay(close, 10), ts_delay(volume, 10), 20) +-ts_rank(ts_std(close, 60), 20) +-(close - ts_mean(close, 30)) / ts_std(close, 30) \ No newline at end of file diff --git a/config/__init__.py b/config/__init__.py new file mode 100644 index 0000000..0494c40 --- /dev/null +++ b/config/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +""" +配置模块 - 包含配置常量 +""" + +from .settings import DEFAULT_SIMULATION_SETTINGS + +__all__ = ['DEFAULT_SIMULATION_SETTINGS'] \ No newline at end of file diff --git a/config/settings.py b/config/settings.py new file mode 100644 index 0000000..ab5471f --- /dev/null +++ b/config/settings.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- +""" +模拟配置常量 +""" + +DEFAULT_SIMULATION_SETTINGS = { + 'instrumentType': 'EQUITY', + 'region': 'USA', + 'universe': 'TOP3000', + 'delay': 1, + 'decay': 0, + 'neutralization': 'INDUSTRY', + 'truncation': 0.08, + 'pasteurization': 'ON', + 'unitHandling': 'VERIFY', + 'nanHandling': 'OFF', + 'language': 'FASTEXPR', + 'visualization': False, +} \ No newline at end of file diff --git a/core/__init__.py b/core/__init__.py new file mode 100644 index 0000000..a58446f --- /dev/null +++ b/core/__init__.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- +""" +核心模块 - 包含API客户端和数据模型 +""" + +from .api_client import WorldQuantBrainSimulate +from .models import AlphaMetrics, SimulationResult + +__all__ = ['WorldQuantBrainSimulate', 'AlphaMetrics', 'SimulationResult'] \ No newline at end of file diff --git a/core/api_client.py b/core/api_client.py new file mode 100644 index 0000000..7d5220f --- /dev/null +++ b/core/api_client.py @@ -0,0 +1,214 @@ +# -*- coding: utf-8 -*- +import os.path +import httpx +import time +from httpx import BasicAuth +from typing import Dict, Any, Optional, Tuple + +from .models import AlphaMetrics, TrainMetrics, TestMetrics, AlphaInfo + + +class WorldQuantBrainSimulate: + def __init__(self, credentials_file='account.txt'): + self.credentials_file = credentials_file + self.client = None + self.brain_api_url = 'https://api.worldquantbrain.com' + + """读取本地账号密码""" + def load_credentials(self) -> Tuple[str, str]: + if not os.path.exists(self.credentials_file): + print("未找到 account.txt 文件") + with open(self.credentials_file, 'w') as f: + f.write("") + print("account.txt 文件已创建,请填写账号密码, 格式: ['username', 'password]") + exit(1) + + with open(self.credentials_file) as f: + credentials = eval(f.read()) + return credentials[0], credentials[1] + + """登录认证""" + def login(self) -> bool: + username, password = self.load_credentials() + self.client = httpx.Client(auth=BasicAuth(username, password)) + + response = self.client.post(f'{self.brain_api_url}/authentication') + print(f"登录状态: {response.status_code}") + + if response.status_code == 201: + print("登录成功!") + return True + else: + print(f"登录失败: {response.json()}") + return False + + """模拟Alpha因子""" + def simulate_alpha(self, expression: str, settings: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + if self.client is None: + raise Exception("请先登录") + + default_settings = { + 'instrumentType': 'EQUITY', + 'region': 'USA', + 'universe': 'TOP3000', + 'delay': 1, + 'decay': 0, + 'neutralization': 'INDUSTRY', + 'truncation': 0.08, + 'pasteurization': 'ON', + 'unitHandling': 'VERIFY', + 'nanHandling': 'OFF', + 'language': 'FASTEXPR', + 'visualization': False, + } + + if settings: + default_settings.update(settings) + + simulation_data = { + 'type': 'REGULAR', + 'settings': default_settings, + 'regular': expression + } + + sim_resp = self.client.post(f'{self.brain_api_url}/simulations', json=simulation_data) + print(f"模拟提交状态: {sim_resp.status_code}") + + sim_progress_url = sim_resp.headers['location'] + print(f"进度URL: {sim_progress_url}") + + while True: + sim_progress_resp = self.client.get(sim_progress_url) + retry_after_sec = float(sim_progress_resp.headers.get("Retry-After", 0)) + + if retry_after_sec == 0: + break + print(sim_progress_resp.json()) + print(f"等待 {retry_after_sec} 秒...") + time.sleep(retry_after_sec) + + # 如果因子模拟不通过, 获取一下失败信息 + if sim_progress_resp.json()["status"] == "ERROR": + result = sim_progress_resp.json()["message"] + print(f"因子模拟失败: {result}") + # 返回一个特殊标识,表示模拟失败 + return {"status": "error", "message": result} + + alpha_id = sim_progress_resp.json()["alpha"] + print(f"生成的Alpha ID: {alpha_id}") + + # 获取详细的性能指标 + metrics = self.get_alpha_metrics(alpha_id) + + return {"status": "success", "alpha_id": alpha_id, "metrics": metrics} + + """获取Alpha因子的详细指标""" + def get_alpha_metrics(self, alpha_id: str) -> AlphaMetrics: + if self.client is None: + raise Exception("请先登录") + + try: + # 获取Alpha的基本信息和指标 + alpha_url = f'{self.brain_api_url}/alphas/{alpha_id}' + alpha_resp = self.client.get(alpha_url) + + if alpha_resp.status_code in [200, 201]: + alpha_data = alpha_resp.json() + return self._parse_alpha_metrics(alpha_data) + else: + return AlphaMetrics( + train_metrics=TrainMetrics(), + is_metrics=TestMetrics(), + test_metrics=TestMetrics(), + alpha_info=AlphaInfo() + ) + + except Exception as e: + print(f"获取指标时出错: {str(e)}") + return AlphaMetrics( + train_metrics=TrainMetrics(), + is_metrics=TestMetrics(), + test_metrics=TestMetrics(), + alpha_info=AlphaInfo() + ) + + """解析Alpha数据,提取关键指标""" + def _parse_alpha_metrics(self, alpha_data: Dict[str, Any]) -> AlphaMetrics: + # 解析训练集数据 + train_metrics = TrainMetrics() + if 'train' in alpha_data and alpha_data['train']: + train_data = alpha_data['train'] + train_metrics = TrainMetrics( + sharpe_ratio=train_data.get('sharpe'), + annual_return=train_data.get('returns'), + max_drawdown=train_data.get('drawdown'), + turnover=train_data.get('turnover'), + fitness=train_data.get('fitness'), + pnl=train_data.get('pnl'), + book_size=train_data.get('bookSize'), + long_count=train_data.get('longCount'), + short_count=train_data.get('shortCount'), + margin=train_data.get('margin'), + ) + + # 解析样本内测试数据 + is_metrics = TestMetrics() + if 'is' in alpha_data and alpha_data['is']: + is_data = alpha_data['is'] + is_metrics = TestMetrics( + sharpe_ratio=is_data.get('sharpe'), + annual_return=is_data.get('returns'), + max_drawdown=is_data.get('drawdown'), + turnover=is_data.get('turnover'), + fitness=is_data.get('fitness'), + pnl=is_data.get('pnl'), + ) + + # 解析样本外测试数据 + test_metrics = TestMetrics() + if 'test' in alpha_data and alpha_data['test']: + test_data = alpha_data['test'] + test_metrics = TestMetrics( + sharpe_ratio=test_data.get('sharpe'), + annual_return=test_data.get('returns'), + max_drawdown=test_data.get('drawdown'), + turnover=test_data.get('turnover'), + fitness=test_data.get('fitness'), + pnl=test_data.get('pnl'), + ) + + # 解析Alpha基本信息 + alpha_info = AlphaInfo( + grade=alpha_data.get('grade'), + stage=alpha_data.get('stage'), + status=alpha_data.get('status'), + date_created=alpha_data.get('dateCreated'), + ) + + # 解析检查结果 + if 'is' in alpha_data and 'checks' in alpha_data['is']: + checks = alpha_data['is']['checks'] + check_results = {} + for check in checks: + check_name = check.get('name', '') + result = check.get('result', '') + value = check.get('value', None) + check_results[check_name.lower()] = { + 'result': result, + 'value': value, + 'limit': check.get('limit', None) + } + alpha_info.checks = check_results + + return AlphaMetrics( + train_metrics=train_metrics, + is_metrics=is_metrics, + test_metrics=test_metrics, + alpha_info=alpha_info, + alpha_id=alpha_data.get('id') + ) + + def close(self): + """关闭连接""" + if self.client: + self.client.close() \ No newline at end of file diff --git a/core/models.py b/core/models.py new file mode 100644 index 0000000..664b20c --- /dev/null +++ b/core/models.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- +from dataclasses import dataclass +from typing import Dict, Any, Optional + + +@dataclass +class TrainMetrics: + """训练集指标""" + sharpe_ratio: Optional[float] = None + annual_return: Optional[float] = None + max_drawdown: Optional[float] = None + turnover: Optional[float] = None + fitness: Optional[float] = None + pnl: Optional[float] = None + book_size: Optional[float] = None + long_count: Optional[float] = None + short_count: Optional[float] = None + margin: Optional[float] = None + + +@dataclass +class TestMetrics: + """测试集指标""" + sharpe_ratio: Optional[float] = None + annual_return: Optional[float] = None + max_drawdown: Optional[float] = None + turnover: Optional[float] = None + fitness: Optional[float] = None + pnl: Optional[float] = None + + +@dataclass +class AlphaInfo: + """Alpha基本信息""" + grade: Optional[str] = None + stage: Optional[str] = None + status: Optional[str] = None + date_created: Optional[str] = None + checks: Optional[Dict[str, Any]] = None + + +@dataclass +class AlphaMetrics: + """Alpha因子完整指标""" + train_metrics: TrainMetrics + is_metrics: TestMetrics + test_metrics: TestMetrics + alpha_info: AlphaInfo + alpha_id: Optional[str] = None + + +@dataclass +class SimulationResult: + """模拟结果""" + expression: str + time_consuming: float + formatted_time: str + alpha_id: str + status: str # success, error, failed + description: str + simulation_timestamp: str + train_metrics: Optional[TrainMetrics] = None + is_metrics: Optional[TestMetrics] = None + test_metrics: Optional[TestMetrics] = None + alpha_info: Optional[AlphaInfo] = None \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..a83fbdf --- /dev/null +++ b/main.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +import os +from managers.simulation_manager import AlphaSimulationManager +from utils.file_utils import load_alpha_list + + +def main(): + """主程序入口""" + # 待模拟因子列表 + alpha_list = load_alpha_list('alpha.txt') + + if not alpha_list: + print("未找到有效的因子表达式,请检查 alpha.txt 文件") + return + + # 创建模拟管理器并运行 + manager = AlphaSimulationManager() + results = manager.run_simulation(alpha_list, batch_size=3) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/main_bak.py b/main_bak.py new file mode 100644 index 0000000..854f4a6 --- /dev/null +++ b/main_bak.py @@ -0,0 +1,496 @@ +import os.path +import httpx +import json +from httpx import BasicAuth +import time +from random import uniform +import threading +from concurrent.futures import ThreadPoolExecutor, as_completed + + +class WorldQuantBrainSimulate: + def __init__(self, credentials_file='account.txt'): + self.credentials_file = credentials_file + self.client = None + self.brain_api_url = 'https://api.worldquantbrain.com' + + """读取本地账号密码""" + def load_credentials(self): + if not os.path.exists(self.credentials_file): + print("未找到 account.txt 文件") + with open(self.credentials_file, 'w') as f: f.write("") + print("account.txt 文件已创建,请填写账号密码, 格式: ['username', 'password]") + exit(1) + + with open(self.credentials_file) as f: + credentials = eval(f.read()) + return credentials[0], credentials[1] + + """登录认证""" + def login(self): + username, password = self.load_credentials() + self.client = httpx.Client(auth=BasicAuth(username, password)) + + response = self.client.post(f'{self.brain_api_url}/authentication') + print(f"登录状态: {response.status_code}") + + if response.status_code == 201: + print("登录成功!") + return True + else: + print(f"登录失败: {response.json()}") + return False + + """模拟Alpha因子""" + def simulate_alpha(self, expression, settings=None): + if self.client is None: + raise Exception("请先登录") + + default_settings = { + 'instrumentType': 'EQUITY', + 'region': 'USA', + 'universe': 'TOP3000', + 'delay': 1, + 'decay': 0, + 'neutralization': 'INDUSTRY', + 'truncation': 0.08, + 'pasteurization': 'ON', + 'unitHandling': 'VERIFY', + 'nanHandling': 'OFF', + 'language': 'FASTEXPR', + 'visualization': False, + } + + if settings: + default_settings.update(settings) + + simulation_data = { + 'type': 'REGULAR', + 'settings': default_settings, + 'regular': expression + } + + sim_resp = self.client.post(f'{self.brain_api_url}/simulations', json=simulation_data) + print(f"模拟提交状态: {sim_resp.status_code}") + + sim_progress_url = sim_resp.headers['location'] + print(f"进度URL: {sim_progress_url}") + + while True: + sim_progress_resp = self.client.get(sim_progress_url) + retry_after_sec = float(sim_progress_resp.headers.get("Retry-After", 0)) + + if retry_after_sec == 0: + break + print(sim_progress_resp.json()) + print(f"等待 {retry_after_sec} 秒...") + time.sleep(retry_after_sec) + + # 如果因子模拟不通过, 获取一下失败信息 + if sim_progress_resp.json()["status"] == "ERROR": + result = sim_progress_resp.json()["message"] + print(f"因子模拟失败: {result}") + # 返回一个特殊标识,表示模拟失败 + return {"status": "error", "message": result} + + alpha_id = sim_progress_resp.json()["alpha"] + print(f"生成的Alpha ID: {alpha_id}") + + # 获取详细的性能指标 + metrics = self.get_alpha_metrics(alpha_id) + + return {"status": "success", "alpha_id": alpha_id, "metrics": metrics} + + """获取Alpha因子的详细指标""" + def get_alpha_metrics(self, alpha_id): + if self.client is None: + raise Exception("请先登录") + + try: + # 获取Alpha的基本信息和指标 + alpha_url = f'{self.brain_api_url}/alphas/{alpha_id}' + alpha_resp = self.client.get(alpha_url) + + if alpha_resp.status_code in [200, 201]: + alpha_data = alpha_resp.json() + return self._parse_alpha_metrics(alpha_data) + else: + return {"error": f"无法获取Alpha信息: {alpha_resp.status_code}"} + + except Exception as e: + return {"error": f"获取指标时出错: {str(e)}"} + + """解析Alpha数据,提取关键指标""" + def _parse_alpha_metrics(self, alpha_data): + metrics = {} + + try: + # 从train字段获取指标数据 + if 'train' in alpha_data and alpha_data['train']: + train_data = alpha_data['train'] + metrics.update({ + 'sharpe_ratio': train_data.get('sharpe', None), + 'annual_return': train_data.get('returns', None), + 'max_drawdown': train_data.get('drawdown', None), + 'turnover': train_data.get('turnover', None), + 'fitness': train_data.get('fitness', None), + 'pnl': train_data.get('pnl', None), + 'book_size': train_data.get('bookSize', None), + 'long_count': train_data.get('longCount', None), + 'short_count': train_data.get('shortCount', None), + 'margin': train_data.get('margin', None), + 'start_date': train_data.get('startDate', None), + }) + + # 从is字段获取样本内测试数据 + if 'is' in alpha_data and alpha_data['is']: + is_data = alpha_data['is'] + metrics.update({ + 'is_sharpe': is_data.get('sharpe', None), + 'is_returns': is_data.get('returns', None), + 'is_drawdown': is_data.get('drawdown', None), + 'is_turnover': is_data.get('turnover', None), + 'is_fitness': is_data.get('fitness', None), + 'is_pnl': is_data.get('pnl', None), + }) + + # 从test字段获取样本外测试数据 + if 'test' in alpha_data and alpha_data['test']: + test_data = alpha_data['test'] + metrics.update({ + 'test_sharpe': test_data.get('sharpe', None), + 'test_returns': test_data.get('returns', None), + 'test_drawdown': test_data.get('drawdown', None), + 'test_turnover': test_data.get('turnover', None), + 'test_fitness': test_data.get('fitness', None), + 'test_pnl': test_data.get('pnl', None), + }) + + # 其他重要信息 + metrics.update({ + 'alpha_id': alpha_data.get('id', None), + 'grade': alpha_data.get('grade', None), + 'stage': alpha_data.get('stage', None), + 'status': alpha_data.get('status', None), + 'date_created': alpha_data.get('dateCreated', None), + }) + + # 解析检查结果 + if 'is' in alpha_data and 'checks' in alpha_data['is']: + checks = alpha_data['is']['checks'] + check_results = {} + for check in checks: + check_name = check.get('name', '') + result = check.get('result', '') + value = check.get('value', None) + check_results[check_name.lower()] = { + 'result': result, + 'value': value, + 'limit': check.get('limit', None) + } + metrics['checks'] = check_results + + except Exception as e: + metrics['error'] = f"解析指标时出错: {str(e)}" + + return metrics + + def close(self): + """关闭连接""" + if self.client: + self.client.close() + + +class AlphaSimulationManager: + def __init__(self, credentials_file='account.txt'): + self.credentials_file = credentials_file + self.results = [] + + """将秒数格式化为 xx分xx秒 格式""" + def format_time(self, seconds): + if seconds < 60: + return f"{seconds:.2f}秒" + else: + minutes = int(seconds // 60) + remaining_seconds = seconds % 60 + return f"{minutes}分{remaining_seconds:.2f}秒" + + """模拟单个Alpha因子(线程安全)""" + def simulate_single_alpha(self, api, expression, settings=None): + alpha_start_time = time.time() + + try: + # 模拟Alpha因子 + simulation_result = api.simulate_alpha(expression, settings) + alpha_end_time = time.time() + time_consuming = alpha_end_time - alpha_start_time + + # 根据模拟结果类型处理 + if simulation_result["status"] == "success": + # 模拟成功的结果 + result = { + "expression": expression, + "time_consuming": time_consuming, + "formatted_time": self.format_time(time_consuming), + "alpha_id": simulation_result["alpha_id"], + "status": "success", + "description": "/", + "simulation_timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), + # 训练集指标 + "train_metrics": { + "sharpe_ratio": simulation_result["metrics"].get('sharpe_ratio'), + "annual_return": simulation_result["metrics"].get('annual_return'), + "max_drawdown": simulation_result["metrics"].get('max_drawdown'), + "turnover": simulation_result["metrics"].get('turnover'), + "fitness": simulation_result["metrics"].get('fitness'), + "pnl": simulation_result["metrics"].get('pnl'), + "book_size": simulation_result["metrics"].get('book_size'), + "long_count": simulation_result["metrics"].get('long_count'), + "short_count": simulation_result["metrics"].get('short_count'), + "margin": simulation_result["metrics"].get('margin'), + }, + # 样本内测试指标 + "is_metrics": { + "sharpe_ratio": simulation_result["metrics"].get('is_sharpe'), + "annual_return": simulation_result["metrics"].get('is_returns'), + "max_drawdown": simulation_result["metrics"].get('is_drawdown'), + "turnover": simulation_result["metrics"].get('is_turnover'), + "fitness": simulation_result["metrics"].get('is_fitness'), + "pnl": simulation_result["metrics"].get('is_pnl'), + }, + # 样本外测试指标 + "test_metrics": { + "sharpe_ratio": simulation_result["metrics"].get('test_sharpe'), + "annual_return": simulation_result["metrics"].get('test_returns'), + "max_drawdown": simulation_result["metrics"].get('test_drawdown'), + "turnover": simulation_result["metrics"].get('test_turnover'), + "fitness": simulation_result["metrics"].get('test_fitness'), + "pnl": simulation_result["metrics"].get('test_pnl'), + }, + # 其他信息 + "alpha_info": { + "grade": simulation_result["metrics"].get('grade'), + "stage": simulation_result["metrics"].get('stage'), + "status": simulation_result["metrics"].get('status'), + "date_created": simulation_result["metrics"].get('date_created'), + "checks": simulation_result["metrics"].get('checks', {}) + } + } + print(f"✓ 因子模拟成功: {expression}") + print(f" 耗时: {self.format_time(time_consuming)},Alpha ID: {simulation_result['alpha_id']}") + + # 打印关键指标 + self._print_success_metrics(simulation_result["metrics"]) + + else: + # 模拟失败的结果(API返回的错误) + result = { + "expression": expression, + "time_consuming": time_consuming, + "formatted_time": self.format_time(time_consuming), + "alpha_id": "/", + "status": "error", + "description": simulation_result["message"], + "simulation_timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), + "performance_metrics": {}, + "risk_metrics": {}, + "quantile_metrics": {}, + "other_metrics": {} + } + print(f"✗ 因子模拟失败: {expression}") + print(f" 耗时: {self.format_time(time_consuming)},错误: {simulation_result['message']}") + + except Exception as e: + # 其他异常情况 + alpha_end_time = time.time() + time_consuming = alpha_end_time - alpha_start_time + + result = { + "expression": expression, + "time_consuming": time_consuming, + "formatted_time": self.format_time(time_consuming), + "alpha_id": "/", + "status": "failed", + "description": str(e), + "simulation_timestamp": time.strftime("%Y-%m-%d %H:%M:%S"), + "performance_metrics": {}, + "risk_metrics": {}, + "quantile_metrics": {}, + "other_metrics": {} + } + print(f"✗ 因子模拟异常: {expression}") + print(f" 耗时: {self.format_time(time_consuming)},异常: {str(e)}") + + return result + + """打印成功因子的关键指标""" + + def _print_success_metrics(self, metrics): + if 'error' in metrics: + print(f" 指标获取错误: {metrics['error']}") + return + + print(" 关键指标 (训练集):") + key_metrics = [ + ('夏普比率', 'sharpe_ratio'), + ('年化收益', 'annual_return'), + ('最大回撤', 'max_drawdown'), + ('换手率', 'turnover'), + ('适应度', 'fitness'), + ('PNL', 'pnl'), + ] + + for chinese_name, metric_key in key_metrics: + value = metrics.get(metric_key) + if value is not None: + if isinstance(value, float): + value = f"{value:.4f}" + print(f" {chinese_name}: {value}") + + # 显示样本外测试的夏普比率(如果存在) + test_sharpe = metrics.get('test_sharpe') + if test_sharpe is not None: + print(f" 样本外夏普比率: {test_sharpe:.4f}") + + """模拟一批Alpha因子(3个一组)""" + def simulate_alpha_batch(self, alpha_batch, batch_number): + print(f"\n{'=' * 60}") + print(f"开始第 {batch_number} 批因子模拟 (共 {len(alpha_batch)} 个因子)") + print(f"因子列表: {alpha_batch}") + print(f"{'=' * 60}") + + batch_start_time = time.time() + batch_results = [] + + # 创建API客户端实例(每个线程独立的客户端) + api = WorldQuantBrainSimulate(self.credentials_file) + + try: + if api.login(): + # 使用线程池执行3个因子的模拟 + with ThreadPoolExecutor(max_workers=3) as executor: + # 提交所有任务 + future_to_alpha = {executor.submit(self.simulate_single_alpha, api, alpha): alpha for alpha in alpha_batch} + + # 等待所有任务完成 + for future in as_completed(future_to_alpha): + alpha = future_to_alpha[future] + try: + result = future.result() + batch_results.append(result) + except Exception as e: + print(f"因子 {alpha} 执行异常: {e}") + except Exception as e: + print(f"第 {batch_number} 批模拟过程中出错: {e}") + finally: + api.close() + + batch_end_time = time.time() + batch_total_time = batch_end_time - batch_start_time + + print(f"\n第 {batch_number} 批模拟完成!") + print(f"本批总耗时: {self.format_time(batch_total_time)}") + print(f"{'=' * 60}") + + return batch_results + + """运行批量模拟""" + def run_simulation(self, alpha_list, batch_size=3): + print("开始Alpha因子批量模拟...") + total_start_time = time.time() + + # 将因子列表分成每批3个 + batches = [alpha_list[i:i + batch_size] for i in range(0, len(alpha_list), batch_size)] + + all_results = [] + + for i, batch in enumerate(batches, 1): + # 模拟当前批次 + batch_results = self.simulate_alpha_batch(batch, i) + all_results.extend(batch_results) + + # 如果不是最后一批,则等待3-5秒 + if i < len(batches): + sleep_time = uniform(3, 5) + print(f"\n等待 {sleep_time:.2f} 秒后开始下一批...") + time.sleep(sleep_time) + + total_end_time = time.time() + total_time = total_end_time - total_start_time + + # 输出最终结果汇总 + self.print_summary(all_results, total_time) + + # 保存结果到文件 + self.save_results(all_results) + + return all_results + + """打印结果汇总""" + def print_summary(self, results, total_time): + print(f"\n{'=' * 60}") + print("模拟结果汇总") + print(f"{'=' * 60}") + + success_count = sum(1 for r in results if r['status'] == 'success') + error_count = sum(1 for r in results if r['status'] == 'error') + failed_count = sum(1 for r in results if r['status'] == 'failed') + + print(f"总模拟因子数: {len(results)}") + print(f"成功: {success_count} 个") + print(f"模拟错误: {error_count} 个") + print(f"执行异常: {failed_count} 个") + print(f"总耗时: {self.format_time(total_time)}") + print(f"{'=' * 60}") + + for i, result in enumerate(results, 1): + status_icon = "✓" if result['status'] == 'success' else "✗" + print(f"{i}. {status_icon} {result['expression']}") + print(f" 状态: {result['status']}") + print(f" 耗时: {result['formatted_time']}") + print(f" Alpha ID: {result['alpha_id']}") + if result['status'] != 'success': + print(f" 原因: {result['description']}") + print() + + """保存结果到文件""" + def save_results(self, results): + # 转换为可序列化的格式 + serializable_results = [] + for result in results: + serializable_result = result.copy() + serializable_result['time_consuming'] = round(serializable_result['time_consuming'], 2) + + # 处理metrics中的浮点数,保留6位小数 + for metric_category in ['performance_metrics', 'risk_metrics', 'quantile_metrics', 'other_metrics']: + if metric_category in serializable_result: + for key, value in serializable_result[metric_category].items(): + if isinstance(value, float): + serializable_result[metric_category][key] = round(value, 6) + + serializable_results.append(serializable_result) + + # 将日志文件, 保存到当前目录下, result 文件夹中 + if not os.path.exists('./result'): + os.makedirs('./result') + + result_name = f"result/simulation_results-{str(int(time.time()))}.json" + with open(result_name, 'w', encoding='utf-8') as f: + json.dump(serializable_results, f, ensure_ascii=False, indent=2) + print(f"结果已保存到 {result_name}") + + +if __name__ == "__main__": + # 待模拟因子列表 + with open('alpha.txt', 'r', encoding='utf-8') as file: + alpha_list = [line.strip() for line in file] + + if not alpha_list: + print("alpha.txt 文件不存在") + with open('alpha.txt', 'w', encoding='utf-8') as file: file.write("") + print("已创建 alpha.txt 文件, 请添加因子后重新运行, 一行一个因子") + exit(1) + + # 创建模拟管理器并运行 + manager = AlphaSimulationManager() + results = manager.run_simulation(alpha_list, batch_size=3) \ No newline at end of file diff --git a/managers/__init__.py b/managers/__init__.py new file mode 100644 index 0000000..992426b --- /dev/null +++ b/managers/__init__.py @@ -0,0 +1,8 @@ +# -*- coding: utf-8 -*- +""" +管理模块 - 包含各种管理器类 +""" + +from .simulation_manager import AlphaSimulationManager + +__all__ = ['AlphaSimulationManager'] \ No newline at end of file diff --git a/managers/simulation_manager.py b/managers/simulation_manager.py new file mode 100644 index 0000000..582fd88 --- /dev/null +++ b/managers/simulation_manager.py @@ -0,0 +1,211 @@ +# -*- coding: utf-8 -*- +import time +import json +import os +from concurrent.futures import ThreadPoolExecutor, as_completed +from random import uniform +from typing import List, Dict, Any + +from core.api_client import WorldQuantBrainSimulate +from core.models import SimulationResult, TrainMetrics, TestMetrics, AlphaInfo +from utils.time_utils import format_time +from utils.file_utils import save_results_to_file + + +class AlphaSimulationManager: + def __init__(self, credentials_file='account.txt'): + self.credentials_file = credentials_file + self.results = [] + + """模拟单个Alpha因子(线程安全)""" + def simulate_single_alpha(self, api: WorldQuantBrainSimulate, expression: str, + settings: Dict[str, Any] = None) -> SimulationResult: + alpha_start_time = time.time() + + try: + # 模拟Alpha因子 + simulation_result = api.simulate_alpha(expression, settings) + alpha_end_time = time.time() + time_consuming = alpha_end_time - alpha_start_time + + # 根据模拟结果类型处理 + if simulation_result["status"] == "success": + # 模拟成功的结果 + metrics = simulation_result["metrics"] + result = SimulationResult( + expression=expression, + time_consuming=time_consuming, + formatted_time=format_time(time_consuming), + alpha_id=simulation_result["alpha_id"], + status="success", + description="/", + simulation_timestamp=time.strftime("%Y-%m-%d %H:%M:%S"), + train_metrics=metrics.train_metrics, + is_metrics=metrics.is_metrics, + test_metrics=metrics.test_metrics, + alpha_info=metrics.alpha_info + ) + print(f"✓ 因子模拟成功: {expression}") + print(f" 耗时: {format_time(time_consuming)},Alpha ID: {simulation_result['alpha_id']}") + + # 打印关键指标 + self._print_success_metrics(metrics) + + else: + # 模拟失败的结果(API返回的错误) + result = SimulationResult( + expression=expression, + time_consuming=time_consuming, + formatted_time=format_time(time_consuming), + alpha_id="/", + status="error", + description=simulation_result["message"], + simulation_timestamp=time.strftime("%Y-%m-%d %H:%M:%S") + ) + print(f"✗ 因子模拟失败: {expression}") + print(f" 耗时: {format_time(time_consuming)},错误: {simulation_result['message']}") + + except Exception as e: + # 其他异常情况 + alpha_end_time = time.time() + time_consuming = alpha_end_time - alpha_start_time + + result = SimulationResult( + expression=expression, + time_consuming=time_consuming, + formatted_time=format_time(time_consuming), + alpha_id="/", + status="failed", + description=str(e), + simulation_timestamp=time.strftime("%Y-%m-%d %H:%M:%S") + ) + print(f"✗ 因子模拟异常: {expression}") + print(f" 耗时: {format_time(time_consuming)},异常: {str(e)}") + + return result + + """打印成功因子的关键指标""" + def _print_success_metrics(self, metrics): + print(" 关键指标 (训练集):") + key_metrics = [ + ('夏普比率', metrics.train_metrics.sharpe_ratio), + ('年化收益', metrics.train_metrics.annual_return), + ('最大回撤', metrics.train_metrics.max_drawdown), + ('换手率', metrics.train_metrics.turnover), + ('适应度', metrics.train_metrics.fitness), + ('PNL', metrics.train_metrics.pnl), + ] + + for chinese_name, value in key_metrics: + if value is not None: + if isinstance(value, float): + value = f"{value:.4f}" + print(f" {chinese_name}: {value}") + + # 显示样本外测试的夏普比率(如果存在) + if metrics.test_metrics.sharpe_ratio is not None: + print(f" 样本外夏普比率: {metrics.test_metrics.sharpe_ratio:.4f}") + + """模拟一批Alpha因子(3个一组)""" + def simulate_alpha_batch(self, alpha_batch: List[str], batch_number: int) -> List[SimulationResult]: + print(f"\n{'=' * 60}") + print(f"开始第 {batch_number} 批因子模拟 (共 {len(alpha_batch)} 个因子)") + print(f"因子列表: {alpha_batch}") + print(f"{'=' * 60}") + + batch_start_time = time.time() + batch_results = [] + + # 创建API客户端实例(每个线程独立的客户端) + api = WorldQuantBrainSimulate(self.credentials_file) + + try: + if api.login(): + # 使用线程池执行3个因子的模拟 + with ThreadPoolExecutor(max_workers=3) as executor: + # 提交所有任务 + future_to_alpha = { + executor.submit(self.simulate_single_alpha, api, alpha): alpha + for alpha in alpha_batch + } + + # 等待所有任务完成 + for future in as_completed(future_to_alpha): + alpha = future_to_alpha[future] + try: + result = future.result() + batch_results.append(result) + except Exception as e: + print(f"因子 {alpha} 执行异常: {e}") + except Exception as e: + print(f"第 {batch_number} 批模拟过程中出错: {e}") + finally: + api.close() + + batch_end_time = time.time() + batch_total_time = batch_end_time - batch_start_time + + print(f"\n第 {batch_number} 批模拟完成!") + print(f"本批总耗时: {format_time(batch_total_time)}") + print(f"{'=' * 60}") + + return batch_results + + """运行批量模拟""" + def run_simulation(self, alpha_list: List[str], batch_size: int = 3) -> List[SimulationResult]: + print("开始Alpha因子批量模拟...") + total_start_time = time.time() + + # 将因子列表分成每批3个 + batches = [alpha_list[i:i + batch_size] for i in range(0, len(alpha_list), batch_size)] + + all_results = [] + + for i, batch in enumerate(batches, 1): + # 模拟当前批次 + batch_results = self.simulate_alpha_batch(batch, i) + all_results.extend(batch_results) + + # 如果不是最后一批,则等待3-5秒 + if i < len(batches): + sleep_time = uniform(3, 5) + print(f"\n等待 {sleep_time:.2f} 秒后开始下一批...") + time.sleep(sleep_time) + + total_end_time = time.time() + total_time = total_end_time - total_start_time + + # 输出最终结果汇总 + self.print_summary(all_results, total_time) + + # 保存结果到文件 + save_results_to_file(all_results) + + return all_results + + """打印结果汇总""" + def print_summary(self, results: List[SimulationResult], total_time: float): + print(f"\n{'=' * 60}") + print("模拟结果汇总") + print(f"{'=' * 60}") + + success_count = sum(1 for r in results if r.status == 'success') + error_count = sum(1 for r in results if r.status == 'error') + failed_count = sum(1 for r in results if r.status == 'failed') + + print(f"总模拟因子数: {len(results)}") + print(f"成功: {success_count} 个") + print(f"模拟错误: {error_count} 个") + print(f"执行异常: {failed_count} 个") + print(f"总耗时: {format_time(total_time)}") + print(f"{'=' * 60}") + + for i, result in enumerate(results, 1): + status_icon = "✓" if result.status == 'success' else "✗" + print(f"{i}. {status_icon} {result.expression}") + print(f" 状态: {result.status}") + print(f" 耗时: {result.formatted_time}") + print(f" Alpha ID: {result.alpha_id}") + if result.status != 'success': + print(f" 原因: {result.description}") + print() \ No newline at end of file diff --git a/reference/yearly-stats.json b/reference/yearly-stats.json new file mode 100644 index 0000000..6e2a7ff --- /dev/null +++ b/reference/yearly-stats.json @@ -0,0 +1,168 @@ +{ + "schema": { + "name": "yearly-stats", + "title": "Yearly Stats", + "properties": [ + { + "name": "year", + "title": "Year", + "type": "year" + }, + { + "name": "pnl", + "title": "PnL", + "type": "amount" + }, + { + "name": "bookSize", + "title": "Book Size", + "type": "amount" + }, + { + "name": "longCount", + "title": "Long Count", + "type": "integer" + }, + { + "name": "shortCount", + "title": "Short Count", + "type": "integer" + }, + { + "name": "turnover", + "title": "Turnover", + "type": "percent" + }, + { + "name": "sharpe", + "title": "Sharpe", + "type": "decimal" + }, + { + "name": "returns", + "title": "Returns", + "type": "percent" + }, + { + "name": "drawdown", + "title": "Drawdown", + "type": "percent" + }, + { + "name": "margin", + "title": "Margin", + "type": "permyriad" + }, + { + "name": "fitness", + "title": "Fitness", + "type": "decimal" + }, + { + "name": "stage", + "title": "Stage", + "type": "string" + } + ] + }, + "records": [ + [ + "2018", + 347052.0, + 20000000, + 1081, + 1083, + 0.3727, + 1.54, + 0.0365, + 0.0156, + 0.000196, + 0.48, + "TRAIN" + ], + [ + "2019", + 190205.0, + 20000000, + 1364, + 1359, + 0.3659, + 0.83, + 0.0189, + 0.0353, + 0.000103, + 0.19, + "TRAIN" + ], + [ + "2020", + 1554201.0, + 20000000, + 1348, + 1340, + 0.3639, + 4.49, + 0.1682, + 0.0145, + 0.000925, + 3.05, + "TRAIN" + ], + [ + "2021", + 584087.0, + 20000000, + 1435, + 1424, + 0.3652, + 1.41, + 0.0579, + 0.0253, + 0.000317, + 0.56, + "TRAIN" + ], + [ + "2022", + 31117.0, + 20000000, + 1441, + 1434, + 0.3446, + 2.08, + 0.0648, + 0.004, + 0.000376, + 0.9, + "TRAIN" + ], + [ + "2022", + 443804.0, + 20000000, + 1415, + 1417, + 0.3623, + 1.19, + 0.0464, + 0.0349, + 0.000256, + 0.43, + "TEST" + ], + [ + "2023", + 68779.0, + 20000000, + 1405, + 1394, + 0.3554, + 6.22, + 0.1323, + 0.0019, + 0.000744, + 3.79, + "TEST" + ] + ] +} \ No newline at end of file diff --git a/reference/指标.json b/reference/指标.json new file mode 100644 index 0000000..c562c7a --- /dev/null +++ b/reference/指标.json @@ -0,0 +1,137 @@ +{ + "id": "KP0WWZ6l", + "type": "REGULAR", + "author": "YC93384", + "settings": { + "instrumentType": "EQUITY", + "region": "USA", + "universe": "TOP3000", + "delay": 1, + "decay": 0, + "neutralization": "SUBINDUSTRY", + "truncation": 0.08, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "OFF", + "maxTrade": "OFF", + "language": "FASTEXPR", + "visualization": false, + "startDate": "2018-01-20", + "endDate": "2023-01-20", + "testPeriod": "P1Y" + }, + "regular": { + "code": "rank(ts_sum(vec_avg(nws12_afterhsz_sl), 60)) * 0.7 + rank(-ts_delta(close, 2)) * 0.3", + "description": null, + "operatorCount": 9 + }, + "dateCreated": "2025-11-13T09:22:47-05:00", + "dateSubmitted": null, + "dateModified": "2025-11-13T09:22:47-05:00", + "name": null, + "favorite": false, + "hidden": false, + "color": null, + "category": null, + "tags": [], + "classifications": [], + "grade": "INFERIOR", + "stage": "IS", + "status": "UNSUBMITTED", + "is": { + "pnl": 3219244, + "bookSize": 20000000, + "longCount": 1332, + "shortCount": 1328, + "turnover": 0.3657, + "returns": 0.0651, + "drawdown": 0.0353, + "margin": 0.000356, + "sharpe": 1.93, + "fitness": 0.81, + "startDate": "2018-01-20", + "checks": [ + { + "name": "LOW_SHARPE", + "result": "PASS", + "limit": 1.25, + "value": 1.93 + }, + { + "name": "LOW_FITNESS", + "result": "FAIL", + "limit": 1.0, + "value": 0.81 + }, + { + "name": "LOW_TURNOVER", + "result": "PASS", + "limit": 0.01, + "value": 0.3657 + }, + { + "name": "HIGH_TURNOVER", + "result": "PASS", + "limit": 0.7, + "value": 0.3657 + }, + { + "name": "CONCENTRATED_WEIGHT", + "result": "PASS" + }, + { + "name": "LOW_SUB_UNIVERSE_SHARPE", + "result": "PASS", + "limit": 0.84, + "value": 1.7 + }, + { + "name": "SELF_CORRELATION", + "result": "PENDING" + }, + { + "name": "MATCHES_COMPETITION", + "result": "PASS", + "competitions": [ + { + "id": "challenge", + "name": "Challenge" + } + ] + } + ] + }, + "os": null, + "train": { + "pnl": 2718449, + "bookSize": 20000000, + "longCount": 1311, + "shortCount": 1306, + "turnover": 0.3665, + "returns": 0.0689, + "drawdown": 0.0353, + "margin": 0.000376, + "fitness": 0.92, + "sharpe": 2.13, + "startDate": "2018-01-20" + }, + "test": { + "pnl": 512583, + "bookSize": 20000000, + "longCount": 1415, + "shortCount": 1416, + "turnover": 0.362, + "returns": 0.0509, + "drawdown": 0.0349, + "margin": 0.000281, + "fitness": 0.5, + "sharpe": 1.33, + "startDate": "2022-01-20" + }, + "prod": null, + "competitions": null, + "themes": null, + "pyramids": null, + "pyramidThemes": null, + "team": null +} \ No newline at end of file diff --git a/reference/进度.json b/reference/进度.json new file mode 100644 index 0000000..e2f3cd1 --- /dev/null +++ b/reference/进度.json @@ -0,0 +1,22 @@ +{ + "id": "3q4OCMgw4MFa8k16tdsYLml", + "type": "REGULAR", + "settings": { + "instrumentType": "EQUITY", + "region": "USA", + "universe": "TOP3000", + "delay": 1, + "decay": 0, + "neutralization": "SUBINDUSTRY", + "truncation": 0.08, + "pasteurization": "ON", + "unitHandling": "VERIFY", + "nanHandling": "OFF", + "maxTrade": "OFF", + "language": "FASTEXPR", + "visualization": false + }, + "regular": "rank(ts_sum(vec_avg(nws12_afterhsz_sl), 60)) * 0.7 + rank(-ts_delta(close, 2)) * 0.3", + "status": "COMPLETE", + "alpha": "KP0WWZ6l" +} \ No newline at end of file diff --git a/utils/__init__.py b/utils/__init__.py new file mode 100644 index 0000000..27d207b --- /dev/null +++ b/utils/__init__.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- +""" +工具模块 - 包含各种工具函数 +""" + +from .file_utils import load_alpha_list, save_results_to_file +from .time_utils import format_time + +__all__ = ['load_alpha_list', 'save_results_to_file', 'format_time'] \ No newline at end of file diff --git a/utils/file_utils.py b/utils/file_utils.py new file mode 100644 index 0000000..813dd88 --- /dev/null +++ b/utils/file_utils.py @@ -0,0 +1,59 @@ +# -*- coding: utf-8 -*- +import os +import json +import time +from typing import List, Any + + +def load_alpha_list(file_path: str) -> List[str]: + """从文件加载Alpha因子列表""" + if not os.path.exists(file_path): + print(f"{file_path} 文件不存在") + with open(file_path, 'w', encoding='utf-8') as file: + file.write("") + print(f"已创建 {file_path} 文件, 请添加因子后重新运行, 一行一个因子") + return [] + + with open(file_path, 'r', encoding='utf-8') as file: + alpha_list = [line.strip() for line in file if line.strip()] + + return alpha_list + + +def save_results_to_file(results: List[Any], result_dir: str = 'result') -> str: + """保存结果到文件""" + # 转换为可序列化的格式 + serializable_results = [] + for result in results: + if hasattr(result, '__dict__'): + # 如果是dataclass对象 + result_dict = result.__dict__.copy() + else: + # 如果是字典 + result_dict = result.copy() + + # 处理时间消耗 + if 'time_consuming' in result_dict: + result_dict['time_consuming'] = round(result_dict['time_consuming'], 2) + + # 处理metrics对象 + for key in list(result_dict.keys()): + if hasattr(result_dict[key], '__dict__'): + result_dict[key] = result_dict[key].__dict__ + # 处理浮点数精度 + for metric_key, value in result_dict[key].items(): + if isinstance(value, float): + result_dict[key][metric_key] = round(value, 6) + + serializable_results.append(result_dict) + + # 确保结果目录存在 + if not os.path.exists(result_dir): + os.makedirs(result_dir) + + result_name = f"{result_dir}/simulation_results-{str(int(time.time()))}.json" + with open(result_name, 'w', encoding='utf-8') as f: + json.dump(serializable_results, f, ensure_ascii=False, indent=2) + + print(f"结果已保存到 {result_name}") + return result_name \ No newline at end of file diff --git a/utils/time_utils.py b/utils/time_utils.py new file mode 100644 index 0000000..37b7bc2 --- /dev/null +++ b/utils/time_utils.py @@ -0,0 +1,9 @@ +# -*- coding: utf-8 -*- +def format_time(seconds: float) -> str: + """将秒数格式化为 xx分xx秒 格式""" + if seconds < 60: + return f"{seconds:.2f}秒" + else: + minutes = int(seconds // 60) + remaining_seconds = seconds % 60 + return f"{minutes}分{remaining_seconds:.2f}秒" \ No newline at end of file