From 3508a1b49649c2fb531f39e84489d1e00d628bf0 Mon Sep 17 00:00:00 2001 From: jack Date: Wed, 12 Nov 2025 17:43:28 +0800 Subject: [PATCH] update --- .gitignore | 65 ++++++++++++++++ main.py | 220 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 285 insertions(+) create mode 100644 .gitignore create mode 100644 main.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..cf7c11e --- /dev/null +++ b/.gitignore @@ -0,0 +1,65 @@ +.DS_Store +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg +.idea/* +xml_files/ + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*,cover + +# Translations +*.mo +*.pot + +# Django stuff: +*.log + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +other/split_clash_config/split_config +ai_news/save_data +daily/*.txt diff --git a/main.py b/main.py new file mode 100644 index 0000000..25767e4 --- /dev/null +++ b/main.py @@ -0,0 +1,220 @@ +"""Alpha 因子表达式生成器 - 增强随机版本""" + +import random +import itertools +from typing import List + +class AlphaGenerator: + """增强版因子表达式生成器,支持随机生成多样化因子""" + + def __init__(self, seed=None): + if seed: + random.seed(seed) + + # 基础操作符和函数库 + OPERATORS = ['+', '-', '*', '/'] + UNARY_OPERATORS = ['-', 'abs', 'rank', 'log', 'power', 'sqrt'] + TIME_SERIES_FUNCTIONS = [ + 'delay', 'ts_mean', 'ts_std_dev', 'ts_rank', 'ts_decay_linear', + 'ts_corr', 'ts_skewness', 'ts_kurtosis', 'ts_max', 'ts_min', + 'ts_sum', 'ts_product', 'ts_delta', 'ts_returns' + ] + GROUP_FUNCTIONS = [ + 'group_rank', 'group_mean', 'group_std', 'group_neutralize', + 'group_zscore', 'group_percentile' + ] + CONDITIONAL_FUNCTIONS = [ + 'if_else', 'trade_when', 'condition_filter', 'signal_when' + ] + + # 基础字段模板 + BASE_FIELDS = ['open', 'close', 'high', 'low', 'volume', 'returns', + 'vwap', 'turnover', 'volatility', 'cap', 'market_cap'] + + def generate_random_expression(self, datafields, depth=0, max_depth=3): + """递归生成随机表达式""" + if depth >= max_depth or random.random() < 0.3: + # 叶子节点 - 选择基础字段或常数 + if random.random() < 0.7 and datafields: + field = random.choice(datafields) + return field + else: + return f"{random.uniform(0.1, 10.0):.2f}" + + # 选择操作类型 + op_type = random.choice(['binary', 'unary', 'ts_function', 'group_function', 'conditional']) + + if op_type == 'binary': + left = self.generate_random_expression(datafields, depth + 1, max_depth) + right = self.generate_random_expression(datafields, depth + 1, max_depth) + op = random.choice(self.OPERATORS) + return f"({left} {op} {right})" + + elif op_type == 'unary': + arg = self.generate_random_expression(datafields, depth + 1, max_depth) + op = random.choice(self.UNARY_OPERATORS) + if op in ['-', 'abs']: + return f"{op}({arg})" + else: + return f"{op}({arg}, {random.randint(1, 30)})" + + elif op_type == 'ts_function': + func = random.choice(self.TIME_SERIES_FUNCTIONS) + arg1 = self.generate_random_expression(datafields, depth + 1, max_depth) + lookback = random.choice([5, 10, 20, 30, 60, 90, 252]) + + if func in ['ts_corr', 'ts_covariance']: + arg2 = self.generate_random_expression(datafields, depth + 1, max_depth) + return f"{func}({arg1}, {arg2}, {lookback})" + else: + return f"{func}({arg1}, {lookback})" + + elif op_type == 'group_function': + func = random.choice(self.GROUP_FUNCTIONS) + arg = self.generate_random_expression(datafields, depth + 1, max_depth) + group_by = random.choice(['subindustry', 'industry', 'sector', 'bucket(cap, range="0.1,1,0.1")']) + return f"{func}({arg}, {group_by})" + + else: # conditional + func = random.choice(self.CONDITIONAL_FUNCTIONS) + condition = self.generate_random_expression(datafields, depth + 1, max_depth) + true_val = self.generate_random_expression(datafields, depth + 1, max_depth) + false_val = self.generate_random_expression(datafields, depth + 1, max_depth) + return f"{func}({condition} > 0, {true_val}, {false_val})" + + def generate_basic_strategies(self, datafields, count=50): + """生成基础单因子策略""" + strategies = [] + + for _ in range(count): + # 随机选择1-3个字段组合 + num_fields = random.randint(1, 3) + selected_fields = random.sample(datafields, min(num_fields, len(datafields))) + + # 生成随机表达式 + expr = self.generate_random_expression(selected_fields) + + # 添加分组操作 + if random.random() < 0.7: + group_by = random.choice(['subindustry', 'industry']) + expr = f"group_rank({expr}, {group_by})" + + strategies.append(expr) + + return strategies + + def generate_multi_factor_strategies(self, datafields, count=50): + """生成多因子组合策略""" + strategies = [] + + for _ in range(count): + # 选择更多字段进行复杂组合 + num_fields = random.randint(2, 5) + selected_fields = random.sample(datafields, min(num_fields, len(datafields))) + + # 生成更复杂的表达式 + expr = self.generate_random_expression(selected_fields, max_depth=4) + + # 添加中性化或标准化 + if random.random() < 0.6: + neutral_type = random.choice(['group_neutralize', 'regression_neut', 'vector_neut']) + if neutral_type == 'group_neutralize': + group_by = random.choice(['subindustry', 'industry', 'sector']) + expr = f"{neutral_type}({expr}, {group_by})" + else: + # 为回归中性化选择另一个因子 + other_field = random.choice([f for f in datafields if f not in selected_fields] or selected_fields) + expr = f"{neutral_type}({expr}, {other_field})" + + strategies.append(expr) + + return strategies + + def generate_advanced_strategies(self, datafields, count=30): + """生成高级策略(均值回归、动量、波动率策略等)""" + strategies = [] + + # 均值回归策略 + for _ in range(count // 3): + field = random.choice(datafields) + lookback = random.choice([5, 10, 20, 30]) + expr = f"-ts_rank({field} - ts_mean({field}, {lookback}), 20)" + strategies.append(f"group_neutralize({expr}, subindustry)") + + # 动量策略 + for _ in range(count // 3): + field = random.choice(datafields) + short_lookback = random.choice([1, 2, 3, 5]) + long_lookback = random.choice([10, 20, 30, 60]) + expr = f"ts_mean({field}, {short_lookback}) - ts_mean({field}, {long_lookback})" + strategies.append(f"group_rank({expr}, subindustry)") + + # 波动率策略 + for _ in range(count // 3): + field = random.choice(datafields) + vol_lookback = random.choice([10, 20, 30]) + expr = f"{field} / ts_std_dev({field}, {vol_lookback})" + strategies.append(f"group_rank({expr}, subindustry)") + + return strategies + + def generate_strategies(self, datafields, mode=1, count=100): + """ + 生成因子表达式列表 + + 参数: + datafields: 数据字段列表 + mode: 生成模式 (1=基础策略, 2=多因子组合, 3=高级策略, 4=混合模式) + count: 生成数量 + + 返回: + 因子表达式列表 + """ + + all_fields = datafields + self.BASE_FIELDS + unique_fields = list(set(all_fields)) # 去重 + + if mode == 1: + return self.generate_basic_strategies(unique_fields, count) + elif mode == 2: + return self.generate_multi_factor_strategies(unique_fields, count) + elif mode == 3: + return self.generate_advanced_strategies(unique_fields, count) + elif mode == 4: + # 混合模式 - 从所有类型中随机选择 + basic = self.generate_basic_strategies(unique_fields, count // 3) + multi = self.generate_multi_factor_strategies(unique_fields, count // 3) + advanced = self.generate_advanced_strategies(unique_fields, count // 3) + return basic + multi + advanced + else: + print("❌ 无效的生成模式,使用混合模式") + return self.generate_strategies(unique_fields, mode=4, count=count) + + +# 使用示例 +if __name__ == "__main__": + # 示例数据字段 + sample_fields = ['volume', 'close', 'open', 'high', 'low', 'returns', 'turnover', 'pe_ratio', 'pb_ratio'] + + generator = AlphaGenerator(seed=42) # 设置随机种子以便复现 + + print("=== 基础策略因子 (20个示例) ===") + basic_strategies = generator.generate_strategies(sample_fields, mode=1, count=20) + for i, strategy in enumerate(basic_strategies[:10], 1): # 只显示前10个 + print(f"{i}. {strategy}") + + print(f"\n共生成 {len(basic_strategies)} 个基础因子") + + print("\n=== 多因子组合策略 (20个示例) ===") + multi_strategies = generator.generate_strategies(sample_fields, mode=2, count=20) + for i, strategy in enumerate(multi_strategies[:10], 1): + print(f"{i}. {strategy}") + + print(f"\n共生成 {len(multi_strategies)} 个多因子组合") + + print("\n=== 随机测试 - 每次运行生成不同的因子 ===") + for run in range(3): + print(f"\n--- 第{run+1}次运行 ---") + random_strategies = generator.generate_strategies(sample_fields, mode=4, count=5) + for i, strategy in enumerate(random_strategies, 1): + print(f"{i}. {strategy}") \ No newline at end of file