You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
220 lines
9.3 KiB
220 lines
9.3 KiB
"""Alpha 因子表达式生成器 - 增强随机版本"""
|
|
|
|
import random
|
|
import itertools
|
|
from typing import List
|
|
|
|
class AlphaGenerator:
|
|
"""增强版因子表达式生成器,支持随机生成多样化因子"""
|
|
|
|
def __init__(self, seed=None):
|
|
if seed:
|
|
random.seed(seed)
|
|
|
|
# 基础操作符和函数库
|
|
OPERATORS = ['+', '-', '*', '/']
|
|
UNARY_OPERATORS = ['-', 'abs', 'rank', 'log', 'power', 'sqrt']
|
|
TIME_SERIES_FUNCTIONS = [
|
|
'delay', 'ts_mean', 'ts_std_dev', 'ts_rank', 'ts_decay_linear',
|
|
'ts_corr', 'ts_skewness', 'ts_kurtosis', 'ts_max', 'ts_min',
|
|
'ts_sum', 'ts_product', 'ts_delta', 'ts_returns'
|
|
]
|
|
GROUP_FUNCTIONS = [
|
|
'group_rank', 'group_mean', 'group_std', 'group_neutralize',
|
|
'group_zscore', 'group_percentile'
|
|
]
|
|
CONDITIONAL_FUNCTIONS = [
|
|
'if_else', 'trade_when', 'condition_filter', 'signal_when'
|
|
]
|
|
|
|
# 基础字段模板
|
|
BASE_FIELDS = ['open', 'close', 'high', 'low', 'volume', 'returns',
|
|
'vwap', 'turnover', 'volatility', 'cap', 'market_cap']
|
|
|
|
def generate_random_expression(self, datafields, depth=0, max_depth=3):
|
|
"""递归生成随机表达式"""
|
|
if depth >= max_depth or random.random() < 0.3:
|
|
# 叶子节点 - 选择基础字段或常数
|
|
if random.random() < 0.7 and datafields:
|
|
field = random.choice(datafields)
|
|
return field
|
|
else:
|
|
return f"{random.uniform(0.1, 10.0):.2f}"
|
|
|
|
# 选择操作类型
|
|
op_type = random.choice(['binary', 'unary', 'ts_function', 'group_function', 'conditional'])
|
|
|
|
if op_type == 'binary':
|
|
left = self.generate_random_expression(datafields, depth + 1, max_depth)
|
|
right = self.generate_random_expression(datafields, depth + 1, max_depth)
|
|
op = random.choice(self.OPERATORS)
|
|
return f"({left} {op} {right})"
|
|
|
|
elif op_type == 'unary':
|
|
arg = self.generate_random_expression(datafields, depth + 1, max_depth)
|
|
op = random.choice(self.UNARY_OPERATORS)
|
|
if op in ['-', 'abs']:
|
|
return f"{op}({arg})"
|
|
else:
|
|
return f"{op}({arg}, {random.randint(1, 30)})"
|
|
|
|
elif op_type == 'ts_function':
|
|
func = random.choice(self.TIME_SERIES_FUNCTIONS)
|
|
arg1 = self.generate_random_expression(datafields, depth + 1, max_depth)
|
|
lookback = random.choice([5, 10, 20, 30, 60, 90, 252])
|
|
|
|
if func in ['ts_corr', 'ts_covariance']:
|
|
arg2 = self.generate_random_expression(datafields, depth + 1, max_depth)
|
|
return f"{func}({arg1}, {arg2}, {lookback})"
|
|
else:
|
|
return f"{func}({arg1}, {lookback})"
|
|
|
|
elif op_type == 'group_function':
|
|
func = random.choice(self.GROUP_FUNCTIONS)
|
|
arg = self.generate_random_expression(datafields, depth + 1, max_depth)
|
|
group_by = random.choice(['subindustry', 'industry', 'sector', 'bucket(cap, range="0.1,1,0.1")'])
|
|
return f"{func}({arg}, {group_by})"
|
|
|
|
else: # conditional
|
|
func = random.choice(self.CONDITIONAL_FUNCTIONS)
|
|
condition = self.generate_random_expression(datafields, depth + 1, max_depth)
|
|
true_val = self.generate_random_expression(datafields, depth + 1, max_depth)
|
|
false_val = self.generate_random_expression(datafields, depth + 1, max_depth)
|
|
return f"{func}({condition} > 0, {true_val}, {false_val})"
|
|
|
|
def generate_basic_strategies(self, datafields, count=50):
|
|
"""生成基础单因子策略"""
|
|
strategies = []
|
|
|
|
for _ in range(count):
|
|
# 随机选择1-3个字段组合
|
|
num_fields = random.randint(1, 3)
|
|
selected_fields = random.sample(datafields, min(num_fields, len(datafields)))
|
|
|
|
# 生成随机表达式
|
|
expr = self.generate_random_expression(selected_fields)
|
|
|
|
# 添加分组操作
|
|
if random.random() < 0.7:
|
|
group_by = random.choice(['subindustry', 'industry'])
|
|
expr = f"group_rank({expr}, {group_by})"
|
|
|
|
strategies.append(expr)
|
|
|
|
return strategies
|
|
|
|
def generate_multi_factor_strategies(self, datafields, count=50):
|
|
"""生成多因子组合策略"""
|
|
strategies = []
|
|
|
|
for _ in range(count):
|
|
# 选择更多字段进行复杂组合
|
|
num_fields = random.randint(2, 5)
|
|
selected_fields = random.sample(datafields, min(num_fields, len(datafields)))
|
|
|
|
# 生成更复杂的表达式
|
|
expr = self.generate_random_expression(selected_fields, max_depth=4)
|
|
|
|
# 添加中性化或标准化
|
|
if random.random() < 0.6:
|
|
neutral_type = random.choice(['group_neutralize', 'regression_neut', 'vector_neut'])
|
|
if neutral_type == 'group_neutralize':
|
|
group_by = random.choice(['subindustry', 'industry', 'sector'])
|
|
expr = f"{neutral_type}({expr}, {group_by})"
|
|
else:
|
|
# 为回归中性化选择另一个因子
|
|
other_field = random.choice([f for f in datafields if f not in selected_fields] or selected_fields)
|
|
expr = f"{neutral_type}({expr}, {other_field})"
|
|
|
|
strategies.append(expr)
|
|
|
|
return strategies
|
|
|
|
def generate_advanced_strategies(self, datafields, count=30):
|
|
"""生成高级策略(均值回归、动量、波动率策略等)"""
|
|
strategies = []
|
|
|
|
# 均值回归策略
|
|
for _ in range(count // 3):
|
|
field = random.choice(datafields)
|
|
lookback = random.choice([5, 10, 20, 30])
|
|
expr = f"-ts_rank({field} - ts_mean({field}, {lookback}), 20)"
|
|
strategies.append(f"group_neutralize({expr}, subindustry)")
|
|
|
|
# 动量策略
|
|
for _ in range(count // 3):
|
|
field = random.choice(datafields)
|
|
short_lookback = random.choice([1, 2, 3, 5])
|
|
long_lookback = random.choice([10, 20, 30, 60])
|
|
expr = f"ts_mean({field}, {short_lookback}) - ts_mean({field}, {long_lookback})"
|
|
strategies.append(f"group_rank({expr}, subindustry)")
|
|
|
|
# 波动率策略
|
|
for _ in range(count // 3):
|
|
field = random.choice(datafields)
|
|
vol_lookback = random.choice([10, 20, 30])
|
|
expr = f"{field} / ts_std_dev({field}, {vol_lookback})"
|
|
strategies.append(f"group_rank({expr}, subindustry)")
|
|
|
|
return strategies
|
|
|
|
def generate_strategies(self, datafields, mode=1, count=100):
|
|
"""
|
|
生成因子表达式列表
|
|
|
|
参数:
|
|
datafields: 数据字段列表
|
|
mode: 生成模式 (1=基础策略, 2=多因子组合, 3=高级策略, 4=混合模式)
|
|
count: 生成数量
|
|
|
|
返回:
|
|
因子表达式列表
|
|
"""
|
|
|
|
all_fields = datafields + self.BASE_FIELDS
|
|
unique_fields = list(set(all_fields)) # 去重
|
|
|
|
if mode == 1:
|
|
return self.generate_basic_strategies(unique_fields, count)
|
|
elif mode == 2:
|
|
return self.generate_multi_factor_strategies(unique_fields, count)
|
|
elif mode == 3:
|
|
return self.generate_advanced_strategies(unique_fields, count)
|
|
elif mode == 4:
|
|
# 混合模式 - 从所有类型中随机选择
|
|
basic = self.generate_basic_strategies(unique_fields, count // 3)
|
|
multi = self.generate_multi_factor_strategies(unique_fields, count // 3)
|
|
advanced = self.generate_advanced_strategies(unique_fields, count // 3)
|
|
return basic + multi + advanced
|
|
else:
|
|
print("❌ 无效的生成模式,使用混合模式")
|
|
return self.generate_strategies(unique_fields, mode=4, count=count)
|
|
|
|
|
|
# 使用示例
|
|
if __name__ == "__main__":
|
|
# 示例数据字段
|
|
sample_fields = ['volume', 'close', 'open', 'high', 'low', 'returns', 'turnover', 'pe_ratio', 'pb_ratio']
|
|
|
|
generator = AlphaGenerator(seed=42) # 设置随机种子以便复现
|
|
|
|
print("=== 基础策略因子 (20个示例) ===")
|
|
basic_strategies = generator.generate_strategies(sample_fields, mode=1, count=20)
|
|
for i, strategy in enumerate(basic_strategies[:10], 1): # 只显示前10个
|
|
print(f"{i}. {strategy}")
|
|
|
|
print(f"\n共生成 {len(basic_strategies)} 个基础因子")
|
|
|
|
print("\n=== 多因子组合策略 (20个示例) ===")
|
|
multi_strategies = generator.generate_strategies(sample_fields, mode=2, count=20)
|
|
for i, strategy in enumerate(multi_strategies[:10], 1):
|
|
print(f"{i}. {strategy}")
|
|
|
|
print(f"\n共生成 {len(multi_strategies)} 个多因子组合")
|
|
|
|
print("\n=== 随机测试 - 每次运行生成不同的因子 ===")
|
|
for run in range(3):
|
|
print(f"\n--- 第{run+1}次运行 ---")
|
|
random_strategies = generator.generate_strategies(sample_fields, mode=4, count=5)
|
|
for i, strategy in enumerate(random_strategies, 1):
|
|
print(f"{i}. {strategy}") |