commit
3508a1b496
@ -0,0 +1,65 @@ |
||||
.DS_Store |
||||
# Byte-compiled / optimized / DLL files |
||||
__pycache__/ |
||||
*.py[cod] |
||||
*$py.class |
||||
|
||||
# C extensions |
||||
*.so |
||||
|
||||
# Distribution / packaging |
||||
.Python |
||||
env/ |
||||
build/ |
||||
develop-eggs/ |
||||
dist/ |
||||
downloads/ |
||||
eggs/ |
||||
.eggs/ |
||||
lib/ |
||||
lib64/ |
||||
parts/ |
||||
sdist/ |
||||
var/ |
||||
*.egg-info/ |
||||
.installed.cfg |
||||
*.egg |
||||
.idea/* |
||||
xml_files/ |
||||
|
||||
# PyInstaller |
||||
# Usually these files are written by a python script from a template |
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it. |
||||
*.manifest |
||||
*.spec |
||||
|
||||
# Installer logs |
||||
pip-log.txt |
||||
pip-delete-this-directory.txt |
||||
|
||||
# Unit test / coverage reports |
||||
htmlcov/ |
||||
.tox/ |
||||
.coverage |
||||
.coverage.* |
||||
.cache |
||||
nosetests.xml |
||||
coverage.xml |
||||
*,cover |
||||
|
||||
# Translations |
||||
*.mo |
||||
*.pot |
||||
|
||||
# Django stuff: |
||||
*.log |
||||
|
||||
# Sphinx documentation |
||||
docs/_build/ |
||||
|
||||
# PyBuilder |
||||
target/ |
||||
|
||||
other/split_clash_config/split_config |
||||
ai_news/save_data |
||||
daily/*.txt |
||||
@ -0,0 +1,220 @@ |
||||
"""Alpha 因子表达式生成器 - 增强随机版本""" |
||||
|
||||
import random |
||||
import itertools |
||||
from typing import List |
||||
|
||||
class AlphaGenerator: |
||||
"""增强版因子表达式生成器,支持随机生成多样化因子""" |
||||
|
||||
def __init__(self, seed=None): |
||||
if seed: |
||||
random.seed(seed) |
||||
|
||||
# 基础操作符和函数库 |
||||
OPERATORS = ['+', '-', '*', '/'] |
||||
UNARY_OPERATORS = ['-', 'abs', 'rank', 'log', 'power', 'sqrt'] |
||||
TIME_SERIES_FUNCTIONS = [ |
||||
'delay', 'ts_mean', 'ts_std_dev', 'ts_rank', 'ts_decay_linear', |
||||
'ts_corr', 'ts_skewness', 'ts_kurtosis', 'ts_max', 'ts_min', |
||||
'ts_sum', 'ts_product', 'ts_delta', 'ts_returns' |
||||
] |
||||
GROUP_FUNCTIONS = [ |
||||
'group_rank', 'group_mean', 'group_std', 'group_neutralize', |
||||
'group_zscore', 'group_percentile' |
||||
] |
||||
CONDITIONAL_FUNCTIONS = [ |
||||
'if_else', 'trade_when', 'condition_filter', 'signal_when' |
||||
] |
||||
|
||||
# 基础字段模板 |
||||
BASE_FIELDS = ['open', 'close', 'high', 'low', 'volume', 'returns', |
||||
'vwap', 'turnover', 'volatility', 'cap', 'market_cap'] |
||||
|
||||
def generate_random_expression(self, datafields, depth=0, max_depth=3): |
||||
"""递归生成随机表达式""" |
||||
if depth >= max_depth or random.random() < 0.3: |
||||
# 叶子节点 - 选择基础字段或常数 |
||||
if random.random() < 0.7 and datafields: |
||||
field = random.choice(datafields) |
||||
return field |
||||
else: |
||||
return f"{random.uniform(0.1, 10.0):.2f}" |
||||
|
||||
# 选择操作类型 |
||||
op_type = random.choice(['binary', 'unary', 'ts_function', 'group_function', 'conditional']) |
||||
|
||||
if op_type == 'binary': |
||||
left = self.generate_random_expression(datafields, depth + 1, max_depth) |
||||
right = self.generate_random_expression(datafields, depth + 1, max_depth) |
||||
op = random.choice(self.OPERATORS) |
||||
return f"({left} {op} {right})" |
||||
|
||||
elif op_type == 'unary': |
||||
arg = self.generate_random_expression(datafields, depth + 1, max_depth) |
||||
op = random.choice(self.UNARY_OPERATORS) |
||||
if op in ['-', 'abs']: |
||||
return f"{op}({arg})" |
||||
else: |
||||
return f"{op}({arg}, {random.randint(1, 30)})" |
||||
|
||||
elif op_type == 'ts_function': |
||||
func = random.choice(self.TIME_SERIES_FUNCTIONS) |
||||
arg1 = self.generate_random_expression(datafields, depth + 1, max_depth) |
||||
lookback = random.choice([5, 10, 20, 30, 60, 90, 252]) |
||||
|
||||
if func in ['ts_corr', 'ts_covariance']: |
||||
arg2 = self.generate_random_expression(datafields, depth + 1, max_depth) |
||||
return f"{func}({arg1}, {arg2}, {lookback})" |
||||
else: |
||||
return f"{func}({arg1}, {lookback})" |
||||
|
||||
elif op_type == 'group_function': |
||||
func = random.choice(self.GROUP_FUNCTIONS) |
||||
arg = self.generate_random_expression(datafields, depth + 1, max_depth) |
||||
group_by = random.choice(['subindustry', 'industry', 'sector', 'bucket(cap, range="0.1,1,0.1")']) |
||||
return f"{func}({arg}, {group_by})" |
||||
|
||||
else: # conditional |
||||
func = random.choice(self.CONDITIONAL_FUNCTIONS) |
||||
condition = self.generate_random_expression(datafields, depth + 1, max_depth) |
||||
true_val = self.generate_random_expression(datafields, depth + 1, max_depth) |
||||
false_val = self.generate_random_expression(datafields, depth + 1, max_depth) |
||||
return f"{func}({condition} > 0, {true_val}, {false_val})" |
||||
|
||||
def generate_basic_strategies(self, datafields, count=50): |
||||
"""生成基础单因子策略""" |
||||
strategies = [] |
||||
|
||||
for _ in range(count): |
||||
# 随机选择1-3个字段组合 |
||||
num_fields = random.randint(1, 3) |
||||
selected_fields = random.sample(datafields, min(num_fields, len(datafields))) |
||||
|
||||
# 生成随机表达式 |
||||
expr = self.generate_random_expression(selected_fields) |
||||
|
||||
# 添加分组操作 |
||||
if random.random() < 0.7: |
||||
group_by = random.choice(['subindustry', 'industry']) |
||||
expr = f"group_rank({expr}, {group_by})" |
||||
|
||||
strategies.append(expr) |
||||
|
||||
return strategies |
||||
|
||||
def generate_multi_factor_strategies(self, datafields, count=50): |
||||
"""生成多因子组合策略""" |
||||
strategies = [] |
||||
|
||||
for _ in range(count): |
||||
# 选择更多字段进行复杂组合 |
||||
num_fields = random.randint(2, 5) |
||||
selected_fields = random.sample(datafields, min(num_fields, len(datafields))) |
||||
|
||||
# 生成更复杂的表达式 |
||||
expr = self.generate_random_expression(selected_fields, max_depth=4) |
||||
|
||||
# 添加中性化或标准化 |
||||
if random.random() < 0.6: |
||||
neutral_type = random.choice(['group_neutralize', 'regression_neut', 'vector_neut']) |
||||
if neutral_type == 'group_neutralize': |
||||
group_by = random.choice(['subindustry', 'industry', 'sector']) |
||||
expr = f"{neutral_type}({expr}, {group_by})" |
||||
else: |
||||
# 为回归中性化选择另一个因子 |
||||
other_field = random.choice([f for f in datafields if f not in selected_fields] or selected_fields) |
||||
expr = f"{neutral_type}({expr}, {other_field})" |
||||
|
||||
strategies.append(expr) |
||||
|
||||
return strategies |
||||
|
||||
def generate_advanced_strategies(self, datafields, count=30): |
||||
"""生成高级策略(均值回归、动量、波动率策略等)""" |
||||
strategies = [] |
||||
|
||||
# 均值回归策略 |
||||
for _ in range(count // 3): |
||||
field = random.choice(datafields) |
||||
lookback = random.choice([5, 10, 20, 30]) |
||||
expr = f"-ts_rank({field} - ts_mean({field}, {lookback}), 20)" |
||||
strategies.append(f"group_neutralize({expr}, subindustry)") |
||||
|
||||
# 动量策略 |
||||
for _ in range(count // 3): |
||||
field = random.choice(datafields) |
||||
short_lookback = random.choice([1, 2, 3, 5]) |
||||
long_lookback = random.choice([10, 20, 30, 60]) |
||||
expr = f"ts_mean({field}, {short_lookback}) - ts_mean({field}, {long_lookback})" |
||||
strategies.append(f"group_rank({expr}, subindustry)") |
||||
|
||||
# 波动率策略 |
||||
for _ in range(count // 3): |
||||
field = random.choice(datafields) |
||||
vol_lookback = random.choice([10, 20, 30]) |
||||
expr = f"{field} / ts_std_dev({field}, {vol_lookback})" |
||||
strategies.append(f"group_rank({expr}, subindustry)") |
||||
|
||||
return strategies |
||||
|
||||
def generate_strategies(self, datafields, mode=1, count=100): |
||||
""" |
||||
生成因子表达式列表 |
||||
|
||||
参数: |
||||
datafields: 数据字段列表 |
||||
mode: 生成模式 (1=基础策略, 2=多因子组合, 3=高级策略, 4=混合模式) |
||||
count: 生成数量 |
||||
|
||||
返回: |
||||
因子表达式列表 |
||||
""" |
||||
|
||||
all_fields = datafields + self.BASE_FIELDS |
||||
unique_fields = list(set(all_fields)) # 去重 |
||||
|
||||
if mode == 1: |
||||
return self.generate_basic_strategies(unique_fields, count) |
||||
elif mode == 2: |
||||
return self.generate_multi_factor_strategies(unique_fields, count) |
||||
elif mode == 3: |
||||
return self.generate_advanced_strategies(unique_fields, count) |
||||
elif mode == 4: |
||||
# 混合模式 - 从所有类型中随机选择 |
||||
basic = self.generate_basic_strategies(unique_fields, count // 3) |
||||
multi = self.generate_multi_factor_strategies(unique_fields, count // 3) |
||||
advanced = self.generate_advanced_strategies(unique_fields, count // 3) |
||||
return basic + multi + advanced |
||||
else: |
||||
print("❌ 无效的生成模式,使用混合模式") |
||||
return self.generate_strategies(unique_fields, mode=4, count=count) |
||||
|
||||
|
||||
# 使用示例 |
||||
if __name__ == "__main__": |
||||
# 示例数据字段 |
||||
sample_fields = ['volume', 'close', 'open', 'high', 'low', 'returns', 'turnover', 'pe_ratio', 'pb_ratio'] |
||||
|
||||
generator = AlphaGenerator(seed=42) # 设置随机种子以便复现 |
||||
|
||||
print("=== 基础策略因子 (20个示例) ===") |
||||
basic_strategies = generator.generate_strategies(sample_fields, mode=1, count=20) |
||||
for i, strategy in enumerate(basic_strategies[:10], 1): # 只显示前10个 |
||||
print(f"{i}. {strategy}") |
||||
|
||||
print(f"\n共生成 {len(basic_strategies)} 个基础因子") |
||||
|
||||
print("\n=== 多因子组合策略 (20个示例) ===") |
||||
multi_strategies = generator.generate_strategies(sample_fields, mode=2, count=20) |
||||
for i, strategy in enumerate(multi_strategies[:10], 1): |
||||
print(f"{i}. {strategy}") |
||||
|
||||
print(f"\n共生成 {len(multi_strategies)} 个多因子组合") |
||||
|
||||
print("\n=== 随机测试 - 每次运行生成不同的因子 ===") |
||||
for run in range(3): |
||||
print(f"\n--- 第{run+1}次运行 ---") |
||||
random_strategies = generator.generate_strategies(sample_fields, mode=4, count=5) |
||||
for i, strategy in enumerate(random_strategies, 1): |
||||
print(f"{i}. {strategy}") |
||||
Loading…
Reference in new issue