增加 alpha 检测工具

main
Jack 4 weeks ago
parent d8c5c04581
commit 0ef8731055
  1. 760
      alpha_check/machine_lib.py
  2. 477
      alpha_check/main.go
  3. 223
      alpha_check/main.py

@ -0,0 +1,760 @@
import requests
from os import environ
from time import sleep
import time
import json
import pandas as pd
import random
import pickle
from urllib.parse import urljoin
from itertools import product
from itertools import combinations
from collections import defaultdict
import pickle
basic_ops = ["reverse", "inverse", "rank", "zscore", "quantile", "normalize"]
ts_ops = ["ts_rank", "ts_zscore", "ts_delta", "ts_sum", "ts_delay",
"ts_std_dev", "ts_mean", "ts_arg_min", "ts_arg_max","ts_scale", "ts_quantile"]
ops_set = basic_ops + ts_ops
def login():
username = ""
password = ""
# Create a session to persistently store the headers
s = requests.Session()
# Save credentials into session
s.auth = (username, password)
# Send a POST request to the /authentication API
response = s.post('https://api.worldquantbrain.com/authentication')
print(response.content)
return s
def get_datasets(
s,
instrument_type: str = 'EQUITY',
region: str = 'USA',
delay: int = 1,
universe: str = 'TOP3000'
):
url = "https://api.worldquantbrain.com/data-sets?" +\
f"instrumentType={instrument_type}&region={region}&delay={str(delay)}&universe={universe}"
result = s.get(url)
datasets_df = pd.DataFrame(result.json()['results'])
return datasets_df
def get_datafields(
s,
instrument_type: str = 'EQUITY',
region: str = 'USA',
delay: int = 1,
universe: str = 'TOP3000',
dataset_id: str = '',
search: str = ''
):
if len(search) == 0:
url_template = "https://api.worldquantbrain.com/data-fields?" +\
f"&instrumentType={instrument_type}" +\
f"&region={region}&delay={str(delay)}&universe={universe}&dataset.id={dataset_id}&limit=50" +\
"&offset={x}"
count = s.get(url_template.format(x=0)).json()['count']
else:
url_template = "https://api.worldquantbrain.com/data-fields?" +\
f"&instrumentType={instrument_type}" +\
f"&region={region}&delay={str(delay)}&universe={universe}&limit=50" +\
f"&search={search}" +\
"&offset={x}"
count = 100
datafields_list = []
for x in range(0, count, 50):
datafields = s.get(url_template.format(x=x))
datafields_list.append(datafields.json()['results'])
datafields_list_flat = [item for sublist in datafields_list for item in sublist]
datafields_df = pd.DataFrame(datafields_list_flat)
return datafields_df
def get_vec_fields(fields):
# 请在此处添加获得权限的Vector操作符
vec_ops = ["vec_avg", "vec_sum"]
vec_fields = []
for field in fields:
for vec_op in vec_ops:
if vec_op == "vec_choose":
vec_fields.append("%s(%s, nth=-1)"%(vec_op, field))
vec_fields.append("%s(%s, nth=0)"%(vec_op, field))
else:
vec_fields.append("%s(%s)"%(vec_op, field))
return(vec_fields)
def process_datafields(df):
datafields = []
datafields += df[df['type'] == "MATRIX"]["id"].tolist()
datafields += get_vec_fields(df[df['type'] == "VECTOR"]["id"].tolist())
return ["winsorize(ts_backfill(%s, 120), std=4)"%field for field in datafields]
def ts_factory(op, field):
output = []
#days = [3, 5, 10, 20, 60, 120, 240]
days = [5, 22, 66, 120, 240]
for day in days:
alpha = "%s(%s, %d)"%(op, field, day)
output.append(alpha)
return output
def first_order_factory(fields, ops_set):
alpha_set = []
#for field in fields:
for field in fields:
#reverse op does the work
alpha_set.append(field)
#alpha_set.append("-%s"%field)
for op in ops_set:
if op == "ts_percentage":
alpha_set += ts_comp_factory(op, field, "percentage", [0.5])
elif op == "ts_decay_exp_window":
alpha_set += ts_comp_factory(op, field, "factor", [0.5])
elif op == "ts_moment":
alpha_set += ts_comp_factory(op, field, "k", [2, 3, 4])
elif op == "ts_entropy":
alpha_set += ts_comp_factory(op, field, "buckets", [10])
elif op.startswith("ts_") or op == "inst_tvr":
alpha_set += ts_factory(op, field)
elif op.startswith("vector"):
alpha_set += vector_factory(op, field)
elif op == "signed_power":
alpha = "%s(%s, 2)"%(op, field)
alpha_set.append(alpha)
else:
alpha = "%s(%s)"%(op, field)
alpha_set.append(alpha)
return alpha_set
def load_task_pool(alpha_list, limit_of_children_simulations, limit_of_multi_simulations):
'''
Input:
alpha_list : list of (alpha, decay) tuples
limit_of_multi_simulations : number of children simulation in a multi-simulation
limit_of_multi_simulations : number of simultaneous multi-simulations
Output:
task : [10 * (alpha, decay)] for a multi-simulation
pool : [10 * [10 * (alpha, decay)]] for simultaneous multi-simulations
pools : [[10 * [10 * (alpha, decay)]]]
'''
tasks = [alpha_list[i:i + limit_of_children_simulations] for i in range(0, len(alpha_list), limit_of_children_simulations)]
pools = [tasks[i:i + limit_of_multi_simulations] for i in range(0, len(tasks), limit_of_multi_simulations)]
return pools
def multi_simulate(alpha_pools, neut, region, universe, start):
s = login()
brain_api_url = 'https://api.worldquantbrain.com'
for x, pool in enumerate(alpha_pools):
if x < start: continue
progress_urls = []
for y, task in enumerate(pool):
# 10 tasks, 10 alpha in each task
sim_data_list = generate_sim_data(task, region, universe, neut)
try:
simulation_response = s.post('https://api.worldquantbrain.com/simulations', json=sim_data_list)
simulation_progress_url = simulation_response.headers['Location']
progress_urls.append(simulation_progress_url)
except:
print("location key error: %s"%simulation_response.content)
sleep(600)
s = login()
print("pool %d task %d post done"%(x,y))
for j, progress in enumerate(progress_urls):
try:
while True:
simulation_progress = s.get(progress)
if simulation_progress.headers.get("Retry-After", 0) == 0:
break
#print("Sleeping for " + simulation_progress.headers["Retry-After"] + " seconds")
sleep(float(simulation_progress.headers["Retry-After"]))
status = simulation_progress.json().get("status", 0)
if status != "COMPLETE":
print("Not complete : %s"%(progress))
"""
#alpha_id = simulation_progress.json()["alpha"]
children = simulation_progress.json().get("children", 0)
children_list = []
for child in children:
child_progress = s.get(brain_api_url + "/simulations/" + child)
alpha_id = child_progress.json()["alpha"]
set_alpha_properties(s,
alpha_id,
name = "%s"%name,
color = None,)
"""
except KeyError:
print("look into: %s"%progress)
except Exception as e:
print(f"other error: {e}")
print("pool %d task %d simulate done"%(x, j))
print("Simulate done")
def generate_sim_data(alpha_list, region, uni, neut):
sim_data_list = []
for alpha, decay in alpha_list:
simulation_data = {
'type': 'REGULAR',
'settings': {
'instrumentType': 'EQUITY',
'region': region,
'universe': uni,
'delay': 1,
'decay': decay,
'neutralization': neut,
'truncation': 0.08,
'pasteurization': 'ON',
'testPeriod': 'P0Y',
'unitHandling': 'VERIFY',
'nanHandling': 'ON',
'language': 'FASTEXPR',
'visualization': False,
},
'regular': alpha}
sim_data_list.append(simulation_data)
return sim_data_list
def set_alpha_properties(
s,
alpha_id,
name: str = None,
color: str = None,
selection_desc: str = "None",
combo_desc: str = "None",
tags: str = ["ace_tag"],
):
"""
Function changes alpha's description parameters
"""
params = {
"color": color,
"name": name,
"tags": tags,
"category": None,
"regular": {"description": None},
"combo": {"description": combo_desc},
"selection": {"description": selection_desc},
}
response = s.patch(
"https://api.worldquantbrain.com/alphas/" + alpha_id, json=params
)
def get_alphas(start_date, end_date, sharpe_th, fitness_th, region, alpha_num, usage):
s = login()
output = []
# 3E large 3C less
count = 0
for i in range(0, alpha_num, 100):
print(i)
url_e = "https://api.worldquantbrain.com/users/self/alphas?limit=100&offset=%d"%(i) \
+ "&status=UNSUBMITTED%1FIS_FAIL&dateCreated%3E=2025-" + start_date \
+ "T00:00:00-04:00&dateCreated%3C2025-" + end_date \
+ "T00:00:00-04:00&is.fitness%3E" + str(fitness_th) + "&is.sharpe%3E" \
+ str(sharpe_th) + "&settings.region=" + region + "&order=-is.sharpe&hidden=false&type!=SUPER"
url_c = "https://api.worldquantbrain.com/users/self/alphas?limit=100&offset=%d"%(i) \
+ "&status=UNSUBMITTED%1FIS_FAIL&dateCreated%3E=2025-" + start_date \
+ "T00:00:00-04:00&dateCreated%3C2025-" + end_date \
+ "T00:00:00-04:00&is.fitness%3C-" + str(fitness_th) + "&is.sharpe%3C-" \
+ str(sharpe_th) + "&settings.region=" + region + "&order=is.sharpe&hidden=false&type!=SUPER"
urls = [url_e]
if usage != "submit":
urls.append(url_c)
for url in urls:
response = s.get(url)
#print(response.json())
try:
alpha_list = response.json()["results"]
#print(response.json())
for j in range(len(alpha_list)):
alpha_id = alpha_list[j]["id"]
name = alpha_list[j]["name"]
dateCreated = alpha_list[j]["dateCreated"]
sharpe = alpha_list[j]["is"]["sharpe"]
fitness = alpha_list[j]["is"]["fitness"]
turnover = alpha_list[j]["is"]["turnover"]
margin = alpha_list[j]["is"]["margin"]
longCount = alpha_list[j]["is"]["longCount"]
shortCount = alpha_list[j]["is"]["shortCount"]
decay = alpha_list[j]["settings"]["decay"]
exp = alpha_list[j]['regular']['code']
count += 1
#if (sharpe > 1.2 and sharpe < 1.6) or (sharpe < -1.2 and sharpe > -1.6):
if (longCount + shortCount) > 100:
if sharpe < -sharpe_th:
exp = "-%s"%exp
rec = [alpha_id, exp, sharpe, turnover, fitness, margin, dateCreated, decay]
print(rec)
if turnover > 0.7:
rec.append(decay*4)
elif turnover > 0.6:
rec.append(decay*3+3)
elif turnover > 0.5:
rec.append(decay*3)
elif turnover > 0.4:
rec.append(decay*2)
elif turnover > 0.35:
rec.append(decay+4)
elif turnover > 0.3:
rec.append(decay+2)
output.append(rec)
except:
print("%d finished re-login"%i)
s = login()
print("count: %d"%count)
return output, s # 新增返回会话s
def prune(next_alpha_recs, prefix, keep_num):
# prefix is the datafield prefix, fnd6, mdl175 ...
# keep_num is the num of top sharpe same-datafield alpha
output = []
num_dict = defaultdict(int)
for rec in next_alpha_recs:
exp = rec[1]
field = exp.split(prefix)[-1].split(",")[0]
sharpe = rec[2]
if sharpe < 0:
field = "-%s"%field
if num_dict[field] < keep_num:
num_dict[field] += 1
decay = rec[-1]
exp = rec[1]
output.append([exp,decay])
return output
def get_group_second_order_factory(first_order, group_ops, region):
second_order = []
for fo in first_order:
for group_op in group_ops:
second_order += group_factory(group_op, fo, region)
return second_order
def group_factory(op, field, region):
output = []
vectors = ["cap"]
chn_group_13 = ['pv13_h_min2_sector', 'pv13_di_6l', 'pv13_rcsed_6l', 'pv13_di_5l', 'pv13_di_4l',
'pv13_di_3l', 'pv13_di_2l', 'pv13_di_1l', 'pv13_parent', 'pv13_level']
chn_group_1 = ['sta1_top3000c30','sta1_top3000c20','sta1_top3000c10','sta1_top3000c2','sta1_top3000c5']
chn_group_2 = ['sta2_top3000_fact4_c10','sta2_top2000_fact4_c50','sta2_top3000_fact3_c20']
hkg_group_13 = ['pv13_10_f3_g2_minvol_1m_sector', 'pv13_10_minvol_1m_sector', 'pv13_20_minvol_1m_sector',
'pv13_2_minvol_1m_sector', 'pv13_5_minvol_1m_sector', 'pv13_1l_scibr', 'pv13_3l_scibr',
'pv13_2l_scibr', 'pv13_4l_scibr', 'pv13_5l_scibr']
hkg_group_1 = ['sta1_allc50','sta1_allc5','sta1_allxjp_513_c20','sta1_top2000xjp_513_c5']
hkg_group_2 = ['sta2_all_xjp_513_all_fact4_c10','sta2_top2000_xjp_513_top2000_fact3_c10',
'sta2_allfactor_xjp_513_13','sta2_top2000_xjp_513_top2000_fact3_c20']
twn_group_13 = ['pv13_2_minvol_1m_sector','pv13_20_minvol_1m_sector','pv13_10_minvol_1m_sector',
'pv13_5_minvol_1m_sector','pv13_10_f3_g2_minvol_1m_sector','pv13_5_f3_g2_minvol_1m_sector',
'pv13_2_f4_g3_minvol_1m_sector']
twn_group_1 = ['sta1_allc50','sta1_allxjp_513_c50','sta1_allxjp_513_c20','sta1_allxjp_513_c2',
'sta1_allc20','sta1_allxjp_513_c5','sta1_allxjp_513_c10','sta1_allc2','sta1_allc5']
twn_group_2 = ['sta2_allfactor_xjp_513_0','sta2_all_xjp_513_all_fact3_c20',
'sta2_all_xjp_513_all_fact4_c20','sta2_all_xjp_513_all_fact4_c50']
usa_group_13 = ['pv13_h_min2_3000_sector','pv13_r2_min20_3000_sector','pv13_r2_min2_3000_sector',
'pv13_r2_min2_3000_sector', 'pv13_h_min2_focused_pureplay_3000_sector']
usa_group_1 = ['sta1_top3000c50','sta1_allc20','sta1_allc10','sta1_top3000c20','sta1_allc5']
usa_group_2 = ['sta2_top3000_fact3_c50','sta2_top3000_fact4_c20','sta2_top3000_fact4_c10']
usa_group_6 = ['mdl10_group_name']
asi_group_13 = ['pv13_20_minvol_1m_sector', 'pv13_5_f3_g2_minvol_1m_sector', 'pv13_10_f3_g2_minvol_1m_sector',
'pv13_2_f4_g3_minvol_1m_sector', 'pv13_10_minvol_1m_sector', 'pv13_5_minvol_1m_sector']
asi_group_1 = ['sta1_allc50', 'sta1_allc10', 'sta1_minvol1mc50','sta1_minvol1mc20',
'sta1_minvol1m_normc20', 'sta1_minvol1m_normc50']
jpn_group_1 = ['sta1_alljpn_513_c5', 'sta1_alljpn_513_c50', 'sta1_alljpn_513_c2', 'sta1_alljpn_513_c20']
jpn_group_2 = ['sta2_top2000_jpn_513_top2000_fact3_c20', 'sta2_all_jpn_513_all_fact1_c5',
'sta2_allfactor_jpn_513_9', 'sta2_all_jpn_513_all_fact1_c10']
jpn_group_13 = ['pv13_2_minvol_1m_sector', 'pv13_2_f4_g3_minvol_1m_sector', 'pv13_10_minvol_1m_sector',
'pv13_10_f3_g2_minvol_1m_sector', 'pv13_all_delay_1_parent', 'pv13_all_delay_1_level']
kor_group_13 = ['pv13_10_f3_g2_minvol_1m_sector', 'pv13_5_minvol_1m_sector', 'pv13_5_f3_g2_minvol_1m_sector',
'pv13_2_minvol_1m_sector', 'pv13_20_minvol_1m_sector', 'pv13_2_f4_g3_minvol_1m_sector']
kor_group_1 = ['sta1_allc20','sta1_allc50','sta1_allc2','sta1_allc10','sta1_minvol1mc50',
'sta1_allxjp_513_c10', 'sta1_top2000xjp_513_c50']
kor_group_2 =['sta2_all_xjp_513_all_fact1_c50','sta2_top2000_xjp_513_top2000_fact2_c50',
'sta2_all_xjp_513_all_fact4_c50','sta2_all_xjp_513_all_fact4_c5']
eur_group_13 = ['pv13_5_sector', 'pv13_2_sector', 'pv13_v3_3l_scibr', 'pv13_v3_2l_scibr', 'pv13_2l_scibr',
'pv13_52_sector', 'pv13_v3_6l_scibr', 'pv13_v3_4l_scibr', 'pv13_v3_1l_scibr']
eur_group_1 = ['sta1_allc10', 'sta1_allc2', 'sta1_top1200c2', 'sta1_allc20', 'sta1_top1200c10']
eur_group_2 = ['sta2_top1200_fact3_c50','sta2_top1200_fact3_c20','sta2_top1200_fact4_c50']
glb_group_13 = ["pv13_10_f2_g3_sector", "pv13_2_f3_g2_sector", "pv13_2_sector", "pv13_52_all_delay_1_sector"]
glb_group_1 = ['sta1_allc20', 'sta1_allc10', 'sta1_allc50', 'sta1_allc5']
glb_group_2 = ['sta2_all_fact4_c50', 'sta2_all_fact4_c20', 'sta2_all_fact3_c20', 'sta2_all_fact4_c10']
glb_group_13 = ['pv13_2_sector', 'pv13_10_sector', 'pv13_3l_scibr', 'pv13_2l_scibr', 'pv13_1l_scibr',
'pv13_52_minvol_1m_all_delay_1_sector','pv13_52_minvol_1m_sector','pv13_52_minvol_1m_sector']
amr_group_13 = ['pv13_4l_scibr', 'pv13_1l_scibr', 'pv13_hierarchy_min51_f1_sector',
'pv13_hierarchy_min2_600_sector', 'pv13_r2_min2_sector', 'pv13_h_min20_600_sector']
#bps_group = "bucket(rank(fnd28_value_05480), range='0.1, 1, 0.1')"
#pb_group = "bucket(rank(close/fnd28_value_05480), range='0.1, 1, 0.1')"
cap_group = "bucket(rank(cap), range='0.1, 1, 0.1')"
asset_group = "bucket(rank(assets),range='0.1, 1, 0.1')"
sector_cap_group = "bucket(group_rank(cap, sector),range='0.1, 1, 0.1')"
sector_asset_group = "bucket(group_rank(assets, sector),range='0.1, 1, 0.1')"
vol_group = "bucket(rank(ts_std_dev(returns,20)),range = '0.1, 1, 0.1')"
liquidity_group = "bucket(rank(close*volume),range = '0.1, 1, 0.1')"
groups = ["market","sector", "industry", "subindustry",
cap_group, asset_group, sector_cap_group, sector_asset_group, vol_group, liquidity_group]
if region == "CHN":
groups += chn_group_13 + chn_group_1 + chn_group_2
if region == "TWN":
groups += twn_group_13 + twn_group_1 + twn_group_2
if region == "ASI":
groups += asi_group_13 + asi_group_1
if region == "USA":
groups += usa_group_13 + usa_group_1 + usa_group_2
if region == "HKG":
groups += hkg_group_13 + hkg_group_1 + hkg_group_2
if region == "KOR":
groups += kor_group_13 + kor_group_1 + kor_group_2
if region == "EUR":
groups += eur_group_13 + eur_group_1 + eur_group_2
if region == "GLB":
groups += glb_group_13 + glb_group_1 + glb_group_2
if region == "AMR":
groups += amr_group_13
if region == "JPN":
groups += jpn_group_1 + jpn_group_2 + jpn_group_13
for group in groups:
if op.startswith("group_vector"):
for vector in vectors:
alpha = "%s(%s,%s,densify(%s))"%(op, field, vector, group)
output.append(alpha)
elif op.startswith("group_percentage"):
alpha = "%s(%s,densify(%s),percentage=0.5)"%(op, field, group)
output.append(alpha)
else:
alpha = "%s(%s,densify(%s))"%(op, field, group)
output.append(alpha)
return output
def trade_when_factory(op,field,region):
output = []
open_events = ["ts_arg_max(volume, 5) == 0", "ts_corr(close, volume, 20) < 0",
"ts_corr(close, volume, 5) < 0", "ts_mean(volume,10)>ts_mean(volume,60)",
"group_rank(ts_std_dev(returns,60), sector) > 0.7", "ts_zscore(returns,60) > 2",
"ts_arg_min(volume, 5) > 3",
"ts_std_dev(returns, 5) > ts_std_dev(returns, 20)",
"ts_arg_max(close, 5) == 0", "ts_arg_max(close, 20) == 0",
"ts_corr(close, volume, 5) > 0", "ts_corr(close, volume, 5) > 0.3", "ts_corr(close, volume, 5) > 0.5",
"ts_corr(close, volume, 20) > 0", "ts_corr(close, volume, 20) > 0.3", "ts_corr(close, volume, 20) > 0.5",
"ts_regression(returns, %s, 5, lag = 0, rettype = 2) > 0"%field,
"ts_regression(returns, %s, 20, lag = 0, rettype = 2) > 0"%field,
"ts_regression(returns, ts_step(20), 20, lag = 0, rettype = 2) > 0",
"ts_regression(returns, ts_step(5), 5, lag = 0, rettype = 2) > 0"]
exit_events = ["abs(returns) > 0.1", "-1"]
usa_events = ["rank(rp_css_business) > 0.8", "ts_rank(rp_css_business, 22) > 0.8", "rank(vec_avg(mws82_sentiment)) > 0.8",
"ts_rank(vec_avg(mws82_sentiment),22) > 0.8", "rank(vec_avg(nws48_ssc)) > 0.8",
"ts_rank(vec_avg(nws48_ssc),22) > 0.8", "rank(vec_avg(mws50_ssc)) > 0.8", "ts_rank(vec_avg(mws50_ssc),22) > 0.8",
"ts_rank(vec_sum(scl12_alltype_buzzvec),22) > 0.9", "pcr_oi_270 < 1", "pcr_oi_270 > 1",]
asi_events = ["rank(vec_avg(mws38_score)) > 0.8", "ts_rank(vec_avg(mws38_score),22) > 0.8"]
eur_events = ["rank(rp_css_business) > 0.8", "ts_rank(rp_css_business, 22) > 0.8",
"rank(vec_avg(oth429_research_reports_fundamental_keywords_4_method_2_pos)) > 0.8",
"ts_rank(vec_avg(oth429_research_reports_fundamental_keywords_4_method_2_pos),22) > 0.8",
"rank(vec_avg(mws84_sentiment)) > 0.8", "ts_rank(vec_avg(mws84_sentiment),22) > 0.8",
"rank(vec_avg(mws85_sentiment)) > 0.8", "ts_rank(vec_avg(mws85_sentiment),22) > 0.8",
"rank(mdl110_analyst_sentiment) > 0.8", "ts_rank(mdl110_analyst_sentiment, 22) > 0.8",
"rank(vec_avg(nws3_scores_posnormscr)) > 0.8",
"ts_rank(vec_avg(nws3_scores_posnormscr),22) > 0.8",
"rank(vec_avg(mws36_sentiment_words_positive)) > 0.8",
"ts_rank(vec_avg(mws36_sentiment_words_positive),22) > 0.8"]
glb_events = ["rank(vec_avg(mdl109_news_sent_1m)) > 0.8",
"ts_rank(vec_avg(mdl109_news_sent_1m),22) > 0.8",
"rank(vec_avg(nws20_ssc)) > 0.8",
"ts_rank(vec_avg(nws20_ssc),22) > 0.8",
"vec_avg(nws20_ssc) > 0",
"rank(vec_avg(nws20_bee)) > 0.8",
"ts_rank(vec_avg(nws20_bee),22) > 0.8",
"rank(vec_avg(nws20_qmb)) > 0.8",
"ts_rank(vec_avg(nws20_qmb),22) > 0.8"]
chn_events = ["rank(vec_avg(oth111_xueqiunaturaldaybasicdivisionstat_senti_conform)) > 0.8",
"ts_rank(vec_avg(oth111_xueqiunaturaldaybasicdivisionstat_senti_conform),22) > 0.8",
"rank(vec_avg(oth111_gubanaturaldaydevicedivisionstat_senti_conform)) > 0.8",
"ts_rank(vec_avg(oth111_gubanaturaldaydevicedivisionstat_senti_conform),22) > 0.8",
"rank(vec_avg(oth111_baragedivisionstat_regi_senti_conform)) > 0.8",
"ts_rank(vec_avg(oth111_baragedivisionstat_regi_senti_conform),22) > 0.8"]
kor_events = ["rank(vec_avg(mdl110_analyst_sentiment)) > 0.8",
"ts_rank(vec_avg(mdl110_analyst_sentiment),22) > 0.8",
"rank(vec_avg(mws38_score)) > 0.8",
"ts_rank(vec_avg(mws38_score),22) > 0.8"]
twn_events = ["rank(vec_avg(mdl109_news_sent_1m)) > 0.8",
"ts_rank(vec_avg(mdl109_news_sent_1m),22) > 0.8",
"rank(rp_ess_business) > 0.8",
"ts_rank(rp_ess_business,22) > 0.8"]
for oe in open_events:
for ee in exit_events:
alpha = "%s(%s, %s, %s)"%(op, oe, field, ee)
output.append(alpha)
return output
def check_submission(alpha_bag, gold_bag, start):
depot = []
s = login()
for idx, g in enumerate(alpha_bag):
if idx < start:
continue
if idx % 5 == 0:
print(idx)
if idx % 200 == 0:
s = login()
#print(idx)
pc = get_check_submission(s, g)
if pc == "sleep":
sleep(100)
s = login()
alpha_bag.append(g)
elif pc != pc:
# pc is nan
print("check self-corrlation error")
sleep(100)
alpha_bag.append(g)
elif pc == "fail":
continue
elif pc == "error":
depot.append(g)
else:
print(g)
gold_bag.append((g, pc))
print(depot)
return gold_bag
def get_check_submission(s, alpha_id):
while True:
result = s.get("https://api.worldquantbrain.com/alphas/" + alpha_id + "/check")
if "retry-after" in result.headers:
time.sleep(float(result.headers["Retry-After"]))
else:
break
try:
if result.json().get("is", 0) == 0:
print("logged out")
return "sleep"
checks_df = pd.DataFrame(
result.json()["is"]["checks"]
)
pc = checks_df[checks_df.name == "PROD_CORRELATION"]["value"].values[0]
if not any(checks_df["result"] == "FAIL"):
return pc
else:
return "fail"
except:
print("catch: %s"%(alpha_id))
return "error"
def view_alphas(gold_bag):
s = login()
sharp_list = []
for gold, pc in gold_bag:
triple = locate_alpha(s, gold)
info = [triple[0], triple[2], triple[3], triple[4], triple[5], triple[6], triple[1]]
info.append(pc)
sharp_list.append(info)
sharp_list.sort(reverse=True, key = lambda x : x[1])
for i in sharp_list:
print(i)
def locate_alpha(s, alpha_id):
while True:
alpha = s.get("https://api.worldquantbrain.com/alphas/" + alpha_id)
if "retry-after" in alpha.headers:
time.sleep(float(alpha.headers["Retry-After"]))
else:
break
string = alpha.content.decode('utf-8')
metrics = json.loads(string)
#print(metrics["regular"]["code"])
dateCreated = metrics["dateCreated"]
sharpe = metrics["is"]["sharpe"]
fitness = metrics["is"]["fitness"]
turnover = metrics["is"]["turnover"]
margin = metrics["is"]["margin"]
decay = metrics["settings"]["decay"]
exp = metrics['regular']['code']
triple = [alpha_id, exp, sharpe, turnover, fitness, margin, dateCreated, decay]
return triple
# some factory for other operators
def vector_factory(op, field):
output = []
vectors = ["cap"]
for vector in vectors:
alpha = "%s(%s, %s)"%(op, field, vector)
output.append(alpha)
return output
def ts_comp_factory(op, field, factor, paras):
output = []
#l1, l2 = [3, 5, 10, 20, 60, 120, 240], paras
l1, l2 = [5, 22, 66, 240], paras
comb = list(product(l1, l2))
for day,para in comb:
if type(para) == float:
alpha = "%s(%s, %d, %s=%.1f)"%(op, field, day, factor, para)
elif type(para) == int:
alpha = "%s(%s, %d, %s=%d)"%(op, field, day, factor, para)
output.append(alpha)
return output
def twin_field_factory(op, field, fields):
output = []
#days = [3, 5, 10, 20, 60, 120, 240]
days = [5, 22, 66, 240]
outset = list(set(fields) - set([field]))
for day in days:
for counterpart in outset:
alpha = "%s(%s, %s, %d)"%(op, field, counterpart, day)
output.append(alpha)
return output
def login_hk():
username = ""
password = ""
# Create a session to persistently store the headers
s = requests.Session()
# Save credentials into session
s.auth = (username, password)
# Send a POST request to the /authentication API
response = s.post('https://api.worldquantbrain.com/authentication')
if response.status_code == requests.codes.unauthorized:
# Check if biometrics is required
if response.headers.get("WWW-Authenticate") == "persona":
print(
"Complete biometrics authentication by scanning your face. Follow the link: \n"
+ urljoin(response.url, response.headers["Location"]) + "\n"
)
input("Press any key after you complete the biometrics authentication.")
# Retry the authentication after biometrics
biometrics_response = s.post(urljoin(response.url, response.headers["Location"]))
while biometrics_response.status_code != 201:
input("Biometrics authentication is not complete. Please try again and press any key when completed.")
biometrics_response = s.post(urljoin(response.url, response.headers["Location"]))
print("Biometrics authentication completed.")
else:
print("\nIncorrect username or password. Please check your credentials.\n")
else:
print("Logged in successfully.")
return s

@ -0,0 +1,477 @@
package main
import (
"encoding/base64"
"encoding/json"
"fmt"
"math"
"strconv"
"strings"
"time"
"github.com/valyala/fasthttp"
)
const (
baseURL = "https://api.worldquantbrain.com"
zeroStreakThreshold = 5 * 252
requiredDays = 2920
)
type Client struct {
client *fasthttp.Client
username string
password string
}
type AlphaRecord struct {
ID string `json:"id"`
Name string `json:"name"`
DateCreated string `json:"dateCreated"`
Sharpe float64 `json:"sharpe"`
Fitness float64 `json:"fitness"`
Turnover float64 `json:"turnover"`
Margin float64 `json:"margin"`
LongCount float64 `json:"longCount"`
ShortCount float64 `json:"shortCount"`
Decay int `json:"decay"`
Code string `json:"code"`
}
type AlphaResponse struct {
Results []struct {
ID string `json:"id"`
Name string `json:"name"`
DateCreated string `json:"dateCreated"`
Is map[string]interface{} `json:"is"`
Settings struct {
Decay int `json:"decay"`
} `json:"settings"`
Regular struct {
Code string `json:"code"`
} `json:"regular"`
} `json:"results"`
}
type PnlResponse struct {
Records [][]interface{} `json:"records"`
}
func NewClient(username, password string) *Client {
return &Client{
client: &fasthttp.Client{},
username: username,
password: password,
}
}
func (c *Client) getAuthHeader() string {
auth := base64.StdEncoding.EncodeToString([]byte(c.username + ":" + c.password))
return "Basic " + auth
}
func (c *Client) Login() error {
req := fasthttp.AcquireRequest()
resp := fasthttp.AcquireResponse()
defer fasthttp.ReleaseRequest(req)
defer fasthttp.ReleaseResponse(resp)
req.SetRequestURI(baseURL + "/authentication")
req.Header.SetMethod("POST")
req.Header.Set("Authorization", c.getAuthHeader())
err := c.client.Do(req, resp)
if err != nil {
return err
}
fmt.Println(string(resp.Body()))
return nil
}
func (c *Client) WaitGet(url string, maxRetries int) (*fasthttp.Response, error) {
retries := 0
for retries < maxRetries {
for {
req := fasthttp.AcquireRequest()
resp := fasthttp.AcquireResponse()
req.SetRequestURI(url)
req.Header.SetMethod("GET")
req.Header.Set("Authorization", c.getAuthHeader())
err := c.client.Do(req, resp)
if err != nil {
fasthttp.ReleaseRequest(req)
fasthttp.ReleaseResponse(resp)
return nil, err
}
retryAfter := resp.Header.Peek("Retry-After")
if len(retryAfter) == 0 {
fasthttp.ReleaseRequest(req)
if resp.StatusCode() < 400 {
return resp, nil
}
fasthttp.ReleaseResponse(resp)
break
}
sleepSec, _ := strconv.ParseFloat(string(retryAfter), 64)
time.Sleep(time.Duration(sleepSec) * time.Second)
fasthttp.ReleaseRequest(req)
fasthttp.ReleaseResponse(resp)
}
time.Sleep(time.Duration(math.Pow(2, float64(retries))) * time.Second)
retries++
}
return nil, fmt.Errorf("max retries exceeded")
}
func (c *Client) Get(url string) (*fasthttp.Response, error) {
req := fasthttp.AcquireRequest()
resp := fasthttp.AcquireResponse()
req.SetRequestURI(url)
req.Header.SetMethod("GET")
req.Header.Set("Authorization", c.getAuthHeader())
err := c.client.Do(req, resp)
if err != nil {
fasthttp.ReleaseRequest(req)
fasthttp.ReleaseResponse(resp)
return nil, err
}
return resp, nil
}
func (c *Client) Patch(url string, data map[string]interface{}) (*fasthttp.Response, error) {
req := fasthttp.AcquireRequest()
resp := fasthttp.AcquireResponse()
defer fasthttp.ReleaseRequest(req)
req.SetRequestURI(url)
req.Header.SetMethod("PATCH")
req.Header.Set("Authorization", c.getAuthHeader())
req.Header.SetContentType("application/json")
jsonData, _ := json.Marshal(data)
req.SetBody(jsonData)
err := c.client.Do(req, resp)
if err != nil {
fasthttp.ReleaseResponse(resp)
return nil, err
}
return resp, nil
}
func GetAlphas(c *Client, startDate, endDate string, sharpeTh, fitnessTh float64, region string, alphaNum int, usage string) ([][]interface{}, *Client, error) {
output := make([][]interface{}, 0)
count := 0
for i := 0; i < alphaNum; i += 100 {
fmt.Println(i)
urlE := fmt.Sprintf("%s/users/self/alphas?limit=100&offset=%d&status=UNSUBMITTED%%1FIS_FAIL&dateCreated%%3E=2025-%sT00:00:00-04:00&dateCreated%%3C2025-%sT00:00:00-04:00&is.fitness%%3E%f&is.sharpe%%3E%f&settings.region=%s&order=-is.sharpe&hidden=false&type!=SUPER",
baseURL, i, startDate, endDate, fitnessTh, sharpeTh, region)
urlC := fmt.Sprintf("%s/users/self/alphas?limit=100&offset=%d&status=UNSUBMITTED%%1FIS_FAIL&dateCreated%%3E=2025-%sT00:00:00-04:00&dateCreated%%3C2025-%sT00:00:00-04:00&is.fitness%%3C-%f&is.sharpe%%3C-%f&settings.region=%s&order=is.sharpe&hidden=false&type!=SUPER",
baseURL, i, startDate, endDate, fitnessTh, sharpeTh, region)
urls := []string{urlE}
if usage != "submit" {
urls = append(urls, urlC)
}
for _, url := range urls {
resp, err := c.Get(url)
if err != nil {
fmt.Printf("%d finished re-login\n", i)
c.Login()
continue
}
var alphaResp AlphaResponse
if err := json.Unmarshal(resp.Body(), &alphaResp); err != nil {
fasthttp.ReleaseResponse(resp)
fmt.Printf("%d finished re-login\n", i)
c.Login()
continue
}
fasthttp.ReleaseResponse(resp)
for _, item := range alphaResp.Results {
alphaID := item.ID
name := item.Name
dateCreated := item.DateCreated
sharpe := getFloat(item.Is, "sharpe")
fitness := getFloat(item.Is, "fitness")
turnover := getFloat(item.Is, "turnover")
margin := getFloat(item.Is, "margin")
longCount := getFloat(item.Is, "longCount")
shortCount := getFloat(item.Is, "shortCount")
decay := item.Settings.Decay
exp := item.Regular.Code
count++
if (longCount + shortCount) > 100 {
if sharpe < -sharpeTh {
exp = "-" + exp
}
rec := []interface{}{alphaID, exp, sharpe, turnover, fitness, margin, dateCreated, decay}
fmt.Println(rec)
if turnover > 0.7 {
rec = append(rec, float64(decay)*4)
} else if turnover > 0.6 {
rec = append(rec, float64(decay)*3+3)
} else if turnover > 0.5 {
rec = append(rec, float64(decay)*3)
} else if turnover > 0.4 {
rec = append(rec, float64(decay)*2)
} else if turnover > 0.35 {
rec = append(rec, float64(decay)+4)
} else if turnover > 0.3 {
rec = append(rec, float64(decay)+2)
}
output = append(output, rec)
}
}
}
}
fmt.Printf("count: %d\n", count)
return output, c, nil
}
func getFloat(m map[string]interface{}, key string) float64 {
if v, ok := m[key]; ok {
switch val := v.(type) {
case float64:
return val
case int:
return float64(val)
}
}
return 0
}
func CheckConsecutiveNonZeroValues(alphaID string, data [][]interface{}, requiredStreak int) bool {
if len(data) < requiredStreak {
return true
}
checkColumn := func(columnData []float64) bool {
if len(columnData) < requiredStreak {
return true
}
currentStreakCount := 0
var currentStreakValue interface{}
for _, value := range columnData {
if value != 0 {
if currentStreakValue != nil && value == currentStreakValue {
currentStreakCount++
} else {
currentStreakValue = value
currentStreakCount = 1
}
} else {
currentStreakValue = nil
currentStreakCount = 0
}
if currentStreakCount >= requiredStreak {
return false
}
}
return true
}
var column1Values, column2Values []float64
for _, row := range data {
if len(row) >= 3 {
if v, ok := row[1].(float64); ok {
column1Values = append(column1Values, v)
}
if v, ok := row[2].(float64); ok {
column2Values = append(column2Values, v)
}
}
}
if len(column1Values) > 0 && len(column2Values) > 0 {
isCol1AllZeros := allZeros(column1Values)
isCol2AllZeros := allZeros(column2Values)
if isCol1AllZeros || isCol2AllZeros {
fmt.Println(alphaID, "不合法")
return false
}
}
if !checkColumn(column1Values) {
fmt.Println(alphaID, "不合法")
return false
}
if !checkColumn(column2Values) {
fmt.Println(alphaID, "不合法")
return false
}
return true
}
func allZeros(arr []float64) bool {
for _, v := range arr {
if v != 0 {
return false
}
}
return true
}
func GetAlphaPnlLegal(c *Client, alphaID string) bool {
notLegalID := make([]string, 0)
url := baseURL + "/alphas/" + alphaID + "/recordsets/pnl"
resp, err := c.WaitGet(url, 10)
if err != nil {
return false
}
defer fasthttp.ReleaseResponse(resp)
var pnlResp PnlResponse
if err := json.Unmarshal(resp.Body(), &pnlResp); err != nil {
return false
}
records := pnlResp.Records
if len(records) == 0 {
return false
}
var dateList []time.Time
for _, record := range records {
if len(record) == 0 {
continue
}
dateStr, ok := record[0].(string)
if !ok {
return false
}
dateObj, err := time.Parse("2006-01-02", dateStr)
if err != nil {
return false
}
dateList = append(dateList, dateObj)
}
if len(dateList) == 0 {
return false
}
minDate := dateList[0]
maxDate := dateList[0]
for _, d := range dateList {
if d.Before(minDate) {
minDate = d
}
if d.After(maxDate) {
maxDate = d
}
}
totalDays := int(maxDate.Sub(minDate).Hours() / 24)
if totalDays < requiredDays {
return false
}
col1Zeros := make([]bool, 0)
for _, record := range records {
if len(record) >= 2 {
if v, ok := record[1].(float64); ok {
col1Zeros = append(col1Zeros, v == 0)
}
}
}
col1MaxZeroStreak := maxConsecutiveZeros(col1Zeros)
if col1MaxZeroStreak >= zeroStreakThreshold {
fmt.Printf("%s 不合法:存在连续%d年零值\n", alphaID, zeroStreakThreshold/252)
notLegalID = append(notLegalID, alphaID)
return false
}
if !CheckConsecutiveNonZeroValues(alphaID, records, 200) {
return false
}
_ = notLegalID
return true
}
func maxConsecutiveZeros(arr []bool) int {
maxStreak := 0
currentStreak := 0
for _, val := range arr {
if val {
currentStreak++
if currentStreak > maxStreak {
maxStreak = currentStreak
}
} else {
currentStreak = 0
}
}
return maxStreak
}
func Mute(c *Client, alphaID string) {
url := baseURL + "/alphas/" + alphaID
data := map[string]interface{}{
"hidden": true,
}
c.Patch(url, data)
}
func main() {
client := NewClient("", "")
client.Login()
foTracker, c, err := GetAlphas(client, "12-01", "12-31", 1, 0.5, "USA", 1000, "submit")
if err != nil {
fmt.Println("Error:", err)
return
}
fNum := len(foTracker)
fmt.Printf("%d 个alpha 进行pnl合法检测,请耐心等待\n", fNum)
fmt.Println(len(foTracker))
count := 0
for i := len(foTracker) - 1; i >= 0; i-- {
if count%25 == 0 {
fmt.Printf("=========== %d ===========\n", count)
}
count++
alphaID, ok := foTracker[i][0].(string)
if !ok {
continue
}
if !GetAlphaPnlLegal(c, alphaID) {
fmt.Println(alphaID, "已经隐藏")
Mute(c, alphaID)
}
}
}

@ -0,0 +1,223 @@
import time
import httpx
import datetime
def login():
username = "jack0210_@hotmail.com"
password = "!QAZ2wsx+0913"
timeout = httpx.Timeout(60.0, connect=10.0)
limits = httpx.Limits(max_keepalive_connections=20, max_connections=100)
transport = httpx.HTTPTransport(retries=3)
s = httpx.Client(
auth=(username, password),
timeout=timeout,
limits=limits,
transport=transport
)
response = s.post('https://api.worldquantbrain.com/authentication')
print(response.content)
return s
def wait_get(s, url: str, max_retries: int = 10):
retries = 0
while retries < max_retries:
while True:
simulation_progress = s.get(url)
if simulation_progress.headers.get("Retry-After", 0) == 0:
break
time.sleep(float(simulation_progress.headers["Retry-After"]))
if simulation_progress.status_code < 400:
break
else:
time.sleep(2 ** retries)
retries += 1
return simulation_progress
def get_alphas(start_date, end_date, sharpe_th, fitness_th, region, alpha_num, usage):
s = login()
output = []
count = 0
for i in range(0, alpha_num, 100):
print(i)
url_e = "https://api.worldquantbrain.com/users/self/alphas?limit=100&offset=%d" % (i) \
+ "&status=UNSUBMITTED%1FIS_FAIL&dateCreated%3E=2025-" + start_date \
+ "T00:00:00-04:00&dateCreated%3C2025-" + end_date \
+ "T00:00:00-04:00&is.fitness%3E" + str(fitness_th) + "&is.sharpe%3E" \
+ str(sharpe_th) + "&settings.region=" + region + "&order=-is.sharpe&hidden=false&type!=SUPER"
url_c = "https://api.worldquantbrain.com/users/self/alphas?limit=100&offset=%d" % (i) \
+ "&status=UNSUBMITTED%1FIS_FAIL&dateCreated%3E=2025-" + start_date \
+ "T00:00:00-04:00&dateCreated%3C2025-" + end_date \
+ "T00:00:00-04:00&is.fitness%3C-" + str(fitness_th) + "&is.sharpe%3C-" \
+ str(sharpe_th) + "&settings.region=" + region + "&order=is.sharpe&hidden=false&type!=SUPER"
urls = [url_e]
if usage != "submit":
urls.append(url_c)
for url in urls:
response = s.get(url)
try:
alpha_list = response.json()["results"]
for j in range(len(alpha_list)):
alpha_id = alpha_list[j]["id"]
name = alpha_list[j]["name"]
dateCreated = alpha_list[j]["dateCreated"]
sharpe = alpha_list[j]["is"]["sharpe"]
fitness = alpha_list[j]["is"]["fitness"]
turnover = alpha_list[j]["is"]["turnover"]
margin = alpha_list[j]["is"]["margin"]
longCount = alpha_list[j]["is"]["longCount"]
shortCount = alpha_list[j]["is"]["shortCount"]
decay = alpha_list[j]["settings"]["decay"]
exp = alpha_list[j]['regular']['code']
count += 1
if (longCount + shortCount) > 100:
if sharpe < -sharpe_th:
exp = "-%s" % exp
rec = [alpha_id, exp, sharpe, turnover, fitness, margin, dateCreated, decay]
print(rec)
if turnover > 0.7:
rec.append(decay * 4)
elif turnover > 0.6:
rec.append(decay * 3 + 3)
elif turnover > 0.5:
rec.append(decay * 3)
elif turnover > 0.4:
rec.append(decay * 2)
elif turnover > 0.35:
rec.append(decay + 4)
elif turnover > 0.3:
rec.append(decay + 2)
output.append(rec)
except:
print("%d finished re-login" % i)
s = login()
print("count: %d" % count)
return output, s
def check_consecutive_non_zero_values(alpha_id, data, required_streak=200):
if not data or len(data) < required_streak:
return True
def check_column(column_data):
if len(column_data) < required_streak:
return True
current_streak_count = 0
current_streak_value = None
for value in column_data:
if value != 0:
if value == current_streak_value:
current_streak_count += 1
else:
current_streak_value = value
current_streak_count = 1
else:
current_streak_value = None
current_streak_count = 0
if current_streak_count >= required_streak:
return False
return True
column1_values = []
column2_values = []
for row in data:
if len(row) >= 3:
column1_values.append(row[1])
column2_values.append(row[2])
if column1_values and column2_values:
is_col1_all_zeros = all(v == 0 for v in column1_values)
is_col2_all_zeros = all(v == 0 for v in column2_values)
if is_col1_all_zeros or is_col2_all_zeros:
print(alpha_id, "不合法")
return False
if not check_column(column1_values):
print(alpha_id, "不合法")
return False
if not check_column(column2_values):
print(alpha_id, "不合法")
return False
return True
def get_alpha_pnl_legal(s, alpha_id: str) -> bool:
not_legal_id = []
pnl = wait_get(s, "https://api.worldquantbrain.com/alphas/" + alpha_id + "/recordsets/pnl").json()
records = pnl["records"]
if not records:
return False
date_list = []
for record in records:
try:
date_obj = datetime.datetime.strptime(record[0], '%Y-%m-%d').date()
date_list.append(date_obj)
except Exception:
return False
min_date = min(date_list)
max_date = max(date_list)
total_days = (max_date - min_date).days
if total_days < 2920:
return False
zero_streak_threshold = 5 * 252
col1_zeros = [record[1] == 0 for record in records]
def max_consecutive_zeros(arr):
max_streak = current_streak = 0
for val in arr:
current_streak = current_streak + 1 if val else 0
max_streak = max(max_streak, current_streak)
return max_streak
col1_max_zero_streak = max_consecutive_zeros(col1_zeros)
if col1_max_zero_streak >= zero_streak_threshold:
print(f"{alpha_id} 不合法:存在连续{zero_streak_threshold // 252}年零值")
not_legal_id.append(str(alpha_id))
return False
if not check_consecutive_non_zero_values(alpha_id, records):
return False
return True
def mute(s, alpha_id):
url = "https://api.worldquantbrain.com/alphas/" + alpha_id
data = {
"hidden": True
}
response = s.patch(url, json=data)
def main():
fo_tracker, s = get_alphas('12-01', '12-31', 1, 0.5, 'USA', 1000, 'submit')
f_num = len(fo_tracker)
print(f_num, "个alpha 进行pnl合法检测,请耐心等待")
count = 0
print(len(fo_tracker))
for i in fo_tracker[::-1][0:]:
if count % 25 == 0:
print('===========', count, '===========')
count += 1
if get_alpha_pnl_legal(s, i[0]) == False:
print(i[0], '已经隐藏')
mute(s, i[0])
if __name__ == "__main__":
main()
Loading…
Cancel
Save