You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 
alpha_tools/alpha_check/machine_lib.py

760 lines
30 KiB

import requests
from os import environ
from time import sleep
import time
import json
import pandas as pd
import random
import pickle
from urllib.parse import urljoin
from itertools import product
from itertools import combinations
from collections import defaultdict
import pickle
basic_ops = ["reverse", "inverse", "rank", "zscore", "quantile", "normalize"]
ts_ops = ["ts_rank", "ts_zscore", "ts_delta", "ts_sum", "ts_delay",
"ts_std_dev", "ts_mean", "ts_arg_min", "ts_arg_max","ts_scale", "ts_quantile"]
ops_set = basic_ops + ts_ops
def login():
username = ""
password = ""
# Create a session to persistently store the headers
s = requests.Session()
# Save credentials into session
s.auth = (username, password)
# Send a POST request to the /authentication API
response = s.post('https://api.worldquantbrain.com/authentication')
print(response.content)
return s
def get_datasets(
s,
instrument_type: str = 'EQUITY',
region: str = 'USA',
delay: int = 1,
universe: str = 'TOP3000'
):
url = "https://api.worldquantbrain.com/data-sets?" +\
f"instrumentType={instrument_type}&region={region}&delay={str(delay)}&universe={universe}"
result = s.get(url)
datasets_df = pd.DataFrame(result.json()['results'])
return datasets_df
def get_datafields(
s,
instrument_type: str = 'EQUITY',
region: str = 'USA',
delay: int = 1,
universe: str = 'TOP3000',
dataset_id: str = '',
search: str = ''
):
if len(search) == 0:
url_template = "https://api.worldquantbrain.com/data-fields?" +\
f"&instrumentType={instrument_type}" +\
f"&region={region}&delay={str(delay)}&universe={universe}&dataset.id={dataset_id}&limit=50" +\
"&offset={x}"
count = s.get(url_template.format(x=0)).json()['count']
else:
url_template = "https://api.worldquantbrain.com/data-fields?" +\
f"&instrumentType={instrument_type}" +\
f"&region={region}&delay={str(delay)}&universe={universe}&limit=50" +\
f"&search={search}" +\
"&offset={x}"
count = 100
datafields_list = []
for x in range(0, count, 50):
datafields = s.get(url_template.format(x=x))
datafields_list.append(datafields.json()['results'])
datafields_list_flat = [item for sublist in datafields_list for item in sublist]
datafields_df = pd.DataFrame(datafields_list_flat)
return datafields_df
def get_vec_fields(fields):
# 请在此处添加获得权限的Vector操作符
vec_ops = ["vec_avg", "vec_sum"]
vec_fields = []
for field in fields:
for vec_op in vec_ops:
if vec_op == "vec_choose":
vec_fields.append("%s(%s, nth=-1)"%(vec_op, field))
vec_fields.append("%s(%s, nth=0)"%(vec_op, field))
else:
vec_fields.append("%s(%s)"%(vec_op, field))
return(vec_fields)
def process_datafields(df):
datafields = []
datafields += df[df['type'] == "MATRIX"]["id"].tolist()
datafields += get_vec_fields(df[df['type'] == "VECTOR"]["id"].tolist())
return ["winsorize(ts_backfill(%s, 120), std=4)"%field for field in datafields]
def ts_factory(op, field):
output = []
#days = [3, 5, 10, 20, 60, 120, 240]
days = [5, 22, 66, 120, 240]
for day in days:
alpha = "%s(%s, %d)"%(op, field, day)
output.append(alpha)
return output
def first_order_factory(fields, ops_set):
alpha_set = []
#for field in fields:
for field in fields:
#reverse op does the work
alpha_set.append(field)
#alpha_set.append("-%s"%field)
for op in ops_set:
if op == "ts_percentage":
alpha_set += ts_comp_factory(op, field, "percentage", [0.5])
elif op == "ts_decay_exp_window":
alpha_set += ts_comp_factory(op, field, "factor", [0.5])
elif op == "ts_moment":
alpha_set += ts_comp_factory(op, field, "k", [2, 3, 4])
elif op == "ts_entropy":
alpha_set += ts_comp_factory(op, field, "buckets", [10])
elif op.startswith("ts_") or op == "inst_tvr":
alpha_set += ts_factory(op, field)
elif op.startswith("vector"):
alpha_set += vector_factory(op, field)
elif op == "signed_power":
alpha = "%s(%s, 2)"%(op, field)
alpha_set.append(alpha)
else:
alpha = "%s(%s)"%(op, field)
alpha_set.append(alpha)
return alpha_set
def load_task_pool(alpha_list, limit_of_children_simulations, limit_of_multi_simulations):
'''
Input:
alpha_list : list of (alpha, decay) tuples
limit_of_multi_simulations : number of children simulation in a multi-simulation
limit_of_multi_simulations : number of simultaneous multi-simulations
Output:
task : [10 * (alpha, decay)] for a multi-simulation
pool : [10 * [10 * (alpha, decay)]] for simultaneous multi-simulations
pools : [[10 * [10 * (alpha, decay)]]]
'''
tasks = [alpha_list[i:i + limit_of_children_simulations] for i in range(0, len(alpha_list), limit_of_children_simulations)]
pools = [tasks[i:i + limit_of_multi_simulations] for i in range(0, len(tasks), limit_of_multi_simulations)]
return pools
def multi_simulate(alpha_pools, neut, region, universe, start):
s = login()
brain_api_url = 'https://api.worldquantbrain.com'
for x, pool in enumerate(alpha_pools):
if x < start: continue
progress_urls = []
for y, task in enumerate(pool):
# 10 tasks, 10 alpha in each task
sim_data_list = generate_sim_data(task, region, universe, neut)
try:
simulation_response = s.post('https://api.worldquantbrain.com/simulations', json=sim_data_list)
simulation_progress_url = simulation_response.headers['Location']
progress_urls.append(simulation_progress_url)
except:
print("location key error: %s"%simulation_response.content)
sleep(600)
s = login()
print("pool %d task %d post done"%(x,y))
for j, progress in enumerate(progress_urls):
try:
while True:
simulation_progress = s.get(progress)
if simulation_progress.headers.get("Retry-After", 0) == 0:
break
#print("Sleeping for " + simulation_progress.headers["Retry-After"] + " seconds")
sleep(float(simulation_progress.headers["Retry-After"]))
status = simulation_progress.json().get("status", 0)
if status != "COMPLETE":
print("Not complete : %s"%(progress))
"""
#alpha_id = simulation_progress.json()["alpha"]
children = simulation_progress.json().get("children", 0)
children_list = []
for child in children:
child_progress = s.get(brain_api_url + "/simulations/" + child)
alpha_id = child_progress.json()["alpha"]
set_alpha_properties(s,
alpha_id,
name = "%s"%name,
color = None,)
"""
except KeyError:
print("look into: %s"%progress)
except Exception as e:
print(f"other error: {e}")
print("pool %d task %d simulate done"%(x, j))
print("Simulate done")
def generate_sim_data(alpha_list, region, uni, neut):
sim_data_list = []
for alpha, decay in alpha_list:
simulation_data = {
'type': 'REGULAR',
'settings': {
'instrumentType': 'EQUITY',
'region': region,
'universe': uni,
'delay': 1,
'decay': decay,
'neutralization': neut,
'truncation': 0.08,
'pasteurization': 'ON',
'testPeriod': 'P0Y',
'unitHandling': 'VERIFY',
'nanHandling': 'ON',
'language': 'FASTEXPR',
'visualization': False,
},
'regular': alpha}
sim_data_list.append(simulation_data)
return sim_data_list
def set_alpha_properties(
s,
alpha_id,
name: str = None,
color: str = None,
selection_desc: str = "None",
combo_desc: str = "None",
tags: str = ["ace_tag"],
):
"""
Function changes alpha's description parameters
"""
params = {
"color": color,
"name": name,
"tags": tags,
"category": None,
"regular": {"description": None},
"combo": {"description": combo_desc},
"selection": {"description": selection_desc},
}
response = s.patch(
"https://api.worldquantbrain.com/alphas/" + alpha_id, json=params
)
def get_alphas(start_date, end_date, sharpe_th, fitness_th, region, alpha_num, usage):
s = login()
output = []
# 3E large 3C less
count = 0
for i in range(0, alpha_num, 100):
print(i)
url_e = "https://api.worldquantbrain.com/users/self/alphas?limit=100&offset=%d"%(i) \
+ "&status=UNSUBMITTED%1FIS_FAIL&dateCreated%3E=2025-" + start_date \
+ "T00:00:00-04:00&dateCreated%3C2025-" + end_date \
+ "T00:00:00-04:00&is.fitness%3E" + str(fitness_th) + "&is.sharpe%3E" \
+ str(sharpe_th) + "&settings.region=" + region + "&order=-is.sharpe&hidden=false&type!=SUPER"
url_c = "https://api.worldquantbrain.com/users/self/alphas?limit=100&offset=%d"%(i) \
+ "&status=UNSUBMITTED%1FIS_FAIL&dateCreated%3E=2025-" + start_date \
+ "T00:00:00-04:00&dateCreated%3C2025-" + end_date \
+ "T00:00:00-04:00&is.fitness%3C-" + str(fitness_th) + "&is.sharpe%3C-" \
+ str(sharpe_th) + "&settings.region=" + region + "&order=is.sharpe&hidden=false&type!=SUPER"
urls = [url_e]
if usage != "submit":
urls.append(url_c)
for url in urls:
response = s.get(url)
#print(response.json())
try:
alpha_list = response.json()["results"]
#print(response.json())
for j in range(len(alpha_list)):
alpha_id = alpha_list[j]["id"]
name = alpha_list[j]["name"]
dateCreated = alpha_list[j]["dateCreated"]
sharpe = alpha_list[j]["is"]["sharpe"]
fitness = alpha_list[j]["is"]["fitness"]
turnover = alpha_list[j]["is"]["turnover"]
margin = alpha_list[j]["is"]["margin"]
longCount = alpha_list[j]["is"]["longCount"]
shortCount = alpha_list[j]["is"]["shortCount"]
decay = alpha_list[j]["settings"]["decay"]
exp = alpha_list[j]['regular']['code']
count += 1
#if (sharpe > 1.2 and sharpe < 1.6) or (sharpe < -1.2 and sharpe > -1.6):
if (longCount + shortCount) > 100:
if sharpe < -sharpe_th:
exp = "-%s"%exp
rec = [alpha_id, exp, sharpe, turnover, fitness, margin, dateCreated, decay]
print(rec)
if turnover > 0.7:
rec.append(decay*4)
elif turnover > 0.6:
rec.append(decay*3+3)
elif turnover > 0.5:
rec.append(decay*3)
elif turnover > 0.4:
rec.append(decay*2)
elif turnover > 0.35:
rec.append(decay+4)
elif turnover > 0.3:
rec.append(decay+2)
output.append(rec)
except:
print("%d finished re-login"%i)
s = login()
print("count: %d"%count)
return output, s # 新增返回会话s
def prune(next_alpha_recs, prefix, keep_num):
# prefix is the datafield prefix, fnd6, mdl175 ...
# keep_num is the num of top sharpe same-datafield alpha
output = []
num_dict = defaultdict(int)
for rec in next_alpha_recs:
exp = rec[1]
field = exp.split(prefix)[-1].split(",")[0]
sharpe = rec[2]
if sharpe < 0:
field = "-%s"%field
if num_dict[field] < keep_num:
num_dict[field] += 1
decay = rec[-1]
exp = rec[1]
output.append([exp,decay])
return output
def get_group_second_order_factory(first_order, group_ops, region):
second_order = []
for fo in first_order:
for group_op in group_ops:
second_order += group_factory(group_op, fo, region)
return second_order
def group_factory(op, field, region):
output = []
vectors = ["cap"]
chn_group_13 = ['pv13_h_min2_sector', 'pv13_di_6l', 'pv13_rcsed_6l', 'pv13_di_5l', 'pv13_di_4l',
'pv13_di_3l', 'pv13_di_2l', 'pv13_di_1l', 'pv13_parent', 'pv13_level']
chn_group_1 = ['sta1_top3000c30','sta1_top3000c20','sta1_top3000c10','sta1_top3000c2','sta1_top3000c5']
chn_group_2 = ['sta2_top3000_fact4_c10','sta2_top2000_fact4_c50','sta2_top3000_fact3_c20']
hkg_group_13 = ['pv13_10_f3_g2_minvol_1m_sector', 'pv13_10_minvol_1m_sector', 'pv13_20_minvol_1m_sector',
'pv13_2_minvol_1m_sector', 'pv13_5_minvol_1m_sector', 'pv13_1l_scibr', 'pv13_3l_scibr',
'pv13_2l_scibr', 'pv13_4l_scibr', 'pv13_5l_scibr']
hkg_group_1 = ['sta1_allc50','sta1_allc5','sta1_allxjp_513_c20','sta1_top2000xjp_513_c5']
hkg_group_2 = ['sta2_all_xjp_513_all_fact4_c10','sta2_top2000_xjp_513_top2000_fact3_c10',
'sta2_allfactor_xjp_513_13','sta2_top2000_xjp_513_top2000_fact3_c20']
twn_group_13 = ['pv13_2_minvol_1m_sector','pv13_20_minvol_1m_sector','pv13_10_minvol_1m_sector',
'pv13_5_minvol_1m_sector','pv13_10_f3_g2_minvol_1m_sector','pv13_5_f3_g2_minvol_1m_sector',
'pv13_2_f4_g3_minvol_1m_sector']
twn_group_1 = ['sta1_allc50','sta1_allxjp_513_c50','sta1_allxjp_513_c20','sta1_allxjp_513_c2',
'sta1_allc20','sta1_allxjp_513_c5','sta1_allxjp_513_c10','sta1_allc2','sta1_allc5']
twn_group_2 = ['sta2_allfactor_xjp_513_0','sta2_all_xjp_513_all_fact3_c20',
'sta2_all_xjp_513_all_fact4_c20','sta2_all_xjp_513_all_fact4_c50']
usa_group_13 = ['pv13_h_min2_3000_sector','pv13_r2_min20_3000_sector','pv13_r2_min2_3000_sector',
'pv13_r2_min2_3000_sector', 'pv13_h_min2_focused_pureplay_3000_sector']
usa_group_1 = ['sta1_top3000c50','sta1_allc20','sta1_allc10','sta1_top3000c20','sta1_allc5']
usa_group_2 = ['sta2_top3000_fact3_c50','sta2_top3000_fact4_c20','sta2_top3000_fact4_c10']
usa_group_6 = ['mdl10_group_name']
asi_group_13 = ['pv13_20_minvol_1m_sector', 'pv13_5_f3_g2_minvol_1m_sector', 'pv13_10_f3_g2_minvol_1m_sector',
'pv13_2_f4_g3_minvol_1m_sector', 'pv13_10_minvol_1m_sector', 'pv13_5_minvol_1m_sector']
asi_group_1 = ['sta1_allc50', 'sta1_allc10', 'sta1_minvol1mc50','sta1_minvol1mc20',
'sta1_minvol1m_normc20', 'sta1_minvol1m_normc50']
jpn_group_1 = ['sta1_alljpn_513_c5', 'sta1_alljpn_513_c50', 'sta1_alljpn_513_c2', 'sta1_alljpn_513_c20']
jpn_group_2 = ['sta2_top2000_jpn_513_top2000_fact3_c20', 'sta2_all_jpn_513_all_fact1_c5',
'sta2_allfactor_jpn_513_9', 'sta2_all_jpn_513_all_fact1_c10']
jpn_group_13 = ['pv13_2_minvol_1m_sector', 'pv13_2_f4_g3_minvol_1m_sector', 'pv13_10_minvol_1m_sector',
'pv13_10_f3_g2_minvol_1m_sector', 'pv13_all_delay_1_parent', 'pv13_all_delay_1_level']
kor_group_13 = ['pv13_10_f3_g2_minvol_1m_sector', 'pv13_5_minvol_1m_sector', 'pv13_5_f3_g2_minvol_1m_sector',
'pv13_2_minvol_1m_sector', 'pv13_20_minvol_1m_sector', 'pv13_2_f4_g3_minvol_1m_sector']
kor_group_1 = ['sta1_allc20','sta1_allc50','sta1_allc2','sta1_allc10','sta1_minvol1mc50',
'sta1_allxjp_513_c10', 'sta1_top2000xjp_513_c50']
kor_group_2 =['sta2_all_xjp_513_all_fact1_c50','sta2_top2000_xjp_513_top2000_fact2_c50',
'sta2_all_xjp_513_all_fact4_c50','sta2_all_xjp_513_all_fact4_c5']
eur_group_13 = ['pv13_5_sector', 'pv13_2_sector', 'pv13_v3_3l_scibr', 'pv13_v3_2l_scibr', 'pv13_2l_scibr',
'pv13_52_sector', 'pv13_v3_6l_scibr', 'pv13_v3_4l_scibr', 'pv13_v3_1l_scibr']
eur_group_1 = ['sta1_allc10', 'sta1_allc2', 'sta1_top1200c2', 'sta1_allc20', 'sta1_top1200c10']
eur_group_2 = ['sta2_top1200_fact3_c50','sta2_top1200_fact3_c20','sta2_top1200_fact4_c50']
glb_group_13 = ["pv13_10_f2_g3_sector", "pv13_2_f3_g2_sector", "pv13_2_sector", "pv13_52_all_delay_1_sector"]
glb_group_1 = ['sta1_allc20', 'sta1_allc10', 'sta1_allc50', 'sta1_allc5']
glb_group_2 = ['sta2_all_fact4_c50', 'sta2_all_fact4_c20', 'sta2_all_fact3_c20', 'sta2_all_fact4_c10']
glb_group_13 = ['pv13_2_sector', 'pv13_10_sector', 'pv13_3l_scibr', 'pv13_2l_scibr', 'pv13_1l_scibr',
'pv13_52_minvol_1m_all_delay_1_sector','pv13_52_minvol_1m_sector','pv13_52_minvol_1m_sector']
amr_group_13 = ['pv13_4l_scibr', 'pv13_1l_scibr', 'pv13_hierarchy_min51_f1_sector',
'pv13_hierarchy_min2_600_sector', 'pv13_r2_min2_sector', 'pv13_h_min20_600_sector']
#bps_group = "bucket(rank(fnd28_value_05480), range='0.1, 1, 0.1')"
#pb_group = "bucket(rank(close/fnd28_value_05480), range='0.1, 1, 0.1')"
cap_group = "bucket(rank(cap), range='0.1, 1, 0.1')"
asset_group = "bucket(rank(assets),range='0.1, 1, 0.1')"
sector_cap_group = "bucket(group_rank(cap, sector),range='0.1, 1, 0.1')"
sector_asset_group = "bucket(group_rank(assets, sector),range='0.1, 1, 0.1')"
vol_group = "bucket(rank(ts_std_dev(returns,20)),range = '0.1, 1, 0.1')"
liquidity_group = "bucket(rank(close*volume),range = '0.1, 1, 0.1')"
groups = ["market","sector", "industry", "subindustry",
cap_group, asset_group, sector_cap_group, sector_asset_group, vol_group, liquidity_group]
if region == "CHN":
groups += chn_group_13 + chn_group_1 + chn_group_2
if region == "TWN":
groups += twn_group_13 + twn_group_1 + twn_group_2
if region == "ASI":
groups += asi_group_13 + asi_group_1
if region == "USA":
groups += usa_group_13 + usa_group_1 + usa_group_2
if region == "HKG":
groups += hkg_group_13 + hkg_group_1 + hkg_group_2
if region == "KOR":
groups += kor_group_13 + kor_group_1 + kor_group_2
if region == "EUR":
groups += eur_group_13 + eur_group_1 + eur_group_2
if region == "GLB":
groups += glb_group_13 + glb_group_1 + glb_group_2
if region == "AMR":
groups += amr_group_13
if region == "JPN":
groups += jpn_group_1 + jpn_group_2 + jpn_group_13
for group in groups:
if op.startswith("group_vector"):
for vector in vectors:
alpha = "%s(%s,%s,densify(%s))"%(op, field, vector, group)
output.append(alpha)
elif op.startswith("group_percentage"):
alpha = "%s(%s,densify(%s),percentage=0.5)"%(op, field, group)
output.append(alpha)
else:
alpha = "%s(%s,densify(%s))"%(op, field, group)
output.append(alpha)
return output
def trade_when_factory(op,field,region):
output = []
open_events = ["ts_arg_max(volume, 5) == 0", "ts_corr(close, volume, 20) < 0",
"ts_corr(close, volume, 5) < 0", "ts_mean(volume,10)>ts_mean(volume,60)",
"group_rank(ts_std_dev(returns,60), sector) > 0.7", "ts_zscore(returns,60) > 2",
"ts_arg_min(volume, 5) > 3",
"ts_std_dev(returns, 5) > ts_std_dev(returns, 20)",
"ts_arg_max(close, 5) == 0", "ts_arg_max(close, 20) == 0",
"ts_corr(close, volume, 5) > 0", "ts_corr(close, volume, 5) > 0.3", "ts_corr(close, volume, 5) > 0.5",
"ts_corr(close, volume, 20) > 0", "ts_corr(close, volume, 20) > 0.3", "ts_corr(close, volume, 20) > 0.5",
"ts_regression(returns, %s, 5, lag = 0, rettype = 2) > 0"%field,
"ts_regression(returns, %s, 20, lag = 0, rettype = 2) > 0"%field,
"ts_regression(returns, ts_step(20), 20, lag = 0, rettype = 2) > 0",
"ts_regression(returns, ts_step(5), 5, lag = 0, rettype = 2) > 0"]
exit_events = ["abs(returns) > 0.1", "-1"]
usa_events = ["rank(rp_css_business) > 0.8", "ts_rank(rp_css_business, 22) > 0.8", "rank(vec_avg(mws82_sentiment)) > 0.8",
"ts_rank(vec_avg(mws82_sentiment),22) > 0.8", "rank(vec_avg(nws48_ssc)) > 0.8",
"ts_rank(vec_avg(nws48_ssc),22) > 0.8", "rank(vec_avg(mws50_ssc)) > 0.8", "ts_rank(vec_avg(mws50_ssc),22) > 0.8",
"ts_rank(vec_sum(scl12_alltype_buzzvec),22) > 0.9", "pcr_oi_270 < 1", "pcr_oi_270 > 1",]
asi_events = ["rank(vec_avg(mws38_score)) > 0.8", "ts_rank(vec_avg(mws38_score),22) > 0.8"]
eur_events = ["rank(rp_css_business) > 0.8", "ts_rank(rp_css_business, 22) > 0.8",
"rank(vec_avg(oth429_research_reports_fundamental_keywords_4_method_2_pos)) > 0.8",
"ts_rank(vec_avg(oth429_research_reports_fundamental_keywords_4_method_2_pos),22) > 0.8",
"rank(vec_avg(mws84_sentiment)) > 0.8", "ts_rank(vec_avg(mws84_sentiment),22) > 0.8",
"rank(vec_avg(mws85_sentiment)) > 0.8", "ts_rank(vec_avg(mws85_sentiment),22) > 0.8",
"rank(mdl110_analyst_sentiment) > 0.8", "ts_rank(mdl110_analyst_sentiment, 22) > 0.8",
"rank(vec_avg(nws3_scores_posnormscr)) > 0.8",
"ts_rank(vec_avg(nws3_scores_posnormscr),22) > 0.8",
"rank(vec_avg(mws36_sentiment_words_positive)) > 0.8",
"ts_rank(vec_avg(mws36_sentiment_words_positive),22) > 0.8"]
glb_events = ["rank(vec_avg(mdl109_news_sent_1m)) > 0.8",
"ts_rank(vec_avg(mdl109_news_sent_1m),22) > 0.8",
"rank(vec_avg(nws20_ssc)) > 0.8",
"ts_rank(vec_avg(nws20_ssc),22) > 0.8",
"vec_avg(nws20_ssc) > 0",
"rank(vec_avg(nws20_bee)) > 0.8",
"ts_rank(vec_avg(nws20_bee),22) > 0.8",
"rank(vec_avg(nws20_qmb)) > 0.8",
"ts_rank(vec_avg(nws20_qmb),22) > 0.8"]
chn_events = ["rank(vec_avg(oth111_xueqiunaturaldaybasicdivisionstat_senti_conform)) > 0.8",
"ts_rank(vec_avg(oth111_xueqiunaturaldaybasicdivisionstat_senti_conform),22) > 0.8",
"rank(vec_avg(oth111_gubanaturaldaydevicedivisionstat_senti_conform)) > 0.8",
"ts_rank(vec_avg(oth111_gubanaturaldaydevicedivisionstat_senti_conform),22) > 0.8",
"rank(vec_avg(oth111_baragedivisionstat_regi_senti_conform)) > 0.8",
"ts_rank(vec_avg(oth111_baragedivisionstat_regi_senti_conform),22) > 0.8"]
kor_events = ["rank(vec_avg(mdl110_analyst_sentiment)) > 0.8",
"ts_rank(vec_avg(mdl110_analyst_sentiment),22) > 0.8",
"rank(vec_avg(mws38_score)) > 0.8",
"ts_rank(vec_avg(mws38_score),22) > 0.8"]
twn_events = ["rank(vec_avg(mdl109_news_sent_1m)) > 0.8",
"ts_rank(vec_avg(mdl109_news_sent_1m),22) > 0.8",
"rank(rp_ess_business) > 0.8",
"ts_rank(rp_ess_business,22) > 0.8"]
for oe in open_events:
for ee in exit_events:
alpha = "%s(%s, %s, %s)"%(op, oe, field, ee)
output.append(alpha)
return output
def check_submission(alpha_bag, gold_bag, start):
depot = []
s = login()
for idx, g in enumerate(alpha_bag):
if idx < start:
continue
if idx % 5 == 0:
print(idx)
if idx % 200 == 0:
s = login()
#print(idx)
pc = get_check_submission(s, g)
if pc == "sleep":
sleep(100)
s = login()
alpha_bag.append(g)
elif pc != pc:
# pc is nan
print("check self-corrlation error")
sleep(100)
alpha_bag.append(g)
elif pc == "fail":
continue
elif pc == "error":
depot.append(g)
else:
print(g)
gold_bag.append((g, pc))
print(depot)
return gold_bag
def get_check_submission(s, alpha_id):
while True:
result = s.get("https://api.worldquantbrain.com/alphas/" + alpha_id + "/check")
if "retry-after" in result.headers:
time.sleep(float(result.headers["Retry-After"]))
else:
break
try:
if result.json().get("is", 0) == 0:
print("logged out")
return "sleep"
checks_df = pd.DataFrame(
result.json()["is"]["checks"]
)
pc = checks_df[checks_df.name == "PROD_CORRELATION"]["value"].values[0]
if not any(checks_df["result"] == "FAIL"):
return pc
else:
return "fail"
except:
print("catch: %s"%(alpha_id))
return "error"
def view_alphas(gold_bag):
s = login()
sharp_list = []
for gold, pc in gold_bag:
triple = locate_alpha(s, gold)
info = [triple[0], triple[2], triple[3], triple[4], triple[5], triple[6], triple[1]]
info.append(pc)
sharp_list.append(info)
sharp_list.sort(reverse=True, key = lambda x : x[1])
for i in sharp_list:
print(i)
def locate_alpha(s, alpha_id):
while True:
alpha = s.get("https://api.worldquantbrain.com/alphas/" + alpha_id)
if "retry-after" in alpha.headers:
time.sleep(float(alpha.headers["Retry-After"]))
else:
break
string = alpha.content.decode('utf-8')
metrics = json.loads(string)
#print(metrics["regular"]["code"])
dateCreated = metrics["dateCreated"]
sharpe = metrics["is"]["sharpe"]
fitness = metrics["is"]["fitness"]
turnover = metrics["is"]["turnover"]
margin = metrics["is"]["margin"]
decay = metrics["settings"]["decay"]
exp = metrics['regular']['code']
triple = [alpha_id, exp, sharpe, turnover, fitness, margin, dateCreated, decay]
return triple
# some factory for other operators
def vector_factory(op, field):
output = []
vectors = ["cap"]
for vector in vectors:
alpha = "%s(%s, %s)"%(op, field, vector)
output.append(alpha)
return output
def ts_comp_factory(op, field, factor, paras):
output = []
#l1, l2 = [3, 5, 10, 20, 60, 120, 240], paras
l1, l2 = [5, 22, 66, 240], paras
comb = list(product(l1, l2))
for day,para in comb:
if type(para) == float:
alpha = "%s(%s, %d, %s=%.1f)"%(op, field, day, factor, para)
elif type(para) == int:
alpha = "%s(%s, %d, %s=%d)"%(op, field, day, factor, para)
output.append(alpha)
return output
def twin_field_factory(op, field, fields):
output = []
#days = [3, 5, 10, 20, 60, 120, 240]
days = [5, 22, 66, 240]
outset = list(set(fields) - set([field]))
for day in days:
for counterpart in outset:
alpha = "%s(%s, %s, %d)"%(op, field, counterpart, day)
output.append(alpha)
return output
def login_hk():
username = ""
password = ""
# Create a session to persistently store the headers
s = requests.Session()
# Save credentials into session
s.auth = (username, password)
# Send a POST request to the /authentication API
response = s.post('https://api.worldquantbrain.com/authentication')
if response.status_code == requests.codes.unauthorized:
# Check if biometrics is required
if response.headers.get("WWW-Authenticate") == "persona":
print(
"Complete biometrics authentication by scanning your face. Follow the link: \n"
+ urljoin(response.url, response.headers["Location"]) + "\n"
)
input("Press any key after you complete the biometrics authentication.")
# Retry the authentication after biometrics
biometrics_response = s.post(urljoin(response.url, response.headers["Location"]))
while biometrics_response.status_code != 201:
input("Biometrics authentication is not complete. Please try again and press any key when completed.")
biometrics_response = s.post(urljoin(response.url, response.headers["Location"]))
print("Biometrics authentication completed.")
else:
print("\nIncorrect username or password. Please check your credentials.\n")
else:
print("Logged in successfully.")
return s