You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
760 lines
30 KiB
760 lines
30 KiB
import requests
|
|
from os import environ
|
|
from time import sleep
|
|
import time
|
|
import json
|
|
import pandas as pd
|
|
import random
|
|
import pickle
|
|
from urllib.parse import urljoin
|
|
from itertools import product
|
|
from itertools import combinations
|
|
from collections import defaultdict
|
|
import pickle
|
|
|
|
|
|
|
|
basic_ops = ["reverse", "inverse", "rank", "zscore", "quantile", "normalize"]
|
|
|
|
ts_ops = ["ts_rank", "ts_zscore", "ts_delta", "ts_sum", "ts_delay",
|
|
"ts_std_dev", "ts_mean", "ts_arg_min", "ts_arg_max","ts_scale", "ts_quantile"]
|
|
|
|
ops_set = basic_ops + ts_ops
|
|
|
|
def login():
|
|
|
|
username = ""
|
|
password = ""
|
|
|
|
# Create a session to persistently store the headers
|
|
s = requests.Session()
|
|
|
|
# Save credentials into session
|
|
s.auth = (username, password)
|
|
|
|
# Send a POST request to the /authentication API
|
|
response = s.post('https://api.worldquantbrain.com/authentication')
|
|
print(response.content)
|
|
return s
|
|
|
|
|
|
def get_datasets(
|
|
s,
|
|
instrument_type: str = 'EQUITY',
|
|
region: str = 'USA',
|
|
delay: int = 1,
|
|
universe: str = 'TOP3000'
|
|
):
|
|
url = "https://api.worldquantbrain.com/data-sets?" +\
|
|
f"instrumentType={instrument_type}®ion={region}&delay={str(delay)}&universe={universe}"
|
|
result = s.get(url)
|
|
datasets_df = pd.DataFrame(result.json()['results'])
|
|
return datasets_df
|
|
|
|
|
|
def get_datafields(
|
|
s,
|
|
instrument_type: str = 'EQUITY',
|
|
region: str = 'USA',
|
|
delay: int = 1,
|
|
universe: str = 'TOP3000',
|
|
dataset_id: str = '',
|
|
search: str = ''
|
|
):
|
|
if len(search) == 0:
|
|
url_template = "https://api.worldquantbrain.com/data-fields?" +\
|
|
f"&instrumentType={instrument_type}" +\
|
|
f"®ion={region}&delay={str(delay)}&universe={universe}&dataset.id={dataset_id}&limit=50" +\
|
|
"&offset={x}"
|
|
count = s.get(url_template.format(x=0)).json()['count']
|
|
|
|
else:
|
|
url_template = "https://api.worldquantbrain.com/data-fields?" +\
|
|
f"&instrumentType={instrument_type}" +\
|
|
f"®ion={region}&delay={str(delay)}&universe={universe}&limit=50" +\
|
|
f"&search={search}" +\
|
|
"&offset={x}"
|
|
count = 100
|
|
|
|
datafields_list = []
|
|
for x in range(0, count, 50):
|
|
datafields = s.get(url_template.format(x=x))
|
|
datafields_list.append(datafields.json()['results'])
|
|
|
|
datafields_list_flat = [item for sublist in datafields_list for item in sublist]
|
|
|
|
datafields_df = pd.DataFrame(datafields_list_flat)
|
|
return datafields_df
|
|
|
|
def get_vec_fields(fields):
|
|
|
|
# 请在此处添加获得权限的Vector操作符
|
|
vec_ops = ["vec_avg", "vec_sum"]
|
|
vec_fields = []
|
|
|
|
for field in fields:
|
|
for vec_op in vec_ops:
|
|
if vec_op == "vec_choose":
|
|
vec_fields.append("%s(%s, nth=-1)"%(vec_op, field))
|
|
vec_fields.append("%s(%s, nth=0)"%(vec_op, field))
|
|
else:
|
|
vec_fields.append("%s(%s)"%(vec_op, field))
|
|
|
|
return(vec_fields)
|
|
|
|
def process_datafields(df):
|
|
|
|
datafields = []
|
|
datafields += df[df['type'] == "MATRIX"]["id"].tolist()
|
|
datafields += get_vec_fields(df[df['type'] == "VECTOR"]["id"].tolist())
|
|
return ["winsorize(ts_backfill(%s, 120), std=4)"%field for field in datafields]
|
|
|
|
def ts_factory(op, field):
|
|
output = []
|
|
#days = [3, 5, 10, 20, 60, 120, 240]
|
|
days = [5, 22, 66, 120, 240]
|
|
|
|
for day in days:
|
|
|
|
alpha = "%s(%s, %d)"%(op, field, day)
|
|
output.append(alpha)
|
|
|
|
return output
|
|
|
|
def first_order_factory(fields, ops_set):
|
|
alpha_set = []
|
|
#for field in fields:
|
|
for field in fields:
|
|
#reverse op does the work
|
|
alpha_set.append(field)
|
|
#alpha_set.append("-%s"%field)
|
|
for op in ops_set:
|
|
|
|
if op == "ts_percentage":
|
|
|
|
alpha_set += ts_comp_factory(op, field, "percentage", [0.5])
|
|
|
|
elif op == "ts_decay_exp_window":
|
|
|
|
alpha_set += ts_comp_factory(op, field, "factor", [0.5])
|
|
|
|
elif op == "ts_moment":
|
|
|
|
alpha_set += ts_comp_factory(op, field, "k", [2, 3, 4])
|
|
|
|
elif op == "ts_entropy":
|
|
|
|
alpha_set += ts_comp_factory(op, field, "buckets", [10])
|
|
|
|
elif op.startswith("ts_") or op == "inst_tvr":
|
|
|
|
alpha_set += ts_factory(op, field)
|
|
|
|
elif op.startswith("vector"):
|
|
|
|
alpha_set += vector_factory(op, field)
|
|
|
|
elif op == "signed_power":
|
|
|
|
alpha = "%s(%s, 2)"%(op, field)
|
|
alpha_set.append(alpha)
|
|
|
|
else:
|
|
alpha = "%s(%s)"%(op, field)
|
|
alpha_set.append(alpha)
|
|
|
|
return alpha_set
|
|
|
|
|
|
def load_task_pool(alpha_list, limit_of_children_simulations, limit_of_multi_simulations):
|
|
'''
|
|
Input:
|
|
alpha_list : list of (alpha, decay) tuples
|
|
limit_of_multi_simulations : number of children simulation in a multi-simulation
|
|
limit_of_multi_simulations : number of simultaneous multi-simulations
|
|
Output:
|
|
task : [10 * (alpha, decay)] for a multi-simulation
|
|
pool : [10 * [10 * (alpha, decay)]] for simultaneous multi-simulations
|
|
pools : [[10 * [10 * (alpha, decay)]]]
|
|
|
|
'''
|
|
tasks = [alpha_list[i:i + limit_of_children_simulations] for i in range(0, len(alpha_list), limit_of_children_simulations)]
|
|
pools = [tasks[i:i + limit_of_multi_simulations] for i in range(0, len(tasks), limit_of_multi_simulations)]
|
|
return pools
|
|
|
|
def multi_simulate(alpha_pools, neut, region, universe, start):
|
|
|
|
s = login()
|
|
|
|
brain_api_url = 'https://api.worldquantbrain.com'
|
|
|
|
for x, pool in enumerate(alpha_pools):
|
|
if x < start: continue
|
|
progress_urls = []
|
|
for y, task in enumerate(pool):
|
|
# 10 tasks, 10 alpha in each task
|
|
sim_data_list = generate_sim_data(task, region, universe, neut)
|
|
try:
|
|
simulation_response = s.post('https://api.worldquantbrain.com/simulations', json=sim_data_list)
|
|
simulation_progress_url = simulation_response.headers['Location']
|
|
progress_urls.append(simulation_progress_url)
|
|
except:
|
|
print("location key error: %s"%simulation_response.content)
|
|
sleep(600)
|
|
s = login()
|
|
|
|
print("pool %d task %d post done"%(x,y))
|
|
|
|
for j, progress in enumerate(progress_urls):
|
|
try:
|
|
while True:
|
|
simulation_progress = s.get(progress)
|
|
if simulation_progress.headers.get("Retry-After", 0) == 0:
|
|
break
|
|
#print("Sleeping for " + simulation_progress.headers["Retry-After"] + " seconds")
|
|
sleep(float(simulation_progress.headers["Retry-After"]))
|
|
|
|
status = simulation_progress.json().get("status", 0)
|
|
if status != "COMPLETE":
|
|
print("Not complete : %s"%(progress))
|
|
|
|
"""
|
|
#alpha_id = simulation_progress.json()["alpha"]
|
|
children = simulation_progress.json().get("children", 0)
|
|
children_list = []
|
|
for child in children:
|
|
child_progress = s.get(brain_api_url + "/simulations/" + child)
|
|
alpha_id = child_progress.json()["alpha"]
|
|
|
|
set_alpha_properties(s,
|
|
alpha_id,
|
|
name = "%s"%name,
|
|
color = None,)
|
|
"""
|
|
except KeyError:
|
|
print("look into: %s"%progress)
|
|
except Exception as e:
|
|
print(f"other error: {e}")
|
|
|
|
|
|
print("pool %d task %d simulate done"%(x, j))
|
|
|
|
print("Simulate done")
|
|
|
|
def generate_sim_data(alpha_list, region, uni, neut):
|
|
sim_data_list = []
|
|
for alpha, decay in alpha_list:
|
|
simulation_data = {
|
|
'type': 'REGULAR',
|
|
'settings': {
|
|
'instrumentType': 'EQUITY',
|
|
'region': region,
|
|
'universe': uni,
|
|
'delay': 1,
|
|
'decay': decay,
|
|
'neutralization': neut,
|
|
'truncation': 0.08,
|
|
'pasteurization': 'ON',
|
|
'testPeriod': 'P0Y',
|
|
'unitHandling': 'VERIFY',
|
|
'nanHandling': 'ON',
|
|
'language': 'FASTEXPR',
|
|
'visualization': False,
|
|
},
|
|
'regular': alpha}
|
|
|
|
sim_data_list.append(simulation_data)
|
|
return sim_data_list
|
|
|
|
def set_alpha_properties(
|
|
s,
|
|
alpha_id,
|
|
name: str = None,
|
|
color: str = None,
|
|
selection_desc: str = "None",
|
|
combo_desc: str = "None",
|
|
tags: str = ["ace_tag"],
|
|
):
|
|
"""
|
|
Function changes alpha's description parameters
|
|
"""
|
|
|
|
params = {
|
|
"color": color,
|
|
"name": name,
|
|
"tags": tags,
|
|
"category": None,
|
|
"regular": {"description": None},
|
|
"combo": {"description": combo_desc},
|
|
"selection": {"description": selection_desc},
|
|
}
|
|
response = s.patch(
|
|
"https://api.worldquantbrain.com/alphas/" + alpha_id, json=params
|
|
)
|
|
|
|
def get_alphas(start_date, end_date, sharpe_th, fitness_th, region, alpha_num, usage):
|
|
s = login()
|
|
output = []
|
|
# 3E large 3C less
|
|
count = 0
|
|
for i in range(0, alpha_num, 100):
|
|
print(i)
|
|
url_e = "https://api.worldquantbrain.com/users/self/alphas?limit=100&offset=%d"%(i) \
|
|
+ "&status=UNSUBMITTED%1FIS_FAIL&dateCreated%3E=2025-" + start_date \
|
|
+ "T00:00:00-04:00&dateCreated%3C2025-" + end_date \
|
|
+ "T00:00:00-04:00&is.fitness%3E" + str(fitness_th) + "&is.sharpe%3E" \
|
|
+ str(sharpe_th) + "&settings.region=" + region + "&order=-is.sharpe&hidden=false&type!=SUPER"
|
|
url_c = "https://api.worldquantbrain.com/users/self/alphas?limit=100&offset=%d"%(i) \
|
|
+ "&status=UNSUBMITTED%1FIS_FAIL&dateCreated%3E=2025-" + start_date \
|
|
+ "T00:00:00-04:00&dateCreated%3C2025-" + end_date \
|
|
+ "T00:00:00-04:00&is.fitness%3C-" + str(fitness_th) + "&is.sharpe%3C-" \
|
|
+ str(sharpe_th) + "&settings.region=" + region + "&order=is.sharpe&hidden=false&type!=SUPER"
|
|
urls = [url_e]
|
|
if usage != "submit":
|
|
urls.append(url_c)
|
|
for url in urls:
|
|
response = s.get(url)
|
|
#print(response.json())
|
|
try:
|
|
alpha_list = response.json()["results"]
|
|
#print(response.json())
|
|
for j in range(len(alpha_list)):
|
|
alpha_id = alpha_list[j]["id"]
|
|
name = alpha_list[j]["name"]
|
|
dateCreated = alpha_list[j]["dateCreated"]
|
|
sharpe = alpha_list[j]["is"]["sharpe"]
|
|
fitness = alpha_list[j]["is"]["fitness"]
|
|
turnover = alpha_list[j]["is"]["turnover"]
|
|
margin = alpha_list[j]["is"]["margin"]
|
|
longCount = alpha_list[j]["is"]["longCount"]
|
|
shortCount = alpha_list[j]["is"]["shortCount"]
|
|
decay = alpha_list[j]["settings"]["decay"]
|
|
exp = alpha_list[j]['regular']['code']
|
|
count += 1
|
|
#if (sharpe > 1.2 and sharpe < 1.6) or (sharpe < -1.2 and sharpe > -1.6):
|
|
if (longCount + shortCount) > 100:
|
|
if sharpe < -sharpe_th:
|
|
exp = "-%s"%exp
|
|
rec = [alpha_id, exp, sharpe, turnover, fitness, margin, dateCreated, decay]
|
|
print(rec)
|
|
if turnover > 0.7:
|
|
rec.append(decay*4)
|
|
elif turnover > 0.6:
|
|
rec.append(decay*3+3)
|
|
elif turnover > 0.5:
|
|
rec.append(decay*3)
|
|
elif turnover > 0.4:
|
|
rec.append(decay*2)
|
|
elif turnover > 0.35:
|
|
rec.append(decay+4)
|
|
elif turnover > 0.3:
|
|
rec.append(decay+2)
|
|
output.append(rec)
|
|
except:
|
|
print("%d finished re-login"%i)
|
|
s = login()
|
|
|
|
print("count: %d"%count)
|
|
return output, s # 新增返回会话s
|
|
|
|
def prune(next_alpha_recs, prefix, keep_num):
|
|
# prefix is the datafield prefix, fnd6, mdl175 ...
|
|
# keep_num is the num of top sharpe same-datafield alpha
|
|
output = []
|
|
num_dict = defaultdict(int)
|
|
for rec in next_alpha_recs:
|
|
exp = rec[1]
|
|
field = exp.split(prefix)[-1].split(",")[0]
|
|
sharpe = rec[2]
|
|
if sharpe < 0:
|
|
field = "-%s"%field
|
|
if num_dict[field] < keep_num:
|
|
num_dict[field] += 1
|
|
decay = rec[-1]
|
|
exp = rec[1]
|
|
output.append([exp,decay])
|
|
return output
|
|
|
|
def get_group_second_order_factory(first_order, group_ops, region):
|
|
second_order = []
|
|
for fo in first_order:
|
|
for group_op in group_ops:
|
|
second_order += group_factory(group_op, fo, region)
|
|
return second_order
|
|
|
|
|
|
def group_factory(op, field, region):
|
|
output = []
|
|
vectors = ["cap"]
|
|
|
|
chn_group_13 = ['pv13_h_min2_sector', 'pv13_di_6l', 'pv13_rcsed_6l', 'pv13_di_5l', 'pv13_di_4l',
|
|
'pv13_di_3l', 'pv13_di_2l', 'pv13_di_1l', 'pv13_parent', 'pv13_level']
|
|
|
|
|
|
chn_group_1 = ['sta1_top3000c30','sta1_top3000c20','sta1_top3000c10','sta1_top3000c2','sta1_top3000c5']
|
|
|
|
chn_group_2 = ['sta2_top3000_fact4_c10','sta2_top2000_fact4_c50','sta2_top3000_fact3_c20']
|
|
|
|
hkg_group_13 = ['pv13_10_f3_g2_minvol_1m_sector', 'pv13_10_minvol_1m_sector', 'pv13_20_minvol_1m_sector',
|
|
'pv13_2_minvol_1m_sector', 'pv13_5_minvol_1m_sector', 'pv13_1l_scibr', 'pv13_3l_scibr',
|
|
'pv13_2l_scibr', 'pv13_4l_scibr', 'pv13_5l_scibr']
|
|
|
|
hkg_group_1 = ['sta1_allc50','sta1_allc5','sta1_allxjp_513_c20','sta1_top2000xjp_513_c5']
|
|
|
|
hkg_group_2 = ['sta2_all_xjp_513_all_fact4_c10','sta2_top2000_xjp_513_top2000_fact3_c10',
|
|
'sta2_allfactor_xjp_513_13','sta2_top2000_xjp_513_top2000_fact3_c20']
|
|
|
|
twn_group_13 = ['pv13_2_minvol_1m_sector','pv13_20_minvol_1m_sector','pv13_10_minvol_1m_sector',
|
|
'pv13_5_minvol_1m_sector','pv13_10_f3_g2_minvol_1m_sector','pv13_5_f3_g2_minvol_1m_sector',
|
|
'pv13_2_f4_g3_minvol_1m_sector']
|
|
|
|
twn_group_1 = ['sta1_allc50','sta1_allxjp_513_c50','sta1_allxjp_513_c20','sta1_allxjp_513_c2',
|
|
'sta1_allc20','sta1_allxjp_513_c5','sta1_allxjp_513_c10','sta1_allc2','sta1_allc5']
|
|
|
|
twn_group_2 = ['sta2_allfactor_xjp_513_0','sta2_all_xjp_513_all_fact3_c20',
|
|
'sta2_all_xjp_513_all_fact4_c20','sta2_all_xjp_513_all_fact4_c50']
|
|
|
|
usa_group_13 = ['pv13_h_min2_3000_sector','pv13_r2_min20_3000_sector','pv13_r2_min2_3000_sector',
|
|
'pv13_r2_min2_3000_sector', 'pv13_h_min2_focused_pureplay_3000_sector']
|
|
|
|
usa_group_1 = ['sta1_top3000c50','sta1_allc20','sta1_allc10','sta1_top3000c20','sta1_allc5']
|
|
|
|
usa_group_2 = ['sta2_top3000_fact3_c50','sta2_top3000_fact4_c20','sta2_top3000_fact4_c10']
|
|
|
|
usa_group_6 = ['mdl10_group_name']
|
|
|
|
asi_group_13 = ['pv13_20_minvol_1m_sector', 'pv13_5_f3_g2_minvol_1m_sector', 'pv13_10_f3_g2_minvol_1m_sector',
|
|
'pv13_2_f4_g3_minvol_1m_sector', 'pv13_10_minvol_1m_sector', 'pv13_5_minvol_1m_sector']
|
|
|
|
asi_group_1 = ['sta1_allc50', 'sta1_allc10', 'sta1_minvol1mc50','sta1_minvol1mc20',
|
|
'sta1_minvol1m_normc20', 'sta1_minvol1m_normc50']
|
|
|
|
jpn_group_1 = ['sta1_alljpn_513_c5', 'sta1_alljpn_513_c50', 'sta1_alljpn_513_c2', 'sta1_alljpn_513_c20']
|
|
|
|
jpn_group_2 = ['sta2_top2000_jpn_513_top2000_fact3_c20', 'sta2_all_jpn_513_all_fact1_c5',
|
|
'sta2_allfactor_jpn_513_9', 'sta2_all_jpn_513_all_fact1_c10']
|
|
|
|
jpn_group_13 = ['pv13_2_minvol_1m_sector', 'pv13_2_f4_g3_minvol_1m_sector', 'pv13_10_minvol_1m_sector',
|
|
'pv13_10_f3_g2_minvol_1m_sector', 'pv13_all_delay_1_parent', 'pv13_all_delay_1_level']
|
|
|
|
kor_group_13 = ['pv13_10_f3_g2_minvol_1m_sector', 'pv13_5_minvol_1m_sector', 'pv13_5_f3_g2_minvol_1m_sector',
|
|
'pv13_2_minvol_1m_sector', 'pv13_20_minvol_1m_sector', 'pv13_2_f4_g3_minvol_1m_sector']
|
|
|
|
kor_group_1 = ['sta1_allc20','sta1_allc50','sta1_allc2','sta1_allc10','sta1_minvol1mc50',
|
|
'sta1_allxjp_513_c10', 'sta1_top2000xjp_513_c50']
|
|
|
|
kor_group_2 =['sta2_all_xjp_513_all_fact1_c50','sta2_top2000_xjp_513_top2000_fact2_c50',
|
|
'sta2_all_xjp_513_all_fact4_c50','sta2_all_xjp_513_all_fact4_c5']
|
|
|
|
eur_group_13 = ['pv13_5_sector', 'pv13_2_sector', 'pv13_v3_3l_scibr', 'pv13_v3_2l_scibr', 'pv13_2l_scibr',
|
|
'pv13_52_sector', 'pv13_v3_6l_scibr', 'pv13_v3_4l_scibr', 'pv13_v3_1l_scibr']
|
|
|
|
eur_group_1 = ['sta1_allc10', 'sta1_allc2', 'sta1_top1200c2', 'sta1_allc20', 'sta1_top1200c10']
|
|
|
|
eur_group_2 = ['sta2_top1200_fact3_c50','sta2_top1200_fact3_c20','sta2_top1200_fact4_c50']
|
|
|
|
glb_group_13 = ["pv13_10_f2_g3_sector", "pv13_2_f3_g2_sector", "pv13_2_sector", "pv13_52_all_delay_1_sector"]
|
|
|
|
glb_group_1 = ['sta1_allc20', 'sta1_allc10', 'sta1_allc50', 'sta1_allc5']
|
|
|
|
glb_group_2 = ['sta2_all_fact4_c50', 'sta2_all_fact4_c20', 'sta2_all_fact3_c20', 'sta2_all_fact4_c10']
|
|
|
|
glb_group_13 = ['pv13_2_sector', 'pv13_10_sector', 'pv13_3l_scibr', 'pv13_2l_scibr', 'pv13_1l_scibr',
|
|
'pv13_52_minvol_1m_all_delay_1_sector','pv13_52_minvol_1m_sector','pv13_52_minvol_1m_sector']
|
|
|
|
amr_group_13 = ['pv13_4l_scibr', 'pv13_1l_scibr', 'pv13_hierarchy_min51_f1_sector',
|
|
'pv13_hierarchy_min2_600_sector', 'pv13_r2_min2_sector', 'pv13_h_min20_600_sector']
|
|
|
|
#bps_group = "bucket(rank(fnd28_value_05480), range='0.1, 1, 0.1')"
|
|
#pb_group = "bucket(rank(close/fnd28_value_05480), range='0.1, 1, 0.1')"
|
|
cap_group = "bucket(rank(cap), range='0.1, 1, 0.1')"
|
|
asset_group = "bucket(rank(assets),range='0.1, 1, 0.1')"
|
|
sector_cap_group = "bucket(group_rank(cap, sector),range='0.1, 1, 0.1')"
|
|
sector_asset_group = "bucket(group_rank(assets, sector),range='0.1, 1, 0.1')"
|
|
|
|
vol_group = "bucket(rank(ts_std_dev(returns,20)),range = '0.1, 1, 0.1')"
|
|
|
|
liquidity_group = "bucket(rank(close*volume),range = '0.1, 1, 0.1')"
|
|
|
|
groups = ["market","sector", "industry", "subindustry",
|
|
cap_group, asset_group, sector_cap_group, sector_asset_group, vol_group, liquidity_group]
|
|
|
|
if region == "CHN":
|
|
groups += chn_group_13 + chn_group_1 + chn_group_2
|
|
if region == "TWN":
|
|
groups += twn_group_13 + twn_group_1 + twn_group_2
|
|
if region == "ASI":
|
|
groups += asi_group_13 + asi_group_1
|
|
if region == "USA":
|
|
groups += usa_group_13 + usa_group_1 + usa_group_2
|
|
if region == "HKG":
|
|
groups += hkg_group_13 + hkg_group_1 + hkg_group_2
|
|
if region == "KOR":
|
|
groups += kor_group_13 + kor_group_1 + kor_group_2
|
|
if region == "EUR":
|
|
groups += eur_group_13 + eur_group_1 + eur_group_2
|
|
if region == "GLB":
|
|
groups += glb_group_13 + glb_group_1 + glb_group_2
|
|
if region == "AMR":
|
|
groups += amr_group_13
|
|
if region == "JPN":
|
|
groups += jpn_group_1 + jpn_group_2 + jpn_group_13
|
|
|
|
for group in groups:
|
|
if op.startswith("group_vector"):
|
|
for vector in vectors:
|
|
alpha = "%s(%s,%s,densify(%s))"%(op, field, vector, group)
|
|
output.append(alpha)
|
|
elif op.startswith("group_percentage"):
|
|
alpha = "%s(%s,densify(%s),percentage=0.5)"%(op, field, group)
|
|
output.append(alpha)
|
|
else:
|
|
alpha = "%s(%s,densify(%s))"%(op, field, group)
|
|
output.append(alpha)
|
|
|
|
return output
|
|
|
|
|
|
def trade_when_factory(op,field,region):
|
|
output = []
|
|
open_events = ["ts_arg_max(volume, 5) == 0", "ts_corr(close, volume, 20) < 0",
|
|
"ts_corr(close, volume, 5) < 0", "ts_mean(volume,10)>ts_mean(volume,60)",
|
|
"group_rank(ts_std_dev(returns,60), sector) > 0.7", "ts_zscore(returns,60) > 2",
|
|
"ts_arg_min(volume, 5) > 3",
|
|
"ts_std_dev(returns, 5) > ts_std_dev(returns, 20)",
|
|
"ts_arg_max(close, 5) == 0", "ts_arg_max(close, 20) == 0",
|
|
"ts_corr(close, volume, 5) > 0", "ts_corr(close, volume, 5) > 0.3", "ts_corr(close, volume, 5) > 0.5",
|
|
"ts_corr(close, volume, 20) > 0", "ts_corr(close, volume, 20) > 0.3", "ts_corr(close, volume, 20) > 0.5",
|
|
"ts_regression(returns, %s, 5, lag = 0, rettype = 2) > 0"%field,
|
|
"ts_regression(returns, %s, 20, lag = 0, rettype = 2) > 0"%field,
|
|
"ts_regression(returns, ts_step(20), 20, lag = 0, rettype = 2) > 0",
|
|
"ts_regression(returns, ts_step(5), 5, lag = 0, rettype = 2) > 0"]
|
|
|
|
exit_events = ["abs(returns) > 0.1", "-1"]
|
|
|
|
usa_events = ["rank(rp_css_business) > 0.8", "ts_rank(rp_css_business, 22) > 0.8", "rank(vec_avg(mws82_sentiment)) > 0.8",
|
|
"ts_rank(vec_avg(mws82_sentiment),22) > 0.8", "rank(vec_avg(nws48_ssc)) > 0.8",
|
|
"ts_rank(vec_avg(nws48_ssc),22) > 0.8", "rank(vec_avg(mws50_ssc)) > 0.8", "ts_rank(vec_avg(mws50_ssc),22) > 0.8",
|
|
"ts_rank(vec_sum(scl12_alltype_buzzvec),22) > 0.9", "pcr_oi_270 < 1", "pcr_oi_270 > 1",]
|
|
|
|
asi_events = ["rank(vec_avg(mws38_score)) > 0.8", "ts_rank(vec_avg(mws38_score),22) > 0.8"]
|
|
|
|
eur_events = ["rank(rp_css_business) > 0.8", "ts_rank(rp_css_business, 22) > 0.8",
|
|
"rank(vec_avg(oth429_research_reports_fundamental_keywords_4_method_2_pos)) > 0.8",
|
|
"ts_rank(vec_avg(oth429_research_reports_fundamental_keywords_4_method_2_pos),22) > 0.8",
|
|
"rank(vec_avg(mws84_sentiment)) > 0.8", "ts_rank(vec_avg(mws84_sentiment),22) > 0.8",
|
|
"rank(vec_avg(mws85_sentiment)) > 0.8", "ts_rank(vec_avg(mws85_sentiment),22) > 0.8",
|
|
"rank(mdl110_analyst_sentiment) > 0.8", "ts_rank(mdl110_analyst_sentiment, 22) > 0.8",
|
|
"rank(vec_avg(nws3_scores_posnormscr)) > 0.8",
|
|
"ts_rank(vec_avg(nws3_scores_posnormscr),22) > 0.8",
|
|
"rank(vec_avg(mws36_sentiment_words_positive)) > 0.8",
|
|
"ts_rank(vec_avg(mws36_sentiment_words_positive),22) > 0.8"]
|
|
|
|
glb_events = ["rank(vec_avg(mdl109_news_sent_1m)) > 0.8",
|
|
"ts_rank(vec_avg(mdl109_news_sent_1m),22) > 0.8",
|
|
"rank(vec_avg(nws20_ssc)) > 0.8",
|
|
"ts_rank(vec_avg(nws20_ssc),22) > 0.8",
|
|
"vec_avg(nws20_ssc) > 0",
|
|
"rank(vec_avg(nws20_bee)) > 0.8",
|
|
"ts_rank(vec_avg(nws20_bee),22) > 0.8",
|
|
"rank(vec_avg(nws20_qmb)) > 0.8",
|
|
"ts_rank(vec_avg(nws20_qmb),22) > 0.8"]
|
|
|
|
chn_events = ["rank(vec_avg(oth111_xueqiunaturaldaybasicdivisionstat_senti_conform)) > 0.8",
|
|
"ts_rank(vec_avg(oth111_xueqiunaturaldaybasicdivisionstat_senti_conform),22) > 0.8",
|
|
"rank(vec_avg(oth111_gubanaturaldaydevicedivisionstat_senti_conform)) > 0.8",
|
|
"ts_rank(vec_avg(oth111_gubanaturaldaydevicedivisionstat_senti_conform),22) > 0.8",
|
|
"rank(vec_avg(oth111_baragedivisionstat_regi_senti_conform)) > 0.8",
|
|
"ts_rank(vec_avg(oth111_baragedivisionstat_regi_senti_conform),22) > 0.8"]
|
|
|
|
kor_events = ["rank(vec_avg(mdl110_analyst_sentiment)) > 0.8",
|
|
"ts_rank(vec_avg(mdl110_analyst_sentiment),22) > 0.8",
|
|
"rank(vec_avg(mws38_score)) > 0.8",
|
|
"ts_rank(vec_avg(mws38_score),22) > 0.8"]
|
|
|
|
twn_events = ["rank(vec_avg(mdl109_news_sent_1m)) > 0.8",
|
|
"ts_rank(vec_avg(mdl109_news_sent_1m),22) > 0.8",
|
|
"rank(rp_ess_business) > 0.8",
|
|
"ts_rank(rp_ess_business,22) > 0.8"]
|
|
|
|
for oe in open_events:
|
|
for ee in exit_events:
|
|
alpha = "%s(%s, %s, %s)"%(op, oe, field, ee)
|
|
output.append(alpha)
|
|
return output
|
|
|
|
|
|
def check_submission(alpha_bag, gold_bag, start):
|
|
depot = []
|
|
s = login()
|
|
for idx, g in enumerate(alpha_bag):
|
|
if idx < start:
|
|
continue
|
|
if idx % 5 == 0:
|
|
print(idx)
|
|
if idx % 200 == 0:
|
|
s = login()
|
|
#print(idx)
|
|
pc = get_check_submission(s, g)
|
|
if pc == "sleep":
|
|
sleep(100)
|
|
s = login()
|
|
alpha_bag.append(g)
|
|
elif pc != pc:
|
|
# pc is nan
|
|
print("check self-corrlation error")
|
|
sleep(100)
|
|
alpha_bag.append(g)
|
|
elif pc == "fail":
|
|
continue
|
|
elif pc == "error":
|
|
depot.append(g)
|
|
else:
|
|
print(g)
|
|
gold_bag.append((g, pc))
|
|
print(depot)
|
|
return gold_bag
|
|
|
|
def get_check_submission(s, alpha_id):
|
|
while True:
|
|
result = s.get("https://api.worldquantbrain.com/alphas/" + alpha_id + "/check")
|
|
if "retry-after" in result.headers:
|
|
time.sleep(float(result.headers["Retry-After"]))
|
|
else:
|
|
break
|
|
try:
|
|
if result.json().get("is", 0) == 0:
|
|
print("logged out")
|
|
return "sleep"
|
|
checks_df = pd.DataFrame(
|
|
result.json()["is"]["checks"]
|
|
)
|
|
pc = checks_df[checks_df.name == "PROD_CORRELATION"]["value"].values[0]
|
|
if not any(checks_df["result"] == "FAIL"):
|
|
return pc
|
|
else:
|
|
return "fail"
|
|
except:
|
|
print("catch: %s"%(alpha_id))
|
|
return "error"
|
|
|
|
|
|
def view_alphas(gold_bag):
|
|
s = login()
|
|
sharp_list = []
|
|
for gold, pc in gold_bag:
|
|
|
|
triple = locate_alpha(s, gold)
|
|
info = [triple[0], triple[2], triple[3], triple[4], triple[5], triple[6], triple[1]]
|
|
info.append(pc)
|
|
sharp_list.append(info)
|
|
|
|
sharp_list.sort(reverse=True, key = lambda x : x[1])
|
|
for i in sharp_list:
|
|
print(i)
|
|
|
|
def locate_alpha(s, alpha_id):
|
|
while True:
|
|
alpha = s.get("https://api.worldquantbrain.com/alphas/" + alpha_id)
|
|
if "retry-after" in alpha.headers:
|
|
time.sleep(float(alpha.headers["Retry-After"]))
|
|
else:
|
|
break
|
|
string = alpha.content.decode('utf-8')
|
|
metrics = json.loads(string)
|
|
#print(metrics["regular"]["code"])
|
|
|
|
dateCreated = metrics["dateCreated"]
|
|
sharpe = metrics["is"]["sharpe"]
|
|
fitness = metrics["is"]["fitness"]
|
|
turnover = metrics["is"]["turnover"]
|
|
margin = metrics["is"]["margin"]
|
|
decay = metrics["settings"]["decay"]
|
|
exp = metrics['regular']['code']
|
|
|
|
triple = [alpha_id, exp, sharpe, turnover, fitness, margin, dateCreated, decay]
|
|
return triple
|
|
|
|
|
|
|
|
# some factory for other operators
|
|
def vector_factory(op, field):
|
|
output = []
|
|
vectors = ["cap"]
|
|
|
|
for vector in vectors:
|
|
|
|
alpha = "%s(%s, %s)"%(op, field, vector)
|
|
output.append(alpha)
|
|
|
|
return output
|
|
|
|
|
|
def ts_comp_factory(op, field, factor, paras):
|
|
output = []
|
|
#l1, l2 = [3, 5, 10, 20, 60, 120, 240], paras
|
|
l1, l2 = [5, 22, 66, 240], paras
|
|
comb = list(product(l1, l2))
|
|
|
|
for day,para in comb:
|
|
|
|
if type(para) == float:
|
|
alpha = "%s(%s, %d, %s=%.1f)"%(op, field, day, factor, para)
|
|
elif type(para) == int:
|
|
alpha = "%s(%s, %d, %s=%d)"%(op, field, day, factor, para)
|
|
|
|
output.append(alpha)
|
|
|
|
return output
|
|
|
|
def twin_field_factory(op, field, fields):
|
|
|
|
output = []
|
|
#days = [3, 5, 10, 20, 60, 120, 240]
|
|
days = [5, 22, 66, 240]
|
|
outset = list(set(fields) - set([field]))
|
|
|
|
for day in days:
|
|
for counterpart in outset:
|
|
alpha = "%s(%s, %s, %d)"%(op, field, counterpart, day)
|
|
output.append(alpha)
|
|
|
|
return output
|
|
|
|
def login_hk():
|
|
|
|
username = ""
|
|
password = ""
|
|
|
|
# Create a session to persistently store the headers
|
|
s = requests.Session()
|
|
|
|
# Save credentials into session
|
|
s.auth = (username, password)
|
|
|
|
# Send a POST request to the /authentication API
|
|
response = s.post('https://api.worldquantbrain.com/authentication')
|
|
|
|
if response.status_code == requests.codes.unauthorized:
|
|
# Check if biometrics is required
|
|
if response.headers.get("WWW-Authenticate") == "persona":
|
|
print(
|
|
"Complete biometrics authentication by scanning your face. Follow the link: \n"
|
|
+ urljoin(response.url, response.headers["Location"]) + "\n"
|
|
)
|
|
input("Press any key after you complete the biometrics authentication.")
|
|
|
|
# Retry the authentication after biometrics
|
|
biometrics_response = s.post(urljoin(response.url, response.headers["Location"]))
|
|
|
|
while biometrics_response.status_code != 201:
|
|
input("Biometrics authentication is not complete. Please try again and press any key when completed.")
|
|
biometrics_response = s.post(urljoin(response.url, response.headers["Location"]))
|
|
|
|
print("Biometrics authentication completed.")
|
|
else:
|
|
print("\nIncorrect username or password. Please check your credentials.\n")
|
|
else:
|
|
print("Logged in successfully.")
|
|
|
|
return s
|
|
|