You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1501 lines
52 KiB
1501 lines
52 KiB
|
|
import getpass
|
|
import json
|
|
import logging
|
|
import os
|
|
import threading
|
|
import time
|
|
from functools import partial
|
|
from multiprocessing.pool import ThreadPool
|
|
from pathlib import Path
|
|
from typing import Literal, Optional, Union
|
|
from urllib.parse import urljoin
|
|
|
|
import pandas as pd
|
|
import requests
|
|
import tqdm
|
|
from helpful_functions import (
|
|
expand_dict_columns,
|
|
save_pnl,
|
|
save_simulation_result,
|
|
save_yearly_stats,
|
|
)
|
|
|
|
DEV = False
|
|
|
|
|
|
class SingleSession(requests.Session):
|
|
_instance = None
|
|
_lock = threading.Lock()
|
|
_relogin_lock = threading.Lock()
|
|
_initialized = False
|
|
|
|
def __new__(cls, *args, **kwargs):
|
|
if cls._instance is None:
|
|
with cls._lock:
|
|
if cls._instance is None:
|
|
cls._instance = super().__new__(cls)
|
|
return cls._instance
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
if not self._initialized:
|
|
super(SingleSession, self).__init__(*args, **kwargs)
|
|
self._initialized = True
|
|
|
|
def get_relogin_lock(self):
|
|
return self._relogin_lock
|
|
|
|
|
|
def setup_logger() -> logging.Logger:
|
|
"""
|
|
This function sets up a logger that writes log messages to the console and,
|
|
if the global variable DEV is set to True, also to a file named 'ace.log'.
|
|
|
|
Returns:
|
|
logger (logging.Logger): The configured logger object.
|
|
|
|
The logger's name is set to 'ace.log'. The level of the logger and the console handler
|
|
is set to INFO if DEV is True, and WARNING otherwise. The format for the log messages
|
|
is: 'asctime' - 'name' - 'levelname' - 'message'.
|
|
"""
|
|
logger = logging.getLogger("ace")
|
|
level = logging.DEBUG if DEV else logging.INFO
|
|
|
|
logger.setLevel(level)
|
|
|
|
console_handler = logging.StreamHandler()
|
|
console_handler.setLevel(level)
|
|
|
|
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
|
console_handler.setFormatter(formatter)
|
|
|
|
logger.addHandler(console_handler)
|
|
|
|
file_handler = logging.FileHandler("ace.log")
|
|
file_handler.setLevel(logging.DEBUG)
|
|
file_handler.setFormatter(formatter)
|
|
logger.addHandler(file_handler)
|
|
|
|
return logger
|
|
|
|
|
|
logger = setup_logger()
|
|
|
|
|
|
DEFAULT_CONFIG = {
|
|
"get_pnl": False,
|
|
"get_stats": False,
|
|
"save_pnl_file": False,
|
|
"save_stats_file": False,
|
|
"save_result_file": False,
|
|
"check_submission": False,
|
|
"check_self_corr": False,
|
|
"check_prod_corr": False,
|
|
}
|
|
|
|
brain_api_url = os.environ.get("BRAIN_API_URL", "https://api.worldquantbrain.com")
|
|
|
|
|
|
def get_credentials() -> tuple[str, str]:
|
|
"""
|
|
Retrieve or prompt for platform credentials.
|
|
|
|
This function attempts to read credentials from a JSON file in the user's home directory.
|
|
If the file doesn't exist or is empty, it prompts the user to enter credentials and saves them.
|
|
|
|
Returns:
|
|
tuple: A tuple containing the email and password.
|
|
|
|
Raises:
|
|
json.JSONDecodeError: If the credentials file exists but contains invalid JSON.
|
|
"""
|
|
|
|
credential_email = os.environ.get("BRAIN_CREDENTIAL_EMAIL")
|
|
credential_password = os.environ.get("BRAIN_CREDENTIAL_PASSWORD")
|
|
|
|
credentials_folder_path = os.path.join(os.path.expanduser("~"), "secrets")
|
|
credentials_file_path = os.path.join(credentials_folder_path, "platform-brain.json")
|
|
|
|
if Path(credentials_file_path).exists() and os.path.getsize(credentials_file_path) > 2:
|
|
with open(credentials_file_path, encoding="utf-8") as file:
|
|
data = json.loads(file.read())
|
|
else:
|
|
os.makedirs(credentials_folder_path, exist_ok=True)
|
|
if credential_email and credential_password:
|
|
email = credential_email
|
|
password = credential_password
|
|
else:
|
|
email = input("Email:\n")
|
|
password = getpass.getpass(prompt="Password:")
|
|
data = {"email": email, "password": password}
|
|
with open(credentials_file_path, "w", encoding="utf-8") as file:
|
|
json.dump(data, file)
|
|
return (data["email"], data["password"])
|
|
|
|
|
|
def start_session() -> SingleSession:
|
|
"""
|
|
Start a new session with the WorldQuant BRAIN platform.
|
|
|
|
This function authenticates the user, handles biometric authentication if required,
|
|
and creates a new session.
|
|
|
|
Returns:
|
|
SingleSession: An authenticated session object.
|
|
|
|
Raises:
|
|
requests.exceptions.RequestException: If there's an error during the authentication process.
|
|
"""
|
|
|
|
s = SingleSession()
|
|
s.auth = get_credentials()
|
|
r = s.post(brain_api_url + "/authentication")
|
|
logger.debug(f"New session created (ID: {id(s)}) with authentication response: {r.status_code}, {r.json()}")
|
|
if r.status_code == requests.status_codes.codes.unauthorized:
|
|
if r.headers["WWW-Authenticate"] == "persona":
|
|
print(
|
|
"Complete biometrics authentication and press any key to continue: \n"
|
|
+ urljoin(r.url, r.headers["Location"])
|
|
+ "\n"
|
|
)
|
|
input()
|
|
s.post(urljoin(r.url, r.headers["Location"]))
|
|
|
|
while True:
|
|
if s.post(urljoin(r.url, r.headers["Location"])).status_code != 201:
|
|
input(
|
|
"Biometrics authentication is not complete. Please try again and press any key when completed \n"
|
|
)
|
|
else:
|
|
break
|
|
else:
|
|
logger.error("\nIncorrect email or password\n")
|
|
with open(
|
|
os.path.join(os.path.expanduser("~"), "secrets/platform-brain.json"),
|
|
"w",
|
|
) as file:
|
|
json.dump({}, file)
|
|
return start_session()
|
|
return s
|
|
|
|
|
|
def check_session_timeout(s: SingleSession) -> int:
|
|
"""
|
|
Check if the current session has timed out.
|
|
|
|
Args:
|
|
s (SingleSession): The current session object.
|
|
|
|
Returns:
|
|
int: The number of seconds until the session expires, or 0 if the session has expired or an error occurred.
|
|
"""
|
|
|
|
authentication_url = brain_api_url + "/authentication"
|
|
try:
|
|
result = s.get(authentication_url).json()["token"]["expiry"]
|
|
logger.debug(f"Session (ID: {id(s)}) timeout check result: {result}")
|
|
return result
|
|
except Exception:
|
|
return 0
|
|
|
|
|
|
def generate_alpha(
|
|
regular: Optional[str] = None,
|
|
selection: Optional[str] = None,
|
|
combo: Optional[str] = None,
|
|
alpha_type: Literal["REGULAR", "SUPER"] = "REGULAR",
|
|
region: str = "USA",
|
|
universe: str = "TOP3000",
|
|
delay: Literal[0, 1] = 1,
|
|
decay: int = 0,
|
|
neutralization: str = "INDUSTRY",
|
|
truncation: float = 0.08,
|
|
pasteurization: Literal["ON", "OFF"] = "ON",
|
|
test_period: str = "P0Y0M0D",
|
|
unit_handling: Literal["VERIFY"] = "VERIFY",
|
|
nan_handling: Literal["ON", "OFF"] = "OFF",
|
|
max_trade: Literal["ON", "OFF"] = "OFF",
|
|
selection_handling: str = "POSITIVE",
|
|
selection_limit: int = 100,
|
|
visualization: bool = False,
|
|
) -> dict:
|
|
"""
|
|
Generate an alpha dictionary for simulation. If alpha_type='REGULAR',
|
|
function generates alpha dictionary using regular input. If alpha_type='SUPER',
|
|
function generates alpha dictionary using selection and combo inputs.
|
|
|
|
Args:
|
|
regular (str, optional): The regular alpha expression.
|
|
selection (str, optional): The selection expression for super alphas.
|
|
combo (str, optional): The combo expression for super alphas.
|
|
alpha_type (str, optional): The type of alpha ("REGULAR" or "SUPER"). Defaults to "REGULAR".
|
|
region (str, optional): The region for the alpha. Defaults to "USA".
|
|
universe (str, optional): The universe for the alpha. Defaults to "TOP3000".
|
|
delay (int, optional): The delay for the alpha. Defaults to 1.
|
|
decay (int, optional): The decay for the alpha. Defaults to 0.
|
|
neutralization (str, optional): The neutralization method. Defaults to "INDUSTRY".
|
|
truncation (float, optional): The truncation value. Defaults to 0.08.
|
|
pasteurization (str, optional): The pasteurization setting. Defaults to "ON".
|
|
test_period (str, optional): The test period. Defaults to "P0Y0M0D".
|
|
unit_handling (str, optional): The unit handling method. Defaults to "VERIFY".
|
|
nan_handling (str, optional): The NaN handling method. Defaults to "OFF".
|
|
max_trade (str, optional): The max trade method. Defaults to "OFF".
|
|
selection_handling (str, optional): The selection handling method for super alphas. Defaults to "POSITIVE".
|
|
selection_limit (int, optional): The selection limit for super alphas. Defaults to 100.
|
|
visualization (bool, optional): Whether to include visualization. Defaults to False.
|
|
|
|
Returns:
|
|
dict: A dictionary containing the alpha configuration for simulation.
|
|
|
|
Raises:
|
|
ValueError: If an invalid alpha_type is provided.
|
|
"""
|
|
|
|
settings = {
|
|
"instrumentType": "EQUITY",
|
|
"region": region,
|
|
"universe": universe,
|
|
"delay": delay,
|
|
"decay": decay,
|
|
"neutralization": neutralization,
|
|
"truncation": truncation,
|
|
"pasteurization": pasteurization,
|
|
"testPeriod": test_period,
|
|
"unitHandling": unit_handling,
|
|
"nanHandling": nan_handling,
|
|
"maxTrade": max_trade,
|
|
"language": "FASTEXPR",
|
|
"visualization": visualization,
|
|
}
|
|
if alpha_type == "REGULAR":
|
|
simulation_data = {
|
|
"type": alpha_type,
|
|
"settings": settings,
|
|
"regular": regular,
|
|
}
|
|
elif alpha_type == "SUPER":
|
|
simulation_data = {
|
|
"type": alpha_type,
|
|
"settings": {
|
|
**settings,
|
|
"selectionHandling": selection_handling,
|
|
"selectionLimit": selection_limit,
|
|
},
|
|
"combo": combo,
|
|
"selection": selection,
|
|
}
|
|
else:
|
|
logger.error("alpha_type should be REGULAR or SUPER")
|
|
return {}
|
|
return simulation_data
|
|
|
|
|
|
def check_session_and_relogin(s: SingleSession) -> SingleSession:
|
|
"""
|
|
Checks for session timeout and if less than 2000 seconds are remaining,
|
|
it attempts to start a new session.
|
|
|
|
Parameters:
|
|
s (SingleSession): The current session object.
|
|
|
|
Returns:
|
|
s (SingleSession): The original session object if it hasn't timed out,
|
|
otherwise a new session object.
|
|
|
|
If the remaining session time is less than 2000 seconds, the function
|
|
attempts to start a new session using the `start_session()` function.
|
|
If `start_session()` fails on the first attempt, it waits for 100 seconds
|
|
and then tries again. The function then returns the new session object.
|
|
"""
|
|
with s.get_relogin_lock():
|
|
if check_session_timeout(s) < 2000:
|
|
logger.debug('Session less than 2000 seconds')
|
|
try:
|
|
s = start_session()
|
|
except Exception:
|
|
logger.info('Trying re-login, wait 100 seconds')
|
|
time.sleep(100)
|
|
s = start_session()
|
|
logger.debug(f"Session (ID: {id(s)}) after check and relogin")
|
|
return s
|
|
|
|
|
|
def start_simulation(s: SingleSession, simulate_data: Union[list[dict], dict]) -> requests.Response:
|
|
"""
|
|
Start a simulation with the provided simulation data.
|
|
|
|
Args:
|
|
s (SingleSession): An authenticated session object.
|
|
simulate_data (dict): A dictionary containing the simulation parameters.
|
|
|
|
Returns:
|
|
requests.Response: The response object from the simulation start request.
|
|
|
|
Raises:
|
|
requests.exceptions.RequestException: If there's an error in the API request.
|
|
"""
|
|
simulate_response = s.post(brain_api_url + "/simulations", json=simulate_data)
|
|
return simulate_response
|
|
|
|
|
|
def simulation_progress(
|
|
s: SingleSession,
|
|
simulate_response: requests.Response,
|
|
) -> dict:
|
|
"""
|
|
Monitor the progress of a simulation and return the result when complete.
|
|
|
|
Args:
|
|
s (SingleSession): An authenticated session object.
|
|
simulate_response (requests.Response): The response from starting the simulation.
|
|
|
|
Returns:
|
|
dict: A dictionary containing the completion status and simulation result.
|
|
|
|
Raises:
|
|
requests.exceptions.RequestException: If there's an error in the API requests.
|
|
"""
|
|
if simulate_response.status_code // 100 != 2:
|
|
logger.warning(f'Simulation failed. {simulate_response.text}, Status code: {simulate_response.status_code}')
|
|
return {"completed": False, "result": {}}
|
|
|
|
simulation_progress_url = simulate_response.headers["Location"]
|
|
error_flag = False
|
|
retry_count = 0
|
|
while True:
|
|
simulation_progress_response = s.get(simulation_progress_url)
|
|
if simulation_progress_response.status_code // 100 != 2:
|
|
logger.error(
|
|
f'Simulation {simulation_progress_url}, Status code: {simulation_progress_response.status_code}, Retry'
|
|
)
|
|
time.sleep(30)
|
|
retry_count += 1
|
|
if retry_count <= 2:
|
|
continue
|
|
else:
|
|
logger.error(
|
|
f'Simulation {simulation_progress_url} failed, Status code: {simulation_progress_response.status_code}'
|
|
)
|
|
error_flag = True
|
|
break
|
|
if simulation_progress_response.headers.get("Retry-After", 0) == 0:
|
|
if simulation_progress_response.json().get("status", "ERROR") == "ERROR":
|
|
error_flag = True
|
|
break
|
|
|
|
time.sleep(float(simulation_progress_response.headers["Retry-After"]))
|
|
|
|
if error_flag:
|
|
logger.error(f"Simulation failed. {simulation_progress_response.json()}")
|
|
return {"completed": False, "result": {}}
|
|
|
|
alpha = simulation_progress_response.json().get("alpha", 0)
|
|
if alpha == 0:
|
|
logger.warning(
|
|
f'Simulation {simulation_progress_response.json().get("id")} failed. {simulation_progress_response.json()}'
|
|
)
|
|
return {"completed": False, "result": {}}
|
|
simulation_result = get_simulation_result_json(s, alpha)
|
|
if len(simulation_result) == 0:
|
|
return {"completed": False, "result": {}}
|
|
return {"completed": True, "result": simulation_result}
|
|
|
|
|
|
def get_simulation_result_json(s: SingleSession, alpha_id: str) -> dict:
|
|
"""
|
|
Retrieve the full simulation result for a specific alpha.
|
|
|
|
Args:
|
|
s (SingleSession): An authenticated session object.
|
|
alpha_id (str): The ID of the alpha.
|
|
|
|
Returns:
|
|
dict: A dictionary containing the full simulation result.
|
|
|
|
Raises:
|
|
requests.exceptions.RequestException: If there's an error in the API request.
|
|
"""
|
|
if alpha_id is None:
|
|
return {}
|
|
while True:
|
|
result = s.get(brain_api_url + "/alphas/" + alpha_id)
|
|
if "retry-after" in result.headers:
|
|
time.sleep(float(result.headers["Retry-After"]))
|
|
else:
|
|
break
|
|
try:
|
|
return result.json()
|
|
except Exception:
|
|
logger.error(f"alpha_id {alpha_id}, {result.headers}, {result.text}, {result.status_code}")
|
|
return {}
|
|
return s.get(brain_api_url + "/alphas/" + alpha_id).json()
|
|
|
|
|
|
def multisimulation_progress(
|
|
s: SingleSession,
|
|
simulate_response: requests.Response,
|
|
) -> dict:
|
|
"""
|
|
Monitor the progress of multiple simulations and return the results when complete.
|
|
|
|
Args:
|
|
s (SingleSession): An authenticated session object.
|
|
simulate_response (requests.Response): The response from starting the simulations.
|
|
|
|
Returns:
|
|
dict: A dictionary containing the completion status and simulation results.
|
|
|
|
Raises:
|
|
requests.exceptions.RequestException: If there's an error in the API requests.
|
|
"""
|
|
if simulate_response.status_code // 100 != 2:
|
|
logger.warning(f'Simulation failed. {simulate_response.text}, Status code: {simulate_response.status_code}')
|
|
return {"completed": False, "result": {}}
|
|
|
|
simulation_progress_url = simulate_response.headers["Location"]
|
|
error_flag = False
|
|
while True:
|
|
simulation_progress_response = s.get(simulation_progress_url)
|
|
if simulation_progress_response.status_code // 100 != 2:
|
|
time.sleep(30)
|
|
if simulation_progress_response.headers.get("Retry-After", 0) == 0:
|
|
if simulation_progress_response.json().get("status", "ERROR") == "ERROR":
|
|
error_flag = True
|
|
break
|
|
|
|
time.sleep(float(simulation_progress_response.headers["Retry-After"]))
|
|
|
|
children = simulation_progress_response.json().get("children", 0)
|
|
|
|
if error_flag:
|
|
if children == 0:
|
|
logger.error(f"Simulation failed. {simulation_progress_response.json()}")
|
|
return {"completed": False, "result": {}}
|
|
for child in children:
|
|
child_progress = s.get(brain_api_url + "/simulations/" + child)
|
|
logger.error(f"Child Simulation failed: {child_progress.json()}")
|
|
return {"completed": False, "result": {}}
|
|
|
|
if len(children) == 0:
|
|
logger.warning(
|
|
f'Multi-Simulation {simulation_progress_response.json().get("id")} failed. {simulation_progress_response.json()}'
|
|
)
|
|
return {"completed": False, "result": {}}
|
|
children_list = []
|
|
for child in children:
|
|
child_progress = s.get(brain_api_url + "/simulations/" + child)
|
|
alpha = child_progress.json().get("alpha", 0)
|
|
if alpha == 0:
|
|
logger.warning(f'Child-Simulation {child_progress.json().get("id")} failed. {child_progress.json()}')
|
|
return {"completed": False, "result": {}}
|
|
child_result = get_simulation_result_json(s, alpha)
|
|
children_list.append(child_result)
|
|
return {"completed": True, "result": children_list}
|
|
|
|
|
|
def get_prod_corr(s: SingleSession, alpha_id: str) -> pd.DataFrame:
|
|
"""
|
|
Retrieve the production correlation data for a specific alpha.
|
|
|
|
Args:
|
|
s (SingleSession): An authenticated session object.
|
|
alpha_id (str): The ID of the alpha.
|
|
|
|
Returns:
|
|
pandas.DataFrame: A DataFrame containing the production correlation data.
|
|
|
|
Raises:
|
|
requests.exceptions.RequestException: If there's an error in the API request.
|
|
"""
|
|
|
|
while True:
|
|
result = s.get(brain_api_url + "/alphas/" + alpha_id + "/correlations/prod")
|
|
if "retry-after" in result.headers:
|
|
time.sleep(float(result.headers["Retry-After"]))
|
|
else:
|
|
break
|
|
if result.json().get("records", 0) == 0:
|
|
logger.warning(f"Failed to get production correlation for alpha_id {alpha_id}. {result.json()}")
|
|
return pd.DataFrame()
|
|
columns = [dct["name"] for dct in result.json()["schema"]["properties"]]
|
|
prod_corr_df = pd.DataFrame(result.json()["records"], columns=columns).assign(alpha_id=alpha_id)
|
|
prod_corr_df["alpha_max_prod_corr"] = result.json()["max"]
|
|
prod_corr_df["alpha_min_prod_corr"] = result.json()["min"]
|
|
|
|
return prod_corr_df
|
|
|
|
|
|
def check_prod_corr_test(s: SingleSession, alpha_id: str, threshold: float = 0.7) -> pd.DataFrame:
|
|
"""
|
|
Check if the alpha's production correlation passes a specified threshold.
|
|
|
|
Args:
|
|
s (SingleSession): An authenticated session object.
|
|
alpha_id (str): The ID of the alpha.
|
|
threshold (float, optional): The correlation threshold. Defaults to 0.7.
|
|
|
|
Returns:
|
|
pandas.DataFrame: A DataFrame containing the test result.
|
|
|
|
Raises:
|
|
requests.exceptions.RequestException: If there's an error in the API request.
|
|
"""
|
|
|
|
prod_corr_df = get_prod_corr(s, alpha_id)
|
|
if prod_corr_df.empty:
|
|
result = [
|
|
{
|
|
"test": "PROD_CORRELATION",
|
|
"result": "NONE",
|
|
"limit": threshold,
|
|
"value": None,
|
|
"alpha_id": alpha_id,
|
|
}
|
|
]
|
|
else:
|
|
value = prod_corr_df[prod_corr_df.alphas > 0]["max"].max()
|
|
result = [
|
|
{
|
|
"test": "PROD_CORRELATION",
|
|
"result": "PASS" if value <= threshold else "FAIL",
|
|
"limit": threshold,
|
|
"value": value,
|
|
"alpha_id": alpha_id,
|
|
}
|
|
]
|
|
return pd.DataFrame(result)
|
|
|
|
|
|
def get_self_corr(s: SingleSession, alpha_id: str) -> pd.DataFrame:
|
|
"""
|
|
Retrieve the self-correlation data for a specific alpha.
|
|
|
|
Args:
|
|
s (SingleSession): An authenticated session object.
|
|
alpha_id (str): The ID of the alpha.
|
|
|
|
Returns:
|
|
pandas.DataFrame: A DataFrame containing the self-correlation data.
|
|
|
|
Raises:
|
|
requests.exceptions.RequestException: If there's an error in the API request.
|
|
"""
|
|
|
|
while True:
|
|
result = s.get(brain_api_url + "/alphas/" + alpha_id + "/correlations/self")
|
|
if "retry-after" in result.headers:
|
|
time.sleep(float(result.headers["Retry-After"]))
|
|
else:
|
|
break
|
|
if result.json().get("records", 0) == 0:
|
|
logger.warning(f"Failed to get self correlation for alpha_id {alpha_id}. {result.json()}")
|
|
return pd.DataFrame()
|
|
|
|
records_len = len(result.json()["records"])
|
|
if records_len == 0:
|
|
logger.warning(f"No self correlation for alpha_id {alpha_id}")
|
|
return pd.DataFrame()
|
|
|
|
columns = [dct["name"] for dct in result.json()["schema"]["properties"]]
|
|
self_corr_df = pd.DataFrame(result.json()["records"], columns=columns).assign(alpha_id=alpha_id)
|
|
self_corr_df["alpha_max_self_corr"] = result.json()["max"]
|
|
self_corr_df["alpha_min_self_corr"] = result.json()["min"]
|
|
|
|
return self_corr_df
|
|
|
|
|
|
def check_self_corr_test(s: SingleSession, alpha_id: str, threshold: float = 0.7) -> pd.DataFrame:
|
|
"""
|
|
Check if the alpha's self-correlation passes a specified threshold.
|
|
|
|
Args:
|
|
s (SingleSession): An authenticated session object.
|
|
alpha_id (str): The ID of the alpha.
|
|
threshold (float, optional): The correlation threshold. Defaults to 0.7.
|
|
|
|
Returns:
|
|
pandas.DataFrame: A DataFrame containing the test result.
|
|
|
|
Raises:
|
|
requests.exceptions.RequestException: If there's an error in the API request.
|
|
"""
|
|
|
|
self_corr_df = get_self_corr(s, alpha_id)
|
|
if self_corr_df.empty:
|
|
result = [
|
|
{
|
|
"test": "SELF_CORRELATION",
|
|
"result": "PASS",
|
|
"limit": threshold,
|
|
"value": 0,
|
|
"alpha_id": alpha_id,
|
|
}
|
|
]
|
|
else:
|
|
value = self_corr_df["correlation"].max()
|
|
result = [
|
|
{
|
|
"test": "SELF_CORRELATION",
|
|
"result": "PASS" if value < threshold else "FAIL",
|
|
"limit": threshold,
|
|
"value": value,
|
|
"alpha_id": alpha_id,
|
|
}
|
|
]
|
|
return pd.DataFrame(result)
|
|
|
|
|
|
def get_check_submission(s: SingleSession, alpha_id: str) -> pd.DataFrame:
|
|
"""
|
|
Retrieve the submission check results for a specific alpha.
|
|
|
|
Args:
|
|
s (SingleSession): An authenticated session object.
|
|
alpha_id (str): The ID of the alpha.
|
|
|
|
Returns:
|
|
pandas.DataFrame: A DataFrame containing the submission check results.
|
|
|
|
Raises:
|
|
requests.exceptions.RequestException: If there's an error in the API request.
|
|
"""
|
|
|
|
while True:
|
|
result = s.get(brain_api_url + "/alphas/" + alpha_id + "/check")
|
|
if "retry-after" in result.headers:
|
|
time.sleep(float(result.headers["Retry-After"]))
|
|
else:
|
|
break
|
|
if result.json().get("is", 0) == 0:
|
|
logger.warning(f"Cant check submission alpha_id {alpha_id}. {result.json()}")
|
|
return pd.DataFrame()
|
|
|
|
checks_df = pd.DataFrame(result.json()["is"]["checks"]).assign(alpha_id=alpha_id)
|
|
|
|
return checks_df
|
|
|
|
|
|
def simulate_multi_alpha(
|
|
s: SingleSession,
|
|
simulate_data_list: list,
|
|
) -> list[dict]:
|
|
"""
|
|
Simulate a list of alphas using multi-simulation.
|
|
|
|
This function checks the session timeout, starts a new session if necessary,
|
|
initiates the simulation, monitors its progress, and sets alpha properties
|
|
upon completion.
|
|
|
|
Args:
|
|
s (SingleSession): An authenticated session object.
|
|
simulate_data (dict): A list of dictionaries, each containing the simulation parameters for the alpha.
|
|
These should include all necessary information such as alpha type, settings, and expressions.
|
|
|
|
Returns:
|
|
list: A list of dictionaries, each containing:
|
|
- 'alpha_id' (str): The ID of the simulated alpha if successful, None otherwise.
|
|
- 'simulate_data' (dict): The original simulation data provided.
|
|
|
|
Raises:
|
|
requests.exceptions.RequestException: If there's an error in the API requests.
|
|
"""
|
|
|
|
s = check_session_and_relogin(s)
|
|
if len(simulate_data_list) == 1:
|
|
return [simulate_single_alpha(s, simulate_data_list[0])]
|
|
simulate_response = start_simulation(s, simulate_data_list)
|
|
simulation_result = multisimulation_progress(s, simulate_response)
|
|
|
|
if not simulation_result["completed"]:
|
|
return [{"alpha_id": None, "simulate_data": x} for x in simulate_data_list]
|
|
result = [
|
|
{
|
|
"alpha_id": x["id"],
|
|
"simulate_data": {
|
|
"type": x["type"],
|
|
"settings": x["settings"],
|
|
"regular": x["regular"]["code"],
|
|
},
|
|
}
|
|
for x in simulation_result["result"]
|
|
]
|
|
_ = [set_alpha_properties(s, x["id"]) for x in simulation_result["result"]]
|
|
return result
|
|
|
|
|
|
def get_specified_alpha_stats(
|
|
s: SingleSession,
|
|
alpha_id: Union[str, None],
|
|
simulate_data: dict,
|
|
get_pnl: bool = False,
|
|
get_stats: bool = False,
|
|
save_pnl_file: bool = False,
|
|
save_stats_file: bool = False,
|
|
save_result_file: bool = False,
|
|
check_submission: bool = False,
|
|
check_self_corr: bool = False,
|
|
check_prod_corr: bool = False,
|
|
) -> dict:
|
|
"""
|
|
Retrieve and process specified statistics for a given alpha.
|
|
|
|
Args:
|
|
s (SingleSession): The authenticated session object.
|
|
alpha_id (str): The ID of the alpha to retrieve statistics for.
|
|
simulate_data (dict): The original simulation data for the alpha.
|
|
get_pnl (bool, optional): Whether to retrieve PnL data. Defaults to False.
|
|
get_stats (bool, optional): Whether to retrieve yearly stats. Defaults to False.
|
|
save_pnl_file (bool, optional): Whether to save PnL data to a file. Defaults to False.
|
|
save_stats_file (bool, optional): Whether to save yearly stats to a file. Defaults to False.
|
|
save_result_file (bool, optional): Whether to save the simulation result to a file. Defaults to False.
|
|
check_submission (bool, optional): Whether to perform submission checks. Defaults to False.
|
|
check_self_corr (bool, optional): Whether to check self-correlation. Defaults to False.
|
|
check_prod_corr (bool, optional): Whether to check production correlation. Defaults to False.
|
|
|
|
Returns:
|
|
dict: A dictionary containing various statistics and information about the alpha.
|
|
|
|
Raises:
|
|
requests.exceptions.RequestException: If there's an error retrieving data from the API.
|
|
"""
|
|
pnl = None
|
|
stats = None
|
|
s = check_session_and_relogin(s)
|
|
logger.debug(f"Session (ID: {id(s)}) used in get_specified_alpha_stats for alpha_id: {alpha_id}")
|
|
if alpha_id is None:
|
|
return {
|
|
"alpha_id": None,
|
|
"simulate_data": simulate_data,
|
|
"is_stats": None,
|
|
"pnl": pnl,
|
|
"stats": stats,
|
|
"is_tests": None,
|
|
"train": None,
|
|
"test": None,
|
|
}
|
|
|
|
result = get_simulation_result_json(s, alpha_id)
|
|
try:
|
|
region = result["settings"]["region"]
|
|
is_stats = pd.DataFrame([{key: value for key, value in result['is'].items() if key != 'checks'}]).assign(
|
|
alpha_id=alpha_id
|
|
)
|
|
except Exception as e:
|
|
logger.error(f"Failed to retrieve simulation result for alpha_id {alpha_id}: {result}, {e}")
|
|
train = result["train"]
|
|
test = result["test"]
|
|
is_stats = pd.DataFrame([{key: value for key, value in result["is"].items() if key != "checks"}]).assign(
|
|
alpha_id=alpha_id
|
|
)
|
|
|
|
if get_pnl:
|
|
pnl = get_alpha_pnl(s, alpha_id)
|
|
if save_pnl_file:
|
|
save_pnl(pnl, alpha_id, region)
|
|
|
|
if get_stats:
|
|
stats = get_alpha_yearly_stats(s, alpha_id)
|
|
if save_stats_file:
|
|
save_yearly_stats(stats, alpha_id, region)
|
|
|
|
if save_result_file:
|
|
save_simulation_result(result)
|
|
|
|
is_tests = pd.DataFrame(result["is"]["checks"]).assign(alpha_id=alpha_id)
|
|
|
|
if check_submission:
|
|
is_tests = get_check_submission(s, alpha_id)
|
|
|
|
return {
|
|
"alpha_id": alpha_id,
|
|
"simulate_data": simulate_data,
|
|
"is_stats": is_stats,
|
|
"pnl": pnl,
|
|
"stats": stats,
|
|
"is_tests": is_tests,
|
|
"train": train,
|
|
"test": test,
|
|
}
|
|
|
|
if check_self_corr and not check_submission:
|
|
self_corr_test = check_self_corr_test(s, alpha_id)
|
|
is_tests = (
|
|
pd.concat([is_tests, pd.DataFrame([self_corr_test])], ignore_index=True)
|
|
.drop_duplicates(subset=["test"], keep="last")
|
|
.reset_index(drop=True)
|
|
)
|
|
if check_prod_corr and not check_submission:
|
|
prod_corr_test = check_prod_corr_test(s, alpha_id)
|
|
is_tests = (
|
|
pd.concat([is_tests, pd.DataFrame([prod_corr_test])], ignore_index=True)
|
|
.drop_duplicates(subset=["test"], keep="last")
|
|
.reset_index(drop=True)
|
|
)
|
|
|
|
return {
|
|
"alpha_id": alpha_id,
|
|
"simulate_data": simulate_data,
|
|
"is_stats": is_stats,
|
|
"pnl": pnl,
|
|
"stats": stats,
|
|
"is_tests": is_tests,
|
|
"train": train,
|
|
"test": test,
|
|
}
|
|
|
|
|
|
def simulate_single_alpha(
|
|
s: SingleSession,
|
|
simulate_data: dict,
|
|
) -> dict:
|
|
"""
|
|
Simulate a single alpha using the provided session and simulation data.
|
|
|
|
This function checks the session timeout, starts a new session if necessary,
|
|
initiates the simulation, monitors its progress, and sets alpha properties
|
|
upon completion.
|
|
|
|
Args:
|
|
s (SingleSession): An authenticated session object.
|
|
simulate_data (dict): A dictionary containing the simulation parameters for the alpha.
|
|
This should include all necessary information such as alpha type, settings, and expressions.
|
|
|
|
Returns:
|
|
dict: A dictionary containing:
|
|
- 'alpha_id' (str): The ID of the simulated alpha if successful, None otherwise.
|
|
- 'simulate_data' (dict): The original simulation data provided.
|
|
|
|
Raises:
|
|
requests.exceptions.RequestException: If there's an error in the API requests.
|
|
"""
|
|
|
|
s = check_session_and_relogin(s)
|
|
simulate_response = start_simulation(s, simulate_data)
|
|
simulation_result = simulation_progress(s, simulate_response)
|
|
|
|
if not simulation_result["completed"]:
|
|
return {"alpha_id": None, "simulate_data": simulate_data}
|
|
set_alpha_properties(s, simulation_result["result"]["id"])
|
|
return {
|
|
"alpha_id": simulation_result["result"]["id"],
|
|
"simulate_data": simulate_data,
|
|
}
|
|
|
|
|
|
def simulate_alpha_list(
|
|
s: SingleSession,
|
|
alpha_list: list,
|
|
limit_of_concurrent_simulations: int = 3,
|
|
simulation_config: dict = DEFAULT_CONFIG,
|
|
) -> list:
|
|
"""
|
|
Simulate a list of alphas concurrently.
|
|
|
|
Args:
|
|
s (SingleSession): The authenticated session object.
|
|
alpha_list (list): A list of alpha configurations to simulate.
|
|
limit_of_concurrent_simulations (int, optional): The maximum number of concurrent simulations. Defaults to 3.
|
|
simulation_config (dict, optional): Configuration for the simulation. Defaults to DEFAULT_CONFIG.
|
|
|
|
Returns:
|
|
list: A list of dictionaries containing simulation results for each alpha.
|
|
|
|
Raises:
|
|
requests.exceptions.RequestException: If there's an error during the simulation process.
|
|
"""
|
|
if (limit_of_concurrent_simulations < 1) or (limit_of_concurrent_simulations > 8):
|
|
logger.warning("Limit of concurrent simulation should be 1..8, will be set to 3")
|
|
limit_of_concurrent_simulations = 3
|
|
|
|
result_list = []
|
|
|
|
with ThreadPool(limit_of_concurrent_simulations) as pool:
|
|
with tqdm.tqdm(total=len(alpha_list)) as pbar:
|
|
for result in pool.imap_unordered(partial(simulate_single_alpha, s), alpha_list):
|
|
result_list.append(result)
|
|
pbar.update()
|
|
|
|
stats_list_result = []
|
|
|
|
def func(x):
|
|
return get_specified_alpha_stats(s, x["alpha_id"], x["simulate_data"], **simulation_config)
|
|
|
|
with ThreadPool(3) as pool:
|
|
for result in pool.map(func, result_list):
|
|
stats_list_result.append(result)
|
|
|
|
return _delete_duplicates_from_result(stats_list_result)
|
|
|
|
|
|
def simulate_alpha_list_multi(
|
|
s: SingleSession,
|
|
alpha_list: list,
|
|
limit_of_concurrent_simulations: int = 3,
|
|
limit_of_multi_simulations: int = 3,
|
|
simulation_config: dict = DEFAULT_CONFIG,
|
|
) -> list:
|
|
"""
|
|
Simulate a list of alphas using multi-simulation when possible.
|
|
|
|
Args:
|
|
s (SingleSession): An authenticated session object.
|
|
alpha_list (list): A list of alpha configurations to simulate.
|
|
limit_of_concurrent_simulations (int, optional): The maximum number of concurrent simulation batches. Defaults to 3.
|
|
limit_of_multi_simulations (int, optional): The maximum number of alphas in a multi-simulation. Defaults to 3.
|
|
simulation_config (dict, optional): Configuration for the simulation. Defaults to DEFAULT_CONFIG.
|
|
|
|
Returns:
|
|
list: A list of dictionaries containing simulation results for each alpha.
|
|
|
|
Raises:
|
|
requests.exceptions.RequestException: If there's an error in the API requests.
|
|
"""
|
|
if (limit_of_multi_simulations < 2) or (limit_of_multi_simulations > 10):
|
|
logger.warning("Limit of multi-simulation should be 2..10, will be set to 3")
|
|
limit_of_multi_simulations = 3
|
|
if (limit_of_concurrent_simulations < 1) or (limit_of_concurrent_simulations > 8):
|
|
logger.warning("Limit of concurrent simulation should be 1..8, will be set to 3")
|
|
limit_of_concurrent_simulations = 3
|
|
if len(alpha_list) < 10:
|
|
logger.warning(
|
|
"List of alphas too short, single concurrent simulations will be used instead of multisimulations"
|
|
)
|
|
return simulate_alpha_list(
|
|
s,
|
|
alpha_list,
|
|
limit_of_concurrent_simulations=limit_of_concurrent_simulations,
|
|
simulation_config=simulation_config,
|
|
)
|
|
if any(d["type"] == "SUPER" for d in alpha_list):
|
|
logger.warning("Multi-Simulation is not supported for SuperAlphas, single concurrent simulations will be used")
|
|
return simulate_alpha_list(
|
|
s,
|
|
alpha_list,
|
|
limit_of_concurrent_simulations=3,
|
|
simulation_config=simulation_config,
|
|
)
|
|
|
|
tasks = [
|
|
alpha_list[i : i + limit_of_multi_simulations] for i in range(0, len(alpha_list), limit_of_multi_simulations)
|
|
]
|
|
result_list = []
|
|
|
|
with ThreadPool(limit_of_concurrent_simulations) as pool:
|
|
with tqdm.tqdm(total=len(tasks)) as pbar:
|
|
for result in pool.imap_unordered(partial(simulate_multi_alpha, s), tasks):
|
|
result_list.append(result)
|
|
pbar.update()
|
|
result_list_flat = [item for sublist in result_list for item in sublist]
|
|
|
|
stats_list_result = []
|
|
|
|
def func(x):
|
|
return get_specified_alpha_stats(s, x["alpha_id"], x["simulate_data"], **simulation_config)
|
|
|
|
with ThreadPool(3) as pool:
|
|
for result in pool.map(func, result_list_flat):
|
|
stats_list_result.append(result)
|
|
|
|
return _delete_duplicates_from_result(stats_list_result)
|
|
|
|
|
|
def _delete_duplicates_from_result(result: list) -> list:
|
|
"""
|
|
Remove duplicate alpha results from the simulation output.
|
|
|
|
Args:
|
|
result (list): A list of dictionaries containing simulation results.
|
|
|
|
Returns:
|
|
list: A deduplicated list of simulation results.
|
|
"""
|
|
alpha_id_lst = []
|
|
result_new = []
|
|
for x in result:
|
|
if x["alpha_id"] is not None:
|
|
if x["alpha_id"] not in alpha_id_lst:
|
|
result_new.append(x)
|
|
alpha_id_lst.append(x["alpha_id"])
|
|
else:
|
|
result_new.append(x)
|
|
return result_new
|
|
|
|
|
|
def set_alpha_properties(
|
|
s: SingleSession,
|
|
alpha_id: str,
|
|
name: Optional[str] = None,
|
|
color: Optional[str] = None,
|
|
selection_desc: str = "None",
|
|
combo_desc: str = "None",
|
|
tags: list[str] = ["ace_tag"],
|
|
) -> requests.Response:
|
|
"""
|
|
Update the properties of an alpha.
|
|
|
|
Args:
|
|
s (SingleSession): An authenticated session object.
|
|
alpha_id (str): The ID of the alpha to update.
|
|
name (str, optional): The new name for the alpha. Defaults to None.
|
|
color (str, optional): The new color for the alpha. Defaults to None.
|
|
selection_desc (str, optional): Description for the selection part of a super alpha. Defaults to "None".
|
|
combo_desc (str, optional): Description for the combo part of a super alpha. Defaults to "None".
|
|
tags (list, optional): List of tags to apply to the alpha. Defaults to ["ace_tag"].
|
|
|
|
Returns:
|
|
requests.Response: The response object from the API call.
|
|
"""
|
|
|
|
params = {
|
|
"color": color,
|
|
"name": name,
|
|
"tags": tags,
|
|
"category": None,
|
|
"regular": {"description": None},
|
|
"combo": {"description": combo_desc},
|
|
"selection": {"description": selection_desc},
|
|
}
|
|
response = s.patch(brain_api_url + "/alphas/" + alpha_id, json=params)
|
|
|
|
return response
|
|
|
|
|
|
def _get_alpha_pnl(
|
|
s: SingleSession,
|
|
alpha_id: str,
|
|
pnl_type: str = "pnl",
|
|
) -> pd.DataFrame:
|
|
"""
|
|
Retrieve the PnL data for a specific alpha.
|
|
|
|
Args:
|
|
s (SingleSession): An authenticated session object.
|
|
alpha_id (str): The ID of the alpha.
|
|
pnl_type (str): 'pnl' to get cumulative pnl, 'daily-pnl' to get daily pnl.
|
|
|
|
Returns:
|
|
pandas.DataFrame: A DataFrame containing the PnL data for the alpha.
|
|
"""
|
|
|
|
while True:
|
|
result = s.get(brain_api_url + "/alphas/" + alpha_id + f"/recordsets/{pnl_type}")
|
|
if "retry-after" in result.headers:
|
|
time.sleep(float(result.headers["Retry-After"]))
|
|
else:
|
|
break
|
|
pnl = result.json()
|
|
if pnl.get("records", 0) == 0:
|
|
return pd.DataFrame()
|
|
columns = [dct["name"] for dct in pnl["schema"]["properties"]]
|
|
pnl_df = (
|
|
pd.DataFrame(pnl["records"], columns=columns)
|
|
.assign(alpha_id=alpha_id, date=lambda x: pd.to_datetime(x.date, format="%Y-%m-%d"))
|
|
.set_index("date")
|
|
)
|
|
return pnl_df
|
|
|
|
|
|
def get_alpha_pnl(s: SingleSession, alpha_id: str) -> pd.DataFrame:
|
|
"""
|
|
Retrieve the cumulative PnL data for a specific alpha.
|
|
|
|
Args:
|
|
s (SingleSession): An authenticated session object.
|
|
alpha_id (str): The ID of the alpha.
|
|
|
|
Returns:
|
|
pandas.DataFrame: A DataFrame containing the PnL data for the alpha.
|
|
"""
|
|
|
|
return _get_alpha_pnl(s, alpha_id, "pnl")
|
|
|
|
|
|
def get_alpha_yearly_stats(s: SingleSession, alpha_id: str) -> pd.DataFrame:
|
|
"""
|
|
Retrieve the yearly statistics for a specific alpha.
|
|
|
|
Args:
|
|
s (SingleSession): An authenticated session object.
|
|
alpha_id (str): The ID of the alpha.
|
|
|
|
Returns:
|
|
pandas.DataFrame: A DataFrame containing the yearly statistics for the alpha.
|
|
"""
|
|
|
|
while True:
|
|
result = s.get(brain_api_url + "/alphas/" + alpha_id + "/recordsets/yearly-stats")
|
|
if "retry-after" in result.headers:
|
|
time.sleep(float(result.headers["Retry-After"]))
|
|
else:
|
|
break
|
|
stats = result.json()
|
|
|
|
if stats.get("records", 0) == 0:
|
|
return pd.DataFrame()
|
|
columns = [dct["name"] for dct in stats["schema"]["properties"]]
|
|
yearly_stats_df = pd.DataFrame(stats["records"], columns=columns).assign(alpha_id=alpha_id)
|
|
return yearly_stats_df
|
|
|
|
|
|
def get_datasets(
|
|
s: SingleSession,
|
|
instrument_type: str = "EQUITY",
|
|
region: str = "USA",
|
|
delay: int = 1,
|
|
universe: str = "TOP3000",
|
|
theme: str = "ALL",
|
|
) -> pd.DataFrame:
|
|
"""
|
|
Retrieve available datasets based on specified parameters.
|
|
|
|
Args:
|
|
s (SingleSession): An authenticated session object.
|
|
instrument_type (str, optional): The type of instrument. Defaults to "EQUITY".
|
|
region (str, optional): The region. Defaults to "USA".
|
|
delay (int, optional): The delay. Defaults to 1.
|
|
universe (str, optional): The universe. Defaults to "TOP3000".
|
|
theme (str, optional): The theme. Defaults to "ALL".
|
|
|
|
Returns:
|
|
pandas.DataFrame: A DataFrame containing information about available datasets.
|
|
"""
|
|
if theme == "ALL":
|
|
# Fetch both theme=false and theme=true
|
|
url_false = (
|
|
brain_api_url
|
|
+ "/data-sets?"
|
|
+ f"instrumentType={instrument_type}®ion={region}&delay={str(delay)}&universe={universe}&theme=false"
|
|
)
|
|
result_false = s.get(url_false)
|
|
df_false = pd.DataFrame(result_false.json()["results"])
|
|
|
|
url_true = (
|
|
brain_api_url
|
|
+ "/data-sets?"
|
|
+ f"instrumentType={instrument_type}®ion={region}&delay={str(delay)}&universe={universe}&theme=true"
|
|
)
|
|
result_true = s.get(url_true)
|
|
df_true = pd.DataFrame(result_true.json()["results"])
|
|
|
|
datasets_df = pd.concat([df_false, df_true], ignore_index=True)
|
|
else:
|
|
url = (
|
|
brain_api_url
|
|
+ "/data-sets?"
|
|
+ f"instrumentType={instrument_type}®ion={region}&delay={str(delay)}&universe={universe}&theme={theme}"
|
|
)
|
|
result = s.get(url)
|
|
datasets_df = pd.DataFrame(result.json()["results"])
|
|
|
|
datasets_df = expand_dict_columns(datasets_df)
|
|
return datasets_df
|
|
|
|
|
|
def get_datafields(
|
|
s: SingleSession,
|
|
instrument_type: str = "EQUITY",
|
|
region: str = "USA",
|
|
delay: int = 1,
|
|
universe: str = "TOP3000",
|
|
theme: str = "false",
|
|
dataset_id: str = "",
|
|
data_type: str = "MATRIX",
|
|
search: str = "",
|
|
) -> pd.DataFrame:
|
|
"""
|
|
Retrieve available datafields based on specified parameters.
|
|
|
|
Args:
|
|
s (SingleSession): An authenticated session object.
|
|
instrument_type (str, optional): The type of instrument. Defaults to "EQUITY".
|
|
region (str, optional): The region. Defaults to "USA".
|
|
delay (int, optional): The delay. Defaults to 1.
|
|
universe (str, optional): The universe. Defaults to "TOP3000".
|
|
theme (str, optional): The theme. Defaults to "false".
|
|
dataset_id (str, optional): The ID of a specific dataset. Defaults to "".
|
|
data_type (str, optional): The type of data. Defaults to "MATRIX".
|
|
search (str, optional): A search string to filter datafields. Defaults to "".
|
|
|
|
Returns:
|
|
pandas.DataFrame: A DataFrame containing information about available datafields.
|
|
"""
|
|
type_param = f"&type={data_type}" if data_type != "ALL" else ""
|
|
if len(search) == 0:
|
|
url_template = (
|
|
brain_api_url
|
|
+ "/data-fields?"
|
|
+ f"&instrumentType={instrument_type}"
|
|
+ f"®ion={region}&delay={str(delay)}&universe={universe}{type_param}&dataset.id={dataset_id}&limit=50"
|
|
+ "&offset={x}"
|
|
)
|
|
count = s.get(url_template.format(x=0)).json()["count"]
|
|
if count == 0:
|
|
logger.warning(
|
|
f"No fields found: region={region}, delay={str(delay)}, universe={universe}, "
|
|
f"type={data_type}, dataset.id={dataset_id}"
|
|
)
|
|
return pd.DataFrame()
|
|
|
|
else:
|
|
url_template = (
|
|
brain_api_url
|
|
+ "/data-fields?"
|
|
+ f"&instrumentType={instrument_type}"
|
|
+ f"®ion={region}&delay={str(delay)}&universe={universe}{type_param}&limit=50"
|
|
+ f"&search={search}"
|
|
+ "&offset={x}"
|
|
)
|
|
count = 100
|
|
|
|
max_try = 5
|
|
datafields_list = []
|
|
for x in range(0, count, 50):
|
|
for _ in range(max_try):
|
|
datafields = s.get(url_template.format(x=x))
|
|
while datafields.status_code == 429:
|
|
print("status_code 429, sleep 3 seconds")
|
|
time.sleep(3)
|
|
datafields = s.get(url_template.format(x=x))
|
|
if "results" in datafields.json():
|
|
break
|
|
time.sleep(5)
|
|
|
|
datafields_list.append(datafields.json()["results"])
|
|
|
|
datafields_list_flat = [item for sublist in datafields_list for item in sublist]
|
|
|
|
datafields_df = pd.DataFrame(datafields_list_flat)
|
|
datafields_df = expand_dict_columns(datafields_df)
|
|
return datafields_df
|
|
|
|
|
|
def get_operators(s: SingleSession) -> pd.DataFrame:
|
|
"""
|
|
Fetches and processes the list of operators from the WorldQuant Brain API.
|
|
|
|
This function retrieves the operators from the provided session `s`,
|
|
explodes the 'scope' column (which contains lists) into separate rows,
|
|
and returns the resulting DataFrame.
|
|
|
|
Args:
|
|
s (SingleSession): An authenticated session object.
|
|
|
|
Returns:
|
|
pd.DataFrame: A DataFrame containing the operators with each scope entry
|
|
as a separate row.
|
|
"""
|
|
df = pd.DataFrame(s.get(brain_api_url + "/operators").json())
|
|
return df.explode('scope').reset_index(drop=True)
|
|
|
|
|
|
def get_instrument_type_region_delay(s: SingleSession) -> pd.DataFrame:
|
|
"""
|
|
Retrieves and organizes instrument type, region, and delay data into a DataFrame.
|
|
|
|
Parameters:
|
|
s (SingleSession): The session object used for making the API call.
|
|
|
|
Returns:
|
|
df (pd.DataFrame): A DataFrame containing the instrument type, region, delay, universe, and neutralization data.
|
|
|
|
The function fetches the settings options from the simulations endpoint and extracts the 'Instrument type',
|
|
'Region', 'Universe', 'Delay', and 'Neutralization' data. It then organizes this data into a list of dictionaries,
|
|
each containing the instrument type, region, delay, universe, and neutralization for a particular combination
|
|
of instrument type, region, and delay. This list is then converted into a DataFrame and returned.
|
|
"""
|
|
|
|
settings_options = s.options('https://api.worldquantbrain.com/simulations').json()['actions']['POST']['settings'][
|
|
'children'
|
|
]
|
|
data = [
|
|
{settings_options[key]['label']: settings_options[key]['choices']}
|
|
for key in settings_options.keys()
|
|
if settings_options[key]['type'] == 'choice'
|
|
]
|
|
|
|
instrument_type_data = {}
|
|
region_data = {}
|
|
universe_data = {}
|
|
delay_data = {}
|
|
neutralization_data = {}
|
|
|
|
for item in data:
|
|
if 'Instrument type' in item:
|
|
instrument_type_data = item['Instrument type']
|
|
elif 'Region' in item:
|
|
region_data = item['Region']['instrumentType']
|
|
elif 'Universe' in item:
|
|
universe_data = item['Universe']['instrumentType']
|
|
elif 'Delay' in item:
|
|
delay_data = item['Delay']['instrumentType']
|
|
elif 'Neutralization' in item:
|
|
neutralization_data = item['Neutralization']['instrumentType']
|
|
|
|
data_list = []
|
|
|
|
for instrument_type in instrument_type_data:
|
|
for region in region_data[instrument_type['value']]:
|
|
for delay in delay_data[instrument_type['value']]['region'][region['value']]:
|
|
row = {'InstrumentType': instrument_type['value'], 'Region': region['value'], 'Delay': delay['value']}
|
|
row['Universe'] = [
|
|
item['value'] for item in universe_data[instrument_type['value']]['region'][region['value']]
|
|
]
|
|
row['Neutralization'] = [
|
|
item['value'] for item in neutralization_data[instrument_type['value']]['region'][region['value']]
|
|
]
|
|
data_list.append(row)
|
|
|
|
df = pd.DataFrame(data_list).sort_values(
|
|
by=['InstrumentType', 'Region', 'Delay'], ascending=False, ignore_index=True
|
|
)
|
|
return df
|
|
|
|
|
|
def performance_comparison(
|
|
s: SingleSession, alpha_id: str, team_id: Optional[str] = None, competition: Optional[str] = None
|
|
) -> dict:
|
|
"""
|
|
Retrieve performance comparison data for merged performance check.
|
|
|
|
Args:
|
|
s (SingleSession): An authenticated session object.
|
|
alpha_id (str): The ID of the alpha.
|
|
team_id (str, optional): The ID of the team for comparison. Defaults to None.
|
|
competition (str, optional): The ID of the competition for comparison. Defaults to None.
|
|
|
|
Returns:
|
|
dict: A dictionary containing the performance comparison data.
|
|
|
|
Raises:
|
|
requests.exceptions.RequestException: If there's an error in the API request.
|
|
"""
|
|
if competition is not None:
|
|
part_url = f"competitions/{competition}"
|
|
elif team_id is not None:
|
|
part_url = f"teams/{team_id}"
|
|
else:
|
|
part_url = "users/self"
|
|
while True:
|
|
result = s.get(brain_api_url + f"/{part_url}/alphas/" + alpha_id + "/before-and-after-performance")
|
|
if "retry-after" in result.headers:
|
|
time.sleep(float(result.headers["Retry-After"]))
|
|
else:
|
|
break
|
|
if result.json().get("stats", 0) == 0:
|
|
logger.warning(f"Cant get performance comparison for alpha_id {alpha_id}. {result.json()}")
|
|
return {}
|
|
if result.status_code != 200:
|
|
logger.warning(f"Cant get performance comparison for alpha_id {alpha_id}. {result.json()}")
|
|
return {}
|
|
|
|
return result.json()
|
|
|
|
|
|
|
|
|
|
def construct_selection_expression(
|
|
selection: str,
|
|
instrument_type: Literal["EQUITY"] = "EQUITY",
|
|
region: str = "USA",
|
|
delay: Literal[0, 1] = 1,
|
|
selection_limit: int = 1000,
|
|
selection_handling: str = "POSITIVE",
|
|
) -> dict:
|
|
"""
|
|
Construct a dictionary containing parameters for a selection expression.
|
|
|
|
This function creates a dictionary with the necessary parameters to define
|
|
a selection expression for use in super alpha simulations.
|
|
|
|
Args:
|
|
selection (str): The selection expression to be used.
|
|
instrument_type (str, optional): Instrument type to use.
|
|
Defaults to "EQUITY".
|
|
region (str, optional): The geographic region for the selection.
|
|
Defaults to "USA".
|
|
delay (int, optional): The delay parameter for the selection.
|
|
Defaults to 1.
|
|
selection_limit (int, optional): The maximum number of instruments
|
|
to be selected. Defaults to 1000.
|
|
selection_handling (str, optional): The method for handling the
|
|
selection. Defaults to "POSITIVE".
|
|
|
|
Returns:
|
|
dict: A dictionary containing the constructed selection expression
|
|
parameters, ready to be used in API calls or other functions.
|
|
"""
|
|
selection_data = {
|
|
"settings.instrumentType": instrument_type,
|
|
"settings.region": region,
|
|
"settings.delay": delay,
|
|
"selection": selection,
|
|
"limit": 10,
|
|
"selectionLimit": selection_limit,
|
|
"selectionHandling": selection_handling,
|
|
}
|
|
return selection_data
|
|
|
|
|
|
def run_selection(s: SingleSession, selection_data: dict) -> dict:
|
|
"""
|
|
Run a selection simulation using the provided selection data.
|
|
|
|
Args:
|
|
s (SingleSession): An authenticated session object.
|
|
selection_data (dict): A dictionary containing the selection parameters.
|
|
|
|
Returns:
|
|
dict: A dictionary containing the count of selected alphas and any messages.
|
|
|
|
Raises:
|
|
requests.exceptions.RequestException: If there's an error in the API request.
|
|
"""
|
|
selection_response = s.get(brain_api_url + "/simulations/super-selection", params=selection_data)
|
|
r = selection_response.json()
|
|
selected_alphas_count = r.get("count")
|
|
message = r.get("message", "")
|
|
time.sleep(2)
|
|
return {"selected_alphas_count": selected_alphas_count, "message": message}
|
|
|
|
|
|
|
|
|
|
def get_alpha_daily_pnl(s: SingleSession, alpha_id: str) -> pd.DataFrame:
|
|
"""
|
|
Retrieve the daily PnL data for a specific alpha.
|
|
|
|
Args:
|
|
s (SingleSession): An authenticated session object.
|
|
alpha_id (str): The ID of the alpha.
|
|
|
|
Returns:
|
|
pandas.DataFrame: A DataFrame containing the PnL data for the alpha.
|
|
"""
|
|
|
|
return _get_alpha_pnl(s, alpha_id, "daily-pnl")
|
|
|
|
|
|
def submit_alpha(s: SingleSession, alpha_id: str) -> bool:
|
|
"""
|
|
Submit an alpha for evaluation.
|
|
|
|
Args:
|
|
s (SingleSession): An authenticated session object.
|
|
alpha_id (str): The ID of the alpha to submit.
|
|
|
|
Returns:
|
|
bool: True if the submission was successful, False otherwise.
|
|
|
|
Raises:
|
|
requests.exceptions.RequestException: If there's an error in the API request.
|
|
"""
|
|
result = s.post(brain_api_url + "/alphas/" + alpha_id + "/submit")
|
|
while True:
|
|
if "retry-after" in result.headers:
|
|
time.sleep(float(result.headers["Retry-After"]))
|
|
result = s.get(brain_api_url + "/alphas/" + alpha_id + "/submit")
|
|
else:
|
|
break
|
|
return result.status_code == 200
|
|
|
|
|
|
|