You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
FieldDownloader/get_category.py

130 lines
4.1 KiB

# -*- coding: utf-8 -*-
import os
import json
import random
import time
import httpx
from httpx import BasicAuth
class CategoryDownloader:
def __init__(self):
self.base_api_url = 'https://api.worldquantbrain.com'
self.client = self.login()
def login(self):
"""登录并返回客户端实例"""
username, password = "jack0210_@hotmail.com", "!QAZ2wsx+0913"
client = httpx.Client(auth=BasicAuth(username, password))
try:
response = client.post(f'{self.base_api_url}/authentication')
print(f"登录状态: {response.status_code}")
if response.status_code in [200, 201]:
print("登录成功!")
return client
else:
print(f"登录失败: {response.json()}")
return None
except Exception as e:
print(f"登录过程中出现错误: {e}")
return None
def fetch_category_data(self, category, delay, instrumentType, region_list, universe_list):
"""获取分类数据并保存到JSON文件"""
results = []
# 创建category_files文件夹
output_dir = "category_files"
if not os.path.exists(output_dir):
os.makedirs(output_dir)
print(f"已创建文件夹: {output_dir}")
if self.client:
for region in region_list:
for universe in universe_list:
url = f'https://api.worldquantbrain.com/data-sets?category={category}&delay={delay}&instrumentType={instrumentType}&limit=50&offset=0&region={region}&universe={universe}'
print(f"请求URL: {url}")
try:
response = self.client.get(url)
if response.status_code == 200:
data = response.json()
if data.get('count', 0) > 0:
for item in data.get('results', []):
result_item = {
'id': item.get('id', ''),
'region': item.get('region', ''),
'universe': item.get('universe', '')
}
results.append(result_item)
else:
print(f"请求失败: {response.status_code}")
except Exception as e:
print(f"请求过程中出现错误: {e}")
time.sleep(random.uniform(5, 8))
# 保存到JSON文件
filename = os.path.join(output_dir, f"{category}.json")
with open(filename, 'w', encoding='utf-8') as f:
json.dump(results, f, indent=2, ensure_ascii=False)
print(f"数据已保存到: {filename}")
print(f"总共找到 {len(results)} 条记录")
return results
if __name__ == "__main__":
downloader = CategoryDownloader()
if downloader.client:
# category_list = [
# 'analyst',
# 'broker',
# 'earnings',
# 'fundamental',
# 'imbalance',
# 'insiders',
# 'institutions',
# 'macro',
# 'model',
# 'news',
# 'option',
# 'other',
# 'pv',
# 'risk',
# 'sentiment',
# 'shortinterest',
# 'socialmedia'
# ]
category = 'socialmedia'
delay = '1'
instrumentType = 'EQUITY'
region_list = [
'USA',
'GLB',
'EUR',
'ASI',
'CHN',
'KOR',
'TWN',
'IND'
]
universe_list = [
'TOP3000',
'TOP1000',
'TOP500',
'TOP200',
'TOPSP500',
'ILLIQUID_MINVOL1M',
# 'MINVOL1M'
]
downloader.fetch_category_data(category, delay, instrumentType, region_list, universe_list)