You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
AlphaGenerator/manual_tools/translation_data_sets.py

132 lines
3.6 KiB

# -*- coding: utf-8 -*-
'''
使用ollama翻译 data-sets description
'''
import psycopg2
import httpx
import time
import sys
def translate_with_ollama(text, model="qwen2.5:7b"):
if not text or not text.strip():
return None
url = "http://localhost:11434/api/generate"
prompt = f"""你是一个金融数据专家,负责翻译期权交易领域的专业字段名。
规则:
1. 在此上下文中,“call” 指“看涨期权”,切勿翻译为“呼叫”。
2. "put""看跌期权"
3. “breakeven” 统一翻译为“盈亏平衡点”。
4. 整体翻译需简洁,符合数据库字段名的命名习惯。
5. 输出仅返回翻译后的中文,不要任何解释。
请翻译:{text}"""
payload = {"model": model, "prompt": prompt, "stream": False}
try:
response = httpx.post(url, json=payload, timeout=180.0)
response.raise_for_status()
result = response.json()
return result['response'].strip()
except httpx.TimeoutException:
print(f"\n[超时] {text[:60]}...")
return None
except Exception as e:
print(f"\n[请求失败] {text[:30]}... - {e}")
return None
db_config = {
'host': '192.168.31.201',
'port': 5432,
'database': 'alpha',
'user': 'jack',
'password': 'aaaAAA111'
}
start_from_id = 0
try:
conn = psycopg2.connect(**db_config)
cur = conn.cursor()
cur.execute("""
SELECT id, description
FROM data_sets
WHERE description IS NOT NULL
AND description_cn IS NULL
AND id >= %s
ORDER BY id ASC
""", (start_from_id,))
rows = cur.fetchall()
total = len(rows)
if total == 0:
print("没有需要翻译的记录!")
sys.exit(0)
print(f"找到 {total} 条待翻译记录 (从ID {start_from_id} 开始)")
print("=" * 60)
success_count = 0
fail_count = 0
last_success_id = start_from_id
for idx, (row_id, description) in enumerate(rows, 1):
percent = (idx / total) * 100
sys.stdout.write(f"\r处理进度: {idx}/{total} ({percent:.1f}%) | 成功: {success_count} | 失败: {fail_count}")
sys.stdout.flush()
max_retries = 2
translate_text = None
for attempt in range(max_retries):
translate_text = translate_with_ollama(description)
if translate_text:
break
elif attempt < max_retries - 1:
time.sleep(3)
if translate_text:
try:
cur.execute(
"UPDATE data_sets SET description_cn = %s WHERE id = %s",
(translate_text, row_id)
)
success_count += 1
last_success_id = row_id
except Exception as e:
print(f"\n[更新失败] ID:{row_id} - {e}")
fail_count += 1
else:
fail_count += 1
if idx % 10 == 0:
conn.commit()
time.sleep(1)
conn.commit()
print(f"\n" + "=" * 60)
print("翻译任务完成!")
print(f"✓ 成功翻译: {success_count}")
print(f"✗ 翻译失败: {fail_count}")
print(f"最后成功处理的ID: {last_success_id}")
cur.close()
conn.close()
except psycopg2.Error as e:
print(f"\n[数据库错误] {e}")
except KeyboardInterrupt:
print(f"\n\n用户中断程序")
print(f"已处理到ID: {last_success_id}")
conn.commit()
except Exception as e:
print(f"\n[程序错误] {e}")
finally:
if 'conn' in locals() and conn:
conn.close()