You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
133 lines
3.7 KiB
133 lines
3.7 KiB
# -*- coding: utf-8 -*-
|
|
'''
|
|
使用ollama翻译 data-sets description
|
|
'''
|
|
|
|
import psycopg2
|
|
import httpx
|
|
import time
|
|
import sys
|
|
|
|
def translate_with_ollama(text, model="qwen2.5:7b"):
|
|
if not text or not text.strip():
|
|
return None
|
|
|
|
url = "http://localhost:11434/api/generate"
|
|
prompt = f"""你是一个金融数据专家,负责翻译期权交易领域的专业字段名。
|
|
规则:
|
|
1. 在此上下文中,“call” 指“看涨期权”,切勿翻译为“呼叫”。
|
|
2. "put" 指"看跌期权"。
|
|
3. “breakeven” 统一翻译为“盈亏平衡点”。
|
|
4. 整体翻译需简洁,符合数据库字段名的命名习惯。
|
|
5. 输出仅返回翻译后的中文,不要任何解释。
|
|
|
|
请翻译:{text}"""
|
|
|
|
payload = {"model": model, "prompt": prompt, "stream": False}
|
|
|
|
try:
|
|
response = httpx.post(url, json=payload, timeout=180.0)
|
|
response.raise_for_status()
|
|
result = response.json()
|
|
return result['response'].strip()
|
|
except httpx.TimeoutException:
|
|
print(f"\n[超时] {text[:60]}...")
|
|
return None
|
|
except Exception as e:
|
|
print(f"\n[请求失败] {text[:30]}... - {e}")
|
|
return None
|
|
|
|
db_config = {
|
|
'host': '192.168.31.201',
|
|
'port': 5432,
|
|
'database': 'alpha',
|
|
'user': 'jack',
|
|
'password': 'aaaAAA111'
|
|
}
|
|
|
|
start_from_id = 0
|
|
|
|
try:
|
|
conn = psycopg2.connect(**db_config)
|
|
cur = conn.cursor()
|
|
|
|
cur.execute("""
|
|
SELECT id, description
|
|
FROM operator
|
|
WHERE description IS NOT NULL
|
|
AND description_cn IS NULL
|
|
AND id >= %s
|
|
ORDER BY id ASC
|
|
""", (start_from_id,))
|
|
|
|
rows = cur.fetchall()
|
|
total = len(rows)
|
|
|
|
if total == 0:
|
|
print("没有需要翻译的记录!")
|
|
sys.exit(0)
|
|
|
|
print(f"找到 {total} 条待翻译记录 (从ID {start_from_id} 开始)")
|
|
print("=" * 60)
|
|
|
|
success_count = 0
|
|
fail_count = 0
|
|
last_success_id = start_from_id
|
|
|
|
for idx, (row_id, description) in enumerate(rows, 1):
|
|
percent = (idx / total) * 100
|
|
sys.stdout.write(f"\r处理进度: {idx}/{total} ({percent:.1f}%) | 成功: {success_count} | 失败: {fail_count}")
|
|
sys.stdout.flush()
|
|
|
|
max_retries = 2
|
|
translate_text = None
|
|
|
|
for attempt in range(max_retries):
|
|
translate_text = translate_with_ollama(description)
|
|
if translate_text:
|
|
break
|
|
elif attempt < max_retries - 1:
|
|
time.sleep(3)
|
|
|
|
if translate_text:
|
|
print(translate_text)
|
|
try:
|
|
cur.execute(
|
|
"UPDATE operator SET description_cn = %s WHERE id = %s",
|
|
(translate_text, row_id)
|
|
)
|
|
success_count += 1
|
|
last_success_id = row_id
|
|
except Exception as e:
|
|
print(f"\n[更新失败] ID:{row_id} - {e}")
|
|
fail_count += 1
|
|
else:
|
|
fail_count += 1
|
|
|
|
if idx % 10 == 0:
|
|
conn.commit()
|
|
|
|
time.sleep(1)
|
|
|
|
conn.commit()
|
|
|
|
print(f"\n" + "=" * 60)
|
|
print("翻译任务完成!")
|
|
print(f"✓ 成功翻译: {success_count} 条")
|
|
print(f"✗ 翻译失败: {fail_count} 条")
|
|
print(f"最后成功处理的ID: {last_success_id}")
|
|
|
|
cur.close()
|
|
conn.close()
|
|
|
|
except psycopg2.Error as e:
|
|
print(f"\n[数据库错误] {e}")
|
|
except KeyboardInterrupt:
|
|
print(f"\n\n用户中断程序")
|
|
print(f"已处理到ID: {last_success_id}")
|
|
conn.commit()
|
|
except Exception as e:
|
|
print(f"\n[程序错误] {e}")
|
|
finally:
|
|
if 'conn' in locals() and conn:
|
|
conn.close() |