jack 2 weeks ago
parent 5b15972069
commit a7ab5d48c9
  1. BIN
      data_sets.db
  2. 220
      migrate_data_sets.py

Binary file not shown.

@ -0,0 +1,220 @@
import sqlite3
import os
def migrate_and_clean():
"""迁移data_sets表并清理不需要的表"""
db_path = 'data_sets.db'
if not os.path.exists(db_path):
print(f"错误: 数据库文件 {db_path} 不存在")
return
try:
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
print("="*60)
print("开始迁移data_sets表...")
print("="*60)
print("\n1. 检查原表状态...")
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='data_sets'")
if not cursor.fetchone():
print("错误: data_sets表不存在")
return
# 检查是否已有id列
cursor.execute("PRAGMA table_info(data_sets)")
columns = cursor.fetchall()
column_names = [col[1] for col in columns]
if 'id' in column_names:
print("✓ 表已有id列,跳过迁移")
# 直接进入清理步骤
cursor.execute("SELECT COUNT(*) FROM data_sets")
total_rows = cursor.fetchone()[0]
print(f"当前表行数: {total_rows}")
else:
print(f"原表有 {len(column_names)} 个列: {', '.join(column_names)}")
print("\n2. 备份原表...")
# 删除可能存在的旧备份
cursor.execute("DROP TABLE IF EXISTS data_sets_backup")
# 创建新备份
cursor.execute("CREATE TABLE data_sets_backup AS SELECT * FROM data_sets")
cursor.execute("SELECT COUNT(*) FROM data_sets_backup")
backup_rows = cursor.fetchone()[0]
print(f"✓ 已创建备份表 data_sets_backup ({backup_rows}行)")
print("\n3. 获取原表结构...")
# 构建新表列定义
column_defs = []
for col_info in columns:
col_name = col_info[1]
col_type = col_info[2]
col_notnull = col_info[3]
col_default = col_info[4]
# 构建列定义
col_def = f"{col_name} {col_type}"
if col_notnull:
col_def += " NOT NULL"
if col_default is not None:
col_def += f" DEFAULT {col_default}"
column_defs.append(col_def)
print("\n4. 创建新表...")
# 创建带id的新表
create_sql = f"""
CREATE TABLE data_sets_new (
id INTEGER PRIMARY KEY AUTOINCREMENT,
{", ".join(column_defs)}
)
"""
cursor.execute("DROP TABLE IF EXISTS data_sets_new")
cursor.execute(create_sql)
print("✓ 已创建带id的新表 data_sets_new")
print("\n5. 复制数据...")
# 构建INSERT语句
original_columns = [col[1] for col in columns]
columns_str = ", ".join(original_columns)
insert_sql = f"""
INSERT INTO data_sets_new ({columns_str})
SELECT {columns_str} FROM data_sets
"""
cursor.execute(insert_sql)
conn.commit()
cursor.execute("SELECT COUNT(*) FROM data_sets_new")
new_rows = cursor.fetchone()[0]
print(f"✓ 已复制 {new_rows} 行数据到新表")
if new_rows != backup_rows:
print(f"警告: 行数不匹配! 原表:{backup_rows}, 新表:{new_rows}")
return
print("\n6. 替换原表...")
cursor.execute("DROP TABLE data_sets")
cursor.execute("ALTER TABLE data_sets_new RENAME TO data_sets")
conn.commit()
print("✓ 已用新表替换原表")
cursor.execute("SELECT COUNT(*) FROM data_sets")
total_rows = cursor.fetchone()[0]
print(f"✓ 迁移完成,当前表行数: {total_rows}")
print("\n" + "="*60)
print("开始清理数据库...")
print("="*60)
print("\n7. 删除备份表...")
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='data_sets_backup'")
if cursor.fetchone():
cursor.execute("DROP TABLE data_sets_backup")
print("✓ 已删除备份表 data_sets_backup")
else:
print("✓ 备份表已不存在")
print("\n8. 清理sqlite_sequence...")
# 更新或添加data_sets的序列
cursor.execute("SELECT MAX(id) FROM data_sets")
max_id = cursor.fetchone()[0] or 0
cursor.execute("SELECT seq FROM sqlite_sequence WHERE name='data_sets'")
seq_data = cursor.fetchone()
if seq_data:
current_seq = seq_data[0]
if current_seq < max_id:
cursor.execute("UPDATE sqlite_sequence SET seq=? WHERE name='data_sets'", (max_id,))
print(f"✓ 已更新序列值: {current_seq} -> {max_id}")
else:
print(f"✓ 序列值已最新: {current_seq}")
else:
cursor.execute("INSERT INTO sqlite_sequence (name, seq) VALUES ('data_sets', ?)", (max_id,))
print(f"✓ 已创建序列: data_sets = {max_id}")
print("\n9. 创建索引...")
# 删除可能存在的旧索引
cursor.execute("SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='data_sets'")
existing_indexes = cursor.fetchall()
for (idx_name,) in existing_indexes:
cursor.execute(f"DROP INDEX IF EXISTS {idx_name}")
# 创建新索引
cursor.execute("CREATE INDEX idx_universe ON data_sets(universe)")
cursor.execute("CREATE INDEX idx_region ON data_sets(region)")
cursor.execute("CREATE INDEX idx_universe_region ON data_sets(universe, region)")
print("✓ 已创建索引: idx_universe, idx_region, idx_universe_region")
print("\n10. 优化数据库...")
conn.commit()
conn.execute("VACUUM")
print("✓ 已执行VACUUM优化")
print("\n11. 验证结果...")
# 检查表结构
cursor.execute("PRAGMA table_info(data_sets)")
final_columns = [col[1] for col in cursor.fetchall()]
print(f"✓ 最终表结构: {', '.join(final_columns)}")
# 测试查询
cursor.execute("""
SELECT COUNT(*) as total,
MIN(id) as min_id,
MAX(id) as max_id,
COUNT(DISTINCT id) as distinct_ids
FROM data_sets
""")
stats = cursor.fetchone()
print(f"✓ 数据统计: 总行数={stats[0]}, ID范围={stats[1]}-{stats[2]}, 唯一ID数={stats[3]}")
# 测试分组查询
cursor.execute("""
SELECT count(id) as count_per_id
FROM data_sets
WHERE universe='TOP3000' AND region='USA'
GROUP BY id
LIMIT 3
""")
test_results = cursor.fetchall()
if test_results:
print(f"✓ 分组查询测试成功 (示例前3个)")
# 列出所有用户表
cursor.execute("""
SELECT name, type
FROM sqlite_master
WHERE type IN ('table', 'index')
AND name NOT LIKE 'sqlite_%'
ORDER BY type, name
""")
print("\n✓ 数据库对象:")
for obj_name, obj_type in cursor.fetchall():
print(f" - {obj_name} ({obj_type})")
conn.commit()
print("\n" + "="*60)
print("✅ 迁移和清理完成!")
print("="*60)
print(f"数据库文件: {db_path}")
print(f"文件大小: {os.path.getsize(db_path) / 1024 / 1024:.2f} MB")
print(f"数据表: data_sets (带自增id)")
print("已清理: 备份表、优化序列、重建索引")
print("="*60)
except Exception as e:
print(f"\n❌ 错误: {e}")
print("正在回滚...")
conn.rollback()
finally:
if conn:
conn.close()
if __name__ == "__main__":
migrate_and_clean()
Loading…
Cancel
Save