import sqlite3 import os def migrate_and_clean(): """迁移data_sets表并清理不需要的表""" db_path = 'data_sets.db' if not os.path.exists(db_path): print(f"错误: 数据库文件 {db_path} 不存在") return try: conn = sqlite3.connect(db_path) cursor = conn.cursor() print("="*60) print("开始迁移data_sets表...") print("="*60) print("\n1. 检查原表状态...") cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='data_sets'") if not cursor.fetchone(): print("错误: data_sets表不存在") return # 检查是否已有id列 cursor.execute("PRAGMA table_info(data_sets)") columns = cursor.fetchall() column_names = [col[1] for col in columns] if 'id' in column_names: print("✓ 表已有id列,跳过迁移") # 直接进入清理步骤 cursor.execute("SELECT COUNT(*) FROM data_sets") total_rows = cursor.fetchone()[0] print(f"当前表行数: {total_rows}") else: print(f"原表有 {len(column_names)} 个列: {', '.join(column_names)}") print("\n2. 备份原表...") # 删除可能存在的旧备份 cursor.execute("DROP TABLE IF EXISTS data_sets_backup") # 创建新备份 cursor.execute("CREATE TABLE data_sets_backup AS SELECT * FROM data_sets") cursor.execute("SELECT COUNT(*) FROM data_sets_backup") backup_rows = cursor.fetchone()[0] print(f"✓ 已创建备份表 data_sets_backup ({backup_rows}行)") print("\n3. 获取原表结构...") # 构建新表列定义 column_defs = [] for col_info in columns: col_name = col_info[1] col_type = col_info[2] col_notnull = col_info[3] col_default = col_info[4] # 构建列定义 col_def = f"{col_name} {col_type}" if col_notnull: col_def += " NOT NULL" if col_default is not None: col_def += f" DEFAULT {col_default}" column_defs.append(col_def) print("\n4. 创建新表...") # 创建带id的新表 create_sql = f""" CREATE TABLE data_sets_new ( id INTEGER PRIMARY KEY AUTOINCREMENT, {", ".join(column_defs)} ) """ cursor.execute("DROP TABLE IF EXISTS data_sets_new") cursor.execute(create_sql) print("✓ 已创建带id的新表 data_sets_new") print("\n5. 复制数据...") # 构建INSERT语句 original_columns = [col[1] for col in columns] columns_str = ", ".join(original_columns) insert_sql = f""" INSERT INTO data_sets_new ({columns_str}) SELECT {columns_str} FROM data_sets """ cursor.execute(insert_sql) conn.commit() cursor.execute("SELECT COUNT(*) FROM data_sets_new") new_rows = cursor.fetchone()[0] print(f"✓ 已复制 {new_rows} 行数据到新表") if new_rows != backup_rows: print(f"警告: 行数不匹配! 原表:{backup_rows}, 新表:{new_rows}") return print("\n6. 替换原表...") cursor.execute("DROP TABLE data_sets") cursor.execute("ALTER TABLE data_sets_new RENAME TO data_sets") conn.commit() print("✓ 已用新表替换原表") cursor.execute("SELECT COUNT(*) FROM data_sets") total_rows = cursor.fetchone()[0] print(f"✓ 迁移完成,当前表行数: {total_rows}") print("\n" + "="*60) print("开始清理数据库...") print("="*60) print("\n7. 删除备份表...") cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='data_sets_backup'") if cursor.fetchone(): cursor.execute("DROP TABLE data_sets_backup") print("✓ 已删除备份表 data_sets_backup") else: print("✓ 备份表已不存在") print("\n8. 清理sqlite_sequence...") # 更新或添加data_sets的序列 cursor.execute("SELECT MAX(id) FROM data_sets") max_id = cursor.fetchone()[0] or 0 cursor.execute("SELECT seq FROM sqlite_sequence WHERE name='data_sets'") seq_data = cursor.fetchone() if seq_data: current_seq = seq_data[0] if current_seq < max_id: cursor.execute("UPDATE sqlite_sequence SET seq=? WHERE name='data_sets'", (max_id,)) print(f"✓ 已更新序列值: {current_seq} -> {max_id}") else: print(f"✓ 序列值已最新: {current_seq}") else: cursor.execute("INSERT INTO sqlite_sequence (name, seq) VALUES ('data_sets', ?)", (max_id,)) print(f"✓ 已创建序列: data_sets = {max_id}") print("\n9. 创建索引...") # 删除可能存在的旧索引 cursor.execute("SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='data_sets'") existing_indexes = cursor.fetchall() for (idx_name,) in existing_indexes: cursor.execute(f"DROP INDEX IF EXISTS {idx_name}") # 创建新索引 cursor.execute("CREATE INDEX idx_universe ON data_sets(universe)") cursor.execute("CREATE INDEX idx_region ON data_sets(region)") cursor.execute("CREATE INDEX idx_universe_region ON data_sets(universe, region)") print("✓ 已创建索引: idx_universe, idx_region, idx_universe_region") print("\n10. 优化数据库...") conn.commit() conn.execute("VACUUM") print("✓ 已执行VACUUM优化") print("\n11. 验证结果...") # 检查表结构 cursor.execute("PRAGMA table_info(data_sets)") final_columns = [col[1] for col in cursor.fetchall()] print(f"✓ 最终表结构: {', '.join(final_columns)}") # 测试查询 cursor.execute(""" SELECT COUNT(*) as total, MIN(id) as min_id, MAX(id) as max_id, COUNT(DISTINCT id) as distinct_ids FROM data_sets """) stats = cursor.fetchone() print(f"✓ 数据统计: 总行数={stats[0]}, ID范围={stats[1]}-{stats[2]}, 唯一ID数={stats[3]}") # 测试分组查询 cursor.execute(""" SELECT count(id) as count_per_id FROM data_sets WHERE universe='TOP3000' AND region='USA' GROUP BY id LIMIT 3 """) test_results = cursor.fetchall() if test_results: print(f"✓ 分组查询测试成功 (示例前3个)") # 列出所有用户表 cursor.execute(""" SELECT name, type FROM sqlite_master WHERE type IN ('table', 'index') AND name NOT LIKE 'sqlite_%' ORDER BY type, name """) print("\n✓ 数据库对象:") for obj_name, obj_type in cursor.fetchall(): print(f" - {obj_name} ({obj_type})") conn.commit() print("\n" + "="*60) print("✅ 迁移和清理完成!") print("="*60) print(f"数据库文件: {db_path}") print(f"文件大小: {os.path.getsize(db_path) / 1024 / 1024:.2f} MB") print(f"数据表: data_sets (带自增id)") print("已清理: 备份表、优化序列、重建索引") print("="*60) except Exception as e: print(f"\n❌ 错误: {e}") print("正在回滚...") conn.rollback() finally: if conn: conn.close() if __name__ == "__main__": migrate_and_clean()