diff --git a/data_sets.db b/data_sets.db index 4dec0a9..95e4c07 100644 Binary files a/data_sets.db and b/data_sets.db differ diff --git a/migrate_data_sets.py b/migrate_data_sets.py new file mode 100644 index 0000000..3822220 --- /dev/null +++ b/migrate_data_sets.py @@ -0,0 +1,220 @@ +import sqlite3 +import os + +def migrate_and_clean(): + """迁移data_sets表并清理不需要的表""" + + db_path = 'data_sets.db' + + if not os.path.exists(db_path): + print(f"错误: 数据库文件 {db_path} 不存在") + return + + try: + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + + print("="*60) + print("开始迁移data_sets表...") + print("="*60) + + print("\n1. 检查原表状态...") + cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='data_sets'") + if not cursor.fetchone(): + print("错误: data_sets表不存在") + return + + # 检查是否已有id列 + cursor.execute("PRAGMA table_info(data_sets)") + columns = cursor.fetchall() + column_names = [col[1] for col in columns] + + if 'id' in column_names: + print("✓ 表已有id列,跳过迁移") + # 直接进入清理步骤 + cursor.execute("SELECT COUNT(*) FROM data_sets") + total_rows = cursor.fetchone()[0] + print(f"当前表行数: {total_rows}") + else: + print(f"原表有 {len(column_names)} 个列: {', '.join(column_names)}") + + print("\n2. 备份原表...") + # 删除可能存在的旧备份 + cursor.execute("DROP TABLE IF EXISTS data_sets_backup") + # 创建新备份 + cursor.execute("CREATE TABLE data_sets_backup AS SELECT * FROM data_sets") + cursor.execute("SELECT COUNT(*) FROM data_sets_backup") + backup_rows = cursor.fetchone()[0] + print(f"✓ 已创建备份表 data_sets_backup ({backup_rows}行)") + + print("\n3. 获取原表结构...") + # 构建新表列定义 + column_defs = [] + for col_info in columns: + col_name = col_info[1] + col_type = col_info[2] + col_notnull = col_info[3] + col_default = col_info[4] + + # 构建列定义 + col_def = f"{col_name} {col_type}" + if col_notnull: + col_def += " NOT NULL" + if col_default is not None: + col_def += f" DEFAULT {col_default}" + + column_defs.append(col_def) + + print("\n4. 创建新表...") + # 创建带id的新表 + create_sql = f""" + CREATE TABLE data_sets_new ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + {", ".join(column_defs)} + ) + """ + cursor.execute("DROP TABLE IF EXISTS data_sets_new") + cursor.execute(create_sql) + print("✓ 已创建带id的新表 data_sets_new") + + print("\n5. 复制数据...") + # 构建INSERT语句 + original_columns = [col[1] for col in columns] + columns_str = ", ".join(original_columns) + insert_sql = f""" + INSERT INTO data_sets_new ({columns_str}) + SELECT {columns_str} FROM data_sets + """ + + cursor.execute(insert_sql) + conn.commit() + + cursor.execute("SELECT COUNT(*) FROM data_sets_new") + new_rows = cursor.fetchone()[0] + print(f"✓ 已复制 {new_rows} 行数据到新表") + + if new_rows != backup_rows: + print(f"警告: 行数不匹配! 原表:{backup_rows}, 新表:{new_rows}") + return + + print("\n6. 替换原表...") + cursor.execute("DROP TABLE data_sets") + cursor.execute("ALTER TABLE data_sets_new RENAME TO data_sets") + conn.commit() + print("✓ 已用新表替换原表") + + cursor.execute("SELECT COUNT(*) FROM data_sets") + total_rows = cursor.fetchone()[0] + print(f"✓ 迁移完成,当前表行数: {total_rows}") + + print("\n" + "="*60) + print("开始清理数据库...") + print("="*60) + + print("\n7. 删除备份表...") + cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='data_sets_backup'") + if cursor.fetchone(): + cursor.execute("DROP TABLE data_sets_backup") + print("✓ 已删除备份表 data_sets_backup") + else: + print("✓ 备份表已不存在") + + print("\n8. 清理sqlite_sequence...") + # 更新或添加data_sets的序列 + cursor.execute("SELECT MAX(id) FROM data_sets") + max_id = cursor.fetchone()[0] or 0 + + cursor.execute("SELECT seq FROM sqlite_sequence WHERE name='data_sets'") + seq_data = cursor.fetchone() + + if seq_data: + current_seq = seq_data[0] + if current_seq < max_id: + cursor.execute("UPDATE sqlite_sequence SET seq=? WHERE name='data_sets'", (max_id,)) + print(f"✓ 已更新序列值: {current_seq} -> {max_id}") + else: + print(f"✓ 序列值已最新: {current_seq}") + else: + cursor.execute("INSERT INTO sqlite_sequence (name, seq) VALUES ('data_sets', ?)", (max_id,)) + print(f"✓ 已创建序列: data_sets = {max_id}") + + print("\n9. 创建索引...") + # 删除可能存在的旧索引 + cursor.execute("SELECT name FROM sqlite_master WHERE type='index' AND tbl_name='data_sets'") + existing_indexes = cursor.fetchall() + for (idx_name,) in existing_indexes: + cursor.execute(f"DROP INDEX IF EXISTS {idx_name}") + + # 创建新索引 + cursor.execute("CREATE INDEX idx_universe ON data_sets(universe)") + cursor.execute("CREATE INDEX idx_region ON data_sets(region)") + cursor.execute("CREATE INDEX idx_universe_region ON data_sets(universe, region)") + print("✓ 已创建索引: idx_universe, idx_region, idx_universe_region") + + print("\n10. 优化数据库...") + conn.commit() + conn.execute("VACUUM") + print("✓ 已执行VACUUM优化") + + print("\n11. 验证结果...") + # 检查表结构 + cursor.execute("PRAGMA table_info(data_sets)") + final_columns = [col[1] for col in cursor.fetchall()] + print(f"✓ 最终表结构: {', '.join(final_columns)}") + + # 测试查询 + cursor.execute(""" + SELECT COUNT(*) as total, + MIN(id) as min_id, + MAX(id) as max_id, + COUNT(DISTINCT id) as distinct_ids + FROM data_sets + """) + stats = cursor.fetchone() + print(f"✓ 数据统计: 总行数={stats[0]}, ID范围={stats[1]}-{stats[2]}, 唯一ID数={stats[3]}") + + # 测试分组查询 + cursor.execute(""" + SELECT count(id) as count_per_id + FROM data_sets + WHERE universe='TOP3000' AND region='USA' + GROUP BY id + LIMIT 3 + """) + test_results = cursor.fetchall() + if test_results: + print(f"✓ 分组查询测试成功 (示例前3个)") + + # 列出所有用户表 + cursor.execute(""" + SELECT name, type + FROM sqlite_master + WHERE type IN ('table', 'index') + AND name NOT LIKE 'sqlite_%' + ORDER BY type, name + """) + print("\n✓ 数据库对象:") + for obj_name, obj_type in cursor.fetchall(): + print(f" - {obj_name} ({obj_type})") + + conn.commit() + + print("\n" + "="*60) + print("✅ 迁移和清理完成!") + print("="*60) + print(f"数据库文件: {db_path}") + print(f"文件大小: {os.path.getsize(db_path) / 1024 / 1024:.2f} MB") + print(f"数据表: data_sets (带自增id)") + print("已清理: 备份表、优化序列、重建索引") + print("="*60) + + except Exception as e: + print(f"\n❌ 错误: {e}") + print("正在回滚...") + conn.rollback() + finally: + if conn: + conn.close() + +if __name__ == "__main__": + migrate_and_clean() \ No newline at end of file