# -*- coding: utf-8 -*- """ 自动清除大于指定天数的数据 """ import threading import time import sys import os from datetime import datetime import pymongo import smtplib from email.mime.text import MIMEText from email.header import Header sys.path.append(os.path.join(os.path.abspath(__file__).split('auto')[0] + 'auto')) base_project = os.path.join(os.getcwd().split('auto')[0], 'auto') import json config_path = os.path.join(base_project, 'config.json') with open(config_path, 'r') as f: config_json = json.load(f) if not config_json: print('No config file found') exit(0) PROJECT_NAME = config_json.get('PROJECT_NAME') DB_USER = config_json.get('DB_USER') DB_PASSWORD = config_json.get('DB_PASSWORD') DB_IP = config_json.get('DB_IP') DB_PORT = config_json.get('DB_PORT') MONGO_LINK = f'mongodb://{DB_USER}:{DB_PASSWORD}@{DB_IP}:{DB_PORT}/' MAIL_HOST = config_json.get('MAIL_HOST') MAIL_USER = config_json.get('MAIL_USER') MAIL_PASS = config_json.get('MAIL_PASS') MAIL_SENDER = config_json.get('MAIL_SENDER') MAIL_RECEIVERS = config_json.get('MAIL_RECEIVERS') now_day = time.strftime('%Y-%m-%d', time.localtime()) rss_base_url = 'http://home.erhe.link:20002/xmlfile/' base_project = os.path.join(os.getcwd().split(PROJECT_NAME)[0], PROJECT_NAME) class MongoHandle(object): def __init__(self, db, collection, del_db=False, del_collection=False, auto_remove=0): self.client = pymongo.MongoClient(MONGO_LINK) self.db = db self.collection = collection if del_db and db: # 检查数据库是否存在 if db in self.client.list_database_names(): # 删除数据库 self.client.drop_database(db) self.db = self.client[db] if del_collection and self.collection: # 检查集合是否存在 if self.collection in self.db.list_collection_names(): # 删除集合 self.db.drop_collection(collection) self.collection = self.db[collection] if auto_remove: self.auto_remove_data(auto_remove) def write_data(self, data): self.collection.insert_one(data) def auto_remove_data(self, day): for data in self.collection.find({'create_time': {'$lt': int(time.time()) - day * 24 * 60 * 60}}): self.collection.delete_one({'_id': data['_id']}) class SendEmail(object): def __init__(self, subject='auto subject', title='auto title', text='auto text') -> None: # 第三方 SMTP 服务 self.mail_host = MAIL_HOST # 设置服务器 self.mail_user = MAIL_USER # 用户名 self.mail_pass = MAIL_PASS # 口令 self.sender = MAIL_SENDER self.receivers = [MAIL_RECEIVERS] self.subject = subject self.title = title self.text = text def send(self): message = MIMEText(self.text, 'plain', 'utf-8') message['From'] = Header(self.title, 'utf-8') message['To'] = Header("auto", 'utf-8') message['Subject'] = Header(self.subject, 'utf-8') try: smtpObj = smtplib.SMTP_SSL(self.mail_host) smtpObj.login(self.mail_user, self.mail_pass) smtpObj.sendmail(self.sender, self.receivers, message.as_string()) print("邮件发送成功") except smtplib.SMTPException as e: print("Error: 无法发送邮件", e) class LogsHandle(object): def __init__(self): self.now_day = time.strftime('%Y-%m-%d', time.localtime()) db = 'logs' collection = 'logs_' + self.now_day self.mongo = MongoHandle(db=db, collection=collection, del_db=False, del_collection=False, auto_remove=0) def logs_write(self, title_source=None, content=None, state=None, send_now=False): data_to_insert = { "title": title_source, "context": content, "state": state, "create_time": int(time.time()), "create_datetime": datetime.now().strftime("%Y-%m-%d %H:%M:%S") } self.mongo.collection.insert_one(data_to_insert) if send_now: subject = 'auto collection' title = 'auto collection - running logs: {}'.format(self.now_day) text = 'logs_source: {}, logs_detail: {}, state: {} logs_create_time: {}'.format( data_to_insert.setdefault('title'), data_to_insert.setdefault('content'), data_to_insert.setdefault('state'), data_to_insert.setdefault('create_datetime'), ) Send = SendEmail(subject=subject, title=title, text=text) Send.send() class AutoRemoveData(object): def __init__(self): self.databases = [ 'spider_news', 'apprcn', 'HelloGithub' ] self.day = 60 self.client = pymongo.MongoClient(MONGO_LINK) self.logs = LogsHandle() self.all_delete_count = 0 def auto_remove_data(self, db_name, day): print(f'准备删除时间大于: {self.day} 数据') if db_name not in self.client.list_database_names(): return deleted_count = 0 db = self.client[db_name] for collection_name in db.list_collection_names(): collection = db[collection_name] for data in collection.find({'create_time': {'$lt': int(time.time()) - day * 24 * 60 * 60}}): collection.delete_one({'_id': data['_id']}) deleted_count += 1 self.all_delete_count += deleted_count msg = f"删除 {db_name} 库 {self.day} 天以上数据 {deleted_count} 条" if deleted_count: print(msg) self.logs.logs_write(f'自动删除 {self.day} 天以上数据', msg, 'delete', False) def main(self): self.logs.logs_write(f'自动删除 {self.day} 天以上数据', f'开始自动删除 {self.day} 天以上数据', 'start', False) threads = [] for db_name in self.databases: thread = threading.Thread(target=self.auto_remove_data, args=(db_name, self.day)) threads.append(thread) thread.start() for thread in threads: thread.join() print(f'删除时间大于: {self.day} 数据, 已完成') print(f'本次运行共删除: {self.all_delete_count} 条数据') self.logs.logs_write(f'自动删除 {self.day} 天以上数据', f'自动删除 {self.day} 天数以上数据完成', 'done', False) if __name__ == "__main__": A = AutoRemoveData() A.main()