# -*-coding: utf-8 -*- import datetime import os import sqlite3 from selenium import webdriver import httpx def get_cookies(url): chrome_options = webdriver.ChromeOptions() args = ['--headless', '--no-sandbox', '--disable-gpu', '--disable-dev-shm-usage'] for arg in args: chrome_options.add_argument(arg) driver = webdriver.Chrome(options=chrome_options) driver.get(url) result_cookie = driver.get_cookies() if result_cookie: return result_cookie else: pass def req(url, cookies): with httpx.Client() as client: headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7", "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6", "Connection": "keep-alive", "Cookie": cookies, "Host": "www.cwl.gov.cn", "User-Agent": "Mozilla/5.0" } res = client.get(url, headers=headers, follow_redirects=True) if res.status_code != 200: print(res.status_code) log_file_path = os.path.join(get_path.get_logs_path(), str(datetime.date.today()) + '.log') with open(log_file_path, 'a') as f: f.write("\n spider_dlt: %s") return res_json = res.json() data_handle(res_json['result']) def data_handle(source_data): ssq_db_path = os.path.join(utils_get_path.get_db_path(), 'ssq.db') conn = sqlite3.connect(ssq_db_path) c = conn.cursor() c.execute('drop table if exists data;') c.execute( 'create table if not exists `ssq` (id INT PRIMARY KEY NOT NULL, `code` varchar(10),`red1` varchar(2),`red2` varchar(2),`red3` varchar(2),`red4` varchar(2),`red5` varchar(2),`red6` varchar(2),`blue` varchar(2),`date` varchar(12),`sales` varchar(15),`poolmoney` varchar(15),`content` varchar(255));') id = 1 for data in source_data: insert_sql = "INSERT INTO ssq ('id', 'code', 'red1', 'red2', 'red3', 'red4', 'red5', 'red6', 'blue', 'date', 'sales', 'poolmoney', 'content') VALUES ({0}, '{1}', '{2}', '{3}', '{4}', '{5}', '{6}', '{7}', '{8}', '{9}', '{10}', '{11}', '{12}')".format( id, data.setdefault('code'), data.setdefault('red').split(',')[0], data.setdefault('red').split(',')[1], data.setdefault('red').split(',')[2], data.setdefault('red').split(',')[3], data.setdefault('red').split(',')[4], data.setdefault('red').split(',')[5], data.setdefault('blue'), data.setdefault('date'), data.setdefault('sales'), data.setdefault('poolmoney'), data.setdefault('content') ) c.execute(insert_sql) conn.commit() id += 1 conn.close() if __name__ == "__main__": url = 'http://www.cwl.gov.cn/cwl_admin/front/cwlkj/search/kjxx/findDrawNotice?name=ssq&issueCount=&issueStart=&issueEnd=&dayStart=&dayEnd=&pageNo=1&pageSize=10&week=&systemType=PC' # result_cookie = util_get_cookies.get_cookies(url) # # cookies = '{}={}'.format(result_cookie[0].setdefault('name'), result_cookie[0].setdefault('value')) # # print(cookies) # 测试时使用的 cookies cookies = "HMF_CI=1b2fd73192f2054a429b2bfa4f58c3ff98119441420133cc8a04ca9c95aa2266eaec5bb7cf1d37df5f9864b8629ba407bacc9c58cadf26e2d726582df3870b0969" req(url, cookies)