You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
94 lines
3.4 KiB
94 lines
3.4 KiB
# -*-coding: utf-8 -*-
|
|
import datetime
|
|
import os
|
|
import sqlite3
|
|
from selenium import webdriver
|
|
import httpx
|
|
|
|
|
|
def get_cookies(url):
|
|
chrome_options = webdriver.ChromeOptions()
|
|
args = ['--headless', '--no-sandbox', '--disable-gpu', '--disable-dev-shm-usage']
|
|
for arg in args:
|
|
chrome_options.add_argument(arg)
|
|
driver = webdriver.Chrome(options=chrome_options)
|
|
driver.get(url)
|
|
|
|
result_cookie = driver.get_cookies()
|
|
if result_cookie:
|
|
return result_cookie
|
|
else:
|
|
pass
|
|
|
|
|
|
def req(url, cookies):
|
|
with httpx.Client() as client:
|
|
headers = {
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
|
|
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
|
|
"Connection": "keep-alive",
|
|
"Cookie": cookies,
|
|
"Host": "www.cwl.gov.cn",
|
|
"User-Agent": "Mozilla/5.0"
|
|
}
|
|
res = client.get(url, headers=headers, follow_redirects=True)
|
|
|
|
if res.status_code != 200:
|
|
print(res.status_code)
|
|
log_file_path = os.path.join(get_path.get_logs_path(), str(datetime.date.today()) + '.log')
|
|
with open(log_file_path, 'a') as f:
|
|
f.write("\n spider_dlt: %s")
|
|
return
|
|
|
|
res_json = res.json()
|
|
data_handle(res_json['result'])
|
|
|
|
|
|
def data_handle(source_data):
|
|
ssq_db_path = os.path.join(utils_get_path.get_db_path(), 'ssq.db')
|
|
conn = sqlite3.connect(ssq_db_path)
|
|
|
|
c = conn.cursor()
|
|
|
|
c.execute('drop table if exists data;')
|
|
|
|
c.execute(
|
|
'create table if not exists `ssq` (id INT PRIMARY KEY NOT NULL, `code` varchar(10),`red1` varchar(2),`red2` varchar(2),`red3` varchar(2),`red4` varchar(2),`red5` varchar(2),`red6` varchar(2),`blue` varchar(2),`date` varchar(12),`sales` varchar(15),`poolmoney` varchar(15),`content` varchar(255));')
|
|
|
|
id = 1
|
|
for data in source_data:
|
|
insert_sql = "INSERT INTO ssq ('id', 'code', 'red1', 'red2', 'red3', 'red4', 'red5', 'red6', 'blue', 'date', 'sales', 'poolmoney', 'content') VALUES ({0}, '{1}', '{2}', '{3}', '{4}', '{5}', '{6}', '{7}', '{8}', '{9}', '{10}', '{11}', '{12}')".format(
|
|
id,
|
|
data.setdefault('code'),
|
|
data.setdefault('red').split(',')[0],
|
|
data.setdefault('red').split(',')[1],
|
|
data.setdefault('red').split(',')[2],
|
|
data.setdefault('red').split(',')[3],
|
|
data.setdefault('red').split(',')[4],
|
|
data.setdefault('red').split(',')[5],
|
|
data.setdefault('blue'),
|
|
data.setdefault('date'),
|
|
data.setdefault('sales'),
|
|
data.setdefault('poolmoney'),
|
|
data.setdefault('content')
|
|
)
|
|
c.execute(insert_sql)
|
|
conn.commit()
|
|
id += 1
|
|
|
|
conn.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
url = 'http://www.cwl.gov.cn/cwl_admin/front/cwlkj/search/kjxx/findDrawNotice?name=ssq&issueCount=&issueStart=&issueEnd=&dayStart=&dayEnd=&pageNo=1&pageSize=10&week=&systemType=PC'
|
|
|
|
# result_cookie = util_get_cookies.get_cookies(url)
|
|
#
|
|
# cookies = '{}={}'.format(result_cookie[0].setdefault('name'), result_cookie[0].setdefault('value'))
|
|
#
|
|
# print(cookies)
|
|
|
|
# 测试时使用的 cookies
|
|
cookies = "HMF_CI=1b2fd73192f2054a429b2bfa4f58c3ff98119441420133cc8a04ca9c95aa2266eaec5bb7cf1d37df5f9864b8629ba407bacc9c58cadf26e2d726582df3870b0969"
|
|
|
|
req(url, cookies)
|
|
|