共计 3699 个字符,预计需要花费 10 分钟才能阅读完成。
背景
很早之前写过一篇叫 你想收到 hostloc 每日热帖的邮件么? 的博客,但是没过多久这个就失效了,因为 hostloc 站点加了防 CC 攻击的机制,导致无法直接获取其网页源码了,下面分享一个可以越过防 CC 攻击的脚本,注意本脚本仅用来自用推送 loc 的热帖给自己,请不要用于其它用途。
依赖
pyaes==1.6.1
beautifulsoup4==4.10.0
html5lib==1.1
代码
import re
import textwrap
import requests
import time
import smtplib
from bs4 import BeautifulSoup
from email.mime.text import MIMEText
from pyaes import AESModeOfOperationCBC
from requests import Session as req_Session
HOST = 'smtp.qq.com'
PORT = 587
SENDER = ''RECEIVER =''
PWD = ''
# 使用 Python 实现防 CC 验证页面中 JS 写的的 toNumbers 函数
def toNumbers(secret: str) -> list:
text = []
for value in textwrap.wrap(secret, 2):
text.append(int(value, 16))
return text
def multiple_replace(s):
replace_mapping = [("键政", "***"), ("ddos", "***"),("DDOS","***"),("党中央","***"),
("扶墙","***")]
for r in replace_mapping:
s = s.replace(*r)
return s
# 不带 Cookies 访问论坛首页,检查是否开启了防 CC 机制,将开启状态、AES 计算所需的参数全部放在一个字典中返回
def check_anti_cc() -> dict:
result_dict = {}
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
}
home_page = "https://hostloc.com/forum.php"
res = requests.get(home_page, headers=headers)
aes_keys = re.findall('toNumbers\("(.*?)"\)', res.text)
cookie_name = re.findall('cookie="(.*?)="', res.text)
if len(aes_keys) != 0: # 开启了防 CC 机制
print("检测到防 CC 机制开启!")
if len(aes_keys) != 3 or len(cookie_name) != 1: # 正则表达式匹配到了参数,但是参数个数不对(不正常的情况)result_dict["ok"] = 0
else: # 匹配正常时将参数存到 result_dict 中
result_dict["ok"] = 1
result_dict["cookie_name"] = cookie_name[0]
result_dict["a"] = aes_keys[0]
result_dict["b"] = aes_keys[1]
result_dict["c"] = aes_keys[2]
else:
pass
return result_dict
# 在开启了防 CC 机制时使用获取到的数据进行 AES 解密计算生成一条 Cookie(未开启防 CC 机制时返回空 Cookies)def gen_anti_cc_cookies() -> dict:
cookies = {}
anti_cc_status = check_anti_cc()
if anti_cc_status: # 不为空,代表开启了防 CC 机制
if anti_cc_status["ok"] == 0:
print("防 CC 验证过程所需参数不符合要求,页面可能存在错误!")
else: # 使用获取到的三个值进行 AES Cipher-Block Chaining 解密计算以生成特定的 Cookie 值用于通过防 CC 验证
print("自动模拟计算尝试通过防 CC 验证")
a = bytes(toNumbers(anti_cc_status["a"]))
b = bytes(toNumbers(anti_cc_status["b"]))
c = bytes(toNumbers(anti_cc_status["c"]))
cbc_mode = AESModeOfOperationCBC(a, b)
result = cbc_mode.decrypt(c)
name = anti_cc_status["cookie_name"]
cookies[name] = result.hex()
else:
pass
return cookies
def get_source() -> req_Session:
headers = {"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
"origin": "https://hostloc.com",
"referer": "https://hostloc.com/forum.php",
}
hot_url = "https://hostloc.com/forum.php?mod=forumdisplay&fid=45&filter=hot"
s = req_Session()
s.headers.update(headers)
s.cookies.update(gen_anti_cc_cookies())
res = s.get(url=hot_url)
res.raise_for_status()
return res.content.decode('utf-8')
def mail_send(subject, mail_body):
try:
msg = MIMEText(mail_body, 'plain', 'utf-8')
msg['Subject'] = subject
msg['From'] = SENDER
msg['To'] = RECEIVER
s = smtplib.SMTP(HOST, PORT)
s.debuglevel = 0
s.login(SENDER, PWD)
s.sendmail(SENDER, RECEIVER, msg.as_string())
s.quit()
except smtplib.SMTPException as e:
print(str(e))
def main():
prefix = 'https://www.hostloc.com/'
current_date = time.strftime("%Y-%m-%d", time.localtime())
content = get_source()
soup = BeautifulSoup(content, 'html5lib')
item_all_list = soup.select('div.bm_c tr')[2:]
items_title_list = []
for item in item_all_list:
# print(item)
reply_num = int(item.select_one('td.num a.xi2').get_text())
title = item.select_one('th.new a.s.xst').get_text()
href = item.select_one('th.new a.s.xst').get('href')
item_url = prefix + href
if reply_num >= 35:
items_title_list.append(title + ' ' + item_url)
mail_send(subject=current_date + '' +'Hostloc 今日热帖 ',
mail_body=multiple_replace('\n'.join(items_title_list)))
print('成功发送了一封邮件!')
# print(items_title_list)
main()
说明
如果需要发邮件给自己需要自行替换脚本中的如下变量:
HOST = 'smtp.qq.com'
PORT = 587
SENDER = ''RECEIVER =''
PWD = ''
敏感词过滤请自行替换以下函数的 replace_mapping 列表值:
def multiple_replace(s):
replace_mapping = [("键政", "***"), ("ddos", "***"),("DDOS","***"),("党中央","***"),
("扶墙","***")]
for r in replace_mapping:
s = s.replace(*r)
return s
正文完