背景
很早之前写过一篇叫 你想收到hostloc每日热帖的邮件么?的博客,但是没过多久这个就失效了,因为hostloc站点加了防CC攻击的机制,导致无法直接获取其网页源码了,下面分享一个可以越过防CC攻击的脚本,注意本脚本仅用来自用推送loc的热帖给自己,请不要用于其它用途。
依赖
1 2 3 |
pyaes==1.6.1 beautifulsoup4==4.10.0 html5lib==1.1 |
代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import re import textwrap import requests import time import smtplib from bs4 import BeautifulSoup from email.mime.text import MIMEText from pyaes import AESModeOfOperationCBC from requests import Session as req_Session HOST = 'smtp.qq.com' PORT = 587 SENDER = '' RECEIVER = '' PWD = '' # 使用Python实现防CC验证页面中JS写的的toNumbers函数 def toNumbers(secret: str) -> list: text = [] for value in textwrap.wrap(secret, 2): text.append(int(value, 16)) return text def multiple_replace(s): replace_mapping = [("键政", "***"), ("ddos", "***"),("DDOS","***"),("党中央","***"), ("扶墙","***")] for r in replace_mapping: s = s.replace(*r) return s # 不带Cookies访问论坛首页,检查是否开启了防CC机制,将开启状态、AES计算所需的参数全部放在一个字典中返回 def check_anti_cc() -> dict: result_dict = {} headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" } home_page = "https://hostloc.com/forum.php" res = requests.get(home_page, headers=headers) aes_keys = re.findall('toNumbers\("(.*?)"\)', res.text) cookie_name = re.findall('cookie="(.*?)="', res.text) if len(aes_keys) != 0: # 开启了防CC机制 print("检测到防 CC 机制开启!") if len(aes_keys) != 3 or len(cookie_name) != 1: # 正则表达式匹配到了参数,但是参数个数不对(不正常的情况) result_dict["ok"] = 0 else: # 匹配正常时将参数存到result_dict中 result_dict["ok"] = 1 result_dict["cookie_name"] = cookie_name[0] result_dict["a"] = aes_keys[0] result_dict["b"] = aes_keys[1] result_dict["c"] = aes_keys[2] else: pass return result_dict # 在开启了防CC机制时使用获取到的数据进行AES解密计算生成一条Cookie(未开启防CC机制时返回空Cookies) def gen_anti_cc_cookies() -> dict: cookies = {} anti_cc_status = check_anti_cc() if anti_cc_status: # 不为空,代表开启了防CC机制 if anti_cc_status["ok"] == 0: print("防 CC 验证过程所需参数不符合要求,页面可能存在错误!") else: # 使用获取到的三个值进行AES Cipher-Block Chaining解密计算以生成特定的Cookie值用于通过防CC验证 print("自动模拟计算尝试通过防 CC 验证") a = bytes(toNumbers(anti_cc_status["a"])) b = bytes(toNumbers(anti_cc_status["b"])) c = bytes(toNumbers(anti_cc_status["c"])) cbc_mode = AESModeOfOperationCBC(a, b) result = cbc_mode.decrypt(c) name = anti_cc_status["cookie_name"] cookies[name] = result.hex() else: pass return cookies def get_source() -> req_Session: headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36", "origin": "https://hostloc.com", "referer": "https://hostloc.com/forum.php", } hot_url = "https://hostloc.com/forum.php?mod=forumdisplay&fid=45&filter=hot" s = req_Session() s.headers.update(headers) s.cookies.update(gen_anti_cc_cookies()) res = s.get(url=hot_url) res.raise_for_status() return res.content.decode('utf-8') def mail_send(subject, mail_body): try: msg = MIMEText(mail_body, 'plain', 'utf-8') msg['Subject'] = subject msg['From'] = SENDER msg['To'] = RECEIVER s = smtplib.SMTP(HOST, PORT) s.debuglevel = 0 s.login(SENDER, PWD) s.sendmail(SENDER, RECEIVER, msg.as_string()) s.quit() except smtplib.SMTPException as e: print(str(e)) def main(): prefix = 'https://www.hostloc.com/' current_date = time.strftime("%Y-%m-%d", time.localtime()) content = get_source() soup = BeautifulSoup(content, 'html5lib') item_all_list = soup.select('div.bm_c tr')[2:] items_title_list = [] for item in item_all_list: # print(item) reply_num = int(item.select_one('td.num a.xi2').get_text()) title = item.select_one('th.new a.s.xst').get_text() href = item.select_one('th.new a.s.xst').get('href') item_url = prefix + href if reply_num >= 35: items_title_list.append(title + ' ' + item_url) mail_send(subject=current_date + ' ' + 'Hostloc今日热帖', mail_body=multiple_replace('\n'.join(items_title_list))) print('成功发送了一封邮件!') # print(items_title_list) main() |
说明
如果需要发邮件给自己需要自行替换脚本中的如下变量:
1 2 3 4 5 |
HOST = 'smtp.qq.com' PORT = 587 SENDER = '' RECEIVER = '' PWD = '' |
敏感词过滤请自行替换以下函数的replace_mapping列表值:
1 2 3 4 5 6 |
def multiple_replace(s): replace_mapping = [("键政", "***"), ("ddos", "***"),("DDOS","***"),("党中央","***"), ("扶墙","***")] for r in replace_mapping: s = s.replace(*r) return s |