背景
关于获取Hostloc的每日热帖推送的话题,本站已经写了两篇博客阐述了,分别是你想收到hostloc每日热帖的邮件么?以及如何越过Hostloc的防CC攻击机制获取其每日热帖,虽然这两个都是Python脚本,但是前者仅仅是一个简单的Python脚本,无法应对后来新增的防CC机制,而后者中的脚本很好的解决了这个问题,但是本篇博客用一种全新的思路来解决这个问题,具体就是用了一个把chrome的cdp协议封装成golang对象的库chromedp来调用无头的chrome浏览器来访问loc站点。
依赖
下面的脚本依赖了以下第三方github库,编译前自己安装一下。
1 2 3 4 |
"github.com/anaskhan96/soup" "github.com/chromedp/chromedp" "github.com/emersion/go-sasl" "github.com/emersion/go-smtp" |
代码
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
package main import ( "context" "encoding/base64" "fmt" "os" "runtime/debug" "strconv" "strings" "time" "github.com/anaskhan96/soup" "github.com/chromedp/chromedp" "github.com/emersion/go-sasl" "github.com/emersion/go-smtp" ) func mailSend(mailBodyText string) { currentDate := time.Now().Format("2006-01-02") subject := fmt.Sprintf("%s Hostloc今日热帖", currentDate) subjectBase := base64.StdEncoding.EncodeToString([]byte(subject)) from := "" to := "" password := "" smtpServer := "smtp.qq.com:587" auth := sasl.NewPlainClient("", from, password) msg := strings.NewReader( "From: " + from + "\r\n" + "To: " + to + "\r\n" + "Subject: =?UTF-8?B?" + subjectBase + "?=\r\n" + "Content-Type: text/html; charset=UTF-8" + "\r\n\r\n" + mailBodyText + "\r\n") if err := smtp.SendMail(smtpServer, auth, from, []string{to}, msg); err != nil { fmt.Println(err.Error() + string(debug.Stack())) os.Exit(1) } else { fmt.Println("成功发送了一封邮件!") } } func getMailBody() string { urlPrefix := "https://www.hostloc.com/" fullUrl := urlPrefix + "forum.php?mod=forumdisplay&fid=45&filter=hot" // 参数设置 options := []chromedp.ExecAllocatorOption{ chromedp.Flag("headless", true), chromedp.Flag("disable-gpu", true), chromedp.UserAgent(`Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36`), } options = append(chromedp.DefaultExecAllocatorOptions[:], options...) allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), options...) defer cancel() // 创建chrome示例 ctx, cancel := chromedp.NewContext(allocCtx) defer cancel() ctx, cancel = context.WithTimeout(ctx, 15*time.Second) defer cancel() var hotListHtml string if err := chromedp.Run(ctx, chromedp.Tasks{ // 打开导航 chromedp.Navigate(fullUrl), // 等待元素加载完成 chromedp.WaitVisible("body", chromedp.ByQuery), chromedp.OuterHTML("#threadlist", &hotListHtml, chromedp.ByID), }); err != nil { fmt.Println(err.Error() + string(debug.Stack())) os.Exit(1) } doc := soup.HTMLParse(hotListHtml) itemList := doc.Find("div", "class", "bm_c").FindAll("tr")[2:] var mailBodyList []string for _, item := range itemList { replyNumStr := item.Find("td", "class", "num").FindStrict("a", "class", "xi2").Text() titleAndHref := item.Find("th", "class", "new").FindStrict("a", "class", "s xst") title := titleAndHref.Text() link := titleAndHref.Attrs()["href"] replyNumInt, err := strconv.Atoi(replyNumStr) if err != nil { fmt.Println(err.Error() + string(debug.Stack())) os.Exit(1) } if replyNumInt >= 35 { mailBodyList = append(mailBodyList, title+"<br/>"+urlPrefix+link) } } mailBodyText := strings.Join(mailBodyList[:], "</br>") return mailBodyText } func main() { mailSend(getMailBody()) } |