go colly 爬虫博客到wordpress https://www.aispider.cc
package spider
import (
func SpiderAiprose() {
c := colly.NewCollector(colly.AllowedDomains("www.aiprose.com"), colly.Async(true))
c.OnHTML(".home-content-title a[href]", func(e *colly.HTMLElement) {
c.OnHTML(".pagination .next a[href]", func(e *colly.HTMLElement) {
c.OnHTML(".blog-root", func(e *colly.HTMLElement) {
title := strings.TrimSpace(e.DOM.Find(".blog-title").Eq(0).Text())
//author:=strings.TrimSpace(e.DOM.Find(".author-info a").Eq(0).Text())
time := strings.TrimSpace(e.DOM.Find(".author-info span").Eq(0).Text())
catelog := strings.TrimSpace(e.DOM.Find(".author-info .catelog-name").Text())
content := strings.TrimSpace(e.DOM.Find(".blog-detaile").Eq(0).Text())
nodes := e.DOM.Find(".blog-label label").Nodes
catelogs := []string{catelog}
var tags []string
for _, node := range nodes {
tags = append(tags, htmlquery.InnerText(node))
saveBlog(title, content, time, tags, catelogs)
c.OnRequest(func(r *colly.Request) {
fmt.Println("Visiting", r.URL)
c.UserAgent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36"