燕鹏
3 years ago
6 changed files with 110 additions and 8 deletions
@ -0,0 +1,7 @@ |
|||||
|
package main |
||||
|
|
||||
|
import "go-wordpress-xmlrpc/spider" |
||||
|
|
||||
|
func main() { |
||||
|
spider.SpiderAiprose() |
||||
|
} |
@ -0,0 +1,22 @@ |
|||||
|
package spider |
||||
|
|
||||
|
import ( |
||||
|
"go-wordpress-xmlrpc/wordpress" |
||||
|
"log" |
||||
|
) |
||||
|
|
||||
|
func saveBlog(title string, content string, postDate string, tags []string) { |
||||
|
c, err := NewClient(`http://49.235.160.131/xmlrpc.php`, UserInfo{ |
||||
|
`nelson`, |
||||
|
`Yasaka.00`, |
||||
|
}) |
||||
|
if err != nil { |
||||
|
log.Fatalln(err) |
||||
|
} |
||||
|
p := wordpress.NewPost(title, content, postDate, tags, tags) |
||||
|
blogID, err := c.Call(p) |
||||
|
if err != nil { |
||||
|
log.Println(err) |
||||
|
} |
||||
|
log.Println(blogID) |
||||
|
} |
@ -1,4 +1,4 @@ |
|||||
package xmlrpc |
package spider |
||||
|
|
||||
import ( |
import ( |
||||
"github.com/kolo/xmlrpc" |
"github.com/kolo/xmlrpc" |
@ -0,0 +1,45 @@ |
|||||
|
package spider |
||||
|
|
||||
|
import ( |
||||
|
"fmt" |
||||
|
"github.com/antchfx/htmlquery" |
||||
|
"github.com/gocolly/colly/v2" |
||||
|
"strings" |
||||
|
) |
||||
|
|
||||
|
func SpiderAiprose() { |
||||
|
c := colly.NewCollector(colly.AllowedDomains("www.aiprose.com")) |
||||
|
//c := colly.NewCollector()
|
||||
|
c.Async = true |
||||
|
// Find and visit all links
|
||||
|
c.OnHTML(".home-content-title a[href]", func(e *colly.HTMLElement) { |
||||
|
e.Request.Visit(e.Attr("href")) |
||||
|
}) |
||||
|
|
||||
|
c.OnHTML(".pagination .next a[href]", func(e *colly.HTMLElement) { |
||||
|
e.Request.Visit(e.Attr("href")) |
||||
|
}) |
||||
|
|
||||
|
c.OnHTML(".blog-root", func(e *colly.HTMLElement) { |
||||
|
title := strings.TrimSpace(e.DOM.Find(".blog-title").Eq(0).Text()) |
||||
|
//author:=strings.TrimSpace(e.DOM.Find(".author-info a").Eq(0).Text())
|
||||
|
time := strings.TrimSpace(e.DOM.Find(".author-info span").Eq(0).Text()) |
||||
|
content := strings.TrimSpace(e.DOM.Find(".blog-detaile").Eq(0).Text()) |
||||
|
nodes := e.DOM.Find(".blog-label label").Nodes |
||||
|
var tags []string |
||||
|
for _, node := range nodes { |
||||
|
tags = append(tags, htmlquery.InnerText(node)) |
||||
|
} |
||||
|
saveBlog(title, content, time, tags) |
||||
|
//println(title +author+ time)
|
||||
|
}) |
||||
|
|
||||
|
c.OnRequest(func(r *colly.Request) { |
||||
|
fmt.Println("Visiting", r.URL) |
||||
|
}) |
||||
|
|
||||
|
c.UserAgent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36" |
||||
|
c.Visit("https://www.aiprose.com/blogs") |
||||
|
//c.Visit("https://www.aiprose.com/blog/139")
|
||||
|
c.Wait() |
||||
|
} |
Loading…
Reference in new issue