燕鹏
3 years ago
6 changed files with 110 additions and 8 deletions
@ -0,0 +1,7 @@ |
|||
package main |
|||
|
|||
import "go-wordpress-xmlrpc/spider" |
|||
|
|||
func main() { |
|||
spider.SpiderAiprose() |
|||
} |
@ -0,0 +1,22 @@ |
|||
package spider |
|||
|
|||
import ( |
|||
"go-wordpress-xmlrpc/wordpress" |
|||
"log" |
|||
) |
|||
|
|||
func saveBlog(title string, content string, postDate string, tags []string) { |
|||
c, err := NewClient(`http://49.235.160.131/xmlrpc.php`, UserInfo{ |
|||
`nelson`, |
|||
`Yasaka.00`, |
|||
}) |
|||
if err != nil { |
|||
log.Fatalln(err) |
|||
} |
|||
p := wordpress.NewPost(title, content, postDate, tags, tags) |
|||
blogID, err := c.Call(p) |
|||
if err != nil { |
|||
log.Println(err) |
|||
} |
|||
log.Println(blogID) |
|||
} |
@ -1,4 +1,4 @@ |
|||
package xmlrpc |
|||
package spider |
|||
|
|||
import ( |
|||
"github.com/kolo/xmlrpc" |
@ -0,0 +1,45 @@ |
|||
package spider |
|||
|
|||
import ( |
|||
"fmt" |
|||
"github.com/antchfx/htmlquery" |
|||
"github.com/gocolly/colly/v2" |
|||
"strings" |
|||
) |
|||
|
|||
func SpiderAiprose() { |
|||
c := colly.NewCollector(colly.AllowedDomains("www.aiprose.com")) |
|||
//c := colly.NewCollector()
|
|||
c.Async = true |
|||
// Find and visit all links
|
|||
c.OnHTML(".home-content-title a[href]", func(e *colly.HTMLElement) { |
|||
e.Request.Visit(e.Attr("href")) |
|||
}) |
|||
|
|||
c.OnHTML(".pagination .next a[href]", func(e *colly.HTMLElement) { |
|||
e.Request.Visit(e.Attr("href")) |
|||
}) |
|||
|
|||
c.OnHTML(".blog-root", func(e *colly.HTMLElement) { |
|||
title := strings.TrimSpace(e.DOM.Find(".blog-title").Eq(0).Text()) |
|||
//author:=strings.TrimSpace(e.DOM.Find(".author-info a").Eq(0).Text())
|
|||
time := strings.TrimSpace(e.DOM.Find(".author-info span").Eq(0).Text()) |
|||
content := strings.TrimSpace(e.DOM.Find(".blog-detaile").Eq(0).Text()) |
|||
nodes := e.DOM.Find(".blog-label label").Nodes |
|||
var tags []string |
|||
for _, node := range nodes { |
|||
tags = append(tags, htmlquery.InnerText(node)) |
|||
} |
|||
saveBlog(title, content, time, tags) |
|||
//println(title +author+ time)
|
|||
}) |
|||
|
|||
c.OnRequest(func(r *colly.Request) { |
|||
fmt.Println("Visiting", r.URL) |
|||
}) |
|||
|
|||
c.UserAgent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36" |
|||
c.Visit("https://www.aiprose.com/blogs") |
|||
//c.Visit("https://www.aiprose.com/blog/139")
|
|||
c.Wait() |
|||
} |
Loading…
Reference in new issue