diff --git a/go.mod b/go.mod index 827a645..436b7cc 100644 --- a/go.mod +++ b/go.mod @@ -2,4 +2,18 @@ module go-wordpress-xmlrpc go 1.16 -require github.com/kolo/xmlrpc v0.0.0-20201022064351-38db28db192b +require ( + github.com/PuerkitoBio/goquery v1.7.1 // indirect + github.com/andybalholm/cascadia v1.3.1 // indirect + github.com/antchfx/htmlquery v1.2.4 // indirect + github.com/antchfx/xmlquery v1.3.7 // indirect + github.com/gocolly/colly/v2 v2.1.0 // indirect + github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/golang/protobuf v1.5.2 // indirect + github.com/kolo/xmlrpc v0.0.0-20201022064351-38db28db192b + github.com/temoto/robotstxt v1.1.2 // indirect + golang.org/x/net v0.0.0-20210927181540-4e4d966f7476 // indirect + golang.org/x/text v0.3.7 // indirect + google.golang.org/appengine v1.6.7 // indirect + google.golang.org/protobuf v1.27.1 // indirect +) diff --git a/main.go b/main.go new file mode 100644 index 0000000..4138d6c --- /dev/null +++ b/main.go @@ -0,0 +1,7 @@ +package main + +import "go-wordpress-xmlrpc/spider" + +func main() { + spider.SpiderAiprose() +} diff --git a/spider/blog.go b/spider/blog.go new file mode 100644 index 0000000..1dfa56e --- /dev/null +++ b/spider/blog.go @@ -0,0 +1,22 @@ +package spider + +import ( + "go-wordpress-xmlrpc/wordpress" + "log" +) + +func saveBlog(title string, content string, postDate string, tags []string) { + c, err := NewClient(`http://49.235.160.131/xmlrpc.php`, UserInfo{ + `nelson`, + `Yasaka.00`, + }) + if err != nil { + log.Fatalln(err) + } + p := wordpress.NewPost(title, content, postDate, tags, tags) + blogID, err := c.Call(p) + if err != nil { + log.Println(err) + } + log.Println(blogID) +} diff --git a/client.go b/spider/client.go similarity index 99% rename from client.go rename to spider/client.go index 005c7f9..392805d 100644 --- a/client.go +++ b/spider/client.go @@ -1,4 +1,4 @@ -package xmlrpc +package spider import ( "github.com/kolo/xmlrpc" diff --git a/spider/spider.go b/spider/spider.go new file mode 100644 index 0000000..5692954 --- /dev/null +++ b/spider/spider.go @@ -0,0 +1,45 @@ +package spider + +import ( + "fmt" + "github.com/antchfx/htmlquery" + "github.com/gocolly/colly/v2" + "strings" +) + +func SpiderAiprose() { + c := colly.NewCollector(colly.AllowedDomains("www.aiprose.com")) + //c := colly.NewCollector() + c.Async = true + // Find and visit all links + c.OnHTML(".home-content-title a[href]", func(e *colly.HTMLElement) { + e.Request.Visit(e.Attr("href")) + }) + + c.OnHTML(".pagination .next a[href]", func(e *colly.HTMLElement) { + e.Request.Visit(e.Attr("href")) + }) + + c.OnHTML(".blog-root", func(e *colly.HTMLElement) { + title := strings.TrimSpace(e.DOM.Find(".blog-title").Eq(0).Text()) + //author:=strings.TrimSpace(e.DOM.Find(".author-info a").Eq(0).Text()) + time := strings.TrimSpace(e.DOM.Find(".author-info span").Eq(0).Text()) + content := strings.TrimSpace(e.DOM.Find(".blog-detaile").Eq(0).Text()) + nodes := e.DOM.Find(".blog-label label").Nodes + var tags []string + for _, node := range nodes { + tags = append(tags, htmlquery.InnerText(node)) + } + saveBlog(title, content, time, tags) + //println(title +author+ time) + }) + + c.OnRequest(func(r *colly.Request) { + fmt.Println("Visiting", r.URL) + }) + + c.UserAgent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36" + c.Visit("https://www.aiprose.com/blogs") + //c.Visit("https://www.aiprose.com/blog/139") + c.Wait() +} diff --git a/wordpress/wp_post.go b/wordpress/wp_post.go index 74de7fb..8d4d567 100644 --- a/wordpress/wp_post.go +++ b/wordpress/wp_post.go @@ -1,9 +1,5 @@ package wordpress -import ( - "time" -) - type Post struct { BlogID int PostContent @@ -63,13 +59,31 @@ func (p Post) GetArgs(user string, pwd string) interface{} { return args } -func NewPost(content string, title string, tags []string, cate []string) (p Post) { +func NewPost(title string, content string, postDate string, tags []string, cate []string) (p Post) { + p.PostContent = PostContent{ + PostType: `post`, + PostStatus: `publish`, + PostTitle: title, + PostContent: content, + PostDate: postDate, + //PostDate: time.Now().Format(`2006-01-02 15:04:05`), + TermsNames: TermsNames{ + PostCategory: cate, + TagsInput: tags, + }, + } + return p +} + +func NewPostAuthor(title string, content string, postDate string, postAuthor int, tags []string, cate []string) (p Post) { p.PostContent = PostContent{ PostType: `post`, PostStatus: `publish`, PostTitle: title, PostContent: content, - PostDate: time.Now().Format(`2006-01-02 15:04:05`), + PostAuthor: postAuthor, + PostDate: postDate, + //PostDate: time.Now().Format(`2006-01-02 15:04:05`), TermsNames: TermsNames{ PostCategory: cate, TagsInput: tags,