Browse Source

aiprose 爬虫成功

master
燕鹏 3 years ago
parent
commit
a7fa5f40ed
  1. 31
      README.md
  2. 11
      spider/spider.go

31
README.md

@ -1,28 +1,3 @@
# Introduction
An implementation of the standard WordPress API methods is provided,Use the [https://github.com/kolo/xmlrpc](https://github.com/kolo/xmlrpc) library as client.
# Todo List
- [x] [wp.newPost](#wp.newPost)
- [ ] wp.getPost
- [ ] wp.getPosts
- [ ] wp.editPost
- [ ] wp.deletePost
# Usage
- ### wp.newPost
```go
c, err := xmlrpc.NewClient(`https://example.com/xmlrpc.php`, xmlrpc.UserInfo{
`your username`,
`your password`,
})
if err != nil {
log.Fatalln(err)
}
p := wordpress.NewPost(`content`, `title`, []string{`tag1`, `tag2`}, []string{`cate1`, `cate2`})
blogID, err := c.Call(p)
if err != nil {
log.Println(err)
}
log.Println(blogID)
```
| 方法 | 网址 |用户名|
| :---: | :---: |:---: |
| SpiderAiprose | https://www.aiprose.com/blogs | nelson |

11
spider/spider.go

@ -8,18 +8,13 @@ import (
)
func SpiderAiprose() {
c := colly.NewCollector(colly.AllowedDomains("www.aiprose.com"))
//c := colly.NewCollector()
c.Async = true
// Find and visit all links
c := colly.NewCollector(colly.AllowedDomains("www.aiprose.com"), colly.Async(true))
c.OnHTML(".home-content-title a[href]", func(e *colly.HTMLElement) {
e.Request.Visit(e.Attr("href"))
})
c.OnHTML(".pagination .next a[href]", func(e *colly.HTMLElement) {
e.Request.Visit(e.Attr("href"))
})
c.OnHTML(".blog-root", func(e *colly.HTMLElement) {
title := strings.TrimSpace(e.DOM.Find(".blog-title").Eq(0).Text())
//author:=strings.TrimSpace(e.DOM.Find(".author-info a").Eq(0).Text())
@ -31,15 +26,11 @@ func SpiderAiprose() {
tags = append(tags, htmlquery.InnerText(node))
}
saveBlog(title, content, time, tags)
//println(title +author+ time)
})
c.OnRequest(func(r *colly.Request) {
fmt.Println("Visiting", r.URL)
})
c.UserAgent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36"
c.Visit("https://www.aiprose.com/blogs")
//c.Visit("https://www.aiprose.com/blog/139")
c.Wait()
}

Loading…
Cancel
Save