diff --git a/go.mod b/go.mod index 436b7cc..902573b 100644 --- a/go.mod +++ b/go.mod @@ -5,9 +5,10 @@ go 1.16 require ( github.com/PuerkitoBio/goquery v1.7.1 // indirect github.com/andybalholm/cascadia v1.3.1 // indirect - github.com/antchfx/htmlquery v1.2.4 // indirect + github.com/antchfx/htmlquery v1.2.4 github.com/antchfx/xmlquery v1.3.7 // indirect - github.com/gocolly/colly/v2 v2.1.0 // indirect + github.com/go-basic/uuid v1.0.0 // indirect + github.com/gocolly/colly/v2 v2.1.0 github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.2 // indirect github.com/kolo/xmlrpc v0.0.0-20201022064351-38db28db192b diff --git a/spider/blog.go b/spider/blog.go index 1dfa56e..5a7365b 100644 --- a/spider/blog.go +++ b/spider/blog.go @@ -5,15 +5,16 @@ import ( "log" ) -func saveBlog(title string, content string, postDate string, tags []string) { +func saveBlog(title string, content string, postDate string, tags []string, catelogs []string) { c, err := NewClient(`http://49.235.160.131/xmlrpc.php`, UserInfo{ + //c, err := NewClient(`https://www.aispider.cc/xmlrpc.php`, UserInfo{ `nelson`, `Yasaka.00`, }) if err != nil { log.Fatalln(err) } - p := wordpress.NewPost(title, content, postDate, tags, tags) + p := wordpress.NewPost(title, content, postDate, tags, catelogs) blogID, err := c.Call(p) if err != nil { log.Println(err) diff --git a/spider/spider.go b/spider/spider.go index 637db9a..acffae4 100644 --- a/spider/spider.go +++ b/spider/spider.go @@ -19,18 +19,21 @@ func SpiderAiprose() { title := strings.TrimSpace(e.DOM.Find(".blog-title").Eq(0).Text()) //author:=strings.TrimSpace(e.DOM.Find(".author-info a").Eq(0).Text()) time := strings.TrimSpace(e.DOM.Find(".author-info span").Eq(0).Text()) + catelog := strings.TrimSpace(e.DOM.Find(".author-info .catelog-name").Text()) content := strings.TrimSpace(e.DOM.Find(".blog-detaile").Eq(0).Text()) nodes := e.DOM.Find(".blog-label label").Nodes + catelogs := []string{catelog} var tags []string for _, node := range nodes { tags = append(tags, htmlquery.InnerText(node)) } - saveBlog(title, content, time, tags) + saveBlog(title, content, time, tags, catelogs) }) c.OnRequest(func(r *colly.Request) { fmt.Println("Visiting", r.URL) }) c.UserAgent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36" c.Visit("https://www.aiprose.com/blogs") + //c.Visit("https://www.aiprose.com/blog/143") c.Wait() } diff --git a/wordpress/wp_post.go b/wordpress/wp_post.go index 8d4d567..4aa01cf 100644 --- a/wordpress/wp_post.go +++ b/wordpress/wp_post.go @@ -1,5 +1,7 @@ package wordpress +import "github.com/go-basic/uuid" + type Post struct { BlogID int PostContent @@ -64,6 +66,7 @@ func NewPost(title string, content string, postDate string, tags []string, cate PostType: `post`, PostStatus: `publish`, PostTitle: title, + PostName: uuid.New(), PostContent: content, PostDate: postDate, //PostDate: time.Now().Format(`2006-01-02 15:04:05`),