package spider
import (
"fmt"
"github.com/antchfx/htmlquery"
"github.com/gocolly/colly/v2"
"strings"
)
func SpiderAiprose ( ) {
c := colly . NewCollector ( colly . AllowedDomains ( "www.aiprose.com" ) , colly . Async ( true ) )
c . OnHTML ( ".home-content-title a[href]" , func ( e * colly . HTMLElement ) {
e . Request . Visit ( e . Attr ( "href" ) )
} )
c . OnHTML ( ".pagination .next a[href]" , func ( e * colly . HTMLElement ) {
e . Request . Visit ( e . Attr ( "href" ) )
} )
c . OnHTML ( ".blog-root" , func ( e * colly . HTMLElement ) {
title := strings . TrimSpace ( e . DOM . Find ( ".blog-title" ) . Eq ( 0 ) . Text ( ) )
//author:=strings.TrimSpace(e.DOM.Find(".author-info a").Eq(0).Text())
time := strings . TrimSpace ( e . DOM . Find ( ".author-info span" ) . Eq ( 0 ) . Text ( ) )
catelog := strings . TrimSpace ( e . DOM . Find ( ".author-info .catelog-name" ) . Text ( ) )
content := strings . TrimSpace ( e . DOM . Find ( ".blog-detaile" ) . Eq ( 0 ) . Text ( ) )
nodes := e . DOM . Find ( ".blog-label label" ) . Nodes
catelogs := [ ] string { catelog }
var tags [ ] string
for _ , node := range nodes {
tags = append ( tags , htmlquery . InnerText ( node ) )
}
saveBlog ( title , content , time , tags , catelogs )
} )
c . OnRequest ( func ( r * colly . Request ) {
fmt . Println ( "Visiting" , r . URL )
} )
c . UserAgent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36"
c . Visit ( "https://www.aiprose.com/blogs" )
//c.Visit("https://www.aiprose.com/blog/143")
c . Wait ( )
}