|  |  | @ -5,8 +5,10 @@ import ( | 
			
		
	
		
			
				
					|  |  |  | 	"github.com/antchfx/htmlquery" | 
			
		
	
		
			
				
					|  |  |  | 	"github.com/gocolly/colly/v2" | 
			
		
	
		
			
				
					|  |  |  | 	"strings" | 
			
		
	
		
			
				
					|  |  |  | 	"time" | 
			
		
	
		
			
				
					|  |  |  | ) | 
			
		
	
		
			
				
					|  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  | /// https://www.aiprose.com/ 爬取
 | 
			
		
	
		
			
				
					|  |  |  | func SpiderAiprose() { | 
			
		
	
		
			
				
					|  |  |  | 	c := colly.NewCollector(colly.AllowedDomains("www.aiprose.com"), colly.Async(true)) | 
			
		
	
		
			
				
					|  |  |  | 	c.OnHTML(".home-content-title a[href]", func(e *colly.HTMLElement) { | 
			
		
	
	
		
			
				
					|  |  | @ -37,3 +39,41 @@ func SpiderAiprose() { | 
			
		
	
		
			
				
					|  |  |  | 	//c.Visit("https://www.aiprose.com/blog/143")
 | 
			
		
	
		
			
				
					|  |  |  | 	c.Wait() | 
			
		
	
		
			
				
					|  |  |  | } | 
			
		
	
		
			
				
					|  |  |  | 
 | 
			
		
	
		
			
				
					|  |  |  | /// csdn爬取 https://blog.csdn.net/bitree1?type=blog
 | 
			
		
	
		
			
				
					|  |  |  | func SpiderBitree() { | 
			
		
	
		
			
				
					|  |  |  | 	c := colly.NewCollector(colly.AllowedDomains("blog.csdn.net"), colly.Async(false)) | 
			
		
	
		
			
				
					|  |  |  | 	c.UserAgent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36" | 
			
		
	
		
			
				
					|  |  |  | 	c.Limit(&colly.LimitRule{ | 
			
		
	
		
			
				
					|  |  |  | 		Delay: 10000 * time.Minute, | 
			
		
	
		
			
				
					|  |  |  | 		//RandomDelay:  2 * time.Minute,
 | 
			
		
	
		
			
				
					|  |  |  | 		Parallelism: 1, | 
			
		
	
		
			
				
					|  |  |  | 	}) | 
			
		
	
		
			
				
					|  |  |  | 	c2 := c.Clone() | 
			
		
	
		
			
				
					|  |  |  | 	//异步
 | 
			
		
	
		
			
				
					|  |  |  | 	c2.Async = false | 
			
		
	
		
			
				
					|  |  |  | 	//限速
 | 
			
		
	
		
			
				
					|  |  |  | 	c2.Limit(&colly.LimitRule{ | 
			
		
	
		
			
				
					|  |  |  | 		DomainGlob:  "blog.csdn.net/bitree1/article/details/*", | 
			
		
	
		
			
				
					|  |  |  | 		Delay:       10000 * time.Minute, | 
			
		
	
		
			
				
					|  |  |  | 		Parallelism: 1, | 
			
		
	
		
			
				
					|  |  |  | 	}) | 
			
		
	
		
			
				
					|  |  |  | 	c.OnHTML(".blog-list-box a[href]", func(e *colly.HTMLElement) { | 
			
		
	
		
			
				
					|  |  |  | 		c2.Request("GET", e.Attr("href"), nil, nil, nil) | 
			
		
	
		
			
				
					|  |  |  | 	}) | 
			
		
	
		
			
				
					|  |  |  | 	c2.OnHTML(".blog-content-box", func(e *colly.HTMLElement) { | 
			
		
	
		
			
				
					|  |  |  | 		//fmt.Println("获取到文章")
 | 
			
		
	
		
			
				
					|  |  |  | 		title := strings.TrimSpace(e.DOM.Find(".title-article").Eq(0).Text()) | 
			
		
	
		
			
				
					|  |  |  | 		fmt.Println(title) | 
			
		
	
		
			
				
					|  |  |  | 	}) | 
			
		
	
		
			
				
					|  |  |  | 	c2.OnRequest(func(r *colly.Request) { | 
			
		
	
		
			
				
					|  |  |  | 		fmt.Println("Visiting", r.URL) | 
			
		
	
		
			
				
					|  |  |  | 	}) | 
			
		
	
		
			
				
					|  |  |  | 	c2.OnError(func(r *colly.Response, err error) { | 
			
		
	
		
			
				
					|  |  |  | 		fmt.Println("Request URL:", r.Request.URL.String(), "failed with response:", r, "\nError:", err) | 
			
		
	
		
			
				
					|  |  |  | 		c2.Request("GET", r.Request.URL.String(), nil, nil, nil) | 
			
		
	
		
			
				
					|  |  |  | 	}) | 
			
		
	
		
			
				
					|  |  |  | 	c.Visit("https://blog.csdn.net/bitree1?type=blog") | 
			
		
	
		
			
				
					|  |  |  | 	c.Wait() | 
			
		
	
		
			
				
					|  |  |  | 	c2.Wait() | 
			
		
	
		
			
				
					|  |  |  | } | 
			
		
	
	
		
			
				
					|  |  | 
 |