先去找類型的a標簽 取出圖片所在網址 取出https://desk.3gbizhi.com/deskMV/438.html
搭建Form界面
Http類?
public static HttpClient Client { get; }
static Http()
{HttpClientHandler handler = new HttpClientHandler();//處理消息對象//ServerCertificateCustomValidationCallback 是否開啟免驗證策略,有的網站不安全,//瀏覽器阻止你訪問,需要把驗證忽略掉handler.ServerCertificateCustomValidationCallback = (message, cart, chain, error) => { return true; };Client = new HttpClient(handler);//請求對象}
圖片所在頁面網址的正則
Regex imgHtml = new Regex(@"<a href=""(https://[a-zA-Z0-9/\.]+\.html)"" class=""[a-zA-Z0-9]* imgw"" target=""_blank"">" );
//< a href = "https://pic.3gbizhi.com/uploadmark/20231006/c54bae39ffc4a10b023fc5c7adfee803.jpg" class="arrows" target="_blank"><i class="fa fa-search-plus fa-fw"></i></a>
Regex picReg = new Regex(@"<a href=""(https://pic\.3gbizhi\.com/uploadmark/\d+/[a-zA-Z0-9]+\.(jpg|png))"" class=""arrows"" target=""_blank"">");
按鈕的點擊事件
string url = this.textBox1.Text;// 獲取爬蟲的url index_23.html
int start = int.Parse(this.textBox3.Text); //開始頁數 index_1.html
int end = int.Parse(this.textBox4.Text); //結束頁數 index_2.html
Regex reg = new Regex(@"index_\d+\.html$");
url = reg.Replace(url,""); //Replace =替換,把后面替換前面類型的字符串https://desk.3gbizhi.com/deskMV/
for (int i = start; i <=end; i++)
{string nowURL = $"{url}/index_{i}.html";HttpResponseMessage res = await Http.Client.GetAsync(nowURL);string data = await res.Content.ReadAsStringAsync();// 整體html字符串// 從data所有字符串匹配滿足正則的字符串 返回結果是MatchCollection的數據集合MatchCollection maths = imgHtml.Matches(data);foreach (Match item in maths){ //下面需要根據html 匹配類型以下格式圖片var res1 = await Http.Client.GetAsync(picURL);string data1 = await res1.Content.ReadAsStringAsync();string picURL1 = picReg.Match(data1).Groups[1].Value;Console.WriteLine(picURL1);downLoad(picURL1);}
}
public async void downLoad(string url){var res = await Http.Client.GetAsync(url);byte[] b1 = await res.Content.ReadAsByteArrayAsync();//C:\Users\Administrator\DesktopFile.WriteAllBytes(@"C:\Users\Administrator\Desktop\PP\"+Path.GetFileName(url), b1);}