import os import requests from bs4 import BeautifulSoup import time import random # 设置请求头，模拟浏览器访问 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Referer': 'https://pic.netbian.com/' } def download_image(url, save_path): """下载图片并保存到指定路径""" try: response = requests.get(url, headers=headers, stream=True) if response.status_code == 200: with open(save_path, 'wb') as f: for chunk in response.iter_content(1024): f.write(chunk) print(f"图片下载成功: {save_path}") else: print(f"下载失败，状态码: {response.status_code}") except Exception as e: print(f"下载图片时出错: {e}") def get_netbian_images(page_url, max_images=20): """爬取彼岸图网指定页面的图片""" # 创建保存图片的文件夹 save_dir = "netbian_4k_images" os.makedirs(save_dir, exist_ok=True) try: # 获取页面内容 response = requests.get(page_url, headers=headers) if response.status_code != 200: print(f"请求失败，状态码: {response.status_code}") return soup = BeautifulSoup(response.text, 'html.parser') # 查找图片列表 - 彼岸图网的结构 ul_element = soup.find('ul', {'class': 'clearfix'}) if not ul_element: print("未找到图片列表") return li_elements = ul_element.find_all('li') downloaded = 0 for li in li_elements: if downloaded >= max_images: break a_tag = li.find('a') if not a_tag: continue # 获取详情页链接 detail_url = "https://pic.netbian.com" + a_tag['href'] # 访问详情页获取高清大图 try: detail_resp = requests.get(detail_url, headers=headers) if detail_resp.status_code != 200: continue detail_soup = BeautifulSoup(detail_resp.text, 'html.parser') img_div = detail_soup.find('div', {'class': 'photo-pic'}) if not img_div: continue img_tag = img_div.find('img') if not img_tag: continue # 获取图片URL img_url = "https://pic.netbian.com" + img_tag['src'] # 提取文件名 file_name = img_url.split('/')[-1] save_path = os.path.join(save_dir, file_name) # 下载图片 if not os.path.exists(save_path): # 避免重复下载 download_image(img_url, save_path) downloaded += 1 # 随机延迟，避免请求过于频繁 time.sleep(random.uniform(1, 3)) except Exception as e: print(f"处理详情页时出错: {e}") continue except Exception as e: print(f"爬取过程中出错: {e}") if name == "main": page_url = "https://pic.netbian.com/4k/index_61.html" max_images = int(input("请输入最大下载数量(默认为20): ") or 20) get_netbian_images(page_url, max_images) print("图片下载完成!") 生成讲解视频

视频信息

视频地址

封面地址

Provider

视频字幕

欢迎观看Python爬虫实战教程！今天我们将学习一个实用的Python脚本，用于从彼岸图网自动下载4K高清壁纸。这个脚本通过模拟浏览器访问网页，解析HTML内容，获取图片链接，并自动下载保存到本地。让我们开始探索这个有趣的项目吧！首先我们来看看这个爬虫脚本需要的Python库。主要包括os库用于文件操作，requests库用于发送网络请求，BeautifulSoup库用于解析HTML页面，time和random库用于添加延迟避免请求过于频繁。这些库可以通过pip命令轻松安装。接下来我们将详细分析每个库的具体用途。现在我们来详细分析download_image函数。这个函数负责下载单张图片并保存到指定路径。它接收两个参数：图片的URL地址和本地保存路径。函数首先使用requests库发送GET请求获取图片数据，然后检查响应状态码确保请求成功，最后以二进制模式打开文件并分块写入图片数据，这样可以避免大文件导致的内存溢出问题。接下来是核心的get_netbian_images函数，它实现了完整的爬取流程。首先创建保存文件夹，然后获取主页面HTML内容，使用BeautifulSoup解析出图片列表。由于主页面只有缩略图，需要进入每个图片的详情页获取高清大图链接。这种二级页面爬取策略确保了图片质量。最后下载图片并添加随机延迟，避免请求过于频繁被网站屏蔽。最后我们来看如何运行这个爬虫脚本。首先确保安装了Python环境和所需的依赖库requests和beautifulsoup4。将代码保存为py文件后，在终端中运行即可。程序会提示输入下载数量，然后自动开始爬取。需要特别注意的是，使用爬虫时必须遵守网站的robots协议，合理设置延迟时间，并且仅供学习交流使用，要尊重网站的版权和服务条款。

视频信息

答案文本 复制

视频字幕 复制

答案文本

视频字幕