import requests import urllib.request from bs4 import BeautifulSoup import os import time # 获得每种品牌的汽车 def get_car_brand_url(base_url): car_brand = 'https://car.autohome.com.cn' car_brand_list = [] headers = {'User-Agent': 'Mozilla/5.0'} response = requests.get(base_url, headers=headers) soup = BeautifulSoup(response.content.decode("gb2312", "ignore").encode("utf-8"), 'html.parser') url_all = soup.find_all('a') for item in url_all: car_brand_list.append(car_brand + item.get('href')) return car_brand_list # 获取每个品牌的所以不同类的汽车 def get_car_brand_class_url(car_url_list): car_class_base = 'https://car.autohome.com.cn' car_class_list = [] headers = {'User-Agent': 'Mozilla/5.0'} for item in car_url_list: response = requests.get(item, headers=headers) soup = BeautifulSoup(response.content.decode("gb2312", "ignore").encode("utf-8"), 'html.parser') url_list = soup.find('div', {'class': 'uibox-con carpic-list02'}).find_all('a') for a in url_list: car_class_list.append(car_class_base + a.get('href')) return car_class_list def get_brand_class_image_url(car_class_list): car_image_url = [] car_base = 'https://car.autohome.com.cn' headers = {'User-Agent': 'Mozilla/5.0'} for item in car_class_list: response = requests.get(item, headers=headers) soup = BeautifulSoup(response.content.decode("gb2312", "ignore").encode("utf-8"), 'html.parser') # car_image_url.append(car_base + soup.find('div', {'class': 'uibox-con carpic-list02'}).find('a').get('href')) car_image_url.append(car_base + soup.find('ul', {'class': 'search-pic-sortul'}).find('a').get('href')) return car_image_url def download_image(car_image_url, folder_path): if not os.path.exists(folder_path): # 判断文件夹是否已经存在 os.makedirs(folder_path) # 创建文件夹 car_base = 'https://car.autohome.com.cn' headers = {'User-Agent': 'Mozilla/5.0'} for item in car_image_url: response = requests.get(item, headers=headers) soup = BeautifulSoup(response.content.decode("gb2312", "ignore").encode("utf-8"), 'html.parser') class_all_image = soup.find('div', {'class': 'uibox-con carpic-list03 border-b-solid'}).find_all('img') index = 0 for src in class_all_image: image_url = 'http:' + src.get('src') img_name = os.path.join(folder_path, '{}.jpg'.format(index)) # img_name = folder_path + str(index) + '.jpg' image = requests.get(image_url) with open(img_name, 'wb') as file: # 以byte形式将图片数据写入 file.write(image.content) file.flush() # file.close() # 关闭文件 print('第%d张图片下载完成' % index) index += 1 base_url = 'https://car.autohome.com.cn/AsLeftMenu/As_LeftListNew.ashx?typeId=2%20&brandId=0%20&fctId=0%20&seriesId=0' image_url_list = get_car_brand_url(base_url) car_class_list = get_car_brand_class_url(image_url_list) # car_class_list = get_car_brand_class_url(['https://car.autohome.com.cn/pic/series/4482.html#pvareaid=2042214']) car_image_url = get_brand_class_image_url(car_class_list) folder_path = r'./car_images' download_image(car_image_url, folder_path)
本网页所有视频内容由 imoviebox边看边下-网页视频下载, iurlBox网页地址收藏管理器 下载并得到。
ImovieBox网页视频下载器 下载地址: ImovieBox网页视频下载器-最新版本下载
本文章由: imapbox邮箱云存储,邮箱网盘,ImageBox 图片批量下载器,网页图片批量下载专家,网页图片批量下载器,获取到文章图片,imoviebox网页视频批量下载器,下载视频内容,为您提供.
阅读和此文章类似的: 全球云计算