import os, re, requests root = 'D:/Python/download/' # 图片文件夹 if not os.path.exists(root): os.makedirs(root) pattern = r'"ObjURL":"(.*?)"' # 匹配图片链接的正则表达式 pattern = re.compile(pattern) # 获取url对应的源码页面内容 def getTextFromHtml(url): cReturn = "" try: r = requests.get(url, timeout=30, headers={'user-agent': 'mozilla/5.0'}) r.raise_for_status() r.encoding = r.apparent_encoding cReturn = r.text except: cReturn = '' return cReturn # 下载图片(url列表) def download(List): for u in List: try: path = root + u.split('/')[-1] u = u.replace('\\', '') r = requests.get(u, timeout=30) r.raise_for_status() r.encoding = r.apparent_encoding if not os.path.exists(path): with open(path, 'wb') as f: f.write(r.content) f.close() print(path + ' 文件保存成功') except: print(u, "下载失败,可能链接不是指定格式图片") def getOtherPage(nPage, nNum, word): urllist = [] #链接接口 url = r'http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592 &is=&fp=result&queryWord={word}&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1 &z=&ic=0&word={word}&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1 &fr=&pn={pn}&rn={rn}' for x in range(1, nPage + 1): u = url.format(word=word, pn=nNum * x, rn=nNum) urllist.append(u) return urllist # n = int(input('输入每页显示多少张图片:')) n = 30 # 手工设置每页30张图片 page = int(input('输入想下载多少页图片(每页%d张图片):' % (n))) word = input('输入想下载的图片搜索关键字:') #链接接口 url = 'http://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1 &st=-1&fm=result&fr=&sf=1&fmq=1499773676062_R&pv=&ic=0&nc=1&z=&se=1&showtab=0 &fb=0&width=&height=&face=0&istype=2&ie=utf-8&word={word}'.format( word=word) html = getTextFromHtml(url) firstUrlList = re.findall(pattern, html) download(firstUrlList)