记一下、记一下,免得又到处找。
Python脚本,进行了一下修改,读取同级目录中的url.txt文本,文本里一行一个资源地址。这样一来就可以在本机运行了。
# coding:utf-8
import requests
import os
import re
class ImgDownloader(object):
# 文件下载(单图多图通用)
def downloader(self, urls):
if urls is not None and type(urls)!=str and len(urls) > 0:
for url in urls:
self.img_downloader(url)
elif urls is not None and type(urls)==str and len(urls) > 0:
self.img_downloader(urls)
else:
return
# 文件下载(单图)
def img_downloader(self, url):
try:
r = requests.get(url=url)
except:
print(404)
return -1
try:
path_name, path, name = self._path_name(url)
if not os.path.exists(path): # 判断路径是否存在
os.makedirs(path, mode=0o755) # 不在则创建
# os.chdir(path) # 打开路径文件
with open(path_name, 'wb') as f:
f.write(r.content)
except:
print(403)
return -1
# 文件下载(多图)
def imgs_downloader(self, urls):
if urls is None or len(urls) == 0:
return
for url in urls:
self.img_downloader(url)
def _path_name(self, url):
name = url.split("/")[-1]
reobj1 = re.compile(r'''(?xi)\A
([a-z][a-zA-Z0-9+\-.]*:(//[^/?#]+)?)?
([a-zA-Z0-9\-._~%!$&'()*+,;=:@/]*)''')
match = reobj1.search(url)
if match:
path_name = match.group(3).strip('/')
path = match.group(3).rstrip(name).strip('/')
return path_name, path, name
else:
return path_name, '', name
if __name__=="__main__":
# 打开当前目录下的 url.txt 文件
with open("url.txt", "r") as file:
# 读取文件中的所有行,每行一个 URL
root_url = tuple(line.strip() for line in file)
# 创建 ImgDownloader 实例
obj_spider = ImgDownloader()
# 调用 downloader 方法下载图片
obj_spider.downloader(root_url)
shell版本的放Linux里面运行,还得开虚拟机,占空间还麻烦,下完事还得再往物理机上下一次,Windows好像可以有办法运行sh来着,忘了,算了,不重要。
#!/bin/bash
# desc: download resource
# author: 十年后的卢哥哥
mydir=`pwd`
while read line
do
{
if [ -n "$line" ]
then
cd $mydir
url=$(echo "$line" | tr -d '\r')
picdir=$(echo $url | sed -r 's/http:\/\///g')
picname=$(echo ${picdir##*/})
picpath=$(echo ${picdir%/*})
mkdir -p $picpath
cd $picpath
wget -O $picname `echo $url`
fi
}
done < $1
exit 0
sh运行方法是:sh download.sh url.txt。
有这个东西真是帮了我大忙啊!以后工作效率直接一整个拉满😁
参考文献:图片下载保留原路径(Python)
January 13,2024 22:47:13
阅读468
撰写评论