方法一:
import re
import requests
# 获取二进制资源
def get_resource(url):
headers = {
"User-Agent": "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)"
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.content
return None
# 获取页面
def get_page(url):
headers = {
"User-Agent": "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)"
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
text = response.content.decode('utf-8')
return text
return None
def parse_source(html):
# 正则匹配路由
video = re.compile('data-mp4="(.*?)">', re.S)
video = re.findall(video, html)
for i in video:
save_video(i)
print(i + '下载完成')
return video
def save_video(url):
content = get_resource(url)
filename = url.split("/")[-1]
with open('./video/%s' % filename, 'wb') as f:
f.write(content)
def main():
for page in range(2, 10):
url = "http://www.budejie.com/video/" + str(page)
html = get_page(url)
parse_source(html)
if __name__ == '__main__':
main()