Python作为一门强大的编程语言,在视频下载自动化方面有着广泛的应用。本文将从基础概念开始,逐步教您如何编写高效的视频下载脚本。
环境准备
在开始编写脚本之前,我们需要准备Python环境和相关库。
安装Python
确保您的系统已安装Python 3.7或更高版本。可以从python.org下载最新版本。
安装必要的库
pip install yt-dlp requests beautifulsoup4 selenium
基础脚本编写
1. 简单的单视频下载脚本
import yt_dlp
import os
def download_video(url, output_path='./downloads'):
# 确保输出目录存在
os.makedirs(output_path, exist_ok=True)
# 配置下载选项
ydl_opts = {
'outtmpl': f'{output_path}/%(title)s.%(ext)s',
'format': 'best[height<=720]', # 限制最高720p
}
try:
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
print(f"下载完成: {url}")
except Exception as e:
print(f"下载失败: {e}")
# 使用示例
if __name__ == "__main__":
video_url = input("请输入视频URL: ")
download_video(video_url)
2. 批量下载脚本
import yt_dlp
import time
from concurrent.futures import ThreadPoolExecutor
import logging
# 配置日志
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')
class VideoDownloader:
def __init__(self, output_path='./downloads', max_workers=3):
self.output_path = output_path
self.max_workers = max_workers
self.ydl_opts = {
'outtmpl': f'{output_path}/%(uploader)s/%(title)s.%(ext)s',
'format': 'best[height<=1080]',
'writesubtitles': True, # 下载字幕
'writeautomaticsub': True, # 下载自动字幕
}
def download_single_video(self, url):
try:
with yt_dlp.YoutubeDL(self.ydl_opts) as ydl:
# 获取视频信息
info = ydl.extract_info(url, download=False)
title = info.get('title', 'Unknown')
duration = info.get('duration', 0)
logging.info(f"开始下载: {title} (时长: {duration}秒)")
# 下载视频
ydl.download([url])
logging.info(f"下载完成: {title}")
return True
except Exception as e:
logging.error(f"下载失败 {url}: {e}")
return False
def download_multiple_videos(self, urls):
successful = 0
failed = 0
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
results = executor.map(self.download_single_video, urls)
for result in results:
if result:
successful += 1
else:
failed += 1
# 添加延迟避免被限制
time.sleep(1)
logging.info(f"下载完成! 成功: {successful}, 失败: {failed}")
# 使用示例
if __name__ == "__main__":
urls = [
"https://www.youtube.com/watch?v=example1",
"https://www.youtube.com/watch?v=example2",
# 添加更多URL
]
downloader = VideoDownloader()
downloader.download_multiple_videos(urls)
高级功能实现
1. 播放列表下载
def download_playlist(playlist_url, max_videos=None):
ydl_opts = {
'outtmpl': './downloads/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s',
'format': 'best[height<=720]',
'playlistend': max_videos, # 限制下载数量
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([playlist_url])
2. 视频质量选择
def get_available_formats(url):
ydl_opts = {'listformats': True}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=False)
formats = info.get('formats', [])
print("可用格式:")
for f in formats:
print(f"ID: {f['format_id']}, 分辨率: {f.get('resolution', 'N/A')}, "
f"文件大小: {f.get('filesize', 'N/A')}")
def download_with_quality_selection(url):
get_available_formats(url)
format_id = input("请选择格式ID: ")
ydl_opts = {
'format': format_id,
'outtmpl': './downloads/%(title)s.%(ext)s',
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
3. 进度显示和错误处理
class ProgressHook:
def __init__(self):
self.current_video = ""
def progress_hook(self, d):
if d['status'] == 'downloading':
if d.get('filename') != self.current_video:
self.current_video = d.get('filename', '')
print(f"\n正在下载: {os.path.basename(self.current_video)}")
if 'total_bytes' in d:
percent = d['downloaded_bytes'] / d['total_bytes'] * 100
print(f"\r进度: {percent:.1f}%", end='', flush=True)
elif d['status'] == 'finished':
print(f"\n下载完成: {os.path.basename(d['filename'])}")
def download_with_progress(url):
hook = ProgressHook()
ydl_opts = {
'outtmpl': './downloads/%(title)s.%(ext)s',
'progress_hooks': [hook.progress_hook],
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
实用工具函数
1. URL验证
import re
def is_valid_url(url):
pattern = re.compile(
r'^https?://' # http:// or https://
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|' # domain...
r'localhost|' # localhost...
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
r'(?::\d+)?' # optional port
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
return pattern.match(url) is not None
2. 文件大小检查
def check_available_space(path, required_mb):
import shutil
free_bytes = shutil.disk_usage(path).free
free_mb = free_bytes / (1024 * 1024)
if free_mb < required_mb:
print(f"警告: 可用空间不足! 需要: {required_mb}MB, 可用: {free_mb:.1f}MB")
return False
return True
配置文件管理
使用JSON配置文件
import json
def load_config(config_file='config.json'):
default_config = {
'output_path': './downloads',
'max_workers': 3,
'video_quality': 'best[height<=720]',
'download_subtitles': True,
'max_retries': 3
}
try:
with open(config_file, 'r', encoding='utf-8') as f:
config = json.load(f)
return {**default_config, **config}
except FileNotFoundError:
# 创建默认配置文件
with open(config_file, 'w', encoding='utf-8') as f:
json.dump(default_config, f, indent=2, ensure_ascii=False)
return default_config
部署和优化
1. 创建可执行脚本
使用PyInstaller将Python脚本打包成可执行文件:
pip install pyinstaller
pyinstaller --onefile video_downloader.py
2. 性能优化建议
- 合理设置并发数量,避免过多请求被限制
- 添加重试机制处理网络错误
- 使用代理池避免IP被封禁
- 定期更新yt-dlp库以支持最新网站
注意事项
法律合规
- 仅下载有权限的内容
- 遵守网站的使用条款
- 不要用于商业用途未授权内容
技术限制
- 某些网站有反爬虫机制
- 下载速度可能受到限制
- 需要定期更新以适应网站变化
总结
通过本文的学习,您应该已经掌握了使用Python编写视频下载脚本的基本技能。从简单的单视频下载到复杂的批量处理,Python为我们提供了强大而灵活的解决方案。记住在使用这些脚本时要遵守相关法律法规,尊重内容创作者的权益。
继续学习和实践,您可以根据具体需求进一步定制和优化这些脚本,创建出更加强大和实用的视频下载工具。
发表回复