Python自动化下载视频脚本:从入门到精通

Python作为一门强大的编程语言,在视频下载自动化方面有着广泛的应用。本文将从基础概念开始,逐步教您如何编写高效的视频下载脚本。

环境准备

在开始编写脚本之前,我们需要准备Python环境和相关库。

安装Python

确保您的系统已安装Python 3.7或更高版本。可以从python.org下载最新版本。

安装必要的库

pip install yt-dlp requests beautifulsoup4 selenium

基础脚本编写

1. 简单的单视频下载脚本

import yt_dlp
import os

def download_video(url, output_path='./downloads'):
    # 确保输出目录存在
    os.makedirs(output_path, exist_ok=True)
    
    # 配置下载选项
    ydl_opts = {
        'outtmpl': f'{output_path}/%(title)s.%(ext)s',
        'format': 'best[height<=720]',  # 限制最高720p
    }
    
    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])
        print(f"下载完成: {url}")
    except Exception as e:
        print(f"下载失败: {e}")

# 使用示例
if __name__ == "__main__":
    video_url = input("请输入视频URL: ")
    download_video(video_url)

2. 批量下载脚本

import yt_dlp
import time
from concurrent.futures import ThreadPoolExecutor
import logging

# 配置日志
logging.basicConfig(level=logging.INFO, 
                   format='%(asctime)s - %(levelname)s - %(message)s')

class VideoDownloader:
    def __init__(self, output_path='./downloads', max_workers=3):
        self.output_path = output_path
        self.max_workers = max_workers
        self.ydl_opts = {
            'outtmpl': f'{output_path}/%(uploader)s/%(title)s.%(ext)s',
            'format': 'best[height<=1080]',
            'writesubtitles': True,  # 下载字幕
            'writeautomaticsub': True,  # 下载自动字幕
        }
    
    def download_single_video(self, url):
        try:
            with yt_dlp.YoutubeDL(self.ydl_opts) as ydl:
                # 获取视频信息
                info = ydl.extract_info(url, download=False)
                title = info.get('title', 'Unknown')
                duration = info.get('duration', 0)
                
                logging.info(f"开始下载: {title} (时长: {duration}秒)")
                
                # 下载视频
                ydl.download([url])
                logging.info(f"下载完成: {title}")
                return True
                
        except Exception as e:
            logging.error(f"下载失败 {url}: {e}")
            return False
    
    def download_multiple_videos(self, urls):
        successful = 0
        failed = 0
        
        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
            results = executor.map(self.download_single_video, urls)
            
            for result in results:
                if result:
                    successful += 1
                else:
                    failed += 1
                    
                # 添加延迟避免被限制
                time.sleep(1)
        
        logging.info(f"下载完成! 成功: {successful}, 失败: {failed}")

# 使用示例
if __name__ == "__main__":
    urls = [
        "https://www.youtube.com/watch?v=example1",
        "https://www.youtube.com/watch?v=example2",
        # 添加更多URL
    ]
    
    downloader = VideoDownloader()
    downloader.download_multiple_videos(urls)

高级功能实现

1. 播放列表下载

def download_playlist(playlist_url, max_videos=None):
    ydl_opts = {
        'outtmpl': './downloads/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s',
        'format': 'best[height<=720]',
        'playlistend': max_videos,  # 限制下载数量
    }
    
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([playlist_url])

2. 视频质量选择

def get_available_formats(url):
    ydl_opts = {'listformats': True}
    
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(url, download=False)
        formats = info.get('formats', [])
        
        print("可用格式:")
        for f in formats:
            print(f"ID: {f['format_id']}, 分辨率: {f.get('resolution', 'N/A')}, "
                  f"文件大小: {f.get('filesize', 'N/A')}")

def download_with_quality_selection(url):
    get_available_formats(url)
    format_id = input("请选择格式ID: ")
    
    ydl_opts = {
        'format': format_id,
        'outtmpl': './downloads/%(title)s.%(ext)s',
    }
    
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])

3. 进度显示和错误处理

class ProgressHook:
    def __init__(self):
        self.current_video = ""
    
    def progress_hook(self, d):
        if d['status'] == 'downloading':
            if d.get('filename') != self.current_video:
                self.current_video = d.get('filename', '')
                print(f"\n正在下载: {os.path.basename(self.current_video)}")
            
            if 'total_bytes' in d:
                percent = d['downloaded_bytes'] / d['total_bytes'] * 100
                print(f"\r进度: {percent:.1f}%", end='', flush=True)
        
        elif d['status'] == 'finished':
            print(f"\n下载完成: {os.path.basename(d['filename'])}")

def download_with_progress(url):
    hook = ProgressHook()
    
    ydl_opts = {
        'outtmpl': './downloads/%(title)s.%(ext)s',
        'progress_hooks': [hook.progress_hook],
    }
    
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([url])

实用工具函数

1. URL验证

import re

def is_valid_url(url):
    pattern = re.compile(
        r'^https?://'  # http:// or https://
        r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|'  # domain...
        r'localhost|'  # localhost...
        r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'  # ...or ip
        r'(?::\d+)?'  # optional port
        r'(?:/?|[/?]\S+)$', re.IGNORECASE)
    return pattern.match(url) is not None

2. 文件大小检查

def check_available_space(path, required_mb):
    import shutil
    
    free_bytes = shutil.disk_usage(path).free
    free_mb = free_bytes / (1024 * 1024)
    
    if free_mb < required_mb:
        print(f"警告: 可用空间不足! 需要: {required_mb}MB, 可用: {free_mb:.1f}MB")
        return False
    return True

配置文件管理

使用JSON配置文件

import json

def load_config(config_file='config.json'):
    default_config = {
        'output_path': './downloads',
        'max_workers': 3,
        'video_quality': 'best[height<=720]',
        'download_subtitles': True,
        'max_retries': 3
    }
    
    try:
        with open(config_file, 'r', encoding='utf-8') as f:
            config = json.load(f)
        return {**default_config, **config}
    except FileNotFoundError:
        # 创建默认配置文件
        with open(config_file, 'w', encoding='utf-8') as f:
            json.dump(default_config, f, indent=2, ensure_ascii=False)
        return default_config

部署和优化

1. 创建可执行脚本

使用PyInstaller将Python脚本打包成可执行文件:

pip install pyinstaller
pyinstaller --onefile video_downloader.py

2. 性能优化建议

  • 合理设置并发数量,避免过多请求被限制
  • 添加重试机制处理网络错误
  • 使用代理池避免IP被封禁
  • 定期更新yt-dlp库以支持最新网站

注意事项

法律合规

  • 仅下载有权限的内容
  • 遵守网站的使用条款
  • 不要用于商业用途未授权内容

技术限制

  • 某些网站有反爬虫机制
  • 下载速度可能受到限制
  • 需要定期更新以适应网站变化

总结

通过本文的学习,您应该已经掌握了使用Python编写视频下载脚本的基本技能。从简单的单视频下载到复杂的批量处理,Python为我们提供了强大而灵活的解决方案。记住在使用这些脚本时要遵守相关法律法规,尊重内容创作者的权益。

继续学习和实践,您可以根据具体需求进一步定制和优化这些脚本,创建出更加强大和实用的视频下载工具。

评论

发表回复

您的邮箱地址不会被公开。 必填项已用 * 标注