Mong muốn:
- Download video youtube chỉ bằng python (video public)
- Check file size trước khi download (không cần download trước)
- Check duration của video (mà không cần download trước)
- Chỉ download audio (với file size nhỏ nhất có thể)
Ngon nhất là package
yt-dlp
.1def get_smallest_webm_audio(url):
2 try:
3 # Run yt-dlp with -J to get video metadata
4 result = subprocess.run(
5 ["yt-dlp", "-J", url],
6 capture_output=True,
7 text=True,
8 check=True
9 )
10 # Parse the JSON output
11 metadata = json.loads(result.stdout)
12 formats = metadata.get("formats", [])
13
14 # Filter for webm audio-only formats with filesize
15 webm_audio_formats = [
16 {
17 "format_id": f.get("format_id"),
18 "filesize": f.get("filesize"),
19 "duration": metadata.get("duration"),
20 "title": metadata.get("title"),
21 "ext": f.get("ext")
22 }
23 for f in formats
24 if f.get("ext") == "webm" and f.get("vcodec") == "none" and f.get("filesize") is not None
25 ]
26
27 # Find the format with the smallest filesize
28 if webm_audio_formats:
29 smallest = min(webm_audio_formats, key=lambda x: x["filesize"])
30 return smallest
31 else:
32 return None
33 except subprocess.CalledProcessError as e:
34 print(f"Error running yt-dlp: {e.stderr}")
35 return None
36 except json.JSONDecodeError:
37 print("Error decoding JSON output from yt-dlp.")
38 return None
39
40url = "https://www.youtube.com/watch?v=ry9SYnV3svc" # Replace with your video URL
41smallest_webm_audio = get_smallest_webm_audio(url)
42if smallest_webm_audio:
43 print("Smallest webm audio format:")
44 print(smallest_webm_audio)
45else:
46 print("No webm audio format with filesize found.")
1import yt_dlp
2
3def get_youtube_video_size_mb(video_url):
4 """Get the file size of a YouTube video in MB."""
5 try:
6 ydl_opts = {"listformats": True, "quiet": True}
7
8 with YoutubeDL(ydl_opts) as ydl:
9 result = ydl.extract_info(video_url, download=False)
10 formats = result.get("formats", [])
11
12 # Iterate through formats to find the first valid "audio only" entry with filesize
13 for f in formats:
14 if (
15 f.get("vcodec") == "none" # Indicates "audio only"
16 and f.get("filesize") is not None
17 ):
18 # return f.get("format_id")
19 return f.get("filesize") / (1024 * 1024) # Convert to MB
20
21 return None
22 except Exception:
23 return None
1import yt_dlp
2
3def get_youtube_video_duration_s(video_url):
4 """Get the duration of a YouTube video in seconds."""
5 try:
6 ydl_opts = {"quiet": True}
7
8 with YoutubeDL(ydl_opts) as ydl:
9 result = ydl.extract_info(video_url, download=False)
10 return result.get("duration")
11 except Exception:
12 return None
Không thể download bằng python package
yt-dlp
được, toàn ra lỗi 4031An error occurred: ERROR: unable to download video data: HTTP Error 403: Forbidden
Cách hay nhất để download là cũng dùng yt-dlp nhưng là dùng CLI của nó!
1import subprocess
2import json
3
4def download_yt(url, output_dir="."):
5 # Get video formats info
6 cmd = ["yt-dlp", "-J", url]
7 result = subprocess.run(cmd, capture_output=True, text=True)
8 formats = json.loads(result.stdout)["formats"]
9
10 # Find audio format with smallest filesize
11 audio_formats = [f for f in formats if f.get("vcodec") == "none"]
12 best_format = min(audio_formats, key=lambda x: x.get("filesize", float("inf")))
13 format_id = best_format["format_id"]
14 print("👉👉👉 format_id: ", format_id)
15
16 # Download with selected format
17 cmd = [
18 "yt-dlp",
19 "-f", format_id,
20 "-o", f"{output_dir}/%(title)s.%(ext)s",
21 url
22 ]
23 subprocess.run(cmd)
24
25# Usage
26download_yt("https://www.youtube.com/watch?v=ry9SYnV3svc", "../fake_storage_account/audios/audioDirectory")