From ffe3b98dcff35680402c1bb3191347bd47518f0a Mon Sep 17 00:00:00 2001 From: Lucas Oskorep Date: Sat, 11 Oct 2025 02:51:49 -0400 Subject: [PATCH] feat: add movies support --- README.md | 47 +++++++++++++++----- src/analyzer.rs | 32 ++++++++++++-- src/client.rs | 14 +++++- src/display.rs | 89 ++++++++++++++++++++++++++++++++++++- src/main.rs | 114 ++++++++++++++++++++++++++++++++++++++---------- src/models.rs | 22 +++++++++- src/selector.rs | 35 ++++++++++++++- 7 files changed, 310 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index e3fd583..bf1fa43 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,17 @@ # Jelly Dedup -A command-line tool to identify and manage duplicate episodes in your Jellyfin media server. This tool analyzes your TV show library, detects duplicate episodes, and provides removal commands to free up storage space. +A command-line tool to identify and manage duplicate episodes and movies in your Jellyfin media server. This tool analyzes your TV show and movie libraries, detects duplicates, and provides removal commands to free up storage space. ## Features -- Scans all TV shows in your Jellyfin library +- Scans TV shows and/or movies in your Jellyfin library - Identifies duplicate episodes based on season and episode numbers -- Intelligently selects lower-quality files for removal +- Identifies duplicate movies based on title and year +- Intelligently selects lower-quality files for removal using smart codec comparison - Generates shell commands for safe file deletion - Displays space savings estimates - Supports custom path prefix removal for cleaner output +- Flexible media type selection (TV shows only, movies only, or both) ## Build Reqs @@ -84,6 +86,9 @@ Options: -p, --path-prefix-to-remove Path prefix to remove from displayed file paths [env: PATH_PREFIX_TO_REMOVE] + -t, --media-type + Type of media to process [default: both] [possible values: tv, movies, both] + -h, --help Print help @@ -93,22 +98,32 @@ Options: ### Examples -1. **Using default local server with API key:** +1. **Scan both TV shows and movies (default):** ```bash jelly-dedup --api-key abc123def456 ``` -2. **Specifying a remote server:** +2. **Scan TV shows only:** + ```bash + jelly-dedup --api-key abc123def456 --media-type tv + ``` + +3. **Scan movies only:** + ```bash + jelly-dedup --api-key abc123def456 --media-type movies + ``` + +4. **Specifying a remote server:** ```bash jelly-dedup --jellyfin-url https://jellyfin.example.com --api-key abc123def456 ``` -3. **Removing path prefix for cleaner output:** +5. **Removing path prefix for cleaner output:** ```bash jelly-dedup --api-key abc123def456 --path-prefix-to-remove /mnt/media ``` -4. **Using environment variables:** +6. **Using environment variables:** ```bash export JELLYFIN_URL=http://localhost:8096 export JELLYFIN_API_KEY=abc123def456 @@ -126,16 +141,28 @@ Options: ## Output The tool will: -1. Scan all TV shows in your library -2. Display duplicate episodes found for each show +1. Scan TV shows and/or movies in your library (depending on `--media-type` option) +2. Display duplicate episodes/movies found with detailed quality information 3. Provide a summary with: - - Total episodes with duplicates + - Total episodes/movies with duplicates - Total files marked for deletion - Estimated space savings in GB 4. Generate `rm` commands for each file to be deleted **Note:** The tool does NOT delete files automatically. It only generates the commands for you to review and execute manually. +### Quality Selection + +The tool uses intelligent quality comparison to select the best version: +- **Resolution First**: Higher resolution always wins (1080p beats 720p) +- **Codec Efficiency**: When resolutions match, codec efficiency is considered: + - AV1: 2.0x multiplier (most efficient) + - H.265/HEVC: 1.5x multiplier + - H.264: 1.0x baseline +- **Effective Bitrate**: Calculates quality based on bitrate × codec efficiency + +For example, a 1080p H.265 file at 6 Mbps (effective: 9.0) will be selected over a 1080p H.264 file at 8 Mbps (effective: 8.0). + ## Safety - The tool is read-only and makes no modifications to your Jellyfin library or filesystem diff --git a/src/analyzer.rs b/src/analyzer.rs index 8ca642f..f7eb61a 100644 --- a/src/analyzer.rs +++ b/src/analyzer.rs @@ -1,17 +1,43 @@ -use crate::models::Episode; +use crate::models::{Episode, Movie}; +use std::collections::HashMap; /// Filters episodes to return only those with multiple media sources (duplicates) pub fn filter_duplicate_episodes(episodes: Vec) -> Vec { episodes .into_iter() - .filter(|ep| has_multiple_versions(ep)) + .filter(|ep| has_multiple_versions_episode(ep)) .collect() } -fn has_multiple_versions(episode: &Episode) -> bool { +fn has_multiple_versions_episode(episode: &Episode) -> bool { if let Some(media_sources) = &episode.media_sources { media_sources.len() > 1 } else { false } } + +/// Filters movies to return only those with duplicate titles (same name and year) +pub fn filter_duplicate_movies(movies: Vec) -> Vec> { + let mut movie_map: HashMap> = HashMap::new(); + + // Group movies by title and year + for movie in movies { + let key = format!("{}-{}", movie.name, movie.year.unwrap_or(0)); + movie_map.entry(key).or_insert_with(Vec::new).push(movie); + } + + // Return only groups with multiple movies or movies with multiple media sources + movie_map + .into_values() + .filter(|group| group.len() > 1 || (group.len() == 1 && has_multiple_versions_movie(&group[0]))) + .collect() +} + +fn has_multiple_versions_movie(movie: &Movie) -> bool { + if let Some(media_sources) = &movie.media_sources { + media_sources.len() > 1 + } else { + false + } +} diff --git a/src/client.rs b/src/client.rs index 678d935..366bb0f 100644 --- a/src/client.rs +++ b/src/client.rs @@ -1,4 +1,4 @@ -use crate::models::{Episode, EpisodesResponse, Item, ItemsResponse}; +use crate::models::{Episode, EpisodesResponse, Item, ItemsResponse, Movie, MoviesResponse}; use std::error::Error; pub struct JellyfinClient { @@ -39,4 +39,16 @@ impl JellyfinClient { Ok(episodes_response.items) } + + pub async fn get_all_movies(&self) -> Result, Box> { + let url = format!( + "{}/Items?IncludeItemTypes=Movie&Recursive=true&Fields=Path,MediaSources,ProductionYear&api_key={}", + self.base_url, self.api_key + ); + + let response = self.client.get(&url).send().await?; + let movies_response: MoviesResponse = response.json().await?; + + Ok(movies_response.items) + } } diff --git a/src/display.rs b/src/display.rs index a631e73..1beeeda 100644 --- a/src/display.rs +++ b/src/display.rs @@ -1,4 +1,4 @@ -use crate::models::{Episode, MediaSource}; +use crate::models::{Episode, MediaSource, Movie}; use crate::selector; #[derive(Debug, Clone, Eq, PartialEq, Hash)] @@ -134,3 +134,90 @@ fn format_codec(source: &MediaSource) -> String { }) .unwrap_or_else(|| "Unknown".to_string()) } + +pub fn print_duplicate_movies(movie_groups: Vec>) -> Vec { + let mut files_to_delete = Vec::new(); + + for movie_group in movie_groups { + if movie_group.is_empty() { + continue; + } + + // If there's only one movie in the group, it must have multiple media sources + if movie_group.len() == 1 { + let movie = &movie_group[0]; + println!("\n🎬 Movie: {}", format_movie_title(movie)); + println!("{}", "-".repeat(80)); + + if let Some(media_sources) = &movie.media_sources { + println!(" Multiple versions found: {}\n", media_sources.len()); + let to_delete = print_movie_versions(&movie.name, media_sources); + files_to_delete.extend(to_delete); + } + } else { + // Multiple movies with same name/year - treat each as a separate version + let first_movie = &movie_group[0]; + println!("\n🎬 Movie: {}", format_movie_title(first_movie)); + println!("{}", "-".repeat(80)); + println!(" Multiple copies found: {}\n", movie_group.len()); + + // Collect all media sources from all movies + let mut all_sources: Vec = Vec::new(); + for movie in &movie_group { + if let Some(media_sources) = &movie.media_sources { + for source in media_sources { + all_sources.push(source.clone()); + } + } + } + + if !all_sources.is_empty() { + let to_delete = print_movie_versions(&first_movie.name, &all_sources); + files_to_delete.extend(to_delete); + } + } + + println!("{}", "=".repeat(80)); + } + + files_to_delete +} + +fn format_movie_title(movie: &Movie) -> String { + if let Some(year) = movie.year { + format!("{} ({})", movie.name, year) + } else { + movie.name.clone() + } +} + +fn print_movie_versions(_movie_name: &str, media_sources: &Vec) -> Vec { + let mut files_to_delete = Vec::new(); + + if let Some(best_idx) = selector::select_best_source(media_sources) { + // Print selected file + println!(" [SELECTED]"); + print_media_source(&media_sources[best_idx]); + + // Print non-selected files + if media_sources.len() > 1 { + println!(" [TO DELETE]"); + for (idx, source) in media_sources.iter().enumerate() { + if idx != best_idx { + print_media_source(source); + if let Some(path) = &source.path { + let size = source.size.unwrap_or(0); + files_to_delete.push(FileToDelete { + path: path.clone(), + size, + }); + } + } + } + } + } + + println!(); + + files_to_delete +} diff --git a/src/main.rs b/src/main.rs index c7c0d11..036186e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,28 +4,42 @@ mod display; mod models; mod selector; -use clap::Parser; +use clap::{Parser, ValueEnum}; use client::JellyfinClient; use display::FileToDelete; use std::collections::HashSet; use std::error::Error; -/// A tool to find and manage duplicate episodes in Jellyfin +#[derive(Debug, Clone, ValueEnum)] +enum MediaType { + /// Process TV shows only + Tv, + /// Process movies only + Movies, + /// Process both TV shows and movies + Both, +} + +/// A tool to find and manage duplicate episodes and movies in Jellyfin #[derive(Parser, Debug)] #[command(name = "jelly-dedup")] #[command(author, version, about, long_about = None)] struct Args { /// Jellyfin server URL - #[arg(short, long, env("JELLYFIN_URL"), default_value = "http://localhost:8096")] + #[arg(short, long, env = "JELLYFIN_URL", default_value = "http://localhost:8096")] jellyfin_url: String, /// Jellyfin API key - #[arg(short, long, env("JELLYFIN_API_KEY"))] + #[arg(short, long, env = "JELLYFIN_API_KEY")] api_key: String, /// Path prefix to remove from displayed file paths - #[arg(short, long, env("PATH_PREFIX_TO_REMOVE"))] + #[arg(short, long, env = "PATH_PREFIX_TO_REMOVE")] path_prefix_to_remove: Option, + + /// Type of media to process + #[arg(short = 't', long, value_enum, default_value = "both")] + media_type: MediaType, } #[tokio::main] @@ -34,44 +48,63 @@ async fn main() -> Result<(), Box> { let args = Args::parse(); + let client = JellyfinClient::new(args.jellyfin_url, args.api_key); + let config = Config { - jellyfin_url: args.jellyfin_url, - api_key: args.api_key, path_prefix_to_remove: args.path_prefix_to_remove, + media_type: args.media_type, }; - let client = JellyfinClient::new(config.jellyfin_url, config.api_key); - - process_all_shows(&client, config.path_prefix_to_remove).await?; + process_media(&client, &config).await?; Ok(()) } struct Config { - jellyfin_url: String, - api_key: String, path_prefix_to_remove: Option, + media_type: MediaType, } struct Statistics { total_duplicate_episodes: usize, + total_duplicate_movies: usize, total_duplicate_files: usize, files_to_delete: HashSet, } -async fn process_all_shows(client: &JellyfinClient, path_prefix_to_remove: Option) -> Result<(), Box> { +async fn process_media(client: &JellyfinClient, config: &Config) -> Result<(), Box> { + let mut stats = Statistics { + total_duplicate_episodes: 0, + total_duplicate_movies: 0, + total_duplicate_files: 0, + files_to_delete: HashSet::new(), + }; + + match config.media_type { + MediaType::Tv => { + process_all_shows(client, &mut stats).await?; + } + MediaType::Movies => { + process_all_movies(client, &mut stats).await?; + } + MediaType::Both => { + process_all_shows(client, &mut stats).await?; + process_all_movies(client, &mut stats).await?; + } + } + + print_summary(&stats, config.path_prefix_to_remove.as_deref(), &config.media_type); + + Ok(()) +} + +async fn process_all_shows(client: &JellyfinClient, stats: &mut Statistics) -> Result<(), Box> { println!("Fetching all TV shows from Jellyfin...\n"); let shows = client.get_all_shows().await?; println!("Found {} TV shows\n", shows.len()); println!("{}", "=".repeat(80)); - let mut stats = Statistics { - total_duplicate_episodes: 0, - total_duplicate_files: 0, - files_to_delete: HashSet::new(), - }; - for show in shows { match process_show(client, &show).await { Ok((episode_count, file_count, files_to_delete)) => { @@ -85,7 +118,27 @@ async fn process_all_shows(client: &JellyfinClient, path_prefix_to_remove: Optio } } - print_summary(&stats, path_prefix_to_remove.as_deref()); + Ok(()) +} + +async fn process_all_movies(client: &JellyfinClient, stats: &mut Statistics) -> Result<(), Box> { + println!("\nFetching all movies from Jellyfin...\n"); + let movies = client.get_all_movies().await?; + + println!("Found {} movies\n", movies.len()); + println!("{}", "=".repeat(80)); + + let duplicate_movie_groups = analyzer::filter_duplicate_movies(movies); + + if !duplicate_movie_groups.is_empty() { + let movie_count = duplicate_movie_groups.len(); + let files_to_delete = display::print_duplicate_movies(duplicate_movie_groups); + let file_count = files_to_delete.len(); + + stats.total_duplicate_movies += movie_count; + stats.total_duplicate_files += file_count; + stats.files_to_delete.extend(files_to_delete); + } Ok(()) } @@ -110,7 +163,7 @@ async fn process_show( Ok((episode_count, file_count, files_to_delete)) } -fn print_summary(stats: &Statistics, path_prefix_to_remove: Option<&str>) { +fn print_summary(stats: &Statistics, path_prefix_to_remove: Option<&str>, media_type: &MediaType) { // Files are already deduplicated in the HashSet let mut sorted_files: Vec<&FileToDelete> = stats.files_to_delete.iter().collect(); sorted_files.sort_by(|a, b| a.path.cmp(&b.path)); @@ -121,7 +174,20 @@ fn print_summary(stats: &Statistics, path_prefix_to_remove: Option<&str>) { println!("\n{}", "=".repeat(80)); println!("Summary:"); - println!(" Total episodes with duplicates: {}", stats.total_duplicate_episodes); + + match media_type { + MediaType::Tv => { + println!(" Total episodes with duplicates: {}", stats.total_duplicate_episodes); + } + MediaType::Movies => { + println!(" Total movies with duplicates: {}", stats.total_duplicate_movies); + } + MediaType::Both => { + println!(" Total episodes with duplicates: {}", stats.total_duplicate_episodes); + println!(" Total movies with duplicates: {}", stats.total_duplicate_movies); + } + } + println!(" Total files to delete: {}", sorted_files.len()); println!(" Estimated space savings: {:.2} GB", total_space_gb); println!("{}", "=".repeat(80)); @@ -135,9 +201,9 @@ fn print_summary(stats: &Statistics, path_prefix_to_remove: Option<&str>) { } else { &file.path }; - // Properly escape the path for bash + display_path.to_owned().insert_str(0, "."); let escaped_path = shell_escape::escape(display_path.into()); - println!("rm {}", escaped_path); + println!("rm .{}", escaped_path); } println!("{}", "=".repeat(80)); println!("Total files to delete: {}", sorted_files.len()); diff --git a/src/models.rs b/src/models.rs index 7df7a7c..adcb51e 100644 --- a/src/models.rs +++ b/src/models.rs @@ -14,7 +14,7 @@ pub struct Item { pub name: String, } -#[derive(Debug, Deserialize)] +#[derive(Debug, Deserialize, Clone)] pub struct MediaStream { #[serde(rename = "Type")] pub stream_type: Option, @@ -24,7 +24,7 @@ pub struct MediaStream { pub codec: Option, } -#[derive(Debug, Deserialize)] +#[derive(Debug, Deserialize, Clone)] pub struct MediaSource { #[serde(rename = "Path")] pub path: Option, @@ -57,3 +57,21 @@ pub struct Episode { #[serde(rename = "MediaSources")] pub media_sources: Option>, } + +#[derive(Debug, Deserialize, Clone)] +pub struct Movie { + #[serde(rename = "Id")] + pub _id: String, + #[serde(rename = "Name")] + pub name: String, + #[serde(rename = "ProductionYear")] + pub year: Option, + #[serde(rename = "MediaSources")] + pub media_sources: Option>, +} + +#[derive(Debug, Deserialize)] +pub struct MoviesResponse { + #[serde(rename = "Items")] + pub items: Vec, +} diff --git a/src/selector.rs b/src/selector.rs index 450a3a3..883c46f 100644 --- a/src/selector.rs +++ b/src/selector.rs @@ -26,8 +26,8 @@ pub fn select_best_source(sources: &[MediaSource]) -> Option { } fn is_better_source(candidate: &MediaSource, current_best: &MediaSource) -> bool { - let candidate_height = get_height(candidate); - let best_height = get_height(current_best); + let candidate_height = normalize_height(get_height(candidate)); + let best_height = normalize_height(get_height(current_best)); // Higher resolution always wins if candidate_height > best_height { @@ -43,6 +43,37 @@ fn is_better_source(candidate: &MediaSource, current_best: &MediaSource) -> bool candidate_effective_bitrate > best_effective_bitrate } +fn normalize_height(height: i32) -> i32 { + // Normalize common cropped resolutions to their standard equivalents + + // 4K/UHD range (2160p): includes 2160p, 2076p (cropped 4K), and other 4K variants + if height >= 2000 && height <= 2160 { + return 2160; + } + + // 1080p/Full HD range: includes 1080p, 1038p, 960p (cropped 1080p) + if height >= 960 && height <= 1088 { + return 1080; + } + + // 720p/HD range: includes 720p, 694p (cropped 720p) + if height >= 690 && height <= 720 { + return 720; + } + + // 576p/SD range: includes 576p, 540p + if height >= 540 && height <= 576 { + return 576; + } + + // 480p/SD range: includes 480p, 460p + if height >= 460 && height <= 480 { + return 480; + } + + height +} + fn calculate_effective_bitrate(source: &MediaSource) -> f64 { let bitrate = source.bitrate.unwrap_or(0) as f64; let codec = get_codec(source);