First working version v0.1.0

This commit is contained in:
2025-02-02 00:04:04 -06:00
commit 0dc32e63b2
8 changed files with 673 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
/target

433
Cargo.lock generated Normal file
View File

@@ -0,0 +1,433 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "anstream"
version = "0.6.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is_terminal_polyfill",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9"
[[package]]
name = "anstyle-parse"
version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e"
dependencies = [
"anstyle",
"once_cell",
"windows-sys",
]
[[package]]
name = "bumpalo"
version = "3.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
[[package]]
name = "bytesize"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3e368af43e418a04d52505cf3dbc23dda4e3407ae2fa99fd0e4f308ce546acc"
dependencies = [
"serde",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clap"
version = "4.5.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "769b0145982b4b48713e01ec42d61614425f27b7058bda7180a3a41f30104796"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
name = "clap_builder"
version = "4.5.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b26884eb4b57140e4d2d93652abfa49498b938b3c9179f9fc487b0acc3edad7"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.5.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54b755194d6389280185988721fffba69495eed5ee9feeee9a599b53db80318c"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_lex"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6"
[[package]]
name = "colorchoice"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
[[package]]
name = "console"
version = "0.15.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea3c6ecd8059b57859df5c69830340ed3c41d30e3da0c1cbed90a96ac853041b"
dependencies = [
"encode_unicode",
"libc",
"once_cell",
"unicode-width",
"windows-sys",
]
[[package]]
name = "duplicated"
version = "0.1.0"
dependencies = [
"bytesize",
"clap",
"indicatif",
"seahash",
]
[[package]]
name = "encode_unicode"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0"
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "indicatif"
version = "0.17.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235"
dependencies = [
"console",
"number_prefix",
"portable-atomic",
"unicode-width",
"web-time",
]
[[package]]
name = "is_terminal_polyfill"
version = "1.70.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
[[package]]
name = "js-sys"
version = "0.3.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
dependencies = [
"once_cell",
"wasm-bindgen",
]
[[package]]
name = "libc"
version = "0.2.169"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
[[package]]
name = "log"
version = "0.4.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f"
[[package]]
name = "number_prefix"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3"
[[package]]
name = "once_cell"
version = "1.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775"
[[package]]
name = "portable-atomic"
version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6"
[[package]]
name = "proc-macro2"
version = "1.0.93"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
dependencies = [
"proc-macro2",
]
[[package]]
name = "seahash"
version = "4.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b"
[[package]]
name = "serde"
version = "1.0.217"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.217"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "2.0.96"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034"
[[package]]
name = "unicode-width"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd"
[[package]]
name = "utf8parse"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "wasm-bindgen"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
dependencies = [
"cfg-if",
"once_cell",
"wasm-bindgen-macro",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
dependencies = [
"bumpalo",
"log",
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
dependencies = [
"proc-macro2",
"quote",
"syn",
"wasm-bindgen-backend",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.100"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
dependencies = [
"unicode-ident",
]
[[package]]
name = "web-time"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "windows-sys"
version = "0.59.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"

10
Cargo.toml Normal file
View File

@@ -0,0 +1,10 @@
[package]
name = "duplicated"
version = "0.1.0"
edition = "2021"
[dependencies]
seahash = "4.1"
clap = { version = "4.5", features = ["derive"] }
bytesize = {version = "1.2.0", features = ["serde"]}
indicatif = "0.17.11"

213
src/lib.rs Normal file
View File

@@ -0,0 +1,213 @@
use std::error::Error;
use std::fs;
use std::path::{Path, PathBuf};
use bytesize::ByteSize;
use clap::Parser;
use indicatif::ProgressBar;
extern crate bytesize;
#[derive(Parser)]
#[command(version, about, long_about = None)]
pub struct Cli {
path: PathBuf,
#[arg(short = 's', long, value_name = "SIZE")]
min_size: Option<ByteSize>,
#[arg(short = 'm', long, value_name = "SIZE")]
max_size: Option<ByteSize>,
}
pub fn run(args: Cli) -> Result<(), Box<dyn Error>> {
let files = navigate_file_tree(&args.path, &args.min_size, &args.max_size)?;
println!("Scanning {} files", files.len());
let hashes = hash_files(&files)?;
let repeats = find_duplicates(&files, &hashes);
if repeats.is_empty() {
println!("No duplicate files found!");
return Ok(());
}
println!("{} Duplicate files found", repeats.len());
for duplicate in repeats {
let size = ByteSize::b(fs::metadata(duplicate[0])?.len());
let paths = duplicate
.into_iter()
.map(|path| path.to_string_lossy())
.collect::<Vec<_>>()
.join(" ");
println!("{size} {paths}");
}
Ok(())
}
fn is_filesize_in_range(
filesize: u64,
min_size: &Option<ByteSize>,
max_size: &Option<ByteSize>,
) -> bool {
if let Some(size) = min_size {
if filesize < size.0 {
return false;
}
}
if let Some(size) = max_size {
if filesize > size.0 {
return false;
}
}
true
}
pub fn navigate_file_tree(
path: &Path,
min_size: &Option<ByteSize>,
max_size: &Option<ByteSize>,
) -> Result<Vec<PathBuf>, Box<dyn Error>> {
let mut files: Vec<PathBuf> = Vec::new();
if !path.is_dir() {
let metadata = fs::metadata(path);
if let Err(err) = metadata {
eprintln!("{err}");
return Ok(files);
}
let filesize = metadata?.len();
if !is_filesize_in_range(filesize, min_size, max_size) {
return Ok(files);
}
files.push(path.to_path_buf());
return Ok(files);
}
let children = fs::read_dir(path);
if let Err(err) = children {
eprintln!("{err}");
return Ok(files);
}
for child in children? {
let child = child?;
let child_path = child.path();
if child_path.is_dir() {
files.append(&mut navigate_file_tree(&child_path, min_size, max_size)?);
} else {
let metadata = fs::metadata(&child_path);
if let Err(err) = metadata {
eprintln!("{err}");
return Ok(files);
}
let filesize = metadata?.len();
if !is_filesize_in_range(filesize, min_size, max_size) {
continue;
}
files.push(child_path.to_path_buf());
}
}
Ok(files)
}
pub fn hash_files(files: &Vec<PathBuf>) -> Result<Vec<u64>, Box<dyn Error>> {
let pb = ProgressBar::new(files.len().try_into()?);
let mut hashes: Vec<u64> = Vec::new();
for file in files {
//let buf = fs::read(file).unwrap_or_else();
let buf = match fs::read(file) {
Ok(t) => t,
Err(_e) => {
//eprint!("{e}");
continue;
}
};
hashes.push(seahash::hash(&buf));
pb.inc(1);
}
pb.finish_with_message("hashing finished");
Ok(hashes)
}
pub fn find_duplicates<'a>(files: &'a [PathBuf], hashes: &[u64]) -> Vec<Vec<&'a PathBuf>> {
let mut repeated: Vec<Vec<&PathBuf>> = Vec::new();
let mut checked_indices: Vec<usize> = Vec::new();
for (i, hash) in hashes.iter().enumerate() {
if checked_indices.contains(&i) {
continue;
}
let mut duplicate_hashes: Vec<&PathBuf> = Vec::new();
duplicate_hashes.push(&files[i]);
// Check all hashes if equal
for (j, other_hash) in hashes.iter().enumerate() {
if hash == other_hash && i != j {
checked_indices.push(j);
duplicate_hashes.push(&files[j]);
}
}
if duplicate_hashes.len() > 1 {
repeated.push(duplicate_hashes)
}
}
repeated
}
#[cfg(test)]
mod test {
use super::*;
// Tests navigate_file_tree
#[test]
fn nav_filetree() {
let path = PathBuf::from("./test-data");
let results = navigate_file_tree(&path, &None, &None).unwrap();
let files = [
PathBuf::from("./test-data/file1"),
PathBuf::from("./test-data/file2"),
PathBuf::from("./test-data/file3"),
];
assert_eq!(results.len(), 3);
assert!(results.contains(&files[0]));
assert!(results.contains(&files[1]));
assert!(results.contains(&files[2]));
}
// Tests hash_files and file_duplicates
#[test]
fn find_duplicate_files() {
let files = vec![
PathBuf::from("./test-data/file1"),
PathBuf::from("./test-data/file2"),
PathBuf::from("./test-data/file3"),
];
let hashes = hash_files(&files).unwrap();
let results = find_duplicates(&files, &hashes);
assert_eq!(results.len(), 1);
assert!(results[0].contains(&&files[0]));
assert!(results[0].contains(&&files[1]));
}
}

14
src/main.rs Normal file
View File

@@ -0,0 +1,14 @@
use std::process;
use clap::Parser;
use duplicated::Cli;
fn main() {
let args = Cli::parse();
if let Err(e) = duplicated::run(args) {
eprintln!("Application error: {e}");
process::exit(1);
};
}

1
test-data/file1 Normal file
View File

@@ -0,0 +1 @@
Hello, World!

1
test-data/file2 Normal file
View File

@@ -0,0 +1 @@
Hello, World!

0
test-data/file3 Normal file
View File