First working version v0.1.0
This commit is contained in:
213
src/lib.rs
Normal file
213
src/lib.rs
Normal file
@@ -0,0 +1,213 @@
|
||||
use std::error::Error;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use bytesize::ByteSize;
|
||||
use clap::Parser;
|
||||
use indicatif::ProgressBar;
|
||||
|
||||
extern crate bytesize;
|
||||
|
||||
#[derive(Parser)]
|
||||
#[command(version, about, long_about = None)]
|
||||
pub struct Cli {
|
||||
path: PathBuf,
|
||||
|
||||
#[arg(short = 's', long, value_name = "SIZE")]
|
||||
min_size: Option<ByteSize>,
|
||||
|
||||
#[arg(short = 'm', long, value_name = "SIZE")]
|
||||
max_size: Option<ByteSize>,
|
||||
}
|
||||
|
||||
pub fn run(args: Cli) -> Result<(), Box<dyn Error>> {
|
||||
let files = navigate_file_tree(&args.path, &args.min_size, &args.max_size)?;
|
||||
println!("Scanning {} files", files.len());
|
||||
|
||||
let hashes = hash_files(&files)?;
|
||||
let repeats = find_duplicates(&files, &hashes);
|
||||
|
||||
if repeats.is_empty() {
|
||||
println!("No duplicate files found!");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
println!("{} Duplicate files found", repeats.len());
|
||||
for duplicate in repeats {
|
||||
let size = ByteSize::b(fs::metadata(duplicate[0])?.len());
|
||||
let paths = duplicate
|
||||
.into_iter()
|
||||
.map(|path| path.to_string_lossy())
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ");
|
||||
|
||||
println!("{size} {paths}");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn is_filesize_in_range(
|
||||
filesize: u64,
|
||||
min_size: &Option<ByteSize>,
|
||||
max_size: &Option<ByteSize>,
|
||||
) -> bool {
|
||||
if let Some(size) = min_size {
|
||||
if filesize < size.0 {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(size) = max_size {
|
||||
if filesize > size.0 {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
pub fn navigate_file_tree(
|
||||
path: &Path,
|
||||
min_size: &Option<ByteSize>,
|
||||
max_size: &Option<ByteSize>,
|
||||
) -> Result<Vec<PathBuf>, Box<dyn Error>> {
|
||||
let mut files: Vec<PathBuf> = Vec::new();
|
||||
if !path.is_dir() {
|
||||
let metadata = fs::metadata(path);
|
||||
if let Err(err) = metadata {
|
||||
eprintln!("{err}");
|
||||
return Ok(files);
|
||||
}
|
||||
|
||||
let filesize = metadata?.len();
|
||||
|
||||
if !is_filesize_in_range(filesize, min_size, max_size) {
|
||||
return Ok(files);
|
||||
}
|
||||
|
||||
files.push(path.to_path_buf());
|
||||
return Ok(files);
|
||||
}
|
||||
|
||||
let children = fs::read_dir(path);
|
||||
if let Err(err) = children {
|
||||
eprintln!("{err}");
|
||||
return Ok(files);
|
||||
}
|
||||
|
||||
for child in children? {
|
||||
let child = child?;
|
||||
let child_path = child.path();
|
||||
if child_path.is_dir() {
|
||||
files.append(&mut navigate_file_tree(&child_path, min_size, max_size)?);
|
||||
} else {
|
||||
let metadata = fs::metadata(&child_path);
|
||||
if let Err(err) = metadata {
|
||||
eprintln!("{err}");
|
||||
return Ok(files);
|
||||
}
|
||||
|
||||
let filesize = metadata?.len();
|
||||
if !is_filesize_in_range(filesize, min_size, max_size) {
|
||||
continue;
|
||||
}
|
||||
|
||||
files.push(child_path.to_path_buf());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(files)
|
||||
}
|
||||
|
||||
pub fn hash_files(files: &Vec<PathBuf>) -> Result<Vec<u64>, Box<dyn Error>> {
|
||||
let pb = ProgressBar::new(files.len().try_into()?);
|
||||
|
||||
let mut hashes: Vec<u64> = Vec::new();
|
||||
for file in files {
|
||||
//let buf = fs::read(file).unwrap_or_else();
|
||||
let buf = match fs::read(file) {
|
||||
Ok(t) => t,
|
||||
Err(_e) => {
|
||||
//eprint!("{e}");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
hashes.push(seahash::hash(&buf));
|
||||
|
||||
pb.inc(1);
|
||||
}
|
||||
|
||||
pb.finish_with_message("hashing finished");
|
||||
|
||||
Ok(hashes)
|
||||
}
|
||||
|
||||
pub fn find_duplicates<'a>(files: &'a [PathBuf], hashes: &[u64]) -> Vec<Vec<&'a PathBuf>> {
|
||||
let mut repeated: Vec<Vec<&PathBuf>> = Vec::new();
|
||||
let mut checked_indices: Vec<usize> = Vec::new();
|
||||
|
||||
for (i, hash) in hashes.iter().enumerate() {
|
||||
if checked_indices.contains(&i) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut duplicate_hashes: Vec<&PathBuf> = Vec::new();
|
||||
duplicate_hashes.push(&files[i]);
|
||||
|
||||
// Check all hashes if equal
|
||||
for (j, other_hash) in hashes.iter().enumerate() {
|
||||
if hash == other_hash && i != j {
|
||||
checked_indices.push(j);
|
||||
duplicate_hashes.push(&files[j]);
|
||||
}
|
||||
}
|
||||
if duplicate_hashes.len() > 1 {
|
||||
repeated.push(duplicate_hashes)
|
||||
}
|
||||
}
|
||||
|
||||
repeated
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
// Tests navigate_file_tree
|
||||
#[test]
|
||||
fn nav_filetree() {
|
||||
let path = PathBuf::from("./test-data");
|
||||
let results = navigate_file_tree(&path, &None, &None).unwrap();
|
||||
|
||||
let files = [
|
||||
PathBuf::from("./test-data/file1"),
|
||||
PathBuf::from("./test-data/file2"),
|
||||
PathBuf::from("./test-data/file3"),
|
||||
];
|
||||
|
||||
assert_eq!(results.len(), 3);
|
||||
assert!(results.contains(&files[0]));
|
||||
assert!(results.contains(&files[1]));
|
||||
assert!(results.contains(&files[2]));
|
||||
}
|
||||
|
||||
// Tests hash_files and file_duplicates
|
||||
#[test]
|
||||
fn find_duplicate_files() {
|
||||
let files = vec![
|
||||
PathBuf::from("./test-data/file1"),
|
||||
PathBuf::from("./test-data/file2"),
|
||||
PathBuf::from("./test-data/file3"),
|
||||
];
|
||||
|
||||
let hashes = hash_files(&files).unwrap();
|
||||
|
||||
let results = find_duplicates(&files, &hashes);
|
||||
|
||||
assert_eq!(results.len(), 1);
|
||||
assert!(results[0].contains(&&files[0]));
|
||||
assert!(results[0].contains(&&files[1]));
|
||||
}
|
||||
}
|
||||
14
src/main.rs
Normal file
14
src/main.rs
Normal file
@@ -0,0 +1,14 @@
|
||||
use std::process;
|
||||
|
||||
use clap::Parser;
|
||||
use duplicated::Cli;
|
||||
|
||||
fn main() {
|
||||
let args = Cli::parse();
|
||||
|
||||
if let Err(e) = duplicated::run(args) {
|
||||
eprintln!("Application error: {e}");
|
||||
process::exit(1);
|
||||
};
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user