using System.Collections.Generic;
public static void Main()
var files = new List<DupInfo>{
new DupInfo(){FilePath = @"C:\User\FakePath.txt", FileSize = 1321056},
new DupInfo(){FilePath = @"C:\User\FakePath.txt", FileSize = 120556},
new DupInfo(){FilePath = @"C:\User\ForPeace.txt", FileSize = 1321056},
new DupInfo(){FilePath = @"C:\User\RealThing.txt", FileSize = 1321056},
new DupInfo(){FilePath = @"C:\User\RhymeTime.txt", FileSize = 1321056},
new DupInfo(){FilePath = @"C:\User\won't match.txt", FileSize = 1321056},
new DupInfo(){FilePath = @"C:\Other\Ham-n-cheese.txt", FileSize = 653096},
new DupInfo(){FilePath = @"C:\Other\Hyenas.txt", FileSize = 653096},
new DupInfo(){FilePath = @"C:\Umbrella\ndsFP.txt", FileSize = 1321056}
var filesSetsWithPossibleDupes = files.GroupBy(f => f.FileSize)
.Where(grp => grp.Count() > 1);
var masterDuplicateDict = new Dictionary<DupStats, IEnumerable<DupInfo>>();
foreach (var grp in filesSetsWithPossibleDupes)
var likelyDuplicates = grp.GroupBy(dup => dup.Checksum)
.Where(g => g.Count() > 1);
foreach(var dupGrp in likelyDuplicates)
var sample = dupGrp.First();
var key = new DupStats() {FileSize = sample.FileSize, Checksum = sample.Checksum};
masterDuplicateDict.Add(key, dupGrp);
foreach (var kvp in masterDuplicateDict)
Console.WriteLine("Possible duplicates for Filesize: {0}, Checksum: {1}", kvp.Key.FileSize, kvp.Key.Checksum);
foreach(var dup in kvp.Value)
Console.WriteLine(" Path: {0}", dup.FilePath);
public string FilePath {get; set;}
public long FileSize {get; set;}
public int Checksum {get { return CalculateChecksum();}}
public int MaxFolder {get; set;}
public int CalculateChecksum()
return new String (FilePath.Where(ch => Char.IsUpper(ch)).ToArray()).GetHashCode();
public long FileSize {get; set;}
public int Checksum {get; set;}
public override bool Equals(object other)
var otherStats = other as DupStats;
return otherStats.FileSize == this.FileSize
&& otherStats.Checksum == this.Checksum;
public override int GetHashCode()