How is finding duplicate files with PowerShell using SHA256? Fucking fast.

I just had to do it. I do this because why the hell not? We all have litter. I guess people will be interested in math, data, and AI. How boring… Maybe not boring at work but definitely boring at home. Maybe I’ll get motivated to do some TensorFlow slop. After I finish my GTK app in Nim.

Param (
    [Parameter(Mandatory=$false)]
       [string]$Path = 'some default path'
)
 
 
Function Get-Duplicates([string]$path) {
    # SHA256
    $dict = @{}
    Get-ChildItem -Recurse $path -File |
      ForEach-Object {
 
          [System.Collections.ArrayList]$arr = @()
 
          $hash = Get-FileHash $_.FullName
          $hashKey = $hash.Hash
          #$filePath = $hash.Path
 
          if ( -not $dict.ContainsKey($hashKey) ) {
            $dict[$hashKey] = $arr
          }
 
          $dict[$hashKey] += $_.FullName
      }
    return $dict
}
 
Write-Host "Searching $Path"
 
$results = Get-Duplicates -path $Path
 
#Write-Output $results
ForEach ($key in $results.Keys) {
    Write-Host $key
    Foreach ($val in $results[$key]) {
        Write-Host $val
    }
}