from pathlib import Path
import hashlib
import os
def remove_duplicate(path):
unique = {}
for file in Path(path).rglob('*'):
if file.is_file():
with open(file, 'rb') as f:
filehash = hashlib.md5(f.read()).hexdigest()
if filehash not in unique:
unique[filehash] = file
else:
# Test print before removing
print(f'Removing --> {unique[filehash]}')
#os.remove(unique[filehash])
if __name__ == '__main__':
path = r'C:foo'
remove_duplicate(path)