# this is based on 2 factors the name of the dog and the breed of the dog so we can
# have 2 dog with the same name but diff breed.
df.drop_duplicates(subset=["name", "breed"])
# Without including index
df.drop_duplicates(subset=["name", "breed"], index = False)
from pathlib import Path
import hashlib
import os
def remove_duplicate(path):
unique = {}
for file in Path(path).rglob('*'):
if file.is_file():
with open(file, 'rb') as f:
filehash = hashlib.md5(f.read()).hexdigest()
if filehash not in unique:
unique[filehash] = file
else:
# Test print before removing
print(f'Removing --> {unique[filehash]}')
#os.remove(unique[filehash])
if __name__ == '__main__':
path = r'C:foo'
remove_duplicate(path)