# this is based on 2 factors the name of the dog and the breed of the dog so we can
# have 2 dog with the same name but diff breed.
df.drop_duplicates(subset=["name", "breed"])
# Without including index
df.drop_duplicates(subset=["name", "breed"], index = False)
df = pd.DataFrame({"Date": ["2022", "2022", "2021", "2021", "2020", "2020"], "Time": ["20:00", "20:00", "20:00", "21:00", "22:00", "22:00"]})
df.drop_duplicates()
#output
# Date Time
# 2022 20:00
# 2021 20:00
# 2021 21:00
# 2020 22:00
from pathlib import Path
import hashlib
import os
def remove_duplicate(path):
unique = {}
for file in Path(path).rglob('*'):
if file.is_file():
with open(file, 'rb') as f:
filehash = hashlib.md5(f.read()).hexdigest()
if filehash not in unique:
unique[filehash] = file
else:
# Test print before removing
print(f'Removing --> {unique[filehash]}')
#os.remove(unique[filehash])
if __name__ == '__main__':
path = r'C:foo'
remove_duplicate(path)