lines_seen = set() # holds lines already seen
with open("file.txt", "r+") as f:
d = f.readlines()
f.seek(0)
for i in d:
if i not in lines_seen:
f.write(i)
lines_seen.add(i)
f.truncate()
with open("file.txt", "r") as txt_file:
new_data = list(set(txt_file))
return new_data
import hashlib
def main():
input_file = "in.txt"
output_file = "out.txt"
completed_hash = set()
output_file = open(output_file, "w")
for line in open(input_file,"r"):
hashValue = hashlib.md5(line.strip().encode('utf-8')).hexdigest()
if hashValue not in completed_hash:
output_file.write(line)
completed_hash.add(hashValue)
output_file.close()
if __name__ == "__main__":
main()