from sklearn.model_selection import train_test_split train, test = train_test_split(df, test_size=0.2)