# train test split df = pd.read_csv('file_location') mask = np.random.rand(len(df)) < 0.8 train = df[mask] test = df[~mask]