# uniform distribution over [0, 1)
np.random.rand(3, 3)
# [[0.20966286 0.72581506 0.78926387]
# [0.85719525 0.00163033 0.45001818]
# [0.17630303 0.40184026 0.89585902]]
# discrete uniform distribution in [0, 10)
np.random.randint(0, 10, size=[3,3])
# [[6 8 4]
# [1 3 3]
# [6 9 7]]
# normal distribution around 5 with standard deviation of 2
np.random.normal(5, 2, size=[3,3])
# [[3.8768528 5.73747086 3.63564872]
# [5.49814587 2.62757122 3.61948982]
# [3.36409537 7.86431236 5.16509868]]
np.random.rand(3,2)
array([[ 0.14022471, 0.96360618], #random
[ 0.37601032, 0.25528411], #random
[ 0.49313049, 0.94909878]]) #random
>>> 2.5 * np.random.randn(2, 4) + 3
array([[-4.49401501, 4.00950034, -1.81814867, 7.29718677], #random
[ 0.39924804, 4.68456316, 4.99394529, 4.84057254]]) #random
# train test split
df = pd.read_csv('file_location')
mask = np.random.rand(len(df)) < 0.8
train = df[mask]
test = df[~mask]