pd.DataFrame( {'a':['A','A','B','B','B','C'], 'b':[1,2,5,5,4,6]})
df.groupby('a')['b'].apply(list)
Out:
a
A [1, 2]
B [5, 5, 4]
C [6]
Name: b, dtype: object
# usage example
gb = df.groupby(["col1", "col2"])
counts = gb.size().to_frame(name="counts")
count
(
counts.join(gb.agg({"col3": "mean"}).rename(columns={"col3": "col3_mean"}))
.join(gb.agg({"col4": "median"}).rename(columns={"col4": "col4_median"}))
.join(gb.agg({"col4": "min"}).rename(columns={"col4": "col4_min"}))
.reset_index()
)
# to create dataframe
keys = np.array(
[
["A", "B"],
["A", "B"],
["A", "B"],
["A", "B"],
["C", "D"],
["C", "D"],
["C", "D"],
["E", "F"],
["E", "F"],
["G", "H"],
]
)
df = pd.DataFrame(
np.hstack([keys, np.random.randn(10, 4).round(2)]), columns=["col1", "col2", "col3", "col4", "col5", "col6"]
)
df[["col3", "col4", "col5", "col6"]] = df[["col3", "col4", "col5", "col6"]].astype(float)
groupByCol = "FID"
elementFromColToList = "name_ID"
temp = df.groupby(groupByCol)[elementFromColToList].apply(list).to_frame()
temp
groupid b
1 [1, 2, 3]
3 [27]
4 [42, 42, 8]