Search
 
SCRIPT & CODE EXAMPLE
 
CODE EXAMPLE FOR PYTHON

pandas

# To install Pandas: "pip install pandas"
# To import pandas:
import pandas as pd
import numpy as np # extra import which will be used in examples

# Create simple pandas series
>>> s = pd.Series([1, 3, 5, np.nan, 6, 8])
>>> s
0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

# Create pandas DataFrame from CSV:
df = pd.read_csv('path_to.csv')

# Create timeseries pandas DataFrame
>>> dates = pd.date_range("20130101", periods=6)
>>> df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list("ABCD"))
>>> df
                   A         B         C         D
2013-01-01  0.469112 -0.282863 -1.509059 -1.135632
2013-01-02  1.212112 -0.173215  0.119209 -1.044236
2013-01-03 -0.861849 -2.104569 -0.494929  1.071804
2013-01-04  0.721555 -0.706771 -1.039575  0.271860
2013-01-05 -0.424972  0.567020  0.276232 -1.087401
2013-01-06 -0.673690  0.113648 -1.478427  0.524988

# Create pandas DataFrame from a dictionary
>>> d = {
        "A": 1.0,
        "B": pd.Timestamp("20130102"),
        "C": pd.Series(1, index=list(range(4)), dtype="float32"),
        "D": np.array([3] * 4, dtype="int32"),
        "E": pd.Categorical(["test", "train", "test", "train"]),
        "F": "foo",
    }
>>> df = pd.DataFrame(d)
>>> df
     A          B    C  D      E    F
0  1.0 2013-01-02  1.0  3   test  foo
1  1.0 2013-01-02  1.0  3  train  foo
2  1.0 2013-01-02  1.0  3   test  foo
3  1.0 2013-01-02  1.0  3  train  foo

# Other important functions:
df.head() # view first 5 rows
df.tail() # view last 5 rows
df.index # get index
df.columns # get columns
df.describe() # describes df
df.T # transpose
df.sort_index() # sort dataframe rows by index
df.sort_values('columns_name') # sort dataframe rows by specific column
# select column
df['column_name'] 
df.column_name # alternative
# selection
df[0:3] # select rows by index number
df["20130102":"20130104"] # select rows by index values
df.loc["20130102"] # select by label
df.loc[slice(None), ["A", "B"]] # select all rows "slice(None)" and only columns "A","B"
df.iloc[3] # select by position

# And many more can be found here: https://pandas.pydata.org/docs/user_guide/10min.html
Source by pandas.pydata.org #
 
PREVIOUS NEXT
Tagged: #pandas
ADD COMMENT
Topic
Name
3+4 =