Search
 
SCRIPT & CODE EXAMPLE
 
CODE EXAMPLE FOR PYTHON

Select rows in pandas MultiIndex DataFrame

col
one two     
a   t      0
    u      1
    v      2
    w      3
df.loc[['a']] # TypeError: Expected tuple, got str
df.loc[('a', slice(None)), :] # No error
# Or use crosssection
df.xs('a', level=0, axis=0, drop_level=False)
# df.xs('a', drop_level=False)
#Here, the drop_level=False argument is needed to prevent xs 
#from dropping level "one" in the result (the level we sliced on).
# Yet another option here is using query:
df.query("one == 'a'")
#If the index did not have a name, you would need to change your query 
#string to be "ilevel_0 == 'a'".
#Finally, using get_level_values:
df[df.index.get_level_values('one') == 'a']
# If your levels are unnamed, or if you need to select by position (not label),
# df[df.index.get_level_values(0) == 'a']
## How do I slice all rows with value "t" on level "two"?
df.loc[(slice(None), 't'), :]
#OR
idx = pd.IndexSlice
df.loc[idx[:, 't'], :]                                                                           
#OR
df.loc(axis=0)[pd.IndexSlice[:, 't']]
#OR
df.xs('t', axis=0, level=1, drop_level=False)                                                                           
#OR                                                                           
df.query("two == 't'")
# Or, if the first level has no name, 
# df.query("ilevel_1 == 't'")                                                                            
#And finally, with get_level_values, you may do
df[df.index.get_level_values('two') == 't']
# Or, to perform selection by position/integer,
# df[df.index.get_level_values(1) == 't']
##
#How can I select rows corresponding to items "b" and "d" in level "one"?
df.loc[['b', 'd']] #b and d are outer (level0) indexes.
#OR                                                                           
items = ['b', 'd']
df.query("one in @items")
# df.query("one == @items", parser='pandas')
# df.query("one in ['b', 'd']")
# df.query("one == ['b', 'd']", parser='pandas')
#OR
df[df.index.get_level_values("one").isin(['b', 'd'])]
#Level 2 access
df.loc[pd.IndexSlice[:, ['t', 'w']], :] 
#OR
items = ['t', 'w']
df.query("two in @items")
# df.query("two == @items", parser='pandas') 
# df.query("two in ['t', 'w']")
# df.query("two == ['t', 'w']", parser='pandas')
#OR
df[df.index.get_level_values('two').isin(['t', 'w'])]
#
         col
one two     
c   u      9
df.loc[('c', 'u'), :]
#OR
df.loc[pd.IndexSlice[('c', 'u')]] # Give -PerformanceWarning: indexing past lexsort depth may impact performance.
# Solution
df_sort = df.sort_index()
df_sort.loc[('c', 'u')]
#OR
df.xs(('c', 'u'))
#OR
df.query("one == 'c' and two == 'u'")
#OR
m1 = (df.index.get_level_values('one') == 'c')
m2 = (df.index.get_level_values('two') == 'u')
df[m1 & m2]
         col
one two     
c   u      9
a   w      3
df.loc[[('c', 'u'), ('a', 'w')]]
# df.loc[pd.IndexSlice[[('c', 'u'), ('a', 'w')]]]
#OR
df[df.index.droplevel(unused_level).isin([('c', 'u'), ('a', 'w')])]
         col
one two     
a   t      0
    u      1
    v      2
    w      3
b   t      4
    t      8
d   t     12
#How can I retrieve all rows corresponding to "a" in level "one" or "t" in level "two"?
df.query("one == 'a' or two == 't'")
#OR
m1 = (df.index.get_level_values('one') == 'a')
m2 = (df.index.get_level_values('two') == 't')
df[m1 | m2] 
         col
one two     
b   7      4
    9      5
c   7     10
d   6     11
    8     12
    8     13
    6     15
 # How do I get all rows where values in level "two" are greater than 5?
df2.query("two > 5")
#OR
df2[df2.index.get_level_values('two') > 5]
##############################
index = pd.MultiIndex.from_product([['a','b'],
                               ['stock1','stock2','stock3'],
                               ['price','volume','velocity']])
df = pd.DataFrame([1,2,3,4,5,6,7,8,9,
                      10,11,12,13,14,15,16,17,18], 
                       index)
df.xs(('stock1', 'velocity'), level=(1,2))
#OR
(
  df.iloc[
    	   df.index.isin(['stock1'], level=1) & 
           df.index.isin(['velocity'], level=2)
          ]
)
(
df.iloc[
  			df.index.isin(['stock1','stock3'], level=1) & 
            df.index.isin(['velocity'], level=2)
       ]
)















                                                                           
Source by stackoverflow.com #
 
PREVIOUS NEXT
Tagged: #Select #rows #pandas #MultiIndex #DataFrame
ADD COMMENT
Topic
Name
5+5 =