> Advanced Indexing
Data Wrangling in Pandas Cheat Sheet
Python For Data Science 

Learn Data Wrangling online at www.DataCamp.com
Also see NumPy Arrays
Selecting
Indexing With isin()
Where
Query
Forward Filling
Vertical
Horizontal/Vertical
Aggregation
Transformation
Backward Filling
> Reshaping Data
> Iteration
> Combining Data
> Missing Data
> Duplicate Data
> Dates
> Visualization
> Grouping Data
Pivot
Setting/Resetting Index
Merge
Join
Concatenate
Reindexing
MultiIndexing
Pivot Table
Stack / Unstack
Melt
>>> df3= df2.pivot(index= ,
columns= ,

values= )
'Date'
'Type'
'Value'
#Spread rows into columns 

>>> df4 = pd.pivot_table(df2,
columns values= , 

index= , 

columns= ])
#Spread rows into 

'Value'
'Date'
'Type'
>>> stacked = df5.stack()
>>> stacked.unstack()
#Pivot a level of column labels

#Pivot a level of index labels
>>> pd.melt(df2,
id_vars=[ ],

value_vars=[ , ],

value_name= )
#Gather columns into rows

"Date"
"Type" "Value"
"Observations"
>>> df.iteritems()
>>> df.iterrows()
#(Column-index, Series) pairs

#(Row-index, Series) pairs
>>> pd.merge(data1,

data2,

how= ,

on= )
'left'
'X1'
>>> pd.merge(data1,

data2,

how= ,

on= )
'right'
'X1'
>>> pd.merge(data1,

data2,

how= ,

on= )
'inner'
'X1'
>>> pd.merge(data1,

data2,

how= ,

on= )
'outer'
'X1'
>>> data1.join(data2, how= )'right'
>>> s.append(s2)
>>> pd.concat([s,s2],axis=1, keys=[ , ])

>>> pd.concat([data1, data2], axis=1, join= )
'One' 'Two'
'inner'
>>> df.dropna()
>>> df3.fillna(df3.mean()) 

>>> df2.replace( , )
#Drop NaN values

#Fill NaN values with a predetermined value
#Replace values with others"a" "f"
>>> s3.unique 

>>> df2.duplicated( )
>>> df2.drop_duplicates( , keep= )
>>> df.index.duplicated()
() #Return unique values
#Check duplicates

#Drop duplicates

#Check index duplicates
'Type'
'Type' 'last'
>>> df2[ ]= pd.to_datetime(df2[ ])

>>> df2[ ]= pd.date_range( , 

periods=6,

freq= )

>>> dates = [datetime(2012,5,1), datetime(2012,5,2)]

>>> index = pd.DatetimeIndex(dates)

>>> index = pd.date_range(datetime(2012,2,1), end, freq= )
'Date' 'Date'
'Date' '2000-1-1'
'M'
'BM'
>>> import matplotlib.pyplot as plt
>>> s.plot()

>>> plt.show()
>>> df2.plot()

>>> plt.show()
>>> df2.groupby(by=[ , ]).mean()

>>> df4.groupby(level=0).sum()

>>> df4.groupby(level=0).agg({ :lambda x:sum(x)/len(x), : np.sum})
'Date' 'Type'
'a' 'b'
>>> customSum = lambda x: (x+x%2)

>>> df4.groupby(level=0).transform(customSum)
>>> df3.loc[:,(df3>1).any()]
>>> df3.loc[:,(df3>1).all()]
>>> df3.loc[:,df3.isnull().any()]
>>> df3.loc[:,df3.notnull().all()]
#Select cols with any vals >1

#Select cols with vals > 1

#Select cols with NaN

#Select cols without NaN
>>> df[(df.Country.isin(df2.Type))]
>>> df3.filter(items= , ])
>>> df.select(lambda x: not x%5)
#Find same elements

#Filter on values

#Select specific elements
”a” ”b”
>>> df.set_index( )
>>> df4 = df.reset_index()
>>> df = df.rename(index=str,
DataFrame columns={ : , 

: , 

: })
'Country'
"Country" "cntry"
"Capital" "cptl"
"Population" "ppltn"
#Set the index

#Reset the index

#Rename 

>>> s2 = s.reindex([ , , , , ])'a' 'c' 'd' 'e' 'b'
>>> arrays = [np.array([1,2,3]),

np.array([5,4,3])]

>>> df5 = pd.DataFrame(np.random.rand(3, 2), index=arrays)

>>> tuples = list(zip(*arrays))

>>> index = pd.MultiIndex.from_tuples(tuples,

names=[ , ])

>>> df6 = pd.DataFrame(np.random.rand(3, 2), index=index)

>>> df2.set_index([ , ])
'first' 'second'
"Date" "Type"
>>> df.reindex(range(4),

method= )'ffill'
>>> s3 = s.reindex(range(5),

method= )'bfill'
Country Capital Population

0 Belgium Brussels 11190846

1 India New Delhi 1303171035

2 Brazil Brasília 207847528

3 Brazil Brasília 207847528
0 3

1 3

2 3

3 3

4 3
>>> s.where(s > 0) #Subset the data
>>> df6.query( )'second > first' #Query DataFrame
Also see Matplotlib
Learn Data Skills Online at www.DataCamp.com