> Advanced Indexing Data Wrangling in Pandas Cheat Sheet Python For Data Science Learn Data Wrangling online at www.DataCamp.com Also see NumPy Arrays Selecting Indexing With isin() Where Query Forward Filling Vertical Horizontal/Vertical Aggregation Transformation Backward Filling > Reshaping Data > Iteration > Combining Data > Missing Data > Duplicate Data > Dates > Visualization > Grouping Data Pivot Setting/Resetting Index Merge Join Concatenate Reindexing MultiIndexing Pivot Table Stack / Unstack Melt >>> df3= df2.pivot(index= , columns= , values= ) 'Date' 'Type' 'Value' #Spread rows into columns >>> df4 = pd.pivot_table(df2, columns values= , index= , columns= ]) #Spread rows into 'Value' 'Date' 'Type' >>> stacked = df5.stack() >>> stacked.unstack() #Pivot a level of column labels #Pivot a level of index labels >>> pd.melt(df2, id_vars=[ ], value_vars=[ , ], value_name= ) #Gather columns into rows "Date" "Type" "Value" "Observations" >>> df.iteritems() >>> df.iterrows() #(Column-index, Series) pairs #(Row-index, Series) pairs >>> pd.merge(data1, data2, how= , on= ) 'left' 'X1' >>> pd.merge(data1, data2, how= , on= ) 'right' 'X1' >>> pd.merge(data1, data2, how= , on= ) 'inner' 'X1' >>> pd.merge(data1, data2, how= , on= ) 'outer' 'X1' >>> data1.join(data2, how= )'right' >>> s.append(s2) >>> pd.concat([s,s2],axis=1, keys=[ , ]) >>> pd.concat([data1, data2], axis=1, join= ) 'One' 'Two' 'inner' >>> df.dropna() >>> df3.fillna(df3.mean()) >>> df2.replace( , ) #Drop NaN values #Fill NaN values with a predetermined value #Replace values with others"a" "f" >>> s3.unique >>> df2.duplicated( ) >>> df2.drop_duplicates( , keep= ) >>> df.index.duplicated() () #Return unique values #Check duplicates #Drop duplicates #Check index duplicates 'Type' 'Type' 'last' >>> df2[ ]= pd.to_datetime(df2[ ]) >>> df2[ ]= pd.date_range( , periods=6, freq= ) >>> dates = [datetime(2012,5,1), datetime(2012,5,2)] >>> index = pd.DatetimeIndex(dates) >>> index = pd.date_range(datetime(2012,2,1), end, freq= ) 'Date' 'Date' 'Date' '2000-1-1' 'M' 'BM' >>> import matplotlib.pyplot as plt >>> s.plot() >>> plt.show() >>> df2.plot() >>> plt.show() >>> df2.groupby(by=[ , ]).mean() >>> df4.groupby(level=0).sum() >>> df4.groupby(level=0).agg({ :lambda x:sum(x)/len(x), : np.sum}) 'Date' 'Type' 'a' 'b' >>> customSum = lambda x: (x+x%2) >>> df4.groupby(level=0).transform(customSum) >>> df3.loc[:,(df3>1).any()] >>> df3.loc[:,(df3>1).all()] >>> df3.loc[:,df3.isnull().any()] >>> df3.loc[:,df3.notnull().all()] #Select cols with any vals >1 #Select cols with vals > 1 #Select cols with NaN #Select cols without NaN >>> df[(df.Country.isin(df2.Type))] >>> df3.filter(items= , ]) >>> df.select(lambda x: not x%5) #Find same elements #Filter on values #Select specific elements ”a” ”b” >>> df.set_index( ) >>> df4 = df.reset_index() >>> df = df.rename(index=str, DataFrame columns={ : , : , : }) 'Country' "Country" "cntry" "Capital" "cptl" "Population" "ppltn" #Set the index #Reset the index #Rename >>> s2 = s.reindex([ , , , , ])'a' 'c' 'd' 'e' 'b' >>> arrays = [np.array([1,2,3]), np.array([5,4,3])] >>> df5 = pd.DataFrame(np.random.rand(3, 2), index=arrays) >>> tuples = list(zip(*arrays)) >>> index = pd.MultiIndex.from_tuples(tuples, names=[ , ]) >>> df6 = pd.DataFrame(np.random.rand(3, 2), index=index) >>> df2.set_index([ , ]) 'first' 'second' "Date" "Type" >>> df.reindex(range(4), method= )'ffill' >>> s3 = s.reindex(range(5), method= )'bfill' Country Capital Population 0 Belgium Brussels 11190846 1 India New Delhi 1303171035 2 Brazil Brasília 207847528 3 Brazil Brasília 207847528 0 3 1 3 2 3 3 3 4 3 >>> s.where(s > 0) #Subset the data >>> df6.query( )'second > first' #Query DataFrame Also see Matplotlib Learn Data Skills Online at www.DataCamp.com