Pandas Basic tutorial 4 - Python Programming

Pandas basic tutorial 4 - Python Programming harsha navalkar blog
Pandas basic tutorial 4 - Python Programming
Open Anaconda Navigator
Launch Sypder

If you have not seen the Pandas
 Basic tutorial 1 - Python Programming  CLICK HERE
 Basic tutorial 2 - Python Programming  CLICK HERE
Basic tutorial 3 - Python Programming  CLICK HERE

PROGRAM 1
import pandas as pd
print ("\n MAX ROWS \n")
print (pd.get_option("display.max_rows"))
print ("\n MAX COLUMNS \n")
print (pd.get_option("display.max_columns"))
print ("\n SET OPTION MAX ROWS \n")
pd.set_option("display.max_rows",70)
print (pd.get_option("display.max_rows"))
print ("\n SET OPTION MAX COLUMNS \n")
pd.set_option("display.max_columns",20)
print (pd.get_option("display.max_columns"))
print ("\n RESET OPTION MAX ROWS \n")
pd.reset_option("display.max_rows")
print (pd.get_option("display.max_rows"))
print ("\n DESCRIBE OPTION MAX ROWS \n")
pd.describe_option("display.max_rows")
print ("\n OPTION CONTEXT  MAX ROWS \n")
with pd.option_context("display.max_rows",10):
   print (pd.get_option("display.max_rows"))
  print(pd.get_option("display.max_rows"))

OUTPUT
 MAX ROWS 

60

 MAX COLUMNS 

20

 SET OPTION MAX ROWS 

70

 SET OPTION MAX COLUMNS 

20

 RESET OPTION MAX ROWS 

60

 DESCRIBE OPTION MAX ROWS 

display.max_rows : int
    If max_rows is exceeded, switch to truncate view. Depending on
    `large_repr`, objects are either centrally truncated or printed as
    a summary view. 'None' value means unlimited.

    In case python/IPython is running in a terminal and `large_repr`
    equals 'truncate' this can be set to 0 and pandas will auto-detect
    the height of the terminal and print a truncated object which fits
    the screen height. The IPython notebook, IPython qtconsole, or
    IDLE do not run in a terminal and hence it is not possible to do
    correct auto-detection.
    [default: 60] [currently: 60]



 OPTION CONTEXT  MAX ROWS 

10

10

PROGRAM 2 
import pandas as pd

d = pd.DataFrame(np.random.randn(8, 4),
index = ['anita','babita','cavita','davita','evita','favita','gavita','hita'], columns = ['A', 'B', 'C', 'D'])

print ('\n SELECT ALL ROWS OF A \n')
print (d.loc[:,'A'])

print ('\n SELECT ALL ROWS OF B \n')
print (d.loc[:,['D','B']])
print('\n')
print (d.loc[['anita','hita'],['A','C']])
print('\n')
print (d.loc['anita':'evita'])
print('\n')
print (d.loc['anita']>0)


OUTPUT
SELECT ALL ROWS OF A 

anita    -0.208319
babita   -0.416149
cavita   -1.026510
davita    0.857518
evita     2.033100
favita   -1.154697
gavita   -1.015738
hita     -1.349647
Name: A, dtype: float64

 SELECT ALL ROWS OF B 

               D         B
anita   2.488755 -0.666601
babita -0.183947  0.406478
cavita -1.843386 -0.680845
davita -2.658799  0.516128
evita  -0.199552 -0.694308
favita  1.343513 -1.924048
gavita  0.035830  1.738005
hita   -0.345181 -0.211781


              A         C
anita -0.208319  1.653451
hita  -1.349647 -0.238298


               A         B         C         D
anita  -0.208319 -0.666601  1.653451  2.488755
babita -0.416149  0.406478 -1.432652 -0.183947
cavita -1.026510 -0.680845  1.366286 -1.843386
davita  0.857518  0.516128 -1.020205 -2.658799
evita   2.033100 -0.694308 -0.722837 -0.199552


A    False
B    False
C     True
D     True

Name: anita, dtype: bool

PROGRAM 3
import pandas as pd
d = pd.DataFrame(np.random.randn(8, 4), columns = ['A', 'B', 'C', 'D'])
print ('\nselect all rows for a specific column \n ')
print ('\n d.iloc[:4]\n')
print (d.iloc[:4])
print ('\n d.iloc[2:5, 3:4] \n')
print (d.iloc[2:5, 3:4])
print ('\n d.iloc[[1, 2, 4], [1, 3]]\n ')
print (d.iloc[[1, 2, 4], [1, 3]])
print ('\n d.iloc[1:4, :] \n')
print (d.iloc[1:4, :])
print ('\n d.iloc[:,1:3]\n ')
print (d.iloc[:,1:3])
print ('\n d.ix[:4] \n')
print (d.ix[:4]) #index slicing
print ('\n d.ix[:,\'A\'] \n')
print (d.ix[:,'A'])
print ('\n d[\'A\'] \n')
print (d['A'])
print ('\n d[[\'D','\B\']] \n')
print (d[['D','B']])
print ('\n d[2:2] \n')
print (d[2:2])
print (' \n d.B \n')

print (d.B)

OUTPUT
select all rows for a specific column 


 d.iloc[:4]

          A         B         C         D
0  1.265307  0.234860 -0.224934 -1.265009
1 -0.095622  1.455218 -0.216373  1.120572
2 -0.218187  0.160335  1.844968  0.903696
3  1.494101 -1.471412  0.059047 -0.539891

 d.iloc[2:5, 3:4] 

          D
2  0.903696
3 -0.539891
4  0.762802

 d.iloc[[1, 2, 4], [1, 3]]

          B         D
1  1.455218  1.120572
2  0.160335  0.903696
4 -0.178283  0.762802

 d.iloc[1:4, :] 

          A         B         C         D
1 -0.095622  1.455218 -0.216373  1.120572
2 -0.218187  0.160335  1.844968  0.903696
3  1.494101 -1.471412  0.059047 -0.539891

 d.iloc[:,1:3]

          B         C
0  0.234860 -0.224934
1  1.455218 -0.216373
2  0.160335  1.844968
3 -1.471412  0.059047
4 -0.178283 -1.691390
5  1.176580 -1.511110
6 -0.513844  0.038202
7 -1.259789 -1.021303

 d.ix[:4] 

          A         B         C         D
0  1.265307  0.234860 -0.224934 -1.265009
1 -0.095622  1.455218 -0.216373  1.120572
2 -0.218187  0.160335  1.844968  0.903696
3  1.494101 -1.471412  0.059047 -0.539891
4  1.639339 -0.178283 -1.691390  0.762802

 d.ix[:,'A'] 

0    1.265307
1   -0.095622
2   -0.218187
3    1.494101
4    1.639339
5   -0.649768
6    0.540650
7   -0.737735
Name: A, dtype: float64

 d['A'] 

0    1.265307
1   -0.095622
2   -0.218187
3    1.494101
4    1.639339
5   -0.649768
6    0.540650
7   -0.737735
Name: A, dtype: float64

 d[['D \B']] 

          D         B
0 -1.265009  0.234860
1  1.120572  1.455218
2  0.903696  0.160335
3 -0.539891 -1.471412
4  0.762802 -0.178283
5  1.204925  1.176580
6 -1.238555 -0.513844
7  1.730290 -1.259789

 d[2:2] 

Empty DataFrame
Columns: [A, B, C, D]
Index: []

 d.B 

0    0.234860
1    1.455218
2    0.160335
3   -1.471412
4   -0.178283
5    1.176580
6   -0.513844
7   -1.259789
Name: B, dtype: float64

Basic tutorial 5 - Python Programming  CLICK HERE

NumPy Tutorial 
CLICK HERE
Matplotlib Tutorial CLICK HERE

Pandas Basic tutorial 4 - Python Programming