Pandas常用操作
准备工作
import numpy as np import pandas as pd
切片
a=pd.Series([2,3,6,1],index=[4,7,3,1])
a[1]#显式大于隐式
1
a[2]
6
loc切片和iloc切片
使用loc切片更明了,表示按自定义的索引切片
a.loc[1]#包含起点也包含终点
1
使用iloc切片表示按默认的行号索引切片
a.iloc[0]
2
a.iloc[0:2]#包含起点不包含终点
a 2 1 3 dtype: int64
注意包含起点与终点
a.loc[4:3]
4 2 7 3 3 6 dtype: int64
只包含起点
a.iloc[1:2]
7 3 dtype: int64
b=pd.DataFrame(np.random.randn(6,3),index=range(2,8), columns=["a","b","c"]) b
a b c 2 -0.984031 2.002845 -0.547842 3 0.837882 -0.111692 -0.051079 4 -0.626863 -0.084946 0.069867 5 -0.434401 1.155147 0.351529 6 -0.340527 -1.047685 -0.689944 7 0.982684 1.290136 -0.236716
b[2:4]#数据框的切片隐式与显式索引容易混淆
a b c 4 -0.626863 -0.084946 0.069867 5 -0.434401 1.155147 0.351529
b["a"]
2 -0.984031 3 0.837882 4 -0.626863 5 -0.434401 6 -0.340527 7 0.982684 Name: a, dtype: float64
b.loc[2:4]
a b c 2 -0.984031 2.002845 -0.547842 3 0.837882 -0.111692 -0.051079 4 -0.626863 -0.084946 0.069867
b.iloc[2:4]
a b c 4 -0.626863 -0.084946 0.069867 5 -0.434401 1.155147 0.351529
b.loc[2:3].a.loc[2]
-0.98403138550212987
b.loc[2].a
-0.98403138550212987
赋值
b.loc[2].a=6
b
a b c 2 6.000000 2.002845 -0.547842 3 0.837882 -0.111692 -0.051079 4 -0.626863 -0.084946 0.069867 5 -0.434401 1.155147 0.351529 6 -0.340527 -1.047685 -0.689944 7 0.982684 1.290136 -0.236716
b.loc[2].a="aaa"#字符串赋值不进去的
b
a b c 2 6.000000 2.002845 -0.547842 3 0.837882 -0.111692 -0.051079 4 -0.626863 -0.084946 0.069867 5 -0.434401 1.155147 0.351529 6 -0.340527 -1.047685 -0.689944 7 0.982684 1.290136 -0.236716
过滤
a
a 2 1 3 c 6 b 1 a1 2 dtype: int64
a[a>2]
1 3 c 6 dtype: int64
b
a b c 2 6.000000 2.002845 -0.547842 3 0.837882 -0.111692 -0.051079 4 -0.626863 -0.084946 0.069867 5 -0.434401 1.155147 0.351529 6 -0.340527 -1.047685 -0.689944 7 0.982684 1.290136 -0.236716
? b[b.b>0]
a b c 2 6.000000 2.002845 -0.547842 5 -0.434401 1.155147 0.351529 7 0.982684 1.290136 -0.236716
b[b["a"]>0]
a b c 2 6.000000 2.002845 -0.547842 3 0.837882 -0.111692 -0.051079 7 0.982684 1.290136 -0.236716
b[(b.a>0)&(b.c>0)]
a b c
b[(b.a>0)|(b.c>0)]
a b c 2 6.000000 2.002845 -0.547842 3 0.837882 -0.111692 -0.051079 4 -0.626863 -0.084946 0.069867 5 -0.434401 1.155147 0.351529 7 0.982684 1.290136 -0.236716
b[(b.a>0)^(b.c>0)]"a","c"
a c 2 6.000000 -0.547842 3 0.837882 -0.051079 4 -0.626863 0.069867 5 -0.434401 0.351529 7 0.982684 -0.236716
迭代
a
4 2 7 3 3 6 1 1 dtype: int64
for i in a.iteritems():
print(i)
(4, 2) (7, 3) (3, 6) (1, 1)
b
a b c 2 6.000000 2.002845 -0.547842 3 0.837882 -0.111692 -0.051079 4 -0.626863 -0.084946 0.069867 5 -0.434401 1.155147 0.351529 6 -0.340527 -1.047685 -0.689944 7 0.982684 1.290136 -0.236716
for i,j in b.iteritems():#i列标,j为行元素
print(i) print(j)
a 2 6.000000 3 0.837882 4 -0.626863 5 -0.434401 6 -0.340527 7 0.982684 Name: a, dtype: float64 b 2 2.002845 3 -0.111692 4 -0.084946 5 1.155147 6 -1.047685 7 1.290136 Name: b, dtype: float64 c 2 -0.547842 3 -0.051079 4 0.069867 5 0.351529 6 -0.689944 7 -0.236716 Name: c, dtype: float64
for i,j in b.iterrows():
print(i) print(j)
2 a 6.000000 b 2.002845 c -0.547842 Name: 2, dtype: float64 3 a 0.837882 b -0.111692 c -0.051079 Name: 3, dtype: float64 4 a -0.626863 b -0.084946 c 0.069867 Name: 4, dtype: float64 5 a -0.434401 b 1.155147 c 0.351529 Name: 5, dtype: float64 6 a -0.340527 b -1.047685 c -0.689944 Name: 6, dtype: float64 7 a 0.982684 b 1.290136 c -0.236716 Name: 7, dtype: float64
排序
a1=pd.Series([2,3,6,1],index=["i","u","2","1"])#排序需要在同种类型的数据中进行(比如整数与字符就不能比较)
a1
i 2 u 3 2 6 1 1 dtype: int64
a1.sort_values()
1 1 i 2 u 3 2 6 dtype: int64
a1.sort_index()
1 1 2 6 i 2 u 3 dtype: int64
b1=pd.DataFrame(np.random.randn(4,3),index=[2,4,1,3], columns=["b","a","k"])
b1
b a k 2 -0.586665 -1.131802 1.163450 4 -1.020215 0.219465 -0.017945 1 1.236446 -0.108178 -0.293046 3 -0.549749 0.897673 0.679992
b1.sort_index()
b a k 1 1.236446 -0.108178 -0.293046 2 -0.586665 -1.131802 1.163450 3 -0.549749 0.897673 0.679992 4 -1.020215 0.219465 -0.017945
b1.sort_index(axis=1)#按列标排
a b k 2 -1.131802 -0.586665 1.163450 4 0.219465 -1.020215 -0.017945 1 -0.108178 1.236446 -0.293046 3 0.897673 -0.549749 0.679992
b1.sort_values(by="k")"k","b"#按某一列排
k b 1 -0.293046 1.236446 4 -0.017945 -1.020215 3 0.679992 -0.549749 2 1.163450 -0.586665
len(b)
6