查看“Pandas常用操作”的源代码
←
Pandas常用操作
跳转至:
导航
,
搜索
因为以下原因,您没有权限编辑本页:
您所请求的操作仅限于该用户组的用户使用:
用户
您可以查看与复制此页面的源代码。
==准备工作== import numpy as np import pandas as pd ==切片== a=pd.Series([2,3,6,1],index=[4,7,3,1]) a[1]#显式大于隐式 1 a[2] 6 ===loc切片和iloc切片=== 使用loc切片更明了,表示按自定义的索引切片 a.loc[1]#包含起点也包含终点 1 使用iloc切片表示按默认的行号索引切片 a.iloc[0] 2 a.iloc[0:2]#包含起点不包含终点 <nowiki>a 2 1 3 dtype: int64</nowiki> 注意包含起点与终点 a.loc[4:3] <nowiki>4 2 7 3 3 6 dtype: int64</nowiki> 只包含起点 a.iloc[1:2] <nowiki>7 3 dtype: int64</nowiki> b=pd.DataFrame(np.random.randn(6,3),index=range(2,8), columns=["a","b","c"]) b <nowiki>a b c 2 -0.984031 2.002845 -0.547842 3 0.837882 -0.111692 -0.051079 4 -0.626863 -0.084946 0.069867 5 -0.434401 1.155147 0.351529 6 -0.340527 -1.047685 -0.689944 7 0.982684 1.290136 -0.236716</nowiki> b[2:4]#数据框的切片隐式与显式索引容易混淆 <nowiki>a b c 4 -0.626863 -0.084946 0.069867 5 -0.434401 1.155147 0.351529</nowiki> b["a"] <nowiki>2 -0.984031 3 0.837882 4 -0.626863 5 -0.434401 6 -0.340527 7 0.982684 Name: a, dtype: float64</nowiki> b.loc[2:4] <nowiki>a b c 2 -0.984031 2.002845 -0.547842 3 0.837882 -0.111692 -0.051079 4 -0.626863 -0.084946 0.069867</nowiki> b.iloc[2:4] <nowiki>a b c 4 -0.626863 -0.084946 0.069867 5 -0.434401 1.155147 0.351529</nowiki> b.loc[2:3].a.loc[2] <nowiki>-0.98403138550212987</nowiki> b.loc[2].a <nowiki>-0.98403138550212987</nowiki> 赋值 b.loc[2].a=6 b <nowiki>a b c 2 6.000000 2.002845 -0.547842 3 0.837882 -0.111692 -0.051079 4 -0.626863 -0.084946 0.069867 5 -0.434401 1.155147 0.351529 6 -0.340527 -1.047685 -0.689944 7 0.982684 1.290136 -0.236716</nowiki> b.loc[2].a="aaa"#字符串赋值不进去的 b a b c 2 6.000000 2.002845 -0.547842 3 0.837882 -0.111692 -0.051079 4 -0.626863 -0.084946 0.069867 5 -0.434401 1.155147 0.351529 6 -0.340527 -1.047685 -0.689944 7 0.982684 1.290136 -0.236716 ==过滤== a <nowiki>a 2 1 3 c 6 b 1 a1 2 dtype: int64</nowiki> a[a>2] <nowiki>1 3 c 6 dtype: int64</nowiki> b <nowiki>a b c 2 6.000000 2.002845 -0.547842 3 0.837882 -0.111692 -0.051079 4 -0.626863 -0.084946 0.069867 5 -0.434401 1.155147 0.351529 6 -0.340527 -1.047685 -0.689944 7 0.982684 1.290136 -0.236716</nowiki> ? b[b.b>0] <nowiki>a b c 2 6.000000 2.002845 -0.547842 5 -0.434401 1.155147 0.351529 7 0.982684 1.290136 -0.236716</nowiki> b[b["a"]>0] a b c 2 6.000000 2.002845 -0.547842 3 0.837882 -0.111692 -0.051079 7 0.982684 1.290136 -0.236716 b[(b.a>0)&(b.c>0)] a b c b[(b.a>0)|(b.c>0)] a b c 2 6.000000 2.002845 -0.547842 3 0.837882 -0.111692 -0.051079 4 -0.626863 -0.084946 0.069867 5 -0.434401 1.155147 0.351529 7 0.982684 1.290136 -0.236716 b[(b.a>0)^(b.c>0)][["a","c"]] a c 2 6.000000 -0.547842 3 0.837882 -0.051079 4 -0.626863 0.069867 5 -0.434401 0.351529 7 0.982684 -0.236716 ==迭代== a <nowiki>4 2 7 3 3 6 1 1 dtype: int64</nowiki> for i in a.iteritems(): print(i) <nowiki>(4, 2) (7, 3) (3, 6) (1, 1)</nowiki> b <nowiki>a b c 2 6.000000 2.002845 -0.547842 3 0.837882 -0.111692 -0.051079 4 -0.626863 -0.084946 0.069867 5 -0.434401 1.155147 0.351529 6 -0.340527 -1.047685 -0.689944 7 0.982684 1.290136 -0.236716</nowiki> for i,j in b.iteritems():#i列标,j为行元素 print(i) print(j) <nowiki>a 2 6.000000 3 0.837882 4 -0.626863 5 -0.434401 6 -0.340527 7 0.982684 Name: a, dtype: float64 b 2 2.002845 3 -0.111692 4 -0.084946 5 1.155147 6 -1.047685 7 1.290136 Name: b, dtype: float64 c 2 -0.547842 3 -0.051079 4 0.069867 5 0.351529 6 -0.689944 7 -0.236716 Name: c, dtype: float64</nowiki> for i,j in b.iterrows(): print(i) print(j) <nowiki>2 a 6.000000 b 2.002845 c -0.547842 Name: 2, dtype: float64 3 a 0.837882 b -0.111692 c -0.051079 Name: 3, dtype: float64 4 a -0.626863 b -0.084946 c 0.069867 Name: 4, dtype: float64 5 a -0.434401 b 1.155147 c 0.351529 Name: 5, dtype: float64 6 a -0.340527 b -1.047685 c -0.689944 Name: 6, dtype: float64 7 a 0.982684 b 1.290136 c -0.236716 Name: 7, dtype: float64</nowiki> ==排序== a1=pd.Series([2,3,6,1],index=["i","u","2","1"])#排序需要在同种类型的数据中进行(比如整数与字符就不能比较) a1 <nowiki>i 2 u 3 2 6 1 1 dtype: int64</nowiki> a1.sort_values() <nowiki>1 1 i 2 u 3 2 6 dtype: int64</nowiki> a1.sort_index() <nowiki>1 1 2 6 i 2 u 3 dtype: int64 </nowiki> b1=pd.DataFrame(np.random.randn(4,3),index=[2,4,1,3], columns=["b","a","k"]) b1 <nowiki>b a k 2 -0.586665 -1.131802 1.163450 4 -1.020215 0.219465 -0.017945 1 1.236446 -0.108178 -0.293046 3 -0.549749 0.897673 0.679992 </nowiki> b1.sort_index() <nowiki>b a k 1 1.236446 -0.108178 -0.293046 2 -0.586665 -1.131802 1.163450 3 -0.549749 0.897673 0.679992 4 -1.020215 0.219465 -0.017945</nowiki> b1.sort_index(axis=1)#按列标排 <nowiki>a b k 2 -1.131802 -0.586665 1.163450 4 0.219465 -1.020215 -0.017945 1 -0.108178 1.236446 -0.293046 3 0.897673 -0.549749 0.679992</nowiki> b1.sort_values(by="k")[["k","b"]]#按某一列排 <nowiki>k b 1 -0.293046 1.236446 4 -0.017945 -1.020215 3 0.679992 -0.549749 2 1.163450 -0.586665</nowiki> len(b) 6
返回至
Pandas常用操作
。
导航菜单
个人工具
登录
命名空间
页面
讨论
变种
视图
阅读
查看源代码
查看历史
更多
搜索
导航
首页
最近更改
随机页面
帮助
工具
链入页面
相关更改
特殊页面
页面信息