from pandas import Series,DataFrame data = {'language':['Java','PHP','Python'],'year':[1995,1995,1991]} frame = DataFrame(data) frame['IDE'] = Series(['Intellij','Notepad','IPython'])
'IPython'in frame['IDE'] #输出False
3 in frame['IDE'] #输出True
#猜测 'IPython' in frame['IDE'] <=> 'IPython' in frame['IDE'].index #所以理所当然会输出False #注:并不意味着frame['IDE'] == frame['IDE'].index,只是说两句代码在in中效果差不多
#正确判断方法 'IPython'in frame['IDE'].values #输出True
布尔索引 通过一个简单的列的数值来挑选数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
In [39]: df[df['A'] > 0] Out[39]: A B C D 2013-01-01 0.469112 -0.282863 -1.509059 -1.135632 2013-01-02 1.212112 -0.173215 0.119209 -1.044236 2013-01-04 0.721555 -0.706771 -1.039575 0.271860
In [40]: df[df > 0] Out[40]: A B C D 2013-01-01 0.469112 NaN NaN NaN 2013-01-02 1.212112 NaN 0.119209 NaN 2013-01-03 NaN NaN NaN 1.071804 2013-01-04 0.721555 NaN NaN 0.271860 2013-01-05 NaN 0.567020 0.276232 NaN 2013-01-06 NaN 0.113648 NaN 0.524988
In [42]: df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three']
In [43]: df2 Out[43]: A B C D E 2013-01-01 0.469112 -0.282863 -1.509059 -1.135632 one 2013-01-02 1.212112 -0.173215 0.119209 -1.044236 one 2013-01-03 -0.861849 -2.104569 -0.494929 1.071804 two 2013-01-04 0.721555 -0.706771 -1.039575 0.271860 three 2013-01-05 -0.424972 0.567020 0.276232 -1.087401 four 2013-01-06 -0.673690 0.113648 -1.478427 0.524988 three
In [44]: df2[df2['E'].isin(['two', 'four'])] Out[44]: A B C D E 2013-01-03 -0.861849 -2.104569 -0.494929 1.071804 two 2013-01-05 -0.424972 0.567020 0.276232 -1.087401 four
In [55]: df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])
In [56]: df1.loc[dates[0]:dates[1], 'E'] = 1
In [57]: df1 Out[57]: A B C D F E 2013-01-01 0.000000 0.000000 -1.509059 5 NaN 1.0 2013-01-02 1.212112 -0.173215 0.119209 5 1.0 1.0 2013-01-03 -0.861849 -2.104569 -0.494929 5 2.0 NaN 2013-01-04 0.721555 -0.706771 -1.039575 5 3.0 NaN