In [48]: df2 = pd.DataFrame(data, columns=['year', 'state', 'pop', 'debt'], ....: index=['one', 'two', 'three', 'four', 'five', 'six']) In [49]: df2 Out[49]: year state pop debt one 2000 Ohio 1.5 NaN two 2001 Ohio 1.7 NaN three 2002 Ohio 3.6 NaN four 2001 Nevada 2.4 NaN five 2002 Nevada 2.9 NaN six 2003 Nevada 3.2 NaN
In [50]: df2.columns Out[50]: Index(['year', 'state', 'pop', 'debt'], dtype='object') #通过类似字典标记的方式或属性的方式,可以将DataFrame的列获取为一个Series。 In [51]: df2['state'] #或者df2.state Out[51]: one Ohio two Ohio three Ohio four Nevada five Nevada six Nevada
列可以通过赋值的方式进行修改。例如,我们可以给那个空的”debt”列赋上一个标量值或一组值
In [54]: frame2['debt'] = 16.5 In [55]: frame2 Out[55]: year state pop debt one 2000 Ohio 1.516.5 two 2001 Ohio 1.716.5 three 2002 Ohio 3.616.5 four 2001 Nevada 2.416.5 five 2002 Nevada 2.916.5 six 2003 Nevada 3.216.5
In [58]: val = pd.Series([-1.2, -1.5, -1.7], index=['two', 'four', 'five']) In [59]: df2['debt'] = val In [60]: df2 Out[60]: year state pop debt one 2000 Ohio 1.516.5 two 2001 Ohio 1.7 -1.2 three 2002 Ohio 3.616.5 four 2001 Nevada 2.4 -1.5 five 2002 Nevada 2.9 -1.7 six 2003 Nevada 3.216.5
创建列并赋值
#为不存在的列赋值,会创建出一个新列 In [61]: df2['eastern'] = df2.state == 'Ohio' In [62]: df2 Out[62]: year state pop debt eastern one 2000 Ohio 1.5 NaN True two 2001 Ohio 1.7 -1.2True three 2002 Ohio 3.6 NaN True four 2001 Nevada 2.4 -1.5False five 2002 Nevada 2.9 -1.7False six 2003 Nevada 3.2 NaN False #关键字del用于删除列 In [63]: del df2['eastern'] In [64]: df2.columns Out[64]: Index(['year', 'state', 'pop', 'debt'], dtype='object')