Pandas的简介
1 2 3 |
import numpy as np import pandas as pd |
1 2 3 |
stock_change = np.random.standard_normal((8, 10)) stock_change |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
array([[-0.319916 , -0.23967163, -0.41046746, -1.35562472, 0.63267385, -0.07139894, -1.57339992, 1.70938258, 0.21858344, -0.29157151], [ 0.18470389, -1.09660233, 0.02100129, -0.73167926, -0.91492246, 0.01460134, -1.3167004 , -0.11432369, 0.25573555, -1.18670285], [-0.59055985, -0.17623971, 0.80204004, -0.66910717, 0.54372105, 0.14997427, -0.02112617, 1.25377914, -0.04460044, 1.30122017], [ 0.83597211, -0.29165389, -0.83069685, 2.28929245, -1.30373427, 0.29352009, 0.66476055, 1.3413838 , -0.76707978, 2.40347927], [-0.37786688, -0.08881539, 1.31234789, -0.0206864 , -0.36988485, -0.3437202 , 0.28111663, 0.61622253, 0.33458919, 0.27758043], [ 0.32657733, -1.35816721, 0.61923403, 0.7562302 , -0.85305713, 0.45928524, -0.49779308, -0.20681249, 0.83682242, 0.06419106], [-0.81294432, 0.86920472, -0.11852474, -1.43525399, -0.56544788, -0.81068801, 0.62678625, -1.49899926, -0.34303511, 0.46967279], [ 0.17689773, -0.50296949, -1.15644972, 0.02208686, 1.66321969, -0.03188492, -0.32311774, 0.15231276, 0.13031874, -0.00492389]]) |
1 2 3 |
data = pd.DataFrame(stock_change) data |
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | |
---|---|---|---|---|---|---|---|---|---|---|
0 | -0.319916 | -0.239672 | -0.410467 | -1.355625 | 0.632674 | -0.071399 | -1.573400 | 1.709383 | 0.218583 | -0.291572 |
1 | 0.184704 | -1.096602 | 0.021001 | -0.731679 | -0.914922 | 0.014601 | -1.316700 | -0.114324 | 0.255736 | -1.186703 |
2 | -0.590560 | -0.176240 | 0.802040 | -0.669107 | 0.543721 | 0.149974 | -0.021126 | 1.253779 | -0.044600 | 1.301220 |
3 | 0.835972 | -0.291654 | -0.830697 | 2.289292 | -1.303734 | 0.293520 | 0.664761 | 1.341384 | -0.767080 | 2.403479 |
4 | -0.377867 | -0.088815 | 1.312348 | -0.020686 | -0.369885 | -0.343720 | 0.281117 | 0.616223 | 0.334589 | 0.277580 |
5 | 0.326577 | -1.358167 | 0.619234 | 0.756230 | -0.853057 | 0.459285 | -0.497793 | -0.206812 | 0.836822 | 0.064191 |
6 | -0.812944 | 0.869205 | -0.118525 | -1.435254 | -0.565448 | -0.810688 | 0.626786 | -1.498999 | -0.343035 | 0.469673 |
7 | 0.176898 | -0.502969 | -1.156450 | 0.022087 | 1.663220 | -0.031885 | -0.323118 | 0.152313 | 0.130319 | -0.004924 |
1 2 3 4 |
# 生成行名字列表 codes = ['股票' + str(i) for i in range(8)] codes |
1 2 |
['股票0', '股票1', '股票2', '股票3', '股票4', '股票5', '股票6', '股票7'] |
1 2 3 4 |
# 生成列名字列表 date = pd.date_range('20180603', periods=10, freq='B') date |
1 2 3 4 5 |
DatetimeIndex(['2018-06-04', '2018-06-05', '2018-06-06', '2018-06-07', '2018-06-08', '2018-06-11', '2018-06-12', '2018-06-13', '2018-06-14', '2018-06-15'], dtype='datetime64[ns]', freq='B') |
1 2 3 |
data = pd.DataFrame(stock_change, index=codes, columns=date) data |
2018-06-04 00:00:00 | 2018-06-05 00:00:00 | 2018-06-06 00:00:00 | 2018-06-07 00:00:00 | 2018-06-08 00:00:00 | 2018-06-11 00:00:00 | 2018-06-12 00:00:00 | 2018-06-13 00:00:00 | 2018-06-14 00:00:00 | 2018-06-15 00:00:00 | |
---|---|---|---|---|---|---|---|---|---|---|
股票0 | -0.319916 | -0.239672 | -0.410467 | -1.355625 | 0.632674 | -0.071399 | -1.573400 | 1.709383 | 0.218583 | -0.291572 |
股票1 | 0.184704 | -1.096602 | 0.021001 | -0.731679 | -0.914922 | 0.014601 | -1.316700 | -0.114324 | 0.255736 | -1.186703 |
股票2 | -0.590560 | -0.176240 | 0.802040 | -0.669107 | 0.543721 | 0.149974 | -0.021126 | 1.253779 | -0.044600 | 1.301220 |
股票3 | 0.835972 | -0.291654 | -0.830697 | 2.289292 | -1.303734 | 0.293520 | 0.664761 | 1.341384 | -0.767080 | 2.403479 |
股票4 | -0.377867 | -0.088815 | 1.312348 | -0.020686 | -0.369885 | -0.343720 | 0.281117 | 0.616223 | 0.334589 | 0.277580 |
股票5 | 0.326577 | -1.358167 | 0.619234 | 0.756230 | -0.853057 | 0.459285 | -0.497793 | -0.206812 | 0.836822 | 0.064191 |
股票6 | -0.812944 | 0.869205 | -0.118525 | -1.435254 | -0.565448 | -0.810688 | 0.626786 | -1.498999 | -0.343035 | 0.469673 |
股票7 | 0.176898 | -0.502969 | -1.156450 | 0.022087 | 1.663220 | -0.031885 | -0.323118 | 0.152313 | 0.130319 | -0.004924 |
DataFrame的属性和方法
1 2 |
data |
2018-06-04 00:00:00 | 2018-06-05 00:00:00 | 2018-06-06 00:00:00 | 2018-06-07 00:00:00 | 2018-06-08 00:00:00 | 2018-06-11 00:00:00 | 2018-06-12 00:00:00 | 2018-06-13 00:00:00 | 2018-06-14 00:00:00 | 2018-06-15 00:00:00 | |
---|---|---|---|---|---|---|---|---|---|---|
股票0 | -0.319916 | -0.239672 | -0.410467 | -1.355625 | 0.632674 | -0.071399 | -1.573400 | 1.709383 | 0.218583 | -0.291572 |
股票1 | 0.184704 | -1.096602 | 0.021001 | -0.731679 | -0.914922 | 0.014601 | -1.316700 | -0.114324 | 0.255736 | -1.186703 |
股票2 | -0.590560 | -0.176240 | 0.802040 | -0.669107 | 0.543721 | 0.149974 | -0.021126 | 1.253779 | -0.044600 | 1.301220 |
股票3 | 0.835972 | -0.291654 | -0.830697 | 2.289292 | -1.303734 | 0.293520 | 0.664761 | 1.341384 | -0.767080 | 2.403479 |
股票4 | -0.377867 | -0.088815 | 1.312348 | -0.020686 | -0.369885 | -0.343720 | 0.281117 | 0.616223 | 0.334589 | 0.277580 |
股票5 | 0.326577 | -1.358167 | 0.619234 | 0.756230 | -0.853057 | 0.459285 | -0.497793 | -0.206812 | 0.836822 | 0.064191 |
股票6 | -0.812944 | 0.869205 | -0.118525 | -1.435254 | -0.565448 | -0.810688 | 0.626786 | -1.498999 | -0.343035 | 0.469673 |
股票7 | 0.176898 | -0.502969 | -1.156450 | 0.022087 | 1.663220 | -0.031885 | -0.323118 | 0.152313 | 0.130319 | -0.004924 |
1 2 3 |
# 获取数据, 返回ndarray数组 data.values |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
array([[-0.319916 , -0.23967163, -0.41046746, -1.35562472, 0.63267385, -0.07139894, -1.57339992, 1.70938258, 0.21858344, -0.29157151], [ 0.18470389, -1.09660233, 0.02100129, -0.73167926, -0.91492246, 0.01460134, -1.3167004 , -0.11432369, 0.25573555, -1.18670285], [-0.59055985, -0.17623971, 0.80204004, -0.66910717, 0.54372105, 0.14997427, -0.02112617, 1.25377914, -0.04460044, 1.30122017], [ 0.83597211, -0.29165389, -0.83069685, 2.28929245, -1.30373427, 0.29352009, 0.66476055, 1.3413838 , -0.76707978, 2.40347927], [-0.37786688, -0.08881539, 1.31234789, -0.0206864 , -0.36988485, -0.3437202 , 0.28111663, 0.61622253, 0.33458919, 0.27758043], [ 0.32657733, -1.35816721, 0.61923403, 0.7562302 , -0.85305713, 0.45928524, -0.49779308, -0.20681249, 0.83682242, 0.06419106], [-0.81294432, 0.86920472, -0.11852474, -1.43525399, -0.56544788, -0.81068801, 0.62678625, -1.49899926, -0.34303511, 0.46967279], [ 0.17689773, -0.50296949, -1.15644972, 0.02208686, 1.66321969, -0.03188492, -0.32311774, 0.15231276, 0.13031874, -0.00492389]]) |
1 2 3 |
# 获取行索引, 返回index对象列表 data.index |
1 2 |
Index(['股票0', '股票1', '股票2', '股票3', '股票4', '股票5', '股票6', '股票7'], dtype='object') |
1 2 3 |
# 获取列索引 data.columns |
1 2 3 4 5 |
DatetimeIndex(['2018-06-04', '2018-06-05', '2018-06-06', '2018-06-07', '2018-06-08', '2018-06-11', '2018-06-12', '2018-06-13', '2018-06-14', '2018-06-15'], dtype='datetime64[ns]', freq='B') |
1 2 3 |
# 获取形状, 返回元组 data.shape |
1 2 |
(8, 10) |
1 2 3 |
# 转置 data.T |
股票0 | 股票1 | 股票2 | 股票3 | 股票4 | 股票5 | 股票6 | 股票7 | |
---|---|---|---|---|---|---|---|---|
2018-06-04 | -0.319916 | 0.184704 | -0.590560 | 0.835972 | -0.377867 | 0.326577 | -0.812944 | 0.176898 |
2018-06-05 | -0.239672 | -1.096602 | -0.176240 | -0.291654 | -0.088815 | -1.358167 | 0.869205 | -0.502969 |
2018-06-06 | -0.410467 | 0.021001 | 0.802040 | -0.830697 | 1.312348 | 0.619234 | -0.118525 | -1.156450 |
2018-06-07 | -1.355625 | -0.731679 | -0.669107 | 2.289292 | -0.020686 | 0.756230 | -1.435254 | 0.022087 |
2018-06-08 | 0.632674 | -0.914922 | 0.543721 | -1.303734 | -0.369885 | -0.853057 | -0.565448 | 1.663220 |
2018-06-11 | -0.071399 | 0.014601 | 0.149974 | 0.293520 | -0.343720 | 0.459285 | -0.810688 | -0.031885 |
2018-06-12 | -1.573400 | -1.316700 | -0.021126 | 0.664761 | 0.281117 | -0.497793 | 0.626786 | -0.323118 |
2018-06-13 | 1.709383 | -0.114324 | 1.253779 | 1.341384 | 0.616223 | -0.206812 | -1.498999 | 0.152313 |
2018-06-14 | 0.218583 | 0.255736 | -0.044600 | -0.767080 | 0.334589 | 0.836822 | -0.343035 | 0.130319 |
2018-06-15 | -0.291572 | -1.186703 | 1.301220 | 2.403479 | 0.277580 | 0.064191 | 0.469673 | -0.004924 |
1 2 3 |
# 查看前几行数据, 默认返回5行 data.head(3) |
2018-06-04 00:00:00 | 2018-06-05 00:00:00 | 2018-06-06 00:00:00 | 2018-06-07 00:00:00 | 2018-06-08 00:00:00 | 2018-06-11 00:00:00 | 2018-06-12 00:00:00 | 2018-06-13 00:00:00 | 2018-06-14 00:00:00 | 2018-06-15 00:00:00 | |
---|---|---|---|---|---|---|---|---|---|---|
股票0 | -0.319916 | -0.239672 | -0.410467 | -1.355625 | 0.632674 | -0.071399 | -1.573400 | 1.709383 | 0.218583 | -0.291572 |
股票1 | 0.184704 | -1.096602 | 0.021001 | -0.731679 | -0.914922 | 0.014601 | -1.316700 | -0.114324 | 0.255736 | -1.186703 |
股票2 | -0.590560 | -0.176240 | 0.802040 | -0.669107 | 0.543721 | 0.149974 | -0.021126 | 1.253779 | -0.044600 | 1.301220 |
1 2 3 |
# 查看后几行数据, 默认返回5行 data.tail(6) |
2018-06-04 00:00:00 | 2018-06-05 00:00:00 | 2018-06-06 00:00:00 | 2018-06-07 00:00:00 | 2018-06-08 00:00:00 | 2018-06-11 00:00:00 | 2018-06-12 00:00:00 | 2018-06-13 00:00:00 | 2018-06-14 00:00:00 | 2018-06-15 00:00:00 | |
---|---|---|---|---|---|---|---|---|---|---|
股票2 | -0.590560 | -0.176240 | 0.802040 | -0.669107 | 0.543721 | 0.149974 | -0.021126 | 1.253779 | -0.044600 | 1.301220 |
股票3 | 0.835972 | -0.291654 | -0.830697 | 2.289292 | -1.303734 | 0.293520 | 0.664761 | 1.341384 | -0.767080 | 2.403479 |
股票4 | -0.377867 | -0.088815 | 1.312348 | -0.020686 | -0.369885 | -0.343720 | 0.281117 | 0.616223 | 0.334589 | 0.277580 |
股票5 | 0.326577 | -1.358167 | 0.619234 | 0.756230 | -0.853057 | 0.459285 | -0.497793 | -0.206812 | 0.836822 | 0.064191 |
股票6 | -0.812944 | 0.869205 | -0.118525 | -1.435254 | -0.565448 | -0.810688 | 0.626786 | -1.498999 | -0.343035 | 0.469673 |
股票7 | 0.176898 | -0.502969 | -1.156450 | 0.022087 | 1.663220 | -0.031885 | -0.323118 | 0.152313 | 0.130319 | -0.004924 |
索引设置
1 2 3 |
# 设置索引的三种方法 data.index[0] |
1 2 |
'股票0' |
1 2 3 4 5 6 |
# index中元素不能单独修改, 只能整体修改 # 生成新的index列表, 替换旧index new_index = ['票' + str(i) for i in range(8)] data.index = new_index data |
2018-06-04 00:00:00 | 2018-06-05 00:00:00 | 2018-06-06 00:00:00 | 2018-06-07 00:00:00 | 2018-06-08 00:00:00 | 2018-06-11 00:00:00 | 2018-06-12 00:00:00 | 2018-06-13 00:00:00 | 2018-06-14 00:00:00 | 2018-06-15 00:00:00 | |
---|---|---|---|---|---|---|---|---|---|---|
票0 | -0.319916 | -0.239672 | -0.410467 | -1.355625 | 0.632674 | -0.071399 | -1.573400 | 1.709383 | 0.218583 | -0.291572 |
票1 | 0.184704 | -1.096602 | 0.021001 | -0.731679 | -0.914922 | 0.014601 | -1.316700 | -0.114324 | 0.255736 | -1.186703 |
票2 | -0.590560 | -0.176240 | 0.802040 | -0.669107 | 0.543721 | 0.149974 | -0.021126 | 1.253779 | -0.044600 | 1.301220 |
票3 | 0.835972 | -0.291654 | -0.830697 | 2.289292 | -1.303734 | 0.293520 | 0.664761 | 1.341384 | -0.767080 | 2.403479 |
票4 | -0.377867 | -0.088815 | 1.312348 | -0.020686 | -0.369885 | -0.343720 | 0.281117 | 0.616223 | 0.334589 | 0.277580 |
票5 | 0.326577 | -1.358167 | 0.619234 | 0.756230 | -0.853057 | 0.459285 | -0.497793 | -0.206812 | 0.836822 | 0.064191 |
票6 | -0.812944 | 0.869205 | -0.118525 | -1.435254 | -0.565448 | -0.810688 | 0.626786 | -1.498999 | -0.343035 | 0.469673 |
票7 | 0.176898 | -0.502969 | -1.156450 | 0.022087 | 1.663220 | -0.031885 | -0.323118 | 0.152313 | 0.130319 | -0.004924 |
1 2 3 |
# 重置索引 data.reset_index(drop=True) # 若不设置drop=True, 原索引变成第一列数据 |
index | 2018-06-04 00:00:00 | 2018-06-05 00:00:00 | 2018-06-06 00:00:00 | 2018-06-07 00:00:00 | 2018-06-08 00:00:00 | 2018-06-11 00:00:00 | 2018-06-12 00:00:00 | 2018-06-13 00:00:00 | 2018-06-14 00:00:00 | 2018-06-15 00:00:00 | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 票0 | -0.319916 | -0.239672 | -0.410467 | -1.355625 | 0.632674 | -0.071399 | -1.573400 | 1.709383 | 0.218583 | -0.291572 |
1 | 票1 | 0.184704 | -1.096602 | 0.021001 | -0.731679 | -0.914922 | 0.014601 | -1.316700 | -0.114324 | 0.255736 | -1.186703 |
2 | 票2 | -0.590560 | -0.176240 | 0.802040 | -0.669107 | 0.543721 | 0.149974 | -0.021126 | 1.253779 | -0.044600 | 1.301220 |
3 | 票3 | 0.835972 | -0.291654 | -0.830697 | 2.289292 | -1.303734 | 0.293520 | 0.664761 | 1.341384 | -0.767080 | 2.403479 |
4 | 票4 | -0.377867 | -0.088815 | 1.312348 | -0.020686 | -0.369885 | -0.343720 | 0.281117 | 0.616223 | 0.334589 | 0.277580 |
5 | 票5 | 0.326577 | -1.358167 | 0.619234 | 0.756230 | -0.853057 | 0.459285 | -0.497793 | -0.206812 | 0.836822 | 0.064191 |
6 | 票6 | -0.812944 | 0.869205 | -0.118525 | -1.435254 | -0.565448 | -0.810688 | 0.626786 | -1.498999 | -0.343035 | 0.469673 |
7 | 票7 | 0.176898 | -0.502969 | -1.156450 | 0.022087 | 1.663220 | -0.031885 | -0.323118 | 0.152313 | 0.130319 | -0.004924 |
1 2 3 4 5 |
# 把某一列设置为新索引 df = pd.DataFrame({'month': [1, 4, 7, 10], 'year': [2012, 2014, 2013, 2014], 'sale':[55, 40, 84, 31]}) |
1 2 |
df.set_index('month', drop=False) # drop=False原索引当成一列数据 |
month | year | sale | |
---|---|---|---|
month | |||
1 | 1 | 2012 | 55 |
4 | 4 | 2014 | 40 |
7 | 7 | 2013 | 84 |
10 | 10 | 2014 | 31 |
1 2 3 4 5 6 7 |
# 把某些列设置为新的索引 res = df.set_index(['month', 'year']) # 多层索引 res.index # MultiIndex多层或分层索引对象,一般用于控制三维数组 # MultiIndex(levels=[[1, 4, 7, 10], [2012, 2013, 2014]], 非重复按序排列的值 # codes=[[0, 1, 2, 3], [0, 2, 1, 2]], 下标 # names=['month', 'year']) 名字 |
1 2 3 4 |
MultiIndex(levels=[[1, 4, 7, 10], [2012, 2013, 2014]], codes=[[0, 1, 2, 3], [0, 2, 1, 2]], names=['month', 'year']) |
1 2 |
res |
sale | ||
---|---|---|
month | year | |
1 | 2012 | 55 |
4 | 2014 | 40 |
7 | 2013 | 84 |
10 | 2014 | 31 |
Panel三维数组的数据结构容器 pandas从0.20.0开始弃用
1 2 3 4 5 |
p = pd.Panel(np.arange(24).reshape(4,3,2), items=list('ABCD'), major_axis=pd.date_range('20130101', periods=3), minor_axis=['first', 'second']) |
1 2 3 4 5 6 7 8 |
D:\Program Files\anaconda\lib\site-packages\IPython\core\interactiveshell.py:3296: FutureWarning: Panel is deprecated and will be removed in a future version. The recommended way to represent these types of 3-dimensional data are with a MultiIndex on a DataFrame, via the Panel.to_frame() method Alternatively, you can use the xarray package http://xarray.pydata.org/en/stable/. Pandas provides a `.to_xarray()` method to help automate this conversion. exec(code_obj, self.user_global_ns, self.user_ns) |
1 2 |
p |
1 2 3 4 5 6 |
<class 'pandas.core.panel.Panel'> Dimensions: 4 (items) x 3 (major_axis) x 2 (minor_axis) Items axis: A to D Major_axis axis: 2013-01-01 00:00:00 to 2013-01-03 00:00:00 Minor_axis axis: first to second |
1 2 |
p['A',:,:] |
first | second | |
---|---|---|
2013-01-01 | 0 | 1 |
2013-01-02 | 2 | 3 |
2013-01-03 | 4 | 5 |
Series一维数组
1 2 3 |
# 包含行索引和数据, 一般对应DateFrame的一列, 相应对DateFrame的操作对Series通用 pd.Series([6.7,5.6,3,10,2], index=[1,2,3,4,5]) |
1 2 3 4 5 6 7 |
1 6.7 2 5.6 3 3.0 4 10.0 5 2.0 dtype: float64 |
1 2 3 |
# 指定内容,默认索引 pd.Series(np.arange(10)) |
1 2 3 4 5 6 7 8 9 10 11 12 |
0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 dtype: int32 |
1 2 3 |
# 通过字典数据创建 pd.Series({'red':100, 'blue':200, 'green': 500, 'yellow':1000}) |
1 2 3 4 5 6 |
red 100 blue 200 green 500 yellow 1000 dtype: int64 |
1 2 |
import matplotlib |
1 2 |
matplotlib.matplotlib_fname() |
1 2 |
'D:\\Program Files\\anaconda\\lib\\site-packages\\matplotlib\\mpl-data\\matplotlibrc' |
DateFrame基本操作
1 2 3 4 |
import numpy as np import pandas as pd import matplotlib.pyplot as plt |
1 2 3 |
# 读取数据 data = pd.read_csv('./data/stock_day.csv') |
1 2 |
data.head() |
open | high | close | low | volume | price_change | p_change | ma5 | ma10 | ma20 | v_ma5 | v_ma10 | v_ma20 | turnover | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2018-02-27 | 23.53 | 25.88 | 24.16 | 23.53 | 95578.03 | 0.63 | 2.68 | 22.942 | 22.142 | 22.875 | 53782.64 | 46738.65 | 55576.11 | 2.39 |
2018-02-26 | 22.80 | 23.78 | 23.53 | 22.80 | 60985.11 | 0.69 | 3.02 | 22.406 | 21.955 | 22.942 | 40827.52 | 42736.34 | 56007.50 | 1.53 |
2018-02-23 | 22.88 | 23.37 | 22.82 | 22.71 | 52914.01 | 0.54 | 2.42 | 21.938 | 21.929 | 23.022 | 35119.58 | 41871.97 | 56372.85 | 1.32 |
2018-02-22 | 22.25 | 22.76 | 22.28 | 22.02 | 36105.01 | 0.36 | 1.64 | 21.446 | 21.909 | 23.137 | 35397.58 | 39904.78 | 60149.60 | 0.90 |
2018-02-14 | 21.49 | 21.99 | 21.92 | 21.48 | 23331.04 | 0.44 | 2.05 | 21.366 | 21.923 | 23.253 | 33590.21 | 42935.74 | 61716.11 | 0.58 |
1 2 3 4 5 6 7 8 9 10 11 12 13 |
# 索引的三种方法 索引名直接索引 loc索引 iloc索引 ix索引(0.20.0已弃用) # 通过中括号索引, 得到series对象 data['close'].head() # 索引名直接索引, 也可以指定多行多列:[]中放列表 data['close']['2018-02-23'] # 先列后行 # loc索引 data.loc[行索引名字, 列索引名字] 可以用 : 表示范围 可以用列表 data.loc['2018-02-23', 'close': 'low'] # 获得Series类型数据 # iloc索引 data.iloc[行下标, 列下标] 可以用 : 表示范围 可以用列表??? data.iloc[2, 2:] |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 |
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-4-460f9db34b63> in <module> 1 # 索引的三种方法 列索引名直接索引 loc索引 iloc索引 ix索引(0.20.0已弃用) 2 # 通过中括号索引, 得到series对象 ----> 3 data['close'].head() 4 5 data['close']['2018-02-23'] # 先列后行 NameError: name 'data' is not defined |
1 2 3 4 5 6 |
# 赋值操作 两种方法 # 直接修改原来的值 data['close'] = 1 # 或者 data.close = 1 # 不推荐使用 |
1 2 3 4 |
# 排序操作 两种方法 sort_values sort_index # 按照某列名字进行排序 ,其他列随动, 使用ascending指定按照大小排序, 默认升序 data = data.sort_values(by='p_change', ascending=False).head() # 也可以指定多列排序,给by参数传列表 |
1 2 3 |
# 按照索引排序 data.sort_index(ascending=False) |
dataframe运算
算术运算
1 2 3 4 |
# add sub mul div 不修改原数据集 data['low'].add(10) data['low'].add(data['close']) # 两列相加 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
2018-02-27 33.53 2018-02-26 32.80 2018-02-23 32.71 2018-02-22 32.02 2018-02-14 31.48 2018-02-13 31.31 2018-02-12 30.63 2018-02-09 30.19 2018-02-08 31.75 2018-02-07 31.29 2018-02-06 32.20 2018-02-05 32.25 2018-02-02 31.53 2018-02-01 32.22 2018-01-31 33.31 2018-01-30 33.70 2018-01-29 33.72 2018-01-26 34.22 2018-01-25 34.23 2018-01-24 35.20 2018-01-23 34.93 2018-01-22 34.75 2018-01-19 34.42 2018-01-18 34.30 2018-01-17 33.80 2018-01-16 33.30 2018-01-15 33.30 2018-01-12 33.42 2018-01-11 33.21 2018-01-10 33.40 ... 2015-04-13 29.50 2015-04-10 29.20 2015-04-09 28.02 2015-04-08 27.60 2015-04-07 26.50 2015-04-03 26.25 2015-04-02 26.21 2015-04-01 26.00 2015-03-31 26.07 2015-03-30 25.99 2015-03-27 24.90 2015-03-26 24.91 2015-03-25 25.18 2015-03-24 25.28 2015-03-23 25.25 2015-03-20 25.18 2015-03-19 25.11 2015-03-18 25.02 2015-03-17 24.63 2015-03-16 24.51 2015-03-13 24.08 2015-03-12 23.95 2015-03-11 24.14 2015-03-10 24.01 2015-03-09 23.80 2015-03-06 23.13 2015-03-05 22.87 2015-03-04 22.61 2015-03-03 22.52 2015-03-02 22.20 Name: low, Length: 643, dtype: float64 |
逻辑运算
1 2 3 |
temp = data['p_change'] > 0 # 返回一维bool数组 temp.head() |
1 2 3 4 5 6 7 |
2018-02-27 True 2018-02-26 True 2018-02-23 True 2018-02-22 True 2018-02-14 True Name: p_change, dtype: bool |
1 2 3 |
# 布尔索引 data[temp].head() # 剔除对应为False的数据 |
open | high | close | low | volume | price_change | p_change | ma5 | ma10 | ma20 | v_ma5 | v_ma10 | v_ma20 | turnover | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2018-02-27 | 23.53 | 25.88 | 24.16 | 23.53 | 95578.03 | 0.63 | 2.68 | 22.942 | 22.142 | 22.875 | 53782.64 | 46738.65 | 55576.11 | 2.39 |
2018-02-26 | 22.80 | 23.78 | 23.53 | 22.80 | 60985.11 | 0.69 | 3.02 | 22.406 | 21.955 | 22.942 | 40827.52 | 42736.34 | 56007.50 | 1.53 |
2018-02-23 | 22.88 | 23.37 | 22.82 | 22.71 | 52914.01 | 0.54 | 2.42 | 21.938 | 21.929 | 23.022 | 35119.58 | 41871.97 | 56372.85 | 1.32 |
2018-02-22 | 22.25 | 22.76 | 22.28 | 22.02 | 36105.01 | 0.36 | 1.64 | 21.446 | 21.909 | 23.137 | 35397.58 | 39904.78 | 60149.60 | 0.90 |
2018-02-14 | 21.49 | 21.99 | 21.92 | 21.48 | 23331.04 | 0.44 | 2.05 | 21.366 | 21.923 | 23.253 | 33590.21 | 42935.74 | 61716.11 | 0.58 |
1 2 3 |
# 多个逻辑判断 用query()实现更简便 data[(data['p_change'] > 2) & (data['open'] > 15)] |
open | high | close | low | volume | price_change | p_change | ma5 | ma10 | ma20 | v_ma5 | v_ma10 | v_ma20 | turnover | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2018-02-27 | 23.53 | 25.88 | 24.16 | 23.53 | 95578.03 | 0.63 | 2.68 | 22.942 | 22.142 | 22.875 | 53782.64 | 46738.65 | 55576.11 | 2.39 |
2018-02-26 | 22.80 | 23.78 | 23.53 | 22.80 | 60985.11 | 0.69 | 3.02 | 22.406 | 21.955 | 22.942 | 40827.52 | 42736.34 | 56007.50 | 1.53 |
2018-02-23 | 22.88 | 23.37 | 22.82 | 22.71 | 52914.01 | 0.54 | 2.42 | 21.938 | 21.929 | 23.022 | 35119.58 | 41871.97 | 56372.85 | 1.32 |
2018-02-14 | 21.49 | 21.99 | 21.92 | 21.48 | 23331.04 | 0.44 | 2.05 | 21.366 | 21.923 | 23.253 | 33590.21 | 42935.74 | 61716.11 | 0.58 |
2018-02-12 | 20.70 | 21.40 | 21.19 | 20.63 | 32445.39 | 0.82 | 4.03 | 21.504 | 22.338 | 23.533 | 44645.16 | 45679.94 | 68686.33 | 0.81 |
2018-02-05 | 22.45 | 23.39 | 23.27 | 22.25 | 52341.39 | 0.65 | 2.87 | 23.172 | 23.928 | 24.112 | 46714.72 | 69278.66 | 77070.00 | 1.31 |
2018-01-19 | 24.60 | 25.34 | 25.13 | 24.42 | 128449.11 | 0.53 | 2.15 | 24.432 | 24.254 | 23.537 | 91838.07 | 88985.70 | 82975.10 | 3.21 |
2018-01-16 | 23.40 | 24.60 | 24.40 | 23.30 | 101295.42 | 0.96 | 4.10 | 23.908 | 24.058 | 23.321 | 82003.73 | 101081.47 | 74590.92 | 2.54 |
2018-01-12 | 23.70 | 25.15 | 24.24 | 23.42 | 120303.53 | 0.56 | 2.37 | 24.076 | 23.748 | 23.236 | 86133.33 | 91838.46 | 69690.35 | 3.01 |
2018-01-04 | 22.79 | 25.07 | 25.07 | 22.51 | 130131.15 | 2.28 | 10.00 | 22.966 | 22.690 | 22.935 | 67939.35 | 59938.43 | 57071.47 | 3.26 |
2017-12-26 | 21.73 | 22.66 | 22.23 | 21.73 | 61929.70 | 0.72 | 3.35 | 22.654 | 23.014 | 23.057 | 50753.59 | 48360.90 | 55920.67 | 1.55 |
2017-12-12 | 22.61 | 24.14 | 23.98 | 22.61 | 125206.36 | 1.21 | 5.31 | 22.732 | 23.100 | 24.337 | 64299.97 | 63480.44 | 76113.42 | 3.13 |
2017-11-28 | 24.01 | 24.90 | 24.70 | 24.00 | 49008.50 | 0.50 | 2.07 | 24.530 | 25.573 | 27.421 | 61031.58 | 88746.39 | 145784.73 | 1.23 |
2017-11-14 | 28.00 | 29.89 | 29.34 | 27.68 | 243773.23 | 1.10 | 3.90 | 28.618 | 29.268 | 29.673 | 176050.82 | 202823.06 | 268677.48 | 6.10 |
2017-11-07 | 28.60 | 29.37 | 28.98 | 28.42 | 173107.20 | 0.80 | 2.84 | 29.918 | 31.134 | 28.995 | 229595.30 | 301209.67 | 247155.94 | 4.33 |
2017-10-31 | 32.62 | 35.22 | 34.44 | 32.20 | 361660.88 | 2.38 | 7.42 | 32.350 | 30.077 | 28.406 | 372824.04 | 334531.90 | 241075.48 | 9.05 |
2017-10-27 | 31.45 | 33.20 | 33.11 | 31.45 | 333824.31 | 0.70 | 2.16 | 30.618 | 28.489 | 27.652 | 388894.38 | 282674.40 | 221367.64 | 8.35 |
2017-10-26 | 29.30 | 32.70 | 32.41 | 28.92 | 501915.41 | 2.68 | 9.01 | 29.662 | 27.813 | 27.306 | 404443.54 | 254722.83 | 215395.01 | 12.56 |
2017-10-25 | 27.86 | 30.45 | 29.73 | 27.54 | 328947.31 | 1.68 | 5.99 | 28.612 | 27.198 | 27.072 | 340158.56 | 213120.20 | 208416.55 | 8.23 |
2017-10-23 | 29.00 | 31.16 | 29.79 | 28.90 | 466494.47 | 1.46 | 5.15 | 27.280 | 26.763 | 26.764 | 247111.56 | 170695.43 | 186873.57 | 11.68 |
2017-10-20 | 29.20 | 29.83 | 28.33 | 27.85 | 411570.12 | 1.17 | 4.31 | 26.360 | 26.519 | 26.572 | 176454.41 | 141111.99 | 171445.99 | 10.30 |
2017-10-19 | 25.61 | 27.20 | 27.16 | 25.61 | 180490.47 | 1.47 | 5.72 | 25.964 | 26.465 | 26.500 | 105002.11 | 115721.45 | 159293.58 | 4.52 |
2017-09-27 | 26.52 | 29.01 | 28.26 | 26.52 | 379796.50 | 1.74 | 6.56 | 26.720 | 26.480 | 26.084 | 252358.18 | 191481.98 | 212995.67 | 9.51 |
2017-09-26 | 24.87 | 26.66 | 26.52 | 24.85 | 165497.09 | 1.61 | 6.46 | 26.246 | 26.349 | 25.879 | 196699.10 | 171159.43 | 203831.34 | 4.14 |
2017-09-21 | 26.03 | 28.48 | 27.73 | 25.70 | 362346.28 | 1.84 | 7.11 | 26.440 | 26.736 | 25.495 | 180037.81 | 209235.84 | 194833.73 | 9.07 |
2017-09-08 | 25.50 | 27.47 | 26.93 | 25.32 | 341440.59 | 1.36 | 5.32 | 24.944 | 24.609 | 23.566 | 235166.70 | 201615.98 | 172957.06 | 8.55 |
2017-09-07 | 24.41 | 26.90 | 25.57 | 24.20 | 289668.06 | 1.06 | 4.33 | 24.566 | 24.253 | 23.281 | 201740.80 | 180431.62 | 160218.55 | 7.25 |
2017-09-06 | 23.51 | 24.88 | 24.51 | 23.38 | 161732.80 | 0.56 | 2.34 | 24.576 | 23.958 | 23.105 | 195688.49 | 164855.13 | 150908.43 | 4.05 |
2017-08-31 | 24.16 | 25.88 | 25.62 | 23.78 | 259406.52 | 1.47 | 6.09 | 23.940 | 23.485 | 22.554 | 159122.44 | 189711.82 | 126717.47 | 6.49 |
2017-08-30 | 23.25 | 24.60 | 24.15 | 23.21 | 196509.73 | 1.09 | 4.73 | 23.340 | 23.111 | 22.387 | 134021.77 | 169549.50 | 116718.92 | 4.92 |
… | … | … | … | … | … | … | … | … | … | … | … | … | … | … |
2015-07-17 | 20.50 | 21.97 | 21.12 | 20.20 | 95870.55 | 1.05 | 5.23 | 23.028 | 23.815 | 27.230 | 149992.77 | 153160.61 | 138386.76 | 3.28 |
2015-07-13 | 25.63 | 27.90 | 27.28 | 25.63 | 215671.91 | 1.67 | 6.52 | 25.000 | 26.057 | 29.122 | 169405.01 | 137084.82 | 151303.74 | 7.38 |
2015-07-10 | 22.20 | 25.61 | 25.61 | 22.10 | 164179.59 | 2.33 | 10.01 | 24.602 | 26.449 | 29.108 | 156328.46 | 124155.83 | 144964.95 | 5.62 |
2015-06-30 | 23.82 | 25.13 | 25.01 | 21.44 | 174521.44 | 1.19 | 5.00 | 25.830 | 27.929 | 29.447 | 140738.14 | 119712.07 | 142160.16 | 5.97 |
2015-06-24 | 27.60 | 28.50 | 28.05 | 27.48 | 100785.98 | 0.87 | 3.20 | 29.168 | 31.333 | 29.708 | 88217.07 | 139552.02 | 141688.41 | 3.45 |
2015-06-16 | 30.48 | 33.48 | 32.35 | 29.61 | 153130.61 | 0.66 | 2.08 | 33.498 | 31.375 | 29.565 | 190886.97 | 168659.00 | 161871.41 | 5.24 |
2015-06-12 | 34.69 | 35.98 | 35.21 | 34.01 | 159825.88 | 0.82 | 2.38 | 33.420 | 30.513 | 28.683 | 197248.25 | 154480.41 | 153888.00 | 5.47 |
2015-06-09 | 30.46 | 33.34 | 33.34 | 30.46 | 204438.47 | 3.03 | 10.00 | 29.252 | 28.082 | 26.770 | 146431.02 | 143824.79 | 139248.67 | 7.00 |
2015-06-08 | 28.40 | 30.90 | 30.31 | 28.40 | 179868.05 | 2.12 | 7.52 | 28.232 | 27.714 | 26.164 | 128067.98 | 143201.95 | 133185.54 | 6.16 |
2015-06-05 | 27.37 | 28.35 | 28.19 | 27.37 | 139291.88 | 1.19 | 4.41 | 27.606 | 27.675 | 25.677 | 111712.57 | 140397.10 | 128138.18 | 4.77 |
2015-06-02 | 27.50 | 28.59 | 28.24 | 27.40 | 112623.23 | 1.06 | 3.90 | 26.912 | 27.755 | 24.460 | 141218.55 | 155083.83 | 122956.92 | 3.86 |
2015-06-01 | 25.86 | 27.79 | 27.18 | 25.86 | 98091.02 | 1.38 | 5.35 | 27.196 | 27.263 | 24.085 | 158335.93 | 152116.08 | 121744.71 | 3.36 |
2015-05-29 | 25.50 | 26.58 | 25.80 | 23.70 | 128398.88 | 0.53 | 2.10 | 27.744 | 26.852 | 23.838 | 169081.63 | 153295.60 | 119753.75 | 4.40 |
2015-05-22 | 29.87 | 30.99 | 29.54 | 28.53 | 209382.62 | 1.32 | 4.68 | 25.960 | 23.679 | 22.440 | 137509.57 | 115879.26 | 104078.16 | 7.17 |
2015-05-21 | 27.50 | 28.22 | 28.22 | 26.50 | 121190.11 | 2.57 | 10.02 | 24.508 | 22.725 | 22.053 | 117760.48 | 103865.87 | 98766.20 | 4.15 |
2015-05-20 | 24.10 | 25.65 | 25.65 | 24.10 | 164143.16 | 2.33 | 9.99 | 23.292 | 21.784 | 21.733 | 118737.32 | 98652.00 | 99743.64 | 5.62 |
2015-05-18 | 22.28 | 24.45 | 23.07 | 22.08 | 109886.22 | 0.79 | 3.55 | 21.896 | 20.906 | 21.434 | 100442.05 | 91373.34 | 101965.14 | 3.76 |
2015-05-14 | 20.84 | 22.59 | 22.14 | 20.72 | 126074.32 | 1.36 | 6.54 | 20.942 | 20.845 | 21.418 | 89971.27 | 84345.72 | 112331.15 | 4.32 |
2015-05-12 | 20.78 | 21.47 | 21.21 | 20.40 | 83175.93 | 0.63 | 3.06 | 20.014 | 20.937 | 21.653 | 81264.03 | 83223.00 | 122655.85 | 2.85 |
2015-05-11 | 20.31 | 20.84 | 20.58 | 20.15 | 78920.74 | 0.58 | 2.90 | 19.916 | 21.108 | 21.649 | 82304.64 | 90837.92 | 127088.19 | 2.70 |
2015-05-08 | 19.10 | 20.18 | 20.00 | 19.10 | 89248.75 | 1.19 | 6.33 | 20.250 | 21.200 | 21.591 | 78174.86 | 92277.06 | 128790.26 | 3.05 |
2015-04-27 | 22.56 | 23.65 | 22.92 | 22.56 | 159325.12 | 1.42 | 6.61 | 21.952 | 22.368 | 20.090 | 121509.68 | 162088.70 | 138272.77 | 5.45 |
2015-04-21 | 21.00 | 21.97 | 21.71 | 20.80 | 111028.80 | 0.44 | 2.07 | 22.478 | 21.423 | 18.772 | 193934.80 | 174999.73 | 136236.51 | 3.80 |
2015-04-15 | 23.45 | 25.56 | 24.38 | 23.40 | 250311.59 | 1.14 | 4.91 | 21.560 | 19.315 | 17.442 | 174581.78 | 138001.54 | 119340.61 | 8.57 |
2015-04-14 | 21.45 | 23.24 | 23.24 | 21.45 | 154693.44 | 2.11 | 9.99 | 20.368 | 18.494 | 16.982 | 156064.66 | 121417.14 | 114763.57 | 5.30 |
2015-04-13 | 19.60 | 21.30 | 21.13 | 19.50 | 171822.69 | 1.70 | 8.75 | 19.228 | 17.812 | 16.563 | 149620.34 | 114456.84 | 111752.31 | 5.88 |
2015-04-09 | 18.28 | 19.89 | 19.62 | 18.02 | 183119.05 | 1.20 | 6.51 | 17.736 | 16.826 | 15.964 | 124323.21 | 106501.34 | 104829.10 | 6.27 |
2015-04-08 | 17.60 | 18.53 | 18.42 | 17.60 | 157725.97 | 0.88 | 5.02 | 17.070 | 16.394 | 15.698 | 101421.29 | 97906.88 | 101658.57 | 5.40 |
2015-04-07 | 16.54 | 17.98 | 17.54 | 16.50 | 122471.85 | 0.88 | 5.28 | 16.620 | 16.120 | 15.510 | 86769.62 | 97473.29 | 98832.94 | 4.19 |
2015-03-30 | 15.99 | 16.63 | 16.42 | 15.99 | 85090.45 | 0.65 | 4.12 | 15.620 | 15.469 | 14.722 | 108176.96 | 108109.99 | 108345.78 | 2.91 |
161 rows × 14 columns
1 2 3 |
# 逻辑运算函数 data.query()实现获取满足条件的样本 data.query('p_change>2 & open>15') |
open | high | close | low | volume | price_change | p_change | ma5 | ma10 | ma20 | v_ma5 | v_ma10 | v_ma20 | turnover | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2018-02-27 | 23.53 | 25.88 | 24.16 | 23.53 | 95578.03 | 0.63 | 2.68 | 22.942 | 22.142 | 22.875 | 53782.64 | 46738.65 | 55576.11 | 2.39 |
2018-02-26 | 22.80 | 23.78 | 23.53 | 22.80 | 60985.11 | 0.69 | 3.02 | 22.406 | 21.955 | 22.942 | 40827.52 | 42736.34 | 56007.50 | 1.53 |
2018-02-23 | 22.88 | 23.37 | 22.82 | 22.71 | 52914.01 | 0.54 | 2.42 | 21.938 | 21.929 | 23.022 | 35119.58 | 41871.97 | 56372.85 | 1.32 |
2018-02-14 | 21.49 | 21.99 | 21.92 | 21.48 | 23331.04 | 0.44 | 2.05 | 21.366 | 21.923 | 23.253 | 33590.21 | 42935.74 | 61716.11 | 0.58 |
2018-02-12 | 20.70 | 21.40 | 21.19 | 20.63 | 32445.39 | 0.82 | 4.03 | 21.504 | 22.338 | 23.533 | 44645.16 | 45679.94 | 68686.33 | 0.81 |
2018-02-05 | 22.45 | 23.39 | 23.27 | 22.25 | 52341.39 | 0.65 | 2.87 | 23.172 | 23.928 | 24.112 | 46714.72 | 69278.66 | 77070.00 | 1.31 |
2018-01-19 | 24.60 | 25.34 | 25.13 | 24.42 | 128449.11 | 0.53 | 2.15 | 24.432 | 24.254 | 23.537 | 91838.07 | 88985.70 | 82975.10 | 3.21 |
2018-01-16 | 23.40 | 24.60 | 24.40 | 23.30 | 101295.42 | 0.96 | 4.10 | 23.908 | 24.058 | 23.321 | 82003.73 | 101081.47 | 74590.92 | 2.54 |
2018-01-12 | 23.70 | 25.15 | 24.24 | 23.42 | 120303.53 | 0.56 | 2.37 | 24.076 | 23.748 | 23.236 | 86133.33 | 91838.46 | 69690.35 | 3.01 |
2018-01-04 | 22.79 | 25.07 | 25.07 | 22.51 | 130131.15 | 2.28 | 10.00 | 22.966 | 22.690 | 22.935 | 67939.35 | 59938.43 | 57071.47 | 3.26 |
2017-12-26 | 21.73 | 22.66 | 22.23 | 21.73 | 61929.70 | 0.72 | 3.35 | 22.654 | 23.014 | 23.057 | 50753.59 | 48360.90 | 55920.67 | 1.55 |
2017-12-12 | 22.61 | 24.14 | 23.98 | 22.61 | 125206.36 | 1.21 | 5.31 | 22.732 | 23.100 | 24.337 | 64299.97 | 63480.44 | 76113.42 | 3.13 |
2017-11-28 | 24.01 | 24.90 | 24.70 | 24.00 | 49008.50 | 0.50 | 2.07 | 24.530 | 25.573 | 27.421 | 61031.58 | 88746.39 | 145784.73 | 1.23 |
2017-11-14 | 28.00 | 29.89 | 29.34 | 27.68 | 243773.23 | 1.10 | 3.90 | 28.618 | 29.268 | 29.673 | 176050.82 | 202823.06 | 268677.48 | 6.10 |
2017-11-07 | 28.60 | 29.37 | 28.98 | 28.42 | 173107.20 | 0.80 | 2.84 | 29.918 | 31.134 | 28.995 | 229595.30 | 301209.67 | 247155.94 | 4.33 |
2017-10-31 | 32.62 | 35.22 | 34.44 | 32.20 | 361660.88 | 2.38 | 7.42 | 32.350 | 30.077 | 28.406 | 372824.04 | 334531.90 | 241075.48 | 9.05 |
2017-10-27 | 31.45 | 33.20 | 33.11 | 31.45 | 333824.31 | 0.70 | 2.16 | 30.618 | 28.489 | 27.652 | 388894.38 | 282674.40 | 221367.64 | 8.35 |
2017-10-26 | 29.30 | 32.70 | 32.41 | 28.92 | 501915.41 | 2.68 | 9.01 | 29.662 | 27.813 | 27.306 | 404443.54 | 254722.83 | 215395.01 | 12.56 |
2017-10-25 | 27.86 | 30.45 | 29.73 | 27.54 | 328947.31 | 1.68 | 5.99 | 28.612 | 27.198 | 27.072 | 340158.56 | 213120.20 | 208416.55 | 8.23 |
2017-10-23 | 29.00 | 31.16 | 29.79 | 28.90 | 466494.47 | 1.46 | 5.15 | 27.280 | 26.763 | 26.764 | 247111.56 | 170695.43 | 186873.57 | 11.68 |
2017-10-20 | 29.20 | 29.83 | 28.33 | 27.85 | 411570.12 | 1.17 | 4.31 | 26.360 | 26.519 | 26.572 | 176454.41 | 141111.99 | 171445.99 | 10.30 |
2017-10-19 | 25.61 | 27.20 | 27.16 | 25.61 | 180490.47 | 1.47 | 5.72 | 25.964 | 26.465 | 26.500 | 105002.11 | 115721.45 | 159293.58 | 4.52 |
2017-09-27 | 26.52 | 29.01 | 28.26 | 26.52 | 379796.50 | 1.74 | 6.56 | 26.720 | 26.480 | 26.084 | 252358.18 | 191481.98 | 212995.67 | 9.51 |
2017-09-26 | 24.87 | 26.66 | 26.52 | 24.85 | 165497.09 | 1.61 | 6.46 | 26.246 | 26.349 | 25.879 | 196699.10 | 171159.43 | 203831.34 | 4.14 |
2017-09-21 | 26.03 | 28.48 | 27.73 | 25.70 | 362346.28 | 1.84 | 7.11 | 26.440 | 26.736 | 25.495 | 180037.81 | 209235.84 | 194833.73 | 9.07 |
2017-09-08 | 25.50 | 27.47 | 26.93 | 25.32 | 341440.59 | 1.36 | 5.32 | 24.944 | 24.609 | 23.566 | 235166.70 | 201615.98 | 172957.06 | 8.55 |
2017-09-07 | 24.41 | 26.90 | 25.57 | 24.20 | 289668.06 | 1.06 | 4.33 | 24.566 | 24.253 | 23.281 | 201740.80 | 180431.62 | 160218.55 | 7.25 |
2017-09-06 | 23.51 | 24.88 | 24.51 | 23.38 | 161732.80 | 0.56 | 2.34 | 24.576 | 23.958 | 23.105 | 195688.49 | 164855.13 | 150908.43 | 4.05 |
2017-08-31 | 24.16 | 25.88 | 25.62 | 23.78 | 259406.52 | 1.47 | 6.09 | 23.940 | 23.485 | 22.554 | 159122.44 | 189711.82 | 126717.47 | 6.49 |
2017-08-30 | 23.25 | 24.60 | 24.15 | 23.21 | 196509.73 | 1.09 | 4.73 | 23.340 | 23.111 | 22.387 | 134021.77 | 169549.50 | 116718.92 | 4.92 |
… | … | … | … | … | … | … | … | … | … | … | … | … | … | … |
2015-07-17 | 20.50 | 21.97 | 21.12 | 20.20 | 95870.55 | 1.05 | 5.23 | 23.028 | 23.815 | 27.230 | 149992.77 | 153160.61 | 138386.76 | 3.28 |
2015-07-13 | 25.63 | 27.90 | 27.28 | 25.63 | 215671.91 | 1.67 | 6.52 | 25.000 | 26.057 | 29.122 | 169405.01 | 137084.82 | 151303.74 | 7.38 |
2015-07-10 | 22.20 | 25.61 | 25.61 | 22.10 | 164179.59 | 2.33 | 10.01 | 24.602 | 26.449 | 29.108 | 156328.46 | 124155.83 | 144964.95 | 5.62 |
2015-06-30 | 23.82 | 25.13 | 25.01 | 21.44 | 174521.44 | 1.19 | 5.00 | 25.830 | 27.929 | 29.447 | 140738.14 | 119712.07 | 142160.16 | 5.97 |
2015-06-24 | 27.60 | 28.50 | 28.05 | 27.48 | 100785.98 | 0.87 | 3.20 | 29.168 | 31.333 | 29.708 | 88217.07 | 139552.02 | 141688.41 | 3.45 |
2015-06-16 | 30.48 | 33.48 | 32.35 | 29.61 | 153130.61 | 0.66 | 2.08 | 33.498 | 31.375 | 29.565 | 190886.97 | 168659.00 | 161871.41 | 5.24 |
2015-06-12 | 34.69 | 35.98 | 35.21 | 34.01 | 159825.88 | 0.82 | 2.38 | 33.420 | 30.513 | 28.683 | 197248.25 | 154480.41 | 153888.00 | 5.47 |
2015-06-09 | 30.46 | 33.34 | 33.34 | 30.46 | 204438.47 | 3.03 | 10.00 | 29.252 | 28.082 | 26.770 | 146431.02 | 143824.79 | 139248.67 | 7.00 |
2015-06-08 | 28.40 | 30.90 | 30.31 | 28.40 | 179868.05 | 2.12 | 7.52 | 28.232 | 27.714 | 26.164 | 128067.98 | 143201.95 | 133185.54 | 6.16 |
2015-06-05 | 27.37 | 28.35 | 28.19 | 27.37 | 139291.88 | 1.19 | 4.41 | 27.606 | 27.675 | 25.677 | 111712.57 | 140397.10 | 128138.18 | 4.77 |
2015-06-02 | 27.50 | 28.59 | 28.24 | 27.40 | 112623.23 | 1.06 | 3.90 | 26.912 | 27.755 | 24.460 | 141218.55 | 155083.83 | 122956.92 | 3.86 |
2015-06-01 | 25.86 | 27.79 | 27.18 | 25.86 | 98091.02 | 1.38 | 5.35 | 27.196 | 27.263 | 24.085 | 158335.93 | 152116.08 | 121744.71 | 3.36 |
2015-05-29 | 25.50 | 26.58 | 25.80 | 23.70 | 128398.88 | 0.53 | 2.10 | 27.744 | 26.852 | 23.838 | 169081.63 | 153295.60 | 119753.75 | 4.40 |
2015-05-22 | 29.87 | 30.99 | 29.54 | 28.53 | 209382.62 | 1.32 | 4.68 | 25.960 | 23.679 | 22.440 | 137509.57 | 115879.26 | 104078.16 | 7.17 |
2015-05-21 | 27.50 | 28.22 | 28.22 | 26.50 | 121190.11 | 2.57 | 10.02 | 24.508 | 22.725 | 22.053 | 117760.48 | 103865.87 | 98766.20 | 4.15 |
2015-05-20 | 24.10 | 25.65 | 25.65 | 24.10 | 164143.16 | 2.33 | 9.99 | 23.292 | 21.784 | 21.733 | 118737.32 | 98652.00 | 99743.64 | 5.62 |
2015-05-18 | 22.28 | 24.45 | 23.07 | 22.08 | 109886.22 | 0.79 | 3.55 | 21.896 | 20.906 | 21.434 | 100442.05 | 91373.34 | 101965.14 | 3.76 |
2015-05-14 | 20.84 | 22.59 | 22.14 | 20.72 | 126074.32 | 1.36 | 6.54 | 20.942 | 20.845 | 21.418 | 89971.27 | 84345.72 | 112331.15 | 4.32 |
2015-05-12 | 20.78 | 21.47 | 21.21 | 20.40 | 83175.93 | 0.63 | 3.06 | 20.014 | 20.937 | 21.653 | 81264.03 | 83223.00 | 122655.85 | 2.85 |
2015-05-11 | 20.31 | 20.84 | 20.58 | 20.15 | 78920.74 | 0.58 | 2.90 | 19.916 | 21.108 | 21.649 | 82304.64 | 90837.92 | 127088.19 | 2.70 |
2015-05-08 | 19.10 | 20.18 | 20.00 | 19.10 | 89248.75 | 1.19 | 6.33 | 20.250 | 21.200 | 21.591 | 78174.86 | 92277.06 | 128790.26 | 3.05 |
2015-04-27 | 22.56 | 23.65 | 22.92 | 22.56 | 159325.12 | 1.42 | 6.61 | 21.952 | 22.368 | 20.090 | 121509.68 | 162088.70 | 138272.77 | 5.45 |
2015-04-21 | 21.00 | 21.97 | 21.71 | 20.80 | 111028.80 | 0.44 | 2.07 | 22.478 | 21.423 | 18.772 | 193934.80 | 174999.73 | 136236.51 | 3.80 |
2015-04-15 | 23.45 | 25.56 | 24.38 | 23.40 | 250311.59 | 1.14 | 4.91 | 21.560 | 19.315 | 17.442 | 174581.78 | 138001.54 | 119340.61 | 8.57 |
2015-04-14 | 21.45 | 23.24 | 23.24 | 21.45 | 154693.44 | 2.11 | 9.99 | 20.368 | 18.494 | 16.982 | 156064.66 | 121417.14 | 114763.57 | 5.30 |
2015-04-13 | 19.60 | 21.30 | 21.13 | 19.50 | 171822.69 | 1.70 | 8.75 | 19.228 | 17.812 | 16.563 | 149620.34 | 114456.84 | 111752.31 | 5.88 |
2015-04-09 | 18.28 | 19.89 | 19.62 | 18.02 | 183119.05 | 1.20 | 6.51 | 17.736 | 16.826 | 15.964 | 124323.21 | 106501.34 | 104829.10 | 6.27 |
2015-04-08 | 17.60 | 18.53 | 18.42 | 17.60 | 157725.97 | 0.88 | 5.02 | 17.070 | 16.394 | 15.698 | 101421.29 | 97906.88 | 101658.57 | 5.40 |
2015-04-07 | 16.54 | 17.98 | 17.54 | 16.50 | 122471.85 | 0.88 | 5.28 | 16.620 | 16.120 | 15.510 | 86769.62 | 97473.29 | 98832.94 | 4.19 |
2015-03-30 | 15.99 | 16.63 | 16.42 | 15.99 | 85090.45 | 0.65 | 4.12 | 15.620 | 15.469 | 14.722 | 108176.96 | 108109.99 | 108345.78 | 2.91 |
161 rows × 14 columns
1 2 3 4 |
# 判断某列是否为特定值 temp = data['turnover'].isin([2.39]) # 返回bool类型series data[temp] # 取出所有某列为特定值的样本 |
open | high | close | low | volume | price_change | p_change | ma5 | ma10 | ma20 | v_ma5 | v_ma10 | v_ma20 | turnover | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2018-02-27 | 23.53 | 25.88 | 24.16 | 23.53 | 95578.03 | 0.63 | 2.68 | 22.942 | 22.142 | 22.875 | 53782.64 | 46738.65 | 55576.11 | 2.39 |
2016-09-28 | 19.88 | 20.98 | 20.86 | 19.71 | 95580.75 | 0.98 | 4.93 | 20.458 | 20.897 | 21.784 | 73619.52 | 69253.23 | 107749.35 | 2.39 |
统计运算
1 2 3 |
# count样本数 mean平均值 std标准差 min最小值 max最大值 50%中位数 25% 75% data.describe() |
open | high | close | low | volume | price_change | p_change | ma5 | ma10 | ma20 | v_ma5 | v_ma10 | v_ma20 | turnover | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 643.000000 | 643.000000 | 643.000000 | 643.000000 | 643.000000 | 643.000000 | 643.000000 | 643.000000 | 643.000000 | 643.000000 | 643.000000 | 643.000000 | 643.000000 | 643.000000 |
mean | 21.272706 | 21.900513 | 21.336267 | 20.771835 | 99905.519114 | 0.018802 | 0.190280 | 21.302362 | 21.267656 | 21.200946 | 100008.642691 | 100287.542177 | 100797.518398 | 2.936190 |
std | 3.930973 | 4.077578 | 3.942806 | 3.791968 | 73879.119354 | 0.898476 | 4.079698 | 3.880640 | 3.813602 | 3.686186 | 62761.578326 | 56759.082060 | 50101.575639 | 2.079375 |
min | 12.250000 | 12.670000 | 12.360000 | 12.200000 | 1158.120000 | -3.520000 | -10.030000 | 12.520000 | 12.520000 | 12.520000 | 10804.740000 | 19130.510000 | 23311.000000 | 0.040000 |
25% | 19.000000 | 19.500000 | 19.045000 | 18.525000 | 48533.210000 | -0.390000 | -1.850000 | 18.990000 | 19.102000 | 19.275000 | 55794.985000 | 59861.955000 | 64103.675000 | 1.360000 |
50% | 21.440000 | 21.970000 | 21.450000 | 20.980000 | 83175.930000 | 0.050000 | 0.260000 | 21.504000 | 21.623000 | 21.530000 | 86133.330000 | 89234.970000 | 96291.730000 | 2.500000 |
75% | 23.400000 | 24.065000 | 23.415000 | 22.850000 | 127580.055000 | 0.455000 | 2.305000 | 23.318000 | 23.098500 | 23.136000 | 127655.655000 | 124693.255000 | 124001.015000 | 3.915000 |
max | 34.990000 | 36.350000 | 35.210000 | 34.010000 | 501915.410000 | 3.030000 | 10.030000 | 33.696000 | 32.186000 | 29.998000 | 404443.540000 | 360028.160000 | 269280.790000 | 12.560000 |
1 2 3 4 5 6 |
# sum累加 mean平均 median中位 min最小 max最大 mode众数(出现次数最多的值) # abs绝对值 prod累乘 std标准差 var方差 idxmax最大值对应下标 idximin最小值下标 # 求最大值 data['open'].max() # 若不指定某列, 则返回每列最大值 |
1 2 |
34.99 |
累计统计函数
1 2 3 4 5 |
# 累计统计函数 cumsum cummax cummin cumprod # 累计统计函数使用前一般先排序数据 temp = data.sort_index() res = temp['price_change'].cumsum() # 返回一列新数据, series |
1 2 3 4 5 |
# 绘制折线图 series可直接画图 res.plot() plt.title('price changes') plt.show() |
)
自定义运算
1 2 3 4 5 6 |
# apply(func, axis=0) func必须接收参数的参数为data对应列,必须有返回值,axis默认是0列 , 1行 # 自定义函数 def fun_1(x): # x为DateFrame中的一列 return x.max() - x.min() data[['open', 'close']].apply(fun_1, axis=0) |
1 2 3 4 |
open 22.74 close 22.85 dtype: float64 |
pandas画图
封装了matplotlib
– DataFrame.plot(x=None, y=None, kind=’line’)
1 2 3 4 5 6 7 8 9 10 11 12 13 |
- x : label or position, default None - y : label, position or list of label, positions, default None - Allows plotting of one column versus another kind : str - ‘line’ : line plot (default) 折线图 - ‘bar’ : vertical bar plot - ‘barh’ : horizontal bar plot - 关于“barh”的解释: - http://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.plot.barh.html - ‘hist’ : histogram - ‘pie’ : pie plot - ‘scatter’ : scatter plot |
– pandas.Series.plot
文件读取与存储
pandas支持操作文件格式有CSV、SQL、XLS、JSON、HDF5等
1 2 3 4 5 |
# 其他格式查看文档 # 读取文档 filepath_or_buffer文件路径 usecols指定读取的列名,列表形式 data_csv = pd.read_csv('./data/stock_day.csv', usecols=['open', 'close']) data_csv.head() |
open | close | |
---|---|---|
2018-02-27 | 23.53 | 24.16 |
2018-02-26 | 22.80 | 23.53 |
2018-02-23 | 22.88 | 22.82 |
2018-02-22 | 22.25 | 22.28 |
2018-02-14 | 21.49 | 21.92 |
保存文档to_csv
– path_or_buf :string or file handle, default None
– sep :character, default ‘,’
– columns :sequence, optional
– mode:’w’:重写, ‘a’ 追加
– index:是否写进行索引 一般不保存
– header :boolean or list of string, default True,是否写进列索引值
1 2 3 |
# 保存文档 data_csv.to_csv('./data/test.csv', index=False) |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
--------------------------------------------------------------------------- PermissionError Traceback (most recent call last) <ipython-input-32-e5fc83b4a243> in <module> 1 # 保存文档 ----> 2 data_csv.to_csv('./data/test.csv', index=False) D:\Program Files\anaconda\lib\site-packages\pandas\core\generic.py in to_csv(self, path_or_buf, sep, na_rep, float_format, columns, header, index, index_label, mode, encoding, compression, quoting, quotechar, line_terminator, chunksize, tupleize_cols, date_format, doublequote, escapechar, decimal) 3018 doublequote=doublequote, 3019 escapechar=escapechar, decimal=decimal) -> 3020 formatter.save() 3021 3022 if path_or_buf is None: D:\Program Files\anaconda\lib\site-packages\pandas\io\formats\csvs.py in save(self) 155 f, handles = _get_handle(self.path_or_buf, self.mode, 156 encoding=self.encoding, --> 157 compression=self.compression) 158 close = True 159 D:\Program Files\anaconda\lib\site-packages\pandas\io\common.py in _get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text) 422 elif encoding: 423 # Python 3 and encoding --> 424 f = open(path_or_buf, mode, encoding=encoding, newline="") 425 elif is_text: 426 # Python 3 and no explicit encoding PermissionError: [Errno 13] Permission denied: './data/test.csv' |
HDF5的读取和存储
1 2 3 |
# 读取数据, 返回DateFrame数据 data_hdf = pd.read_hdf('./data/stock_data/day/day_eps_ttm.h5') |
1 2 3 |
# 存储数据 二进制文件 data_hdf.to_hdf('./data/test2.h5', key='test') # 必须指定key |
json文件的读取和存储
- pandas.read_json(path_or_buf=None, orient=None, typ=’frame’, lines=False)
- 将JSON格式准换成默认的Pandas DataFrame格式
- orient : string,Indication of expected JSON string format.
- ‘split’ : dict like {index -> [index], columns -> [columns], data -> [values]}
- split 将索引总结到索引,列名到列名,数据到数据。将三部分都分开了
- ‘records’ : list like [{column -> value}, … , {column -> value}]
- records 以columns:values的形式输出
- ‘index’ : dict like {index -> {column -> value}}
- index 以index:{columns:values}…的形式输出
- ‘columns’ : dict like {column -> {index -> value}},默认该格式
- colums 以columns:{index:values}的形式输出
- ‘values’ : just the values array
- values 直接输出值
- ‘split’ : dict like {index -> [index], columns -> [columns], data -> [values]}
- lines : boolean, default False
- 按照每行读取json对象
- typ : default ‘frame’, 指定转换成的对象类型series或者dataframe
1 2 3 |
# 读取文件 orient='records'格式 data_json = pd.read_json('./data/Sarcasm_Headlines_Dataset.json', orient='records', lines=True) |
1 2 3 |
# 写入文件 lines=True:每个样本一个json字符串 data_json.to_json('./data/test.json', orient='records', lines=True ) |
1 |
<br /> |
Pandas进阶
1 2 3 4 |
import pandas as pd import numpy as np import matplotlib.pyplot as plt |
缺失值处理
查找缺失值
- NaN not a number
- 特殊字符标记的缺失 ? 空格 ‘NULL’ 需要确定用什么字符表示缺失状态, 替换成NaN, 再进行处理
1 2 3 |
# 读取数据 data = pd.read_csv('./data/IMDB-Movie-Data.csv') |
1 2 3 4 5 |
# 判断是否有缺失, isnull()进行判断每个元素是否是NaN, 返回bool类型的df res_1 = pd.isnull(data) # 判断res中是否有True np.any(res_1) |
1 2 |
True |
1 2 3 4 5 |
# 判断是否有缺失, notnull()进行判断每个元素是否不是NaN, 返回bool类型的df res_2 = pd.notnull(data) # 判断res中是否全部都是True np.all(res_2) |
1 2 |
False |
处理缺失值
- 删除 整行样本
- 填充 填合理的值
1 2 3 4 |
# 处理缺失值的两种方法 # 删除存在缺失值的整行样本, 不修改原data data.dropna() |
Rank | Title | Genre | Description | Director | Actors | Year | Runtime (Minutes) | Rating | Votes | Revenue (Millions) | Metascore | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | Guardians of the Galaxy | Action,Adventure,Sci-Fi | A group of intergalactic criminals are forced … | James Gunn | Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S… | 2014 | 121 | 8.1 | 757074 | 333.13 | 76.0 |
1 | 2 | Prometheus | Adventure,Mystery,Sci-Fi | Following clues to the origin of mankind, a te… | Ridley Scott | Noomi Rapace, Logan Marshall-Green, Michael Fa… | 2012 | 124 | 7.0 | 485820 | 126.46 | 65.0 |
2 | 3 | Split | Horror,Thriller | Three girls are kidnapped by a man with a diag… | M. Night Shyamalan | James McAvoy, Anya Taylor-Joy, Haley Lu Richar… | 2016 | 117 | 7.3 | 157606 | 138.12 | 62.0 |
3 | 4 | Sing | Animation,Comedy,Family | In a city of humanoid animals, a hustling thea… | Christophe Lourdelet | Matthew McConaughey,Reese Witherspoon, Seth Ma… | 2016 | 108 | 7.2 | 60545 | 270.32 | 59.0 |
4 | 5 | Suicide Squad | Action,Adventure,Fantasy | A secret government agency recruits some of th… | David Ayer | Will Smith, Jared Leto, Margot Robbie, Viola D… | 2016 | 123 | 6.2 | 393727 | 325.02 | 40.0 |
5 | 6 | The Great Wall | Action,Adventure,Fantasy | European mercenaries searching for black powde… | Yimou Zhang | Matt Damon, Tian Jing, Willem Dafoe, Andy Lau | 2016 | 103 | 6.1 | 56036 | 45.13 | 42.0 |
6 | 7 | La La Land | Comedy,Drama,Music | A jazz pianist falls for an aspiring actress i… | Damien Chazelle | Ryan Gosling, Emma Stone, Rosemarie DeWitt, J…. | 2016 | 128 | 8.3 | 258682 | 151.06 | 93.0 |
8 | 9 | The Lost City of Z | Action,Adventure,Biography | A true-life drama, centering on British explor… | James Gray | Charlie Hunnam, Robert Pattinson, Sienna Mille… | 2016 | 141 | 7.1 | 7188 | 8.01 | 78.0 |
9 | 10 | Passengers | Adventure,Drama,Romance | A spacecraft traveling to a distant colony pla… | Morten Tyldum | Jennifer Lawrence, Chris Pratt, Michael Sheen,… | 2016 | 116 | 7.0 | 192177 | 100.01 | 41.0 |
10 | 11 | Fantastic Beasts and Where to Find Them | Adventure,Family,Fantasy | The adventures of writer Newt Scamander in New… | David Yates | Eddie Redmayne, Katherine Waterston, Alison Su… | 2016 | 133 | 7.5 | 232072 | 234.02 | 66.0 |
11 | 12 | Hidden Figures | Biography,Drama,History | The story of a team of female African-American… | Theodore Melfi | Taraji P. Henson, Octavia Spencer, Janelle Mon… | 2016 | 127 | 7.8 | 93103 | 169.27 | 74.0 |
12 | 13 | Rogue One | Action,Adventure,Sci-Fi | The Rebel Alliance makes a risky move to steal… | Gareth Edwards | Felicity Jones, Diego Luna, Alan Tudyk, Donnie… | 2016 | 133 | 7.9 | 323118 | 532.17 | 65.0 |
13 | 14 | Moana | Animation,Adventure,Comedy | In Ancient Polynesia, when a terrible curse in… | Ron Clements | Auli’i Cravalho, Dwayne Johnson, Rachel House,… | 2016 | 107 | 7.7 | 118151 | 248.75 | 81.0 |
14 | 15 | Colossal | Action,Comedy,Drama | Gloria is an out-of-work party girl forced to … | Nacho Vigalondo | Anne Hathaway, Jason Sudeikis, Austin Stowell,… | 2016 | 109 | 6.4 | 8612 | 2.87 | 70.0 |
15 | 16 | The Secret Life of Pets | Animation,Adventure,Comedy | The quiet life of a terrier named Max is upend… | Chris Renaud | Louis C.K., Eric Stonestreet, Kevin Hart, Lake… | 2016 | 87 | 6.6 | 120259 | 368.31 | 61.0 |
16 | 17 | Hacksaw Ridge | Biography,Drama,History | WWII American Army Medic Desmond T. Doss, who … | Mel Gibson | Andrew Garfield, Sam Worthington, Luke Bracey,… | 2016 | 139 | 8.2 | 211760 | 67.12 | 71.0 |
17 | 18 | Jason Bourne | Action,Thriller | The CIA’s most dangerous former operative is d… | Paul Greengrass | Matt Damon, Tommy Lee Jones, Alicia Vikander,V… | 2016 | 123 | 6.7 | 150823 | 162.16 | 58.0 |
18 | 19 | Lion | Biography,Drama | A five-year-old Indian boy gets lost on the st… | Garth Davis | Dev Patel, Nicole Kidman, Rooney Mara, Sunny P… | 2016 | 118 | 8.1 | 102061 | 51.69 | 69.0 |
19 | 20 | Arrival | Drama,Mystery,Sci-Fi | When twelve mysterious spacecraft appear aroun… | Denis Villeneuve | Amy Adams, Jeremy Renner, Forest Whitaker,Mich… | 2016 | 116 | 8.0 | 340798 | 100.50 | 81.0 |
20 | 21 | Gold | Adventure,Drama,Thriller | Kenny Wells, a prospector desperate for a luck… | Stephen Gaghan | Matthew McConaughey, Edgar Ramírez, Bryce Dall… | 2016 | 120 | 6.7 | 19053 | 7.22 | 49.0 |
21 | 22 | Manchester by the Sea | Drama | A depressed uncle is asked to take care of his… | Kenneth Lonergan | Casey Affleck, Michelle Williams, Kyle Chandle… | 2016 | 137 | 7.9 | 134213 | 47.70 | 96.0 |
23 | 24 | Trolls | Animation,Adventure,Comedy | After the Bergens invade Troll Village, Poppy,… | Walt Dohrn | Anna Kendrick, Justin Timberlake,Zooey Deschan… | 2016 | 92 | 6.5 | 38552 | 153.69 | 56.0 |
24 | 25 | Independence Day: Resurgence | Action,Adventure,Sci-Fi | Two decades after the first Independence Day i… | Roland Emmerich | Liam Hemsworth, Jeff Goldblum, Bill Pullman,Ma… | 2016 | 120 | 5.3 | 127553 | 103.14 | 32.0 |
28 | 29 | Bad Moms | Comedy | When three overworked and under-appreciated mo… | Jon Lucas | Mila Kunis, Kathryn Hahn, Kristen Bell,Christi… | 2016 | 100 | 6.2 | 66540 | 113.08 | 60.0 |
29 | 30 | Assassin’s Creed | Action,Adventure,Drama | When Callum Lynch explores the memories of his… | Justin Kurzel | Michael Fassbender, Marion Cotillard, Jeremy I… | 2016 | 115 | 5.9 | 112813 | 54.65 | 36.0 |
30 | 31 | Why Him? | Comedy | A holiday gathering threatens to go off the ra… | John Hamburg | Zoey Deutch, James Franco, Tangie Ambrose,Cedr… | 2016 | 111 | 6.3 | 48123 | 60.31 | 39.0 |
31 | 32 | Nocturnal Animals | Drama,Thriller | A wealthy art gallery owner is haunted by her … | Tom Ford | Amy Adams, Jake Gyllenhaal, Michael Shannon, A… | 2016 | 116 | 7.5 | 126030 | 10.64 | 67.0 |
32 | 33 | X-Men: Apocalypse | Action,Adventure,Sci-Fi | After the re-emergence of the world’s first mu… | Bryan Singer | James McAvoy, Michael Fassbender, Jennifer Law… | 2016 | 144 | 7.1 | 275510 | 155.33 | 52.0 |
33 | 34 | Deadpool | Action,Adventure,Comedy | A fast-talking mercenary with a morbid sense o… | Tim Miller | Ryan Reynolds, Morena Baccarin, T.J. Miller, E… | 2016 | 108 | 8.0 | 627797 | 363.02 | 65.0 |
34 | 35 | Resident Evil: The Final Chapter | Action,Horror,Sci-Fi | Alice returns to where the nightmare began: Th… | Paul W.S. Anderson | Milla Jovovich, Iain Glen, Ali Larter, Shawn R… | 2016 | 107 | 5.6 | 46165 | 26.84 | 49.0 |
… | … | … | … | … | … | … | … | … | … | … | … | … |
955 | 956 | That Awkward Moment | Comedy,Romance | Three best friends find themselves where we’ve… | Tom Gormican | Zac Efron, Michael B. Jordan, Miles Teller, Im… | 2014 | 94 | 6.2 | 81823 | 26.05 | 36.0 |
956 | 957 | Legion | Action,Fantasy,Horror | When a group of strangers at a dusty roadside … | Scott Stewart | Paul Bettany, Dennis Quaid, Charles S. Dutton,… | 2010 | 100 | 5.2 | 84158 | 40.17 | 32.0 |
957 | 958 | End of Watch | Crime,Drama,Thriller | Shot documentary-style, this film follows the … | David Ayer | Jake Gyllenhaal, Michael Peña, Anna Kendrick, … | 2012 | 109 | 7.7 | 192190 | 40.98 | 68.0 |
958 | 959 | 3 Days to Kill | Action,Drama,Thriller | A dying CIA agent trying to reconnect with his… | McG | Kevin Costner, Hailee Steinfeld, Connie Nielse… | 2014 | 117 | 6.2 | 73567 | 30.69 | 40.0 |
959 | 960 | Lucky Number Slevin | Crime,Drama,Mystery | A case of mistaken identity lands Slevin into … | Paul McGuigan | Josh Hartnett, Ben Kingsley, Morgan Freeman, L… | 2006 | 110 | 7.8 | 271940 | 22.49 | 53.0 |
960 | 961 | Trance | Crime,Drama,Mystery | An art auctioneer who has become mixed up with… | Danny Boyle | James McAvoy, Rosario Dawson, Vincent Cassel,D… | 2013 | 101 | 7.0 | 97141 | 2.32 | 61.0 |
961 | 962 | Into the Forest | Drama,Sci-Fi,Thriller | After a massive power outage, two sisters lear… | Patricia Rozema | Ellen Page, Evan Rachel Wood, Max Minghella,Ca… | 2015 | 101 | 5.9 | 10220 | 0.01 | 59.0 |
962 | 963 | The Other Boleyn Girl | Biography,Drama,History | Two sisters contend for the affection of King … | Justin Chadwick | Natalie Portman, Scarlett Johansson, Eric Bana… | 2008 | 115 | 6.7 | 88260 | 26.81 | 50.0 |
963 | 964 | I Spit on Your Grave | Crime,Horror,Thriller | A writer who is brutalized during her cabin re… | Steven R. Monroe | Sarah Butler, Jeff Branson, Andrew Howard,Dani… | 2010 | 108 | 6.3 | 60133 | 0.09 | 27.0 |
970 | 971 | Texas Chainsaw 3D | Horror,Thriller | A young woman travels to Texas to collect an i… | John Luessenhop | Alexandra Daddario, Tania Raymonde, Scott East… | 2013 | 92 | 4.8 | 37060 | 34.33 | 62.0 |
972 | 973 | Rock of Ages | Comedy,Drama,Musical | A small town girl and a city boy meet on the S… | Adam Shankman | Julianne Hough, Diego Boneta, Tom Cruise, Alec… | 2012 | 123 | 5.9 | 64513 | 38.51 | 47.0 |
973 | 974 | Scream 4 | Horror,Mystery | Ten years have passed, and Sidney Prescott, wh… | Wes Craven | Neve Campbell, Courteney Cox, David Arquette, … | 2011 | 111 | 6.2 | 108544 | 38.18 | 52.0 |
974 | 975 | Queen of Katwe | Biography,Drama,Sport | A Ugandan girl sees her world rapidly change a… | Mira Nair | Madina Nalwanga, David Oyelowo, Lupita Nyong’o… | 2016 | 124 | 7.4 | 6753 | 8.81 | 73.0 |
975 | 976 | My Big Fat Greek Wedding 2 | Comedy,Family,Romance | A Portokalos family secret brings the beloved … | Kirk Jones | Nia Vardalos, John Corbett, Michael Constantin… | 2016 | 94 | 6.0 | 20966 | 59.57 | 37.0 |
979 | 980 | The Skin I Live In | Drama,Thriller | A brilliant plastic surgeon, haunted by past t… | Pedro Almodóvar | Antonio Banderas, Elena Anaya, Jan Cornet,Mari… | 2011 | 120 | 7.6 | 108772 | 3.19 | 70.0 |
980 | 981 | Miracles from Heaven | Biography,Drama,Family | A young girl suffering from a rare digestive d… | Patricia Riggen | Jennifer Garner, Kylie Rogers, Martin Henderso… | 2016 | 109 | 7.0 | 12048 | 61.69 | 44.0 |
981 | 982 | Annie | Comedy,Drama,Family | A foster kid, who lives with her mean foster m… | Will Gluck | Quvenzhané Wallis, Cameron Diaz, Jamie Foxx, R… | 2014 | 118 | 5.3 | 27312 | 85.91 | 33.0 |
982 | 983 | Across the Universe | Drama,Fantasy,Musical | The music of the Beatles and the Vietnam War f… | Julie Taymor | Evan Rachel Wood, Jim Sturgess, Joe Anderson, … | 2007 | 133 | 7.4 | 95172 | 24.34 | 56.0 |
983 | 984 | Let’s Be Cops | Comedy | Two struggling pals dress as police officers f… | Luke Greenfield | Jake Johnson, Damon Wayans Jr., Rob Riggle, Ni… | 2014 | 104 | 6.5 | 112729 | 82.39 | 30.0 |
984 | 985 | Max | Adventure,Family | A Malinois dog that helped American Marines in… | Boaz Yakin | Thomas Haden Church, Josh Wiggins, Luke Kleint… | 2015 | 111 | 6.8 | 21405 | 42.65 | 47.0 |
985 | 986 | Your Highness | Adventure,Comedy,Fantasy | When Prince Fabious’s bride is kidnapped, he g… | David Gordon Green | Danny McBride, Natalie Portman, James Franco, … | 2011 | 102 | 5.6 | 87904 | 21.56 | 31.0 |
986 | 987 | Final Destination 5 | Horror,Thriller | Survivors of a suspension-bridge collapse lear… | Steven Quale | Nicholas D’Agosto, Emma Bell, Arlen Escarpeta,… | 2011 | 92 | 5.9 | 88000 | 42.58 | 50.0 |
987 | 988 | Endless Love | Drama,Romance | The story of a privileged girl and a charismat… | Shana Feste | Gabriella Wilde, Alex Pettyfer, Bruce Greenwoo… | 2014 | 104 | 6.3 | 33688 | 23.39 | 30.0 |
990 | 991 | Underworld: Rise of the Lycans | Action,Adventure,Fantasy | An origins story centered on the centuries-old… | Patrick Tatopoulos | Rhona Mitra, Michael Sheen, Bill Nighy, Steven… | 2009 | 92 | 6.6 | 129708 | 45.80 | 44.0 |
991 | 992 | Taare Zameen Par | Drama,Family,Music | An eight-year-old boy is thought to be a lazy … | Aamir Khan | Darsheel Safary, Aamir Khan, Tanay Chheda, Sac… | 2007 | 165 | 8.5 | 102697 | 1.20 | 42.0 |
993 | 994 | Resident Evil: Afterlife | Action,Adventure,Horror | While still out to destroy the evil Umbrella C… | Paul W.S. Anderson | Milla Jovovich, Ali Larter, Wentworth Miller,K… | 2010 | 97 | 5.9 | 140900 | 60.13 | 37.0 |
994 | 995 | Project X | Comedy | 3 high school seniors throw a birthday party t… | Nima Nourizadeh | Thomas Mann, Oliver Cooper, Jonathan Daniel Br… | 2012 | 88 | 6.7 | 164088 | 54.72 | 48.0 |
996 | 997 | Hostel: Part II | Horror | Three American college students studying abroa… | Eli Roth | Lauren German, Heather Matarazzo, Bijou Philli… | 2007 | 94 | 5.5 | 73152 | 17.54 | 46.0 |
997 | 998 | Step Up 2: The Streets | Drama,Music,Romance | Romantic sparks occur between two dance studen… | Jon M. Chu | Robert Hoffman, Briana Evigan, Cassie Ventura,… | 2008 | 98 | 6.2 | 70699 | 58.01 | 50.0 |
999 | 1000 | Nine Lives | Comedy,Family,Fantasy | A stuffy businessman finds himself trapped ins… | Barry Sonnenfeld | Kevin Spacey, Jennifer Garner, Robbie Amell,Ch… | 2016 | 87 | 5.3 | 12435 | 19.64 | 11.0 |
838 rows × 12 columns
1 2 3 |
# 填充(替换)缺失值, 不修改原data data.fillna(value=data.mean()) |
Rank | Title | Genre | Description | Director | Actors | Year | Runtime (Minutes) | Rating | Votes | Revenue (Millions) | Metascore | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | Guardians of the Galaxy | Action,Adventure,Sci-Fi | A group of intergalactic criminals are forced … | James Gunn | Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S… | 2014 | 121 | 8.1 | 757074 | 333.130000 | 76.000000 |
1 | 2 | Prometheus | Adventure,Mystery,Sci-Fi | Following clues to the origin of mankind, a te… | Ridley Scott | Noomi Rapace, Logan Marshall-Green, Michael Fa… | 2012 | 124 | 7.0 | 485820 | 126.460000 | 65.000000 |
2 | 3 | Split | Horror,Thriller | Three girls are kidnapped by a man with a diag… | M. Night Shyamalan | James McAvoy, Anya Taylor-Joy, Haley Lu Richar… | 2016 | 117 | 7.3 | 157606 | 138.120000 | 62.000000 |
3 | 4 | Sing | Animation,Comedy,Family | In a city of humanoid animals, a hustling thea… | Christophe Lourdelet | Matthew McConaughey,Reese Witherspoon, Seth Ma… | 2016 | 108 | 7.2 | 60545 | 270.320000 | 59.000000 |
4 | 5 | Suicide Squad | Action,Adventure,Fantasy | A secret government agency recruits some of th… | David Ayer | Will Smith, Jared Leto, Margot Robbie, Viola D… | 2016 | 123 | 6.2 | 393727 | 325.020000 | 40.000000 |
5 | 6 | The Great Wall | Action,Adventure,Fantasy | European mercenaries searching for black powde… | Yimou Zhang | Matt Damon, Tian Jing, Willem Dafoe, Andy Lau | 2016 | 103 | 6.1 | 56036 | 45.130000 | 42.000000 |
6 | 7 | La La Land | Comedy,Drama,Music | A jazz pianist falls for an aspiring actress i… | Damien Chazelle | Ryan Gosling, Emma Stone, Rosemarie DeWitt, J…. | 2016 | 128 | 8.3 | 258682 | 151.060000 | 93.000000 |
7 | 8 | Mindhorn | Comedy | A has-been actor best known for playing the ti… | Sean Foley | Essie Davis, Andrea Riseborough, Julian Barrat… | 2016 | 89 | 6.4 | 2490 | 82.956376 | 71.000000 |
8 | 9 | The Lost City of Z | Action,Adventure,Biography | A true-life drama, centering on British explor… | James Gray | Charlie Hunnam, Robert Pattinson, Sienna Mille… | 2016 | 141 | 7.1 | 7188 | 8.010000 | 78.000000 |
9 | 10 | Passengers | Adventure,Drama,Romance | A spacecraft traveling to a distant colony pla… | Morten Tyldum | Jennifer Lawrence, Chris Pratt, Michael Sheen,… | 2016 | 116 | 7.0 | 192177 | 100.010000 | 41.000000 |
10 | 11 | Fantastic Beasts and Where to Find Them | Adventure,Family,Fantasy | The adventures of writer Newt Scamander in New… | David Yates | Eddie Redmayne, Katherine Waterston, Alison Su… | 2016 | 133 | 7.5 | 232072 | 234.020000 | 66.000000 |
11 | 12 | Hidden Figures | Biography,Drama,History | The story of a team of female African-American… | Theodore Melfi | Taraji P. Henson, Octavia Spencer, Janelle Mon… | 2016 | 127 | 7.8 | 93103 | 169.270000 | 74.000000 |
12 | 13 | Rogue One | Action,Adventure,Sci-Fi | The Rebel Alliance makes a risky move to steal… | Gareth Edwards | Felicity Jones, Diego Luna, Alan Tudyk, Donnie… | 2016 | 133 | 7.9 | 323118 | 532.170000 | 65.000000 |
13 | 14 | Moana | Animation,Adventure,Comedy | In Ancient Polynesia, when a terrible curse in… | Ron Clements | Auli’i Cravalho, Dwayne Johnson, Rachel House,… | 2016 | 107 | 7.7 | 118151 | 248.750000 | 81.000000 |
14 | 15 | Colossal | Action,Comedy,Drama | Gloria is an out-of-work party girl forced to … | Nacho Vigalondo | Anne Hathaway, Jason Sudeikis, Austin Stowell,… | 2016 | 109 | 6.4 | 8612 | 2.870000 | 70.000000 |
15 | 16 | The Secret Life of Pets | Animation,Adventure,Comedy | The quiet life of a terrier named Max is upend… | Chris Renaud | Louis C.K., Eric Stonestreet, Kevin Hart, Lake… | 2016 | 87 | 6.6 | 120259 | 368.310000 | 61.000000 |
16 | 17 | Hacksaw Ridge | Biography,Drama,History | WWII American Army Medic Desmond T. Doss, who … | Mel Gibson | Andrew Garfield, Sam Worthington, Luke Bracey,… | 2016 | 139 | 8.2 | 211760 | 67.120000 | 71.000000 |
17 | 18 | Jason Bourne | Action,Thriller | The CIA’s most dangerous former operative is d… | Paul Greengrass | Matt Damon, Tommy Lee Jones, Alicia Vikander,V… | 2016 | 123 | 6.7 | 150823 | 162.160000 | 58.000000 |
18 | 19 | Lion | Biography,Drama | A five-year-old Indian boy gets lost on the st… | Garth Davis | Dev Patel, Nicole Kidman, Rooney Mara, Sunny P… | 2016 | 118 | 8.1 | 102061 | 51.690000 | 69.000000 |
19 | 20 | Arrival | Drama,Mystery,Sci-Fi | When twelve mysterious spacecraft appear aroun… | Denis Villeneuve | Amy Adams, Jeremy Renner, Forest Whitaker,Mich… | 2016 | 116 | 8.0 | 340798 | 100.500000 | 81.000000 |
20 | 21 | Gold | Adventure,Drama,Thriller | Kenny Wells, a prospector desperate for a luck… | Stephen Gaghan | Matthew McConaughey, Edgar Ramírez, Bryce Dall… | 2016 | 120 | 6.7 | 19053 | 7.220000 | 49.000000 |
21 | 22 | Manchester by the Sea | Drama | A depressed uncle is asked to take care of his… | Kenneth Lonergan | Casey Affleck, Michelle Williams, Kyle Chandle… | 2016 | 137 | 7.9 | 134213 | 47.700000 | 96.000000 |
22 | 23 | Hounds of Love | Crime,Drama,Horror | A cold-blooded predatory couple while cruising… | Ben Young | Emma Booth, Ashleigh Cummings, Stephen Curry,S… | 2016 | 108 | 6.7 | 1115 | 82.956376 | 72.000000 |
23 | 24 | Trolls | Animation,Adventure,Comedy | After the Bergens invade Troll Village, Poppy,… | Walt Dohrn | Anna Kendrick, Justin Timberlake,Zooey Deschan… | 2016 | 92 | 6.5 | 38552 | 153.690000 | 56.000000 |
24 | 25 | Independence Day: Resurgence | Action,Adventure,Sci-Fi | Two decades after the first Independence Day i… | Roland Emmerich | Liam Hemsworth, Jeff Goldblum, Bill Pullman,Ma… | 2016 | 120 | 5.3 | 127553 | 103.140000 | 32.000000 |
25 | 26 | Paris pieds nus | Comedy | Fiona visits Paris for the first time to assis… | Dominique Abel | Fiona Gordon, Dominique Abel,Emmanuelle Riva, … | 2016 | 83 | 6.8 | 222 | 82.956376 | 58.985043 |
26 | 27 | Bahubali: The Beginning | Action,Adventure,Drama | In ancient India, an adventurous and daring ma… | S.S. Rajamouli | Prabhas, Rana Daggubati, Anushka Shetty,Tamann… | 2015 | 159 | 8.3 | 76193 | 6.500000 | 58.985043 |
27 | 28 | Dead Awake | Horror,Thriller | A young woman must save herself and her friend… | Phillip Guzman | Jocelin Donahue, Jesse Bradford, Jesse Borrego… | 2016 | 99 | 4.7 | 523 | 0.010000 | 58.985043 |
28 | 29 | Bad Moms | Comedy | When three overworked and under-appreciated mo… | Jon Lucas | Mila Kunis, Kathryn Hahn, Kristen Bell,Christi… | 2016 | 100 | 6.2 | 66540 | 113.080000 | 60.000000 |
29 | 30 | Assassin’s Creed | Action,Adventure,Drama | When Callum Lynch explores the memories of his… | Justin Kurzel | Michael Fassbender, Marion Cotillard, Jeremy I… | 2016 | 115 | 5.9 | 112813 | 54.650000 | 36.000000 |
… | … | … | … | … | … | … | … | … | … | … | … | … |
970 | 971 | Texas Chainsaw 3D | Horror,Thriller | A young woman travels to Texas to collect an i… | John Luessenhop | Alexandra Daddario, Tania Raymonde, Scott East… | 2013 | 92 | 4.8 | 37060 | 34.330000 | 62.000000 |
971 | 972 | Disturbia | Drama,Mystery,Thriller | A teen living under house arrest becomes convi… | D.J. Caruso | Shia LaBeouf, David Morse, Carrie-Anne Moss, S… | 2007 | 105 | 6.9 | 193491 | 80.050000 | 58.985043 |
972 | 973 | Rock of Ages | Comedy,Drama,Musical | A small town girl and a city boy meet on the S… | Adam Shankman | Julianne Hough, Diego Boneta, Tom Cruise, Alec… | 2012 | 123 | 5.9 | 64513 | 38.510000 | 47.000000 |
973 | 974 | Scream 4 | Horror,Mystery | Ten years have passed, and Sidney Prescott, wh… | Wes Craven | Neve Campbell, Courteney Cox, David Arquette, … | 2011 | 111 | 6.2 | 108544 | 38.180000 | 52.000000 |
974 | 975 | Queen of Katwe | Biography,Drama,Sport | A Ugandan girl sees her world rapidly change a… | Mira Nair | Madina Nalwanga, David Oyelowo, Lupita Nyong’o… | 2016 | 124 | 7.4 | 6753 | 8.810000 | 73.000000 |
975 | 976 | My Big Fat Greek Wedding 2 | Comedy,Family,Romance | A Portokalos family secret brings the beloved … | Kirk Jones | Nia Vardalos, John Corbett, Michael Constantin… | 2016 | 94 | 6.0 | 20966 | 59.570000 | 37.000000 |
976 | 977 | Dark Places | Drama,Mystery,Thriller | Libby Day was only eight years old when her fa… | Gilles Paquet-Brenner | Charlize Theron, Nicholas Hoult, Christina Hen… | 2015 | 113 | 6.2 | 31634 | 82.956376 | 39.000000 |
977 | 978 | Amateur Night | Comedy | Guy Carter is an award-winning graduate studen… | Lisa Addario | Jason Biggs, Janet Montgomery,Ashley Tisdale, … | 2016 | 92 | 5.0 | 2229 | 82.956376 | 38.000000 |
978 | 979 | It’s Only the End of the World | Drama | Louis (Gaspard Ulliel), a terminally ill write… | Xavier Dolan | Nathalie Baye, Vincent Cassel, Marion Cotillar… | 2016 | 97 | 7.0 | 10658 | 82.956376 | 48.000000 |
979 | 980 | The Skin I Live In | Drama,Thriller | A brilliant plastic surgeon, haunted by past t… | Pedro Almodóvar | Antonio Banderas, Elena Anaya, Jan Cornet,Mari… | 2011 | 120 | 7.6 | 108772 | 3.190000 | 70.000000 |
980 | 981 | Miracles from Heaven | Biography,Drama,Family | A young girl suffering from a rare digestive d… | Patricia Riggen | Jennifer Garner, Kylie Rogers, Martin Henderso… | 2016 | 109 | 7.0 | 12048 | 61.690000 | 44.000000 |
981 | 982 | Annie | Comedy,Drama,Family | A foster kid, who lives with her mean foster m… | Will Gluck | Quvenzhané Wallis, Cameron Diaz, Jamie Foxx, R… | 2014 | 118 | 5.3 | 27312 | 85.910000 | 33.000000 |
982 | 983 | Across the Universe | Drama,Fantasy,Musical | The music of the Beatles and the Vietnam War f… | Julie Taymor | Evan Rachel Wood, Jim Sturgess, Joe Anderson, … | 2007 | 133 | 7.4 | 95172 | 24.340000 | 56.000000 |
983 | 984 | Let’s Be Cops | Comedy | Two struggling pals dress as police officers f… | Luke Greenfield | Jake Johnson, Damon Wayans Jr., Rob Riggle, Ni… | 2014 | 104 | 6.5 | 112729 | 82.390000 | 30.000000 |
984 | 985 | Max | Adventure,Family | A Malinois dog that helped American Marines in… | Boaz Yakin | Thomas Haden Church, Josh Wiggins, Luke Kleint… | 2015 | 111 | 6.8 | 21405 | 42.650000 | 47.000000 |
985 | 986 | Your Highness | Adventure,Comedy,Fantasy | When Prince Fabious’s bride is kidnapped, he g… | David Gordon Green | Danny McBride, Natalie Portman, James Franco, … | 2011 | 102 | 5.6 | 87904 | 21.560000 | 31.000000 |
986 | 987 | Final Destination 5 | Horror,Thriller | Survivors of a suspension-bridge collapse lear… | Steven Quale | Nicholas D’Agosto, Emma Bell, Arlen Escarpeta,… | 2011 | 92 | 5.9 | 88000 | 42.580000 | 50.000000 |
987 | 988 | Endless Love | Drama,Romance | The story of a privileged girl and a charismat… | Shana Feste | Gabriella Wilde, Alex Pettyfer, Bruce Greenwoo… | 2014 | 104 | 6.3 | 33688 | 23.390000 | 30.000000 |
988 | 989 | Martyrs | Horror | A young woman’s quest for revenge against the … | Pascal Laugier | Morjana Alaoui, Mylène Jampanoï, Catherine Bég… | 2008 | 99 | 7.1 | 63785 | 82.956376 | 89.000000 |
989 | 990 | Selma | Biography,Drama,History | A chronicle of Martin Luther King’s campaign t… | Ava DuVernay | David Oyelowo, Carmen Ejogo, Tim Roth, Lorrain… | 2014 | 128 | 7.5 | 67637 | 52.070000 | 58.985043 |
990 | 991 | Underworld: Rise of the Lycans | Action,Adventure,Fantasy | An origins story centered on the centuries-old… | Patrick Tatopoulos | Rhona Mitra, Michael Sheen, Bill Nighy, Steven… | 2009 | 92 | 6.6 | 129708 | 45.800000 | 44.000000 |
991 | 992 | Taare Zameen Par | Drama,Family,Music | An eight-year-old boy is thought to be a lazy … | Aamir Khan | Darsheel Safary, Aamir Khan, Tanay Chheda, Sac… | 2007 | 165 | 8.5 | 102697 | 1.200000 | 42.000000 |
992 | 993 | Take Me Home Tonight | Comedy,Drama,Romance | Four years after graduation, an awkward high s… | Michael Dowse | Topher Grace, Anna Faris, Dan Fogler, Teresa P… | 2011 | 97 | 6.3 | 45419 | 6.920000 | 58.985043 |
993 | 994 | Resident Evil: Afterlife | Action,Adventure,Horror | While still out to destroy the evil Umbrella C… | Paul W.S. Anderson | Milla Jovovich, Ali Larter, Wentworth Miller,K… | 2010 | 97 | 5.9 | 140900 | 60.130000 | 37.000000 |
994 | 995 | Project X | Comedy | 3 high school seniors throw a birthday party t… | Nima Nourizadeh | Thomas Mann, Oliver Cooper, Jonathan Daniel Br… | 2012 | 88 | 6.7 | 164088 | 54.720000 | 48.000000 |
995 | 996 | Secret in Their Eyes | Crime,Drama,Mystery | A tight-knit team of rising investigators, alo… | Billy Ray | Chiwetel Ejiofor, Nicole Kidman, Julia Roberts… | 2015 | 111 | 6.2 | 27585 | 82.956376 | 45.000000 |
996 | 997 | Hostel: Part II | Horror | Three American college students studying abroa… | Eli Roth | Lauren German, Heather Matarazzo, Bijou Philli… | 2007 | 94 | 5.5 | 73152 | 17.540000 | 46.000000 |
997 | 998 | Step Up 2: The Streets | Drama,Music,Romance | Romantic sparks occur between two dance studen… | Jon M. Chu | Robert Hoffman, Briana Evigan, Cassie Ventura,… | 2008 | 98 | 6.2 | 70699 | 58.010000 | 50.000000 |
998 | 999 | Search Party | Adventure,Comedy | A pair of friends embark on a mission to reuni… | Scot Armstrong | Adam Pally, T.J. Miller, Thomas Middleditch,Sh… | 2014 | 93 | 5.6 | 4881 | 82.956376 | 22.000000 |
999 | 1000 | Nine Lives | Comedy,Family,Fantasy | A stuffy businessman finds himself trapped ins… | Barry Sonnenfeld | Kevin Spacey, Jennifer Garner, Robbie Amell,Ch… | 2016 | 87 | 5.3 | 12435 | 19.640000 | 11.000000 |
1000 rows × 12 columns
1 2 3 |
# 有特殊标记的缺失 wis = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/breast-cancer-wisconsin.data") |
1 2 |
wis.head() |
1000025 | 5 | 1 | 1.1 | 1.2 | 2 | 1.3 | 3 | 1.4 | 1.5 | 2.1 | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1002945 | 5 | 4 | 4 | 5 | 7 | 10 | 3 | 2 | 1 | 2 |
1 | 1015425 | 3 | 1 | 1 | 1 | 2 | 2 | 3 | 1 | 1 | 2 |
2 | 1016277 | 6 | 8 | 8 | 1 | 3 | 4 | 3 | 7 | 1 | 2 |
3 | 1017023 | 4 | 1 | 1 | 3 | 2 | 1 | 3 | 1 | 1 | 2 |
4 | 1017122 | 8 | 10 | 10 | 8 | 7 | 10 | 9 | 7 | 1 | 4 |
1 2 3 |
# 把有特殊标记的缺失值替换成np.nan, 不修改原数据集 wis_2 = wis.replace(to_replace='?', value=np.nan) |
1 2 3 |
# 删除缺失值 wis_2.dropna() |
1000025 | 5 | 1 | 1.1 | 1.2 | 2 | 1.3 | 3 | 1.4 | 1.5 | 2.1 | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1002945 | 5 | 4 | 4 | 5 | 7 | 10 | 3 | 2 | 1 | 2 |
1 | 1015425 | 3 | 1 | 1 | 1 | 2 | 2 | 3 | 1 | 1 | 2 |
2 | 1016277 | 6 | 8 | 8 | 1 | 3 | 4 | 3 | 7 | 1 | 2 |
3 | 1017023 | 4 | 1 | 1 | 3 | 2 | 1 | 3 | 1 | 1 | 2 |
4 | 1017122 | 8 | 10 | 10 | 8 | 7 | 10 | 9 | 7 | 1 | 4 |
5 | 1018099 | 1 | 1 | 1 | 1 | 2 | 10 | 3 | 1 | 1 | 2 |
6 | 1018561 | 2 | 1 | 2 | 1 | 2 | 1 | 3 | 1 | 1 | 2 |
7 | 1033078 | 2 | 1 | 1 | 1 | 2 | 1 | 1 | 1 | 5 | 2 |
8 | 1033078 | 4 | 2 | 1 | 1 | 2 | 1 | 2 | 1 | 1 | 2 |
9 | 1035283 | 1 | 1 | 1 | 1 | 1 | 1 | 3 | 1 | 1 | 2 |
10 | 1036172 | 2 | 1 | 1 | 1 | 2 | 1 | 2 | 1 | 1 | 2 |
11 | 1041801 | 5 | 3 | 3 | 3 | 2 | 3 | 4 | 4 | 1 | 4 |
12 | 1043999 | 1 | 1 | 1 | 1 | 2 | 3 | 3 | 1 | 1 | 2 |
13 | 1044572 | 8 | 7 | 5 | 10 | 7 | 9 | 5 | 5 | 4 | 4 |
14 | 1047630 | 7 | 4 | 6 | 4 | 6 | 1 | 4 | 3 | 1 | 4 |
15 | 1048672 | 4 | 1 | 1 | 1 | 2 | 1 | 2 | 1 | 1 | 2 |
16 | 1049815 | 4 | 1 | 1 | 1 | 2 | 1 | 3 | 1 | 1 | 2 |
17 | 1050670 | 10 | 7 | 7 | 6 | 4 | 10 | 4 | 1 | 2 | 4 |
18 | 1050718 | 6 | 1 | 1 | 1 | 2 | 1 | 3 | 1 | 1 | 2 |
19 | 1054590 | 7 | 3 | 2 | 10 | 5 | 10 | 5 | 4 | 4 | 4 |
20 | 1054593 | 10 | 5 | 5 | 3 | 6 | 7 | 7 | 10 | 1 | 4 |
21 | 1056784 | 3 | 1 | 1 | 1 | 2 | 1 | 2 | 1 | 1 | 2 |
23 | 1059552 | 1 | 1 | 1 | 1 | 2 | 1 | 3 | 1 | 1 | 2 |
24 | 1065726 | 5 | 2 | 3 | 4 | 2 | 7 | 3 | 6 | 1 | 4 |
25 | 1066373 | 3 | 2 | 1 | 1 | 1 | 1 | 2 | 1 | 1 | 2 |
26 | 1066979 | 5 | 1 | 1 | 1 | 2 | 1 | 2 | 1 | 1 | 2 |
27 | 1067444 | 2 | 1 | 1 | 1 | 2 | 1 | 2 | 1 | 1 | 2 |
28 | 1070935 | 1 | 1 | 3 | 1 | 2 | 1 | 1 | 1 | 1 | 2 |
29 | 1070935 | 3 | 1 | 1 | 1 | 1 | 1 | 2 | 1 | 1 | 2 |
30 | 1071760 | 2 | 1 | 1 | 1 | 2 | 1 | 3 | 1 | 1 | 2 |
… | … | … | … | … | … | … | … | … | … | … | … |
668 | 1350423 | 5 | 10 | 10 | 8 | 5 | 5 | 7 | 10 | 1 | 4 |
669 | 1352848 | 3 | 10 | 7 | 8 | 5 | 8 | 7 | 4 | 1 | 4 |
670 | 1353092 | 3 | 2 | 1 | 2 | 2 | 1 | 3 | 1 | 1 | 2 |
671 | 1354840 | 2 | 1 | 1 | 1 | 2 | 1 | 3 | 1 | 1 | 2 |
672 | 1354840 | 5 | 3 | 2 | 1 | 3 | 1 | 1 | 1 | 1 | 2 |
673 | 1355260 | 1 | 1 | 1 | 1 | 2 | 1 | 2 | 1 | 1 | 2 |
674 | 1365075 | 4 | 1 | 4 | 1 | 2 | 1 | 1 | 1 | 1 | 2 |
675 | 1365328 | 1 | 1 | 2 | 1 | 2 | 1 | 2 | 1 | 1 | 2 |
676 | 1368267 | 5 | 1 | 1 | 1 | 2 | 1 | 1 | 1 | 1 | 2 |
677 | 1368273 | 1 | 1 | 1 | 1 | 2 | 1 | 1 | 1 | 1 | 2 |
678 | 1368882 | 2 | 1 | 1 | 1 | 2 | 1 | 1 | 1 | 1 | 2 |
679 | 1369821 | 10 | 10 | 10 | 10 | 5 | 10 | 10 | 10 | 7 | 4 |
680 | 1371026 | 5 | 10 | 10 | 10 | 4 | 10 | 5 | 6 | 3 | 4 |
681 | 1371920 | 5 | 1 | 1 | 1 | 2 | 1 | 3 | 2 | 1 | 2 |
682 | 466906 | 1 | 1 | 1 | 1 | 2 | 1 | 1 | 1 | 1 | 2 |
683 | 466906 | 1 | 1 | 1 | 1 | 2 | 1 | 1 | 1 | 1 | 2 |
684 | 534555 | 1 | 1 | 1 | 1 | 2 | 1 | 1 | 1 | 1 | 2 |
685 | 536708 | 1 | 1 | 1 | 1 | 2 | 1 | 1 | 1 | 1 | 2 |
686 | 566346 | 3 | 1 | 1 | 1 | 2 | 1 | 2 | 3 | 1 | 2 |
687 | 603148 | 4 | 1 | 1 | 1 | 2 | 1 | 1 | 1 | 1 | 2 |
688 | 654546 | 1 | 1 | 1 | 1 | 2 | 1 | 1 | 1 | 8 | 2 |
689 | 654546 | 1 | 1 | 1 | 3 | 2 | 1 | 1 | 1 | 1 | 2 |
690 | 695091 | 5 | 10 | 10 | 5 | 4 | 5 | 4 | 4 | 1 | 4 |
691 | 714039 | 3 | 1 | 1 | 1 | 2 | 1 | 1 | 1 | 1 | 2 |
692 | 763235 | 3 | 1 | 1 | 1 | 2 | 1 | 2 | 1 | 2 | 2 |
693 | 776715 | 3 | 1 | 1 | 1 | 3 | 2 | 1 | 1 | 1 | 2 |
694 | 841769 | 2 | 1 | 1 | 1 | 2 | 1 | 1 | 1 | 1 | 2 |
695 | 888820 | 5 | 10 | 10 | 3 | 7 | 3 | 8 | 10 | 2 | 4 |
696 | 897471 | 4 | 8 | 6 | 4 | 3 | 4 | 10 | 6 | 1 | 4 |
697 | 897471 | 4 | 8 | 8 | 5 | 4 | 5 | 10 | 4 | 1 | 4 |
682 rows × 11 columns
数据离散化
连续属性的离散化就是在连续属性的值域上,将值域划分为若干个离散的区间,最后用不同的符号或整数 值代表落在每个子区间中的属性值。
连续属性离散化的目的是为了简化数据结构,数据离散化技术可以用来减少给定连续属性值的个数。
1 2 3 |
# 读取数据 data = pd.read_csv('./data/stock_day.csv') |
1 2 3 |
# 取出某列 p_change = data['p_change'] |
1 2 3 4 5 6 7 |
# 进行数据离散化的两种方式 # qcut(数据 ,类别数, labels类别名) 数据分箱,把样本平均分到指定数量的类别中 # res = pd.qcut(p_change, 3, labels=['a','b','c']) res = pd.qcut(p_change, 3) # 统计各个类别的数量 res.value_counts() |
1 2 3 4 5 |
(-10.030999999999999, -1.04] 215 (1.49, 10.03] 214 (-1.04, 1.49] 214 Name: p_change, dtype: int64 |
1 2 3 4 5 6 7 |
# cut 把区间平均分成指定数量的份数, 再对每个区间内的样本打上相应区间的标签 # res = pd.cut(p_change, 3) # 也可以传入列表来指定区间划分 res_2 = pd.cut(p_change, [-100, -7, -5, -3, 0, 3, 5, 7, 100]) # 统计各个类别的数量 res_2.value_counts() |
1 2 3 4 5 6 7 8 9 10 |
(0, 3] 215 (-3, 0] 188 (3, 5] 57 (-5, -3] 51 (7, 100] 35 (5, 7] 35 (-100, -7] 34 (-7, -5] 28 Name: p_change, dtype: int64 |
one-hot编码
- 如性别特征:男女设置成1 0 ,无大小数据变成了有大小,容易给训练模型错误信息, 产生无关规律
- 哑变量矩阵: 将性别分成两列 性别男(1, 0) 性别女(1, 0)
- one-hot编码(独热编码) :把离散的一列数据转换成哑变量矩阵的编码方式
1 2 3 |
# one_hot编码 get_dummies(series, columns名字前缀) res_3 = pd.get_dummies(res, prefix='abc') # 得到稀疏数据,后续需要降维处理 |
数据合并
- pd.concat实现数据的合并,按行按列合并
- pd.merge实现数据的合并
1 2 |
res_3.head() |
abc_(-10.030999999999999, -1.04] | abc_(-1.04, 1.49] | abc_(1.49, 10.03] | |
---|---|---|---|
2018-02-27 | 0 | 0 | 1 |
2018-02-26 | 0 | 0 | 1 |
2018-02-23 | 0 | 0 | 1 |
2018-02-22 | 0 | 0 | 1 |
2018-02-14 | 0 | 0 | 1 |
1 2 3 |
# 吧one_hot编码后的数据合并到数据集中 pd.concat([data, res_3], axis=1) # 按列合并 |
open | high | close | low | volume | price_change | p_change | ma5 | ma10 | ma20 | v_ma5 | v_ma10 | v_ma20 | turnover | abc_(-10.030999999999999, -1.04] | abc_(-1.04, 1.49] | abc_(1.49, 10.03] | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2018-02-27 | 23.53 | 25.88 | 24.16 | 23.53 | 95578.03 | 0.63 | 2.68 | 22.942 | 22.142 | 22.875 | 53782.64 | 46738.65 | 55576.11 | 2.39 | 0 | 0 | 1 |
2018-02-26 | 22.80 | 23.78 | 23.53 | 22.80 | 60985.11 | 0.69 | 3.02 | 22.406 | 21.955 | 22.942 | 40827.52 | 42736.34 | 56007.50 | 1.53 | 0 | 0 | 1 |
2018-02-23 | 22.88 | 23.37 | 22.82 | 22.71 | 52914.01 | 0.54 | 2.42 | 21.938 | 21.929 | 23.022 | 35119.58 | 41871.97 | 56372.85 | 1.32 | 0 | 0 | 1 |
2018-02-22 | 22.25 | 22.76 | 22.28 | 22.02 | 36105.01 | 0.36 | 1.64 | 21.446 | 21.909 | 23.137 | 35397.58 | 39904.78 | 60149.60 | 0.90 | 0 | 0 | 1 |
2018-02-14 | 21.49 | 21.99 | 21.92 | 21.48 | 23331.04 | 0.44 | 2.05 | 21.366 | 21.923 | 23.253 | 33590.21 | 42935.74 | 61716.11 | 0.58 | 0 | 0 | 1 |
2018-02-13 | 21.40 | 21.90 | 21.48 | 21.31 | 30802.45 | 0.28 | 1.32 | 21.342 | 22.103 | 23.387 | 39694.65 | 45518.14 | 65161.68 | 0.77 | 0 | 1 | 0 |
2018-02-12 | 20.70 | 21.40 | 21.19 | 20.63 | 32445.39 | 0.82 | 4.03 | 21.504 | 22.338 | 23.533 | 44645.16 | 45679.94 | 68686.33 | 0.81 | 0 | 0 | 1 |
2018-02-09 | 21.20 | 21.46 | 20.36 | 20.19 | 54304.01 | -1.50 | -6.86 | 21.920 | 22.596 | 23.645 | 48624.36 | 48982.38 | 70552.47 | 1.36 | 1 | 0 | 0 |
2018-02-08 | 21.79 | 22.09 | 21.88 | 21.75 | 27068.16 | 0.09 | 0.41 | 22.372 | 23.009 | 23.839 | 44411.98 | 48612.16 | 73852.45 | 0.68 | 0 | 1 | 0 |
2018-02-07 | 22.69 | 23.11 | 21.80 | 21.29 | 53853.25 | -0.50 | -2.24 | 22.480 | 23.258 | 23.929 | 52281.28 | 56315.11 | 74925.33 | 1.35 | 1 | 0 | 0 |
2018-02-06 | 22.80 | 23.55 | 22.29 | 22.20 | 55555.00 | -0.97 | -4.17 | 22.864 | 23.607 | 24.029 | 51341.63 | 64413.58 | 75738.95 | 1.39 | 1 | 0 | 0 |
2018-02-05 | 22.45 | 23.39 | 23.27 | 22.25 | 52341.39 | 0.65 | 2.87 | 23.172 | 23.928 | 24.112 | 46714.72 | 69278.66 | 77070.00 | 1.31 | 0 | 0 | 1 |
2018-02-02 | 22.40 | 22.70 | 22.62 | 21.53 | 33242.11 | 0.20 | 0.89 | 23.272 | 24.114 | 24.184 | 49340.40 | 70873.73 | 79929.71 | 0.83 | 0 | 1 | 0 |
2018-02-01 | 23.71 | 23.86 | 22.42 | 22.22 | 66414.64 | -1.30 | -5.48 | 23.646 | 24.365 | 24.279 | 52812.35 | 80394.43 | 88480.92 | 1.66 | 1 | 0 | 0 |
2018-01-31 | 23.85 | 23.98 | 23.72 | 23.31 | 49155.02 | -0.11 | -0.46 | 24.036 | 24.583 | 24.411 | 60348.94 | 80496.48 | 91666.75 | 1.23 | 0 | 1 | 0 |
2018-01-30 | 23.71 | 24.08 | 23.83 | 23.70 | 32420.43 | 0.05 | 0.21 | 24.350 | 24.671 | 24.365 | 77485.53 | 84805.23 | 92943.35 | 0.81 | 0 | 1 | 0 |
2018-01-29 | 24.40 | 24.63 | 23.77 | 23.72 | 65469.81 | -0.73 | -2.98 | 24.684 | 24.728 | 24.294 | 91842.60 | 91692.73 | 93456.22 | 1.64 | 1 | 0 | 0 |
2018-01-26 | 24.27 | 24.74 | 24.49 | 24.22 | 50601.83 | 0.11 | 0.45 | 24.956 | 24.694 | 24.221 | 92407.05 | 92122.56 | 91980.51 | 1.27 | 0 | 1 | 0 |
2018-01-25 | 24.99 | 24.99 | 24.37 | 24.23 | 104097.59 | -0.93 | -3.68 | 25.084 | 24.669 | 24.109 | 107976.51 | 99092.73 | 92262.67 | 2.61 | 1 | 0 | 0 |
2018-01-24 | 25.49 | 26.28 | 25.29 | 25.20 | 134838.00 | -0.20 | -0.79 | 25.130 | 24.599 | 23.997 | 100644.02 | 93535.55 | 89522.22 | 3.37 | 0 | 1 | 0 |
2018-01-23 | 25.15 | 25.53 | 25.50 | 24.93 | 104205.76 | 0.39 | 1.55 | 24.992 | 24.450 | 23.844 | 92124.92 | 87064.33 | 85876.80 | 2.61 | 0 | 0 | 1 |
2018-01-22 | 25.14 | 25.40 | 25.13 | 24.75 | 68292.08 | -0.01 | -0.04 | 24.772 | 24.296 | 23.644 | 91542.85 | 84861.33 | 84970.00 | 1.71 | 0 | 1 | 0 |
2018-01-19 | 24.60 | 25.34 | 25.13 | 24.42 | 128449.11 | 0.53 | 2.15 | 24.432 | 24.254 | 23.537 | 91838.07 | 88985.70 | 82975.10 | 3.21 | 0 | 0 | 1 |
2018-01-18 | 24.40 | 24.88 | 24.60 | 24.30 | 67435.14 | 0.01 | 0.04 | 24.254 | 24.192 | 23.441 | 90208.95 | 96567.41 | 78252.92 | 1.69 | 0 | 1 | 0 |
2018-01-17 | 24.42 | 24.92 | 24.60 | 23.80 | 92242.51 | 0.20 | 0.82 | 24.068 | 24.239 | 23.378 | 86427.08 | 102837.01 | 77049.61 | 2.31 | 0 | 1 | 0 |
2018-01-16 | 23.40 | 24.60 | 24.40 | 23.30 | 101295.42 | 0.96 | 4.10 | 23.908 | 24.058 | 23.321 | 82003.73 | 101081.47 | 74590.92 | 2.54 | 0 | 0 | 1 |
2018-01-15 | 24.01 | 24.23 | 23.43 | 23.30 | 69768.17 | -0.80 | -3.30 | 23.820 | 23.860 | 23.257 | 78179.81 | 95219.71 | 71006.65 | 1.75 | 1 | 0 | 0 |
2018-01-12 | 23.70 | 25.15 | 24.24 | 23.42 | 120303.53 | 0.56 | 2.37 | 24.076 | 23.748 | 23.236 | 86133.33 | 91838.46 | 69690.35 | 3.01 | 0 | 0 | 1 |
2018-01-11 | 23.67 | 23.85 | 23.67 | 23.21 | 48525.75 | -0.12 | -0.50 | 24.130 | 23.548 | 23.197 | 102925.87 | 85432.61 | 65928.23 | 1.21 | 0 | 1 | 0 |
2018-01-10 | 24.10 | 24.60 | 23.80 | 23.40 | 70125.79 | -0.14 | -0.58 | 24.410 | 23.394 | 23.204 | 119246.95 | 85508.89 | 66934.89 | 1.76 | 0 | 1 | 0 |
… | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … | … |
2015-04-13 | 19.60 | 21.30 | 21.13 | 19.50 | 171822.69 | 1.70 | 8.75 | 19.228 | 17.812 | 16.563 | 149620.34 | 114456.84 | 111752.31 | 5.88 | 0 | 0 | 1 |
2015-04-10 | 19.55 | 19.89 | 19.43 | 19.20 | 112962.15 | -0.19 | -0.97 | 18.334 | 17.276 | 16.230 | 133648.38 | 109309.78 | 106228.29 | 3.87 | 0 | 1 | 0 |
2015-04-09 | 18.28 | 19.89 | 19.62 | 18.02 | 183119.05 | 1.20 | 6.51 | 17.736 | 16.826 | 15.964 | 124323.21 | 106501.34 | 104829.10 | 6.27 | 0 | 0 | 1 |
2015-04-08 | 17.60 | 18.53 | 18.42 | 17.60 | 157725.97 | 0.88 | 5.02 | 17.070 | 16.394 | 15.698 | 101421.29 | 97906.88 | 101658.57 | 5.40 | 0 | 0 | 1 |
2015-04-07 | 16.54 | 17.98 | 17.54 | 16.50 | 122471.85 | 0.88 | 5.28 | 16.620 | 16.120 | 15.510 | 86769.62 | 97473.29 | 98832.94 | 4.19 | 0 | 0 | 1 |
2015-04-03 | 16.44 | 16.77 | 16.66 | 16.25 | 91962.88 | 0.22 | 1.34 | 16.396 | 15.904 | 15.348 | 79293.34 | 94172.24 | 99956.63 | 3.15 | 0 | 1 | 0 |
2015-04-02 | 16.21 | 16.50 | 16.44 | 16.21 | 66336.32 | 0.15 | 0.92 | 16.218 | 15.772 | 15.229 | 84971.19 | 92655.96 | 104350.08 | 2.27 | 0 | 1 | 0 |
2015-04-01 | 16.18 | 16.48 | 16.29 | 16.00 | 68609.42 | 0.12 | 0.74 | 15.916 | 15.666 | 15.065 | 88679.47 | 95386.75 | 105692.28 | 2.35 | 0 | 1 | 0 |
2015-03-31 | 16.78 | 16.88 | 16.17 | 16.07 | 84467.62 | -0.25 | -1.52 | 15.718 | 15.568 | 14.896 | 94392.47 | 100679.68 | 105615.58 | 2.89 | 1 | 0 | 0 |
2015-03-30 | 15.99 | 16.63 | 16.42 | 15.99 | 85090.45 | 0.65 | 4.12 | 15.620 | 15.469 | 14.722 | 108176.96 | 108109.99 | 108345.78 | 2.91 | 0 | 0 | 1 |
2015-03-27 | 14.90 | 15.86 | 15.77 | 14.90 | 120352.13 | 0.84 | 5.63 | 15.412 | 15.314 | 14.527 | 109051.14 | 109047.78 | 108905.84 | 4.12 | 0 | 0 | 1 |
2015-03-26 | 15.14 | 15.35 | 14.93 | 14.91 | 84877.75 | -0.37 | -2.42 | 15.326 | 15.184 | 14.462 | 100340.74 | 103146.79 | 108303.41 | 2.91 | 1 | 0 | 0 |
2015-03-25 | 15.97 | 15.97 | 15.30 | 15.18 | 97174.40 | -0.38 | -2.42 | 15.416 | 15.102 | 14.436 | 102094.02 | 103156.85 | 109604.83 | 3.33 | 1 | 0 | 0 |
2015-03-24 | 15.38 | 16.16 | 15.68 | 15.28 | 153390.08 | 0.30 | 1.95 | 15.418 | 15.002 | 14.385 | 106966.89 | 105410.25 | 110336.03 | 5.25 | 0 | 0 | 1 |
2015-03-23 | 15.34 | 15.56 | 15.38 | 15.25 | 89461.32 | 0.04 | 0.26 | 15.318 | 14.899 | 14.304 | 108043.02 | 100192.60 | 107645.16 | 3.06 | 0 | 1 | 0 |
2015-03-20 | 15.38 | 15.48 | 15.34 | 15.18 | 76800.13 | -0.04 | -0.26 | 15.216 | 14.792 | 14.232 | 109044.42 | 105741.03 | 108857.41 | 2.63 | 0 | 1 | 0 |
2015-03-19 | 15.20 | 15.64 | 15.38 | 15.11 | 93644.19 | 0.07 | 0.46 | 15.042 | 14.686 | 14.153 | 105952.84 | 116044.19 | 111147.22 | 3.21 | 0 | 1 | 0 |
2015-03-18 | 15.18 | 15.66 | 15.31 | 15.02 | 121538.71 | 0.13 | 0.86 | 14.788 | 14.464 | 14.058 | 104219.67 | 115997.81 | 112493.60 | 4.16 | 0 | 1 | 0 |
2015-03-17 | 14.90 | 15.44 | 15.18 | 14.63 | 158770.77 | 0.31 | 2.08 | 14.586 | 14.223 | 13.954 | 103853.62 | 110551.48 | 111739.85 | 5.43 | 0 | 0 | 1 |
2015-03-16 | 14.52 | 15.05 | 14.87 | 14.51 | 94468.30 | 0.40 | 2.76 | 14.480 | 13.975 | 13.843 | 92342.17 | 108581.56 | 107464.31 | 3.23 | 0 | 0 | 1 |
2015-03-13 | 14.13 | 14.50 | 14.47 | 14.08 | 61342.22 | 0.36 | 2.55 | 14.368 | 13.740 | 13.740 | 102437.64 | 108763.91 | 108763.91 | 2.10 | 0 | 0 | 1 |
2015-03-12 | 14.11 | 14.80 | 14.11 | 13.95 | 84978.37 | -0.19 | -1.33 | 14.330 | 13.659 | 13.659 | 126135.54 | 114032.98 | 114032.98 | 2.91 | 1 | 0 | 0 |
2015-03-11 | 14.80 | 15.08 | 14.30 | 14.14 | 119708.43 | -0.35 | -2.39 | 14.140 | 13.603 | 13.603 | 127775.94 | 117664.81 | 117664.81 | 4.10 | 1 | 0 | 0 |
2015-03-10 | 14.20 | 14.80 | 14.65 | 14.01 | 101213.51 | 0.34 | 2.38 | 13.860 | 13.503 | 13.503 | 117249.34 | 117372.87 | 117372.87 | 3.46 | 0 | 0 | 1 |
2015-03-09 | 14.14 | 14.85 | 14.31 | 13.80 | 144945.66 | 0.03 | 0.21 | 13.470 | 13.312 | 13.312 | 124820.96 | 120066.09 | 120066.09 | 4.96 | 0 | 1 | 0 |
2015-03-06 | 13.17 | 14.48 | 14.28 | 13.13 | 179831.72 | 1.12 | 8.51 | 13.112 | 13.112 | 13.112 | 115090.18 | 115090.18 | 115090.18 | 6.16 | 0 | 0 | 1 |
2015-03-05 | 12.88 | 13.45 | 13.16 | 12.87 | 93180.39 | 0.26 | 2.02 | 12.820 | 12.820 | 12.820 | 98904.79 | 98904.79 | 98904.79 | 3.19 | 0 | 0 | 1 |
2015-03-04 | 12.80 | 12.92 | 12.90 | 12.61 | 67075.44 | 0.20 | 1.57 | 12.707 | 12.707 | 12.707 | 100812.93 | 100812.93 | 100812.93 | 2.30 | 0 | 0 | 1 |
2015-03-03 | 12.52 | 13.06 | 12.70 | 12.52 | 139071.61 | 0.18 | 1.44 | 12.610 | 12.610 | 12.610 | 117681.67 | 117681.67 | 117681.67 | 4.76 | 0 | 1 | 0 |
2015-03-02 | 12.25 | 12.67 | 12.52 | 12.20 | 96291.73 | 0.32 | 2.62 | 12.520 | 12.520 | 12.520 | 96291.73 | 96291.73 | 96291.73 | 3.30 | 0 | 0 | 1 |
643 rows × 17 columns
- pd.merge(left, right, how=’inner’, on=None, left_on=None, right_on=None)
- 可以指定按照两组数据的共同键值对合并或者左右各自
- left: A DataFrame object
- right: Another DataFrame object
- on: Columns (names) to join on. Must be found in both the left and right DataFrame objects.
- left_on=None, right_on=None:指定左右键
1 2 3 4 5 6 7 8 9 10 |
left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'], 'key2': ['K0', 'K1', 'K0', 'K1'], 'A': ['A0', 'A1', 'A2', 'A3'], 'B': ['B0', 'B1', 'B2', 'B3']}) right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'], 'key2': ['K0', 'K0', 'K0', 'K0'], 'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3']}) |
1 2 3 4 |
# pd.merge # inner连接合并数据集 left左数据集 right 右数据集 on合并的共有字段 how合并方式 pd.merge(left=left, right=right, on=['key1','key2'], how='inner') |
key1 | key2 | A | B | C | D | |
---|---|---|---|---|---|---|
0 | K0 | K0 | A0 | B0 | C0 | D0 |
1 | K1 | K0 | A2 | B2 | C1 | D1 |
2 | K1 | K0 | A2 | B2 | C2 | D2 |
交叉表和透视表
1 2 3 |
# 股票涨跌和星期几的关系 data.head() |
open | high | close | low | volume | price_change | p_change | ma5 | ma10 | ma20 | v_ma5 | v_ma10 | v_ma20 | turnover | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2018-02-27 | 23.53 | 25.88 | 24.16 | 23.53 | 95578.03 | 0.63 | 2.68 | 22.942 | 22.142 | 22.875 | 53782.64 | 46738.65 | 55576.11 | 2.39 |
2018-02-26 | 22.80 | 23.78 | 23.53 | 22.80 | 60985.11 | 0.69 | 3.02 | 22.406 | 21.955 | 22.942 | 40827.52 | 42736.34 | 56007.50 | 1.53 |
2018-02-23 | 22.88 | 23.37 | 22.82 | 22.71 | 52914.01 | 0.54 | 2.42 | 21.938 | 21.929 | 23.022 | 35119.58 | 41871.97 | 56372.85 | 1.32 |
2018-02-22 | 22.25 | 22.76 | 22.28 | 22.02 | 36105.01 | 0.36 | 1.64 | 21.446 | 21.909 | 23.137 | 35397.58 | 39904.78 | 60149.60 | 0.90 |
2018-02-14 | 21.49 | 21.99 | 21.92 | 21.48 | 23331.04 | 0.44 | 2.05 | 21.366 | 21.923 | 23.253 | 33590.21 | 42935.74 | 61716.11 | 0.58 |
1 2 3 4 5 6 |
# 日期格式转换成星期表示 # pd.to_datetime(data.index).weekday # 添加新的一列数据, 若无该列,自动创建 data['week'] = pd.to_datetime(data.index).weekday data |
1 2 3 |
# 创建涨跌特征列 data['rise'] = np.where(data['p_change'] > 0, 1, 0) |
1 2 3 4 |
# 建立交叉表 两列离散的数据 values为数量统计 temp = pd.crosstab(data['week'], data['rise']) temp |
rise | 0 | 1 |
---|---|---|
week | ||
0 | 63 | 62 |
1 | 55 | 76 |
2 | 61 | 71 |
3 | 63 | 65 |
4 | 59 | 68 |
1 2 3 |
# 按行求和 sum()默认按列 sum_day = temp.sum(1) |
1 2 3 4 |
# 计算上涨下跌的概率 div()默认按列 res = temp.div(sum_day, axis=0) res |
rise | 0 | 1 |
---|---|---|
week | ||
0 | 0.504000 | 0.496000 |
1 | 0.419847 | 0.580153 |
2 | 0.462121 | 0.537879 |
3 | 0.492188 | 0.507812 |
4 | 0.464567 | 0.535433 |
1 2 3 4 |
# 绘制柱状图 stacked=True两个叠加 res.plot(kind='bar', stacked=True) plt.show() |
1 2 3 4 |
# pivot_table()透视表不是创建表,是一种处理数据方法, 使用分组聚合 # 用透视表计算涨跌概率 按index进行分组 按values进行聚合 aggfunc聚合方法付 pd.pivot_table(data=data, index='week', values='rise', aggfunc='mean') |
rise | |
---|---|
week | |
0 | 0.496000 |
1 | 0.580153 |
2 | 0.537879 |
3 | 0.507812 |
4 | 0.535433 |
分组和聚合
- 分组:按照某个离散的特征把数据集分成几份
- 聚合:对分组之后的数据统计
1 2 |
col =pd.DataFrame({'color': ['white','red','green','red','green'], 'object': ['pen','pencil','pencil','ashtray','pen'],'price1':[5.56,4.20,1.30,0.56,2.75],'price2':[4.75,4.12,1.60,0.75,3.15]}) |
1 2 |
col |
color | object | price1 | price2 | |
---|---|---|---|---|
0 | white | pen | 5.56 | 4.75 |
1 | red | pencil | 4.20 | 4.12 |
2 | green | pencil | 1.30 | 1.60 |
3 | red | ashtray | 0.56 | 0.75 |
4 | green | pen | 2.75 | 3.15 |
1 2 3 4 5 |
# 分组聚合一般一起使用 # 按照颜色进行分组, 再进行count聚合 # col.groupby('color') # 返回DataFrameGroupBy对象 col.groupby('color').count() |
object | price1 | price2 | |
---|---|---|---|
color | |||
green | 2 | 2 | 2 |
red | 2 | 2 | 2 |
white | 1 | 1 | 1 |
1 2 |
col.groupby('color')['price1'].count() |
1 2 3 4 5 6 |
color green 2 red 2 white 1 Name: price1, dtype: int64 |
1 2 3 |
# 另一种分组聚合的方式 col['price1'].groupby(col['color']).count() |
1 2 3 4 5 6 |
color green 2 red 2 white 1 Name: price1, dtype: int64 |
1 2 3 |
# 示例: 导入星巴克店的数据 数据来源:https://www.kaggle.com/starbucks/store-locations/data data = pd.read_csv("./data/directory.csv") |
1 2 3 4 5 6 |
# 统计星巴克在各个国家的店铺数量, 按照country进行分组, 再count统计 res = data.groupby('Country')['Brand'].count() # 绘制图像 res.plot(kind='bar', figsize=(20, 8 )) plt.show() |
1 |
<br /> |