
1 2
|
import pandas as pd import numpy as np
|
定义序列, pandas中的数据形式通常是float32或float64
1 2
|
s = pd.Series([1,3,4,np.nan,11,99]) s
|
0 1.0
1 3.0
2 4.0
3 NaN
4 11.0
5 99.0
dtype: float64
定义日期列表
1 2
|
dates = pd.date_range('20180927',periods=6) dates
|
DatetimeIndex(['2018-09-27', '2018-09-28', '2018-09-29', '2018-09-30',
'2018-10-01', '2018-10-02'],
dtype='datetime64[ns]', freq='D')
DataFrame类似于numpy的array
不指定索引的DataFrame
1 2
|
df = pd.DataFrame(np.arange(12).reshape(3, 4)) >>> df
|
|
0 |
1 |
2 |
3 |
| 0 |
0 |
1 |
2 |
3 |
| 1 |
4 |
5 |
6 |
7 |
| 2 |
8 |
9 |
10 |
11 |
DataFrame的定义
1 2
|
>>>df = pd.DataFrame({'A': 1., 'B': 'Foo', 'C': np.array([3] * 4)}) >>>df
|
|
A |
B |
C |
| 0 |
1.0 |
Foo |
3 |
| 1 |
1.0 |
Foo |
3 |
| 2 |
1.0 |
Foo |
3 |
| 3 |
1.0 |
Foo |
3 |
行索引为dates, 列索引为[a, b, c, d]
1 2
|
>>> df = pd.DataFrame(np.random.randn(6,4),index=dates,columns=['a','b','c','d']) >>> df
|
|
a |
b |
c |
d |
| 2018-09-27 |
0.338831 |
1.036578 |
-2.573243 |
-2.204440 |
| 2018-09-28 |
-0.473799 |
-0.646747 |
0.435539 |
-0.758833 |
| 2018-09-29 |
0.305467 |
0.816041 |
-0.116044 |
1.197494 |
| 2018-09-30 |
-0.435368 |
-0.082337 |
0.624486 |
-1.234057 |
| 2018-10-01 |
-0.667478 |
0.233274 |
-1.380012 |
2.261031 |
| 2018-10-02 |
-0.654250 |
-0.180604 |
0.609015 |
-0.924455 |
查看DataFrame的数据类型
a float64
b float64
c float64
d float64
dtype: object
查看DataFrame的索引
DatetimeIndex(['2018-09-27', '2018-09-28', '2018-09-29', '2018-09-30',
'2018-10-01', '2018-10-02'],
dtype='datetime64[ns]', freq='D')
查看DataFrame的列索引
Index(['a', 'b', 'c', 'd'], dtype='object')
查看DataFrame的值
array([[ 0.33883139, 1.03657755, -2.5732431 , -2.20443975],
[-0.47379902, -0.64674734, 0.43553894, -0.75883344],
[ 0.30546684, 0.81604074, -0.11604421, 1.19749384],
[-0.43536792, -0.08233739, 0.62448617, -1.23405699],
[-0.66747791, 0.23327389, -1.38001185, 2.26103083],
[-0.65425047, -0.18060444, 0.60901542, -0.92445528]])
查看DataFrame的描述
|
a |
b |
c |
d |
| count |
6.000000 |
6.000000 |
6.000000 |
6.000000 |
| mean |
-0.264433 |
0.196034 |
-0.400043 |
-0.277210 |
| std |
0.463933 |
0.635914 |
1.306128 |
1.667218 |
| min |
-0.667478 |
-0.646747 |
-2.573243 |
-2.204440 |
| 25% |
-0.609138 |
-0.156038 |
-1.064020 |
-1.156657 |
| 50% |
-0.454583 |
0.075468 |
0.159747 |
-0.841644 |
| 75% |
0.120258 |
0.670349 |
0.565646 |
0.708412 |
| max |
0.338831 |
1.036578 |
0.624486 |
2.261031 |
DataFrame的转置
|
2018-09-27 00:00:00 |
2018-09-28 00:00:00 |
2018-09-29 00:00:00 |
2018-09-30 00:00:00 |
2018-10-01 00:00:00 |
2018-10-02 00:00:00 |
| a |
0.338831 |
-0.473799 |
0.305467 |
-0.435368 |
-0.667478 |
-0.654250 |
| b |
1.036578 |
-0.646747 |
0.816041 |
-0.082337 |
0.233274 |
-0.180604 |
| c |
-2.573243 |
0.435539 |
-0.116044 |
0.624486 |
-1.380012 |
0.609015 |
| d |
-2.204440 |
-0.758833 |
1.197494 |
-1.234057 |
2.261031 |
-0.924455 |
DataFrame的index排序
1
|
>>> df.sort_index(axis=1)
|
|
a |
b |
c |
d |
| 2018-09-27 |
0.338831 |
1.036578 |
-2.573243 |
-2.204440 |
| 2018-09-28 |
-0.473799 |
-0.646747 |
0.435539 |
-0.758833 |
| 2018-09-29 |
0.305467 |
0.816041 |
-0.116044 |
1.197494 |
| 2018-09-30 |
-0.435368 |
-0.082337 |
0.624486 |
-1.234057 |
| 2018-10-01 |
-0.667478 |
0.233274 |
-1.380012 |
2.261031 |
| 2018-10-02 |
-0.654250 |
-0.180604 |
0.609015 |
-0.924455 |
DataFrame的index排序, 逆序
1
|
>>> df.sort_index(axis=1,ascending=False)
|
|
d |
c |
b |
a |
| 2018-09-27 |
-2.204440 |
-2.573243 |
1.036578 |
0.338831 |
| 2018-09-28 |
-0.758833 |
0.435539 |
-0.646747 |
-0.473799 |
| 2018-09-29 |
1.197494 |
-0.116044 |
0.816041 |
0.305467 |
| 2018-09-30 |
-1.234057 |
0.624486 |
-0.082337 |
-0.435368 |
| 2018-10-01 |
2.261031 |
-1.380012 |
0.233274 |
-0.667478 |
| 2018-10-02 |
-0.924455 |
0.609015 |
-0.180604 |
-0.654250 |
DataFrame按值排序
1
|
>>> df.sort_values(by='c')
|
|
a |
b |
c |
d |
| 2018-09-27 |
0.338831 |
1.036578 |
-2.573243 |
-2.204440 |
| 2018-10-01 |
-0.667478 |
0.233274 |
-1.380012 |
2.261031 |
| 2018-09-29 |
0.305467 |
0.816041 |
-0.116044 |
1.197494 |
| 2018-09-28 |
-0.473799 |
-0.646747 |
0.435539 |
-0.758833 |
| 2018-10-02 |
-0.654250 |
-0.180604 |
0.609015 |
-0.924455 |
| 2018-09-30 |
-0.435368 |
-0.082337 |
0.624486 |
-1.234057 |
近期评论