[참조] http://pinkwink.kr/734, http://pinkwink.kr/735
1) pandas 설치
cd Python35
cd script
pip3 install pandas
2) 에디터로 sublime text 를 사용해본다.
- 우측하단에 Plain Text 를 Python 으로 변경
- https://packagecontrol.io/installation 에 접속해서 import urlib....어쩌고하는 문구를 복사한뒤,
sublime 에서 Ctrl + ` 을 눌러서 붙여넣는다.
- sublime text 를 재시작한다.
- Preferences > Package Control 혹은 SHIFT + CTRL + P
Package Conotrol: Add Repository 검색후, 하단 URL에 https://github.com/wuub/SublimeREPL 을
붙여넣는다.
[ Series ]
3)
from pandas import Series, DataFrame
import pandas as pd
import numpy as np
obj = Series([4,7,-5,3])
print(obj.index)
print(obj.values)
print(obj)
obj.index = ['Bob','steve','Jeff','Ryan']
print(obj)
-------------------
RangeIndex(start=0, stop=4, step=1)
[ 4 7 -5 3]
0 4
1 7
2 -5
3 3
dtype: int64
Bob 4
steve 7
Jeff -5
Ryan 3
dtype: int64
4)
obj2 = Series([4,7,-5,3], index=['b','b','a','c'])
print(obj2)
print(obj2['a'])
print(obj2['b'])
print(np.exp(obj2))
print(obj2 * 2)
----------------------------
b 4
b 7
a -5
c 3
dtype: int64
-5
b 4
b 7
dtype: int64
b 54.598150
b 1096.633158
a 0.006738
c 20.085537
dtype: float64
b 8
b 14
a -10
c 6
dtype: int64
5)
sdata = {'Ohio':35000,'Texas':71000,'Oregon':16000,'Utah':5000}
obj3 = Series(sdata)
print(obj3)
states=['California', 'Ohio', 'Oregon', 'Texas']
obj4 = Series(sdata, index=states)
print(obj4)
pd.isnull(obj4)
------------------------------
Ohio 35000
Texas 71000
Oregon 16000
Utah 5000
dtype: int64
California NaN
Ohio 35000.0
Oregon 16000.0
Texas 71000.0
dtype: float64
California True
Ohio False
Oregon False
Texas False
dtype: bool
6)
pd.notnull(obj4)
California True
Ohio False
Oregon False
Texas False
dtype: bool
7)
obj4.isnull()
California True
Ohio False
Oregon False
Texas False
dtype: bool
8)
print(obj3 + obj4)
California NaN
Ohio 70000.0
Oregon 32000.0
Texas 142000.0
Utah NaN
dtype: float64
9)
obj4.name = "population"
obj4.index.name="state"
print(obj4)
state
California NaN
Ohio 35000.0
Oregon 16000.0
Texas 71000.0
Name: population, dtype: float64
[ DataFrame ]
1)
from pandas import Series, DataFrame
import pandas as pd
import numpy as np
data = {'state':['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
'year':[2000,2001,2002,2001,2002],
'pop':[1.5,1.7,3.6,2.4,2.9]}
frame = DataFrame(data)
print(frame)
state year pop
0 Ohio 2000 1.5
1 Ohio 2001 1.7
2 Ohio 2002 3.6
3 Nevada 2001 2.4
4 Nevada 2002 2.9
2)
frame2 = DataFrame(data, columns=['year','state','pop','debt'], index=['one','two','three','four','five'])
print(frame2)
print(frame2.loc['three'])
frame2.debt = 3.14
print(frame2)
year state pop debt
one 2000 Ohio 1.5 NaN
two 2001 Ohio 1.7 NaN
three 2002 Ohio 3.6 NaN
four 2001 Nevada 2.4 NaN
five 2002 Nevada 2.9 NaN
year 2002
state Ohio
pop 3.6
debt NaN
Name: three, dtype: object
year state pop debt
one 2000 Ohio 1.5 3.14
two 2001 Ohio 1.7 3.14
three 2002 Ohio 3.6 3.14
four 2001 Nevada 2.4 3.14
five 2002 Nevada 2.9 3.14
3)
frame2.debt = np.arange(5)
print(frame2)
val = Series([-1.2, -1.5, -1.7], index=['two', 'four','five'])
frame2.debt = val
frame2
year state pop debt
one 2000 Ohio 1.5 0
two 2001 Ohio 1.7 1
three 2002 Ohio 3.6 2
four 2001 Nevada 2.4 3
five 2002 Nevada 2.9 4
year state pop debt
one 2000 Ohio 1.5 NaN
two 2001 Ohio 1.7 -1.2
three 2002 Ohio 3.6 NaN
four 2001 Nevada 2.4 -1.5
five 2002 Nevada 2.9 -1.7
4)
frame2['eastern'] = frame2.state== 'Ohio'
frame2
year state pop debt eastern
one 2000 Ohio 1.5 NaN True
two 2001 Ohio 1.7 -1.2 True
three 2002 Ohio 3.6 NaN True
four 2001 Nevada 2.4 -1.5 False
five 2002 Nevada 2.9 -1.7 False
5)
del frame2['eastern']
frame2
year state pop debt
one 2000 Ohio 1.5 NaN
two 2001 Ohio 1.7 -1.2
three 2002 Ohio 3.6 NaN
four 2001 Nevada 2.4 -1.5
five 2002 Nevada 2.9 -1.7
6)
pop = {'Nevada':{2001:2.4, 2002:2.9}, 'Ohio':{2000:1.5, 2001:1.7, 2002:3.6}}
frame3 = DataFrame(pop)
frame3
Nevada Ohio
2000 NaN 1.5
2001 2.4 1.7
2002 2.9 3.6
7)
frame3.T
2000 2001 2002
Nevada NaN 2.4 2.9
Ohio 1.5 1.7 3.6