python_pycharm_graph - 8BitsCoding/RobotMentor GitHub Wiki


PyCharm -> File -> Settings -> Project Interpreter -> '+' -> ํ•„์š”ํ•œ package์„ค์น˜

pandas, matplotlib ์„ค์น˜


๋ชฉ์ฐจ

๋งŽ์ด ์‚ฌ์šฉํ•˜๊ฒŒ ๋  ๊ธฐ๋Šฅ ๊ธฐ์ค€์œผ๋กœ ์ž‘์„ฑ


์‚ฌ์ „ ์‚ฌํ•ญ

์˜ˆ์ œํŒŒ์ผ์€ ๋‹ค์Œ ๋‘ ๊ฐœ๋ฅผ ์ด์šฉ mtcars.csv, csv_exam.csv, split.csv, gpgga_exam.csv


CSV ์ •๋ณด ์ถœ๋ ฅํ•˜๊ธฐ

from pandas.io.parsers import read_csv
import matplotlib.pyplot as plt

df = read_csv('csv_exam.csv')
#df = read_csv('csv_exam.csv', names=['id', 'class', 'math', 'english', 'science'])

print('Type:', type(df))             # ๋ฐ์ดํ„ฐ ํ”„๋ ˆ์ž„์˜ row(observation)์˜ ๊ฐฏ์ˆ˜
print('Shape:', df.shape)            # ๋ฐ์ดํ„ฐ ํ”„๋ ˆ์ž„์˜ (row, col)
print('Head:\n', df.head(3))         # ๋ฐ์ดํ„ฐ ํ”„๋ ˆ์ž„์˜ ์ฒ˜์Œ ์ผ๋ถ€ ๋ฐ์ดํ„ฐ
print('tail:\n', df.tail(3))
print('Values:\n', df.values)
print('Describe:\n', df.describe())  # ์š”์•ฝ ํ†ต๊ณ„๋Ÿ‰(์ตœ์†Ÿ๊ฐ’, ์ตœ๋Œ“๊ฐ’, ์ค‘์•™๊ฐ’, ํ‰๊ท  ...)

print('-'*30)

์ถœ๋ ฅ๊ฒฐ๊ณผ

df = read_csv('csv_exam.csv', names=['id', 'class', 'math', 'english', 'science'])
print('Head:\n', df.head(3))

๊ฐ€์žฅ ์ƒ์œ„ ์—ด์— ํ•ด๋‹น ์•„์ด๋””์˜ ์—ด์„ ์ถ”๊ฐ€

Head:
    id  class  math  english  science
0  id  class  math  english  science
1   1      1    50       98       50
2   2      1    60       97       60
print('Type:', type(df))

<class 'pandas.core.frame.DataFrame'>

print('Shape:', df.shape)

Shape: (20, 5)

20ํ–‰ 5์—ด

print('Head:\n', df.head(3))

์ƒ์œ„ 3ํ–‰ ๋ฐ์ดํ„ฐ ์ถœ๋ ฅ

Head:
    id  class  math  english  science
0   1      1    50       98       50
1   2      1    60       97       60
2   3      1    45       86       78
print('tail:\n', df.tail(3))

ํ•˜์œ„ 3ํ–‰ ๋ฐ์ดํ„ฐ ์ถœ๋ ฅ

tail:
     id  class  math  english  science
17  18      5    80       78       90
18  19      5    89       68       87
19  20      5    78       83       58
print('Values:\n', df.values)

์ „์ฒด ๋ฐ์ดํ„ฐ ์ถœ๋ ฅ

print('Describe:\n', df.describe())

๋ฐ์ดํ„ฐ์˜ ์š”์•ฝ์ •๋ณด ์ถœ๋ ฅ

Describe:
              id      class       math    english    science
count  20.00000  20.000000  20.000000  20.000000  20.000000
mean   10.50000   3.000000  57.450000  84.900000  59.450000
std     5.91608   1.450953  20.299015  12.875517  25.292968
min     1.00000   1.000000  20.000000  56.000000  12.000000
25%     5.75000   2.000000  45.750000  78.000000  45.000000
50%    10.50000   3.000000  54.000000  86.500000  62.500000
75%    15.25000   4.000000  75.750000  98.000000  78.000000
max    20.00000   5.000000  90.000000  98.000000  98.000000

๋ฐ์ดํ„ฐ ํŒŒ์‹ฑํ•˜๊ธฐ (์ถ”๊ฐ€/์‚ญ์ œ ํ•˜๊ธฐ)

print('df.loc[0:1]\n', df.loc[0:1])

0~1ํ–‰ ์ถœ๋ ฅ

    id  class  math  english  science
0   1      1    50       98       50
1   2      1    60       97       60
print('df.loc[0:1]\n', df.loc[:, 'id':'math'])

๋ชจ๋“  ํ–‰์˜ id~math ์—ด ์ถœ๋ ฅ

    id  class  math
0    1      1    50
1    2      1    60
...
new_df = df[df.math >= 50]
print('new_df(math>=50)\n', new_df)

ํŠน์ • ๊ฐ’์— ์กฐ๊ฑด ๊ฑธ๊ธฐ 1

new_df(math>=50)
     id  class  math  english  science
0    1      1    50       98       50
1    2      1    60       97       60
5    6      2    50       89       98
...
new_df = df[df.math >= 50]
new_df.sort_values(by='math', ascending=0)
print('new_df(math>=50)\n', new_df)

ํŠน์ • ๊ฐ’์œผ๋กœ ๋‚ด๋ฆผ์ฐจ์ˆœ ์ •๋ ฌ

new_df(math>=50)
     id  class  math  english  science
0    1      1    50       98       50
1    2      1    60       97       60
5    6      2    50       89       98
...
new_df = df[(df.math >= 50) & (df.english >= 20)]
print('new_df(math>=50)\n', new_df)

ํŠน์ • ๊ฐ’์— ์กฐ๊ฑด ๊ฑธ๊ธฐ 2

new_df2 = df[df.class.str.contains('test')]

ํŠน์ • ๋ฌธ์ž์—ด ํฌํ•จ์—ฌ๋ถ€

df['new_column'] = 'new column'
print('new_column\n', df.head(3))

์—ด ์ถ”๊ฐ€

new_column
    id  class  math  english  science  new_column
0   1      1    50       98       50  new column
1   2      1    60       97       60  new column
2   3      1    45       86       78  new column
from pandas.io.parsers import read_csv
import matplotlib.pyplot as plt

df = read_csv('split.csv', names=['id'])

split = df.id.str.split(',',expand=True)
#split['id', '1', '2'](/8BitsCoding/RobotMentor/wiki/'id',-'1',-'2') = df.id.str.split(',',expand=True)

print('df\n', df)
print('split\n', split)

๋ฌธ์ž์—ด ๋‚˜๋ˆ„๊ธฐ

df
       id
0  a,b,c
1  1,2,3
split
    0  1  2
0  a  b  c
1  1  2  3

๊ทธ๋ž˜ํ”„ ๊ทธ๋ฆฌ๊ธฐ

from pandas.io.parsers import read_csv
import matplotlib.pyplot as plt

df = read_csv('mtcars.csv')

# ์ž๋™์ฐจ ๋ฐฐ๊ธฐ๋Ÿ‰(disp)๊ณผ ์—ฐ๋น„(mpg)์˜ ์ƒ๊ด€ ๊ด€๊ณ„ ์‚ฐ์ ๋„ ๊ทธ๋ž˜ํ”„
plt.scatter(df.disp, df.mpg)
plt.xlabel('disp')
plt.ylabel('mpg')
plt.show()

# ์ž๋™์ฐจ์˜ ๋ฌด๊ฒŒ(wt)์™€ ์—ฐ๋น„(mpg)์˜ ์‚ฐ์ ๋„ ๊ทธ๋ž˜ํ”„
#plt.scatter(df.wt, df.mpg)
plt.plot(df.wt,df.mpg,'rs--')
plt.xlabel('weight')
plt.ylabel('miles per gallon')
plt.grid()
plt.savefig('wt-mpg.png')
plt.show()

์‹ค์ „ GPS๋ฐ์ดํ„ฐ ํŒŒ์‹ฑํ•˜๊ธฐ

gpgga_exam.csv๋ฅผ ์‚ฌ์šฉํ•˜๋ฉด ๋จ

# ๊ธฐ๋ณธ์  import
from pandas.io.parsers import read_csv
import pandas as pd
import matplotlib.pyplot as plt

df = read_csv('gpgga_exam.csv', names=['gps_raw_data'])
# csvํŒŒ์ผ์„ ์ฝ์–ด์˜จ๋‹ค.

split = df.gps_raw_data.str.split(',', expand=True )
# ',' ๋ฌธ์ž ๋‹จ์œ„๋กœ ๋‚˜๋ˆˆ๋‹ค.

split.columns = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19']
# Q. column์„ ์ด๋Ÿฐ์‹์œผ๋กœ ๋‹ค ์ ์–ด ์ค˜์•ผํ•˜๋Š”์ง€?

split1 = split[split['0'].str.contains('GPGGA')]
# split '0'์—ด์— GPGGA๋ผ๋Š” ๋ฌธ์ž๊ฐ€ ์žˆ๋‹ค๋ฉด split1์œผ๋กœ ๋„ฃ๋Š”๋‹ค.

split2 = split1.loc[:, ['2', '4']]
# split1์˜ '2', '4'์—ด์„ split2๋กœ ๋„ฃ๋Š”๋‹ค.

split2['2'] = pd.to_numeric(split2['2'])
split2['4'] = pd.to_numeric(split2['4'])
# split2์˜ '2', '4'์—ด์˜ ์ž๋ฃŒํ˜•์„ ๋ณ€๊ฒฝํ•œ๋‹ค.

split2['2'] = split2['2'].sub(3700, fill_value=0)
split2['4'] = split2['4'].sub(12700, fill_value=0)
split2['2'] = split2['2'].div(60, fill_value=0)
split2['4'] = split2['4'].div(60, fill_value=0)
split2['2'] = split2['2'].add(37, fill_value=0)
split2['4'] = split2['4'].add(127, fill_value=0)

# ๊ฒฐ๊ณผ ์ถœ๋ ฅ
print('split2\n', split2)
split2
             2          4
8   37.583465  127.02741
14  37.600132  127.02741
15  37.616798  127.02741