> 文章列表 > Pandas 练习, 常见功能查阅

Pandas 练习, 常见功能查阅

Pandas 练习, 常见功能查阅

Pandas

安装 pandas 库:

conda install pandas

数据

git clone https://github.com/KeithGalli/pandas.git

练习

import pandas as pd
data_dir = "/data_dir"
df = pd.read_csv(f'{data_dir}/pandas/pokemon_data.csv')
df.shape
(800, 12)
df.head()
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
0 1 Bulbasaur Grass Poison 45 49 49 65 65 45 1 False
1 2 Ivysaur Grass Poison 60 62 63 80 80 60 1 False
2 3 Venusaur Grass Poison 80 82 83 100 100 80 1 False
3 3 VenusaurMega Venusaur Grass Poison 80 100 123 122 120 80 1 False
4 4 Charmander Fire NaN 39 52 43 60 50 65 1 False
df = pd.read_excel(f'{data_dir}/pandas/pokemon_data.xlsx')
df.shape
(800, 12)
df.head()
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
0 1 Bulbasaur Grass Poison 45 49 49 65 65 45 1 False
1 2 Ivysaur Grass Poison 60 62 63 80 80 60 1 False
2 3 Venusaur Grass Poison 80 82 83 100 100 80 1 False
3 3 VenusaurMega Venusaur Grass Poison 80 100 123 122 120 80 1 False
4 4 Charmander Fire NaN 39 52 43 60 50 65 1 False
def read():global dfdf = pd.read_csv(f'{data_dir}/pandas/pokemon_data.csv')
read()
df.columns
Index(['#', 'Name', 'Type 1', 'Type 2', 'HP', 'Attack', 'Defense', 'Sp. Atk','Sp. Def', 'Speed', 'Generation', 'Legendary'],dtype='object')
df.Name
0                  Bulbasaur
1                    Ivysaur
2                   Venusaur
3      VenusaurMega Venusaur
4                 Charmander...          
795                  Diancie
796      DiancieMega Diancie
797      HoopaHoopa Confined
798       HoopaHoopa Unbound
799                Volcanion
Name: Name, Length: 800, dtype: object
df['Name']
0                  Bulbasaur
1                    Ivysaur
2                   Venusaur
3      VenusaurMega Venusaur
4                 Charmander...          
795                  Diancie
796      DiancieMega Diancie
797      HoopaHoopa Confined
798       HoopaHoopa Unbound
799                Volcanion
Name: Name, Length: 800, dtype: object
df[['Name', 'HP', 'Speed']]
Name HP Speed
0 Bulbasaur 45 45
1 Ivysaur 60 60
2 Venusaur 80 80
3 VenusaurMega Venusaur 80 80
4 Charmander 39 65
... ... ... ...
795 Diancie 50 50
796 DiancieMega Diancie 50 110
797 HoopaHoopa Confined 80 70
798 HoopaHoopa Unbound 80 80
799 Volcanion 80 70

800 rows × 3 columns

# 第 [n] 行的数据
df.iloc[1]
#                   2
Name          Ivysaur
Type 1          Grass
Type 2         Poison
HP                 60
Attack             62
Defense            63
Sp. Atk            80
Sp. Def            80
Speed              60
Generation          1
Legendary       False
Name: 1, dtype: object
# 第 [n, m) 行的数据
df.iloc[0:4]
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
0 1 Bulbasaur Grass Poison 45 49 49 65 65 45 1 False
1 2 Ivysaur Grass Poison 60 62 63 80 80 60 1 False
2 3 Venusaur Grass Poison 80 82 83 100 100 80 1 False
3 3 VenusaurMega Venusaur Grass Poison 80 100 123 122 120 80 1 False
# 第 [n] 行, [m] 列的数据
df.iloc[2, 2]
'Grass'
# 行遍历
# for index, row in df.iterrows():
# #     print(index, row)
#     print(index, row['Name'])
0 Bulbasaur
1 Ivysaur
2 Venusaur
3 VenusaurMega Venusaur
4 Charmander
5 Charmeleon
...
796 DiancieMega Diancie
797 HoopaHoopa Confined
798 HoopaHoopa Unbound
799 Volcanion
# 根据字段过滤数据
df.loc[df['Type 1'] == "Grass"]
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
0 1 Bulbasaur Grass Poison 45 49 49 65 65 45 1 False
1 2 Ivysaur Grass Poison 60 62 63 80 80 60 1 False
2 3 Venusaur Grass Poison 80 82 83 100 100 80 1 False
3 3 VenusaurMega Venusaur Grass Poison 80 100 123 122 120 80 1 False
48 43 Oddish Grass Poison 45 50 55 75 65 30 1 False
... ... ... ... ... ... ... ... ... ... ... ... ...
718 650 Chespin Grass NaN 56 61 65 48 45 38 6 False
719 651 Quilladin Grass NaN 61 78 95 56 58 57 6 False
720 652 Chesnaught Grass Fighting 88 107 122 74 75 64 6 False
740 672 Skiddo Grass NaN 66 65 48 62 57 52 6 False
741 673 Gogoat Grass NaN 123 100 62 97 81 68 6 False

70 rows × 12 columns

# 统计, 只对"数值"类型统计
# count:非空值的数量。
# mean:平均值。
# std:标准差。
# min:最小值。
# 25%:下四分位数。
# 50%:中位数(下四分位数和上四分位数的平均值)。
# 75%:上四分位数。
# max:最大值。
df.describe()
# HP Attack Defense Sp. Atk Sp. Def Speed Generation
count 800.000000 800.000000 800.000000 800.000000 800.000000 800.000000 800.000000 800.00000
mean 362.813750 69.258750 79.001250 73.842500 72.820000 71.902500 68.277500 3.32375
std 208.343798 25.534669 32.457366 31.183501 32.722294 27.828916 29.060474 1.66129
min 1.000000 1.000000 5.000000 5.000000 10.000000 20.000000 5.000000 1.00000
25% 184.750000 50.000000 55.000000 50.000000 49.750000 50.000000 45.000000 2.00000
50% 364.500000 65.000000 75.000000 70.000000 65.000000 70.000000 65.000000 3.00000
75% 539.250000 80.000000 100.000000 90.000000 95.000000 90.000000 90.000000 5.00000
max 721.000000 255.000000 190.000000 230.000000 194.000000 230.000000 180.000000 6.00000
# 排序
df.sort_values('Name')
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
510 460 Abomasnow Grass Ice 90 92 75 92 85 60 4 False
511 460 AbomasnowMega Abomasnow Grass Ice 90 132 105 132 105 30 4 False
68 63 Abra Psychic NaN 25 20 15 105 55 90 1 False
392 359 Absol Dark NaN 65 130 60 75 60 75 3 False
393 359 AbsolMega Absol Dark NaN 65 150 60 115 60 115 3 False
... ... ... ... ... ... ... ... ... ... ... ... ...
632 571 Zoroark Dark NaN 60 105 60 120 60 105 5 False
631 570 Zorua Dark NaN 40 65 40 80 40 65 5 False
46 41 Zubat Poison Flying 40 45 35 30 40 55 1 False
695 634 Zweilous Dark Dragon 72 85 70 65 70 58 5 False
794 718 Zygarde50% Forme Dragon Ground 108 100 121 81 95 95 6 True

800 rows × 12 columns

# 排序: 倒序
df.sort_values('Name', ascending=False)
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
794 718 Zygarde50% Forme Dragon Ground 108 100 121 81 95 95 6 True
695 634 Zweilous Dark Dragon 72 85 70 65 70 58 5 False
46 41 Zubat Poison Flying 40 45 35 30 40 55 1 False
631 570 Zorua Dark NaN 40 65 40 80 40 65 5 False
632 571 Zoroark Dark NaN 60 105 60 120 60 105 5 False
... ... ... ... ... ... ... ... ... ... ... ... ...
393 359 AbsolMega Absol Dark NaN 65 150 60 115 60 115 3 False
392 359 Absol Dark NaN 65 130 60 75 60 75 3 False
68 63 Abra Psychic NaN 25 20 15 105 55 90 1 False
511 460 AbomasnowMega Abomasnow Grass Ice 90 132 105 132 105 30 4 False
510 460 Abomasnow Grass Ice 90 92 75 92 85 60 4 False

800 rows × 12 columns

# 排序: 多字段排序
df.sort_values(['Type 1', 'HP'])
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
316 292 Shedinja Bug Ghost 1 90 45 30 30 40 3 False
230 213 Shuckle Bug Rock 20 10 230 10 230 5 2 False
462 415 Combee Bug Flying 30 30 42 30 42 70 4 False
603 543 Venipede Bug Poison 30 45 59 30 39 57 5 False
314 290 Nincada Bug Ground 31 45 90 30 30 40 3 False
... ... ... ... ... ... ... ... ... ... ... ... ...
142 131 Lapras Water Ice 130 85 80 85 95 60 1 False
145 134 Vaporeon Water NaN 130 65 60 110 95 65 1 False
350 320 Wailmer Water NaN 130 70 35 70 35 60 3 False
655 594 Alomomola Water NaN 165 75 80 40 45 65 5 False
351 321 Wailord Water NaN 170 90 45 90 45 60 3 False

800 rows × 12 columns

# 排序: 多字段排序, 指定每个字段的排序顺序
df.sort_values(['Type 1', 'HP'], ascending=[0, 1])
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
139 129 Magikarp Water NaN 20 10 55 15 20 80 1 False
381 349 Feebas Water NaN 20 15 20 10 55 80 3 False
97 90 Shellder Water NaN 30 65 100 45 25 40 1 False
106 98 Krabby Water NaN 30 105 90 25 25 50 1 False
125 116 Horsea Water NaN 30 40 70 70 25 60 1 False
... ... ... ... ... ... ... ... ... ... ... ... ...
232 214 HeracrossMega Heracross Bug Fighting 80 185 115 40 105 75 2 False
678 617 Accelgor Bug NaN 80 70 40 100 60 145 5 False
734 666 Vivillon Bug Flying 80 52 50 90 50 89 6 False
698 637 Volcarona Bug Fire 85 60 65 135 105 100 5 False
520 469 Yanmega Bug Flying 86 76 86 116 56 95 4 False

800 rows × 12 columns

df['Total'] = df['HP'] + df['Attack'] + df['Defense'] + df['Sp. Atk'] + df['Sp. Def'] + df['Speed']
df
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary Total
0 1 Bulbasaur Grass Poison 45 49 49 65 65 45 1 False 318
1 2 Ivysaur Grass Poison 60 62 63 80 80 60 1 False 405
2 3 Venusaur Grass Poison 80 82 83 100 100 80 1 False 525
3 3 VenusaurMega Venusaur Grass Poison 80 100 123 122 120 80 1 False 625
4 4 Charmander Fire NaN 39 52 43 60 50 65 1 False 309
... ... ... ... ... ... ... ... ... ... ... ... ... ...
795 719 Diancie Rock Fairy 50 100 150 100 150 50 6 True 600
796 719 DiancieMega Diancie Rock Fairy 50 160 110 160 110 110 6 True 700
797 720 HoopaHoopa Confined Psychic Ghost 80 110 60 150 130 70 6 True 600
798 720 HoopaHoopa Unbound Psychic Dark 80 160 60 170 130 80 6 True 680
799 721 Volcanion Fire Water 80 110 120 130 90 70 6 True 600

800 rows × 13 columns

df['Total'] = df['HP'] + df['Attack'] + df['Defense'] + df['Sp. Atk'] + df['Sp. Def'] + df['Speed']
# df.drop 不会修改 df, 需要赋值. 如果删除的 列 不存在, 则会报错
df = df.drop(columns=['Total'])df.head()
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
0 1 Bulbasaur Grass Poison 45 49 49 65 65 45 1 False
1 2 Ivysaur Grass Poison 60 62 63 80 80 60 1 False
2 3 Venusaur Grass Poison 80 82 83 100 100 80 1 False
3 3 VenusaurMega Venusaur Grass Poison 80 100 123 122 120 80 1 False
4 4 Charmander Fire NaN 39 52 43 60 50 65 1 False
# 当 axis=0 时,对每一列进行求和,返回一行结果。
# 当 axis=1 时,对每一行进行求和,返回一列结果。
df['Total'] = df.iloc[:, 4:10].sum(axis=1)df.head()
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary Total
0 1 Bulbasaur Grass Poison 45 49 49 65 65 45 1 False 318
1 2 Ivysaur Grass Poison 60 62 63 80 80 60 1 False 405
2 3 Venusaur Grass Poison 80 82 83 100 100 80 1 False 525
3 3 VenusaurMega Venusaur Grass Poison 80 100 123 122 120 80 1 False 625
4 4 Charmander Fire NaN 39 52 43 60 50 65 1 False 309
read()df['Total'] = df.iloc[:, 4:10].sum(axis=1)# 输出指定列
df = df[['Total', 'HP', 'Defense']]df.head(5)
Total HP Defense
0 318 45 49
1 405 60 63
2 525 80 83
3 625 80 123
4 309 39 43
read()df['Total'] = df.iloc[:, 4:10].sum(axis=1)cols = list(df.columns)
print(cols)# 输出指定列
df = df[cols[0:4] + [cols[-1]] + cols[4:-1]]df.head(5)
['#', 'Name', 'Type 1', 'Type 2', 'HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Generation', 'Legendary', 'Total']
# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
0 1 Bulbasaur Grass Poison 318 45 49 49 65 65 45 1 False
1 2 Ivysaur Grass Poison 405 60 62 63 80 80 60 1 False
2 3 Venusaur Grass Poison 525 80 82 83 100 100 80 1 False
3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 122 120 80 1 False
4 4 Charmander Fire NaN 309 39 52 43 60 50 65 1 False
# 写文件
# df.to_csv('/tmp/modified.csv', index=False)
# df.to_excel('/tmp/modified.xlsx', index=False)
# 多个字段过滤
df.loc[(df['Type 1'] == 'Grass') & (df['Type 2'] == 'Poison')]
# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
0 1 Bulbasaur Grass Poison 318 45 49 49 65 65 45 1 False
1 2 Ivysaur Grass Poison 405 60 62 63 80 80 60 1 False
2 3 Venusaur Grass Poison 525 80 82 83 100 100 80 1 False
3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 122 120 80 1 False
48 43 Oddish Grass Poison 320 45 50 55 75 65 30 1 False
49 44 Gloom Grass Poison 395 60 65 70 85 75 40 1 False
50 45 Vileplume Grass Poison 490 75 80 85 110 90 50 1 False
75 69 Bellsprout Grass Poison 300 50 75 35 70 30 40 1 False
76 70 Weepinbell Grass Poison 390 65 90 50 85 45 55 1 False
77 71 Victreebel Grass Poison 490 80 105 65 100 70 70 1 False
344 315 Roselia Grass Poison 400 50 60 45 100 80 65 3 False
451 406 Budew Grass Poison 280 40 30 35 50 70 55 4 False
452 407 Roserade Grass Poison 515 60 70 65 125 105 90 4 False
651 590 Foongus Grass Poison 294 69 55 45 55 55 15 5 False
652 591 Amoonguss Grass Poison 464 114 85 70 85 80 30 5 False
# 多个字段过滤
df.loc[(df['Type 1'] == 'Grass') | (df['Type 2'] == 'Poison')]
# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
0 1 Bulbasaur Grass Poison 318 45 49 49 65 65 45 1 False
1 2 Ivysaur Grass Poison 405 60 62 63 80 80 60 1 False
2 3 Venusaur Grass Poison 525 80 82 83 100 100 80 1 False
3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 122 120 80 1 False
16 13 Weedle Bug Poison 195 40 35 30 20 20 50 1 False
... ... ... ... ... ... ... ... ... ... ... ... ... ...
718 650 Chespin Grass NaN 313 56 61 65 48 45 38 6 False
719 651 Quilladin Grass NaN 405 61 78 95 56 58 57 6 False
720 652 Chesnaught Grass Fighting 530 88 107 122 74 75 64 6 False
740 672 Skiddo Grass NaN 350 66 65 48 62 57 52 6 False
741 673 Gogoat Grass NaN 531 123 100 62 97 81 68 6 False

89 rows × 13 columns

new_df = df.loc[(df['Type 1'] == 'Grass') & (df['Type 2'] == 'Poison') & (df['HP'] > 70)]# new_df.to_csv('/tmp/filtered.csv', index=False)# "index" 是 df 的 index. 会重新为 new_df 生成index
new_df = new_df.reset_index()new_df
index # Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
0 2 3 Venusaur Grass Poison 525 80 82 83 100 100 80 1 False
1 3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 122 120 80 1 False
2 50 45 Vileplume Grass Poison 490 75 80 85 110 90 50 1 False
3 77 71 Victreebel Grass Poison 490 80 105 65 100 70 70 1 False
4 652 591 Amoonguss Grass Poison 464 114 85 70 85 80 30 5 False
new_df = df.loc[(df['Type 1'] == 'Grass') & (df['Type 2'] == 'Poison') & (df['HP'] > 70)]# 会重新为 new_df 生成index, 删除 df 保留下来的 index 列
new_df = new_df.reset_index(drop=True)new_df
# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
0 3 Venusaur Grass Poison 525 80 82 83 100 100 80 1 False
1 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 122 120 80 1 False
2 45 Vileplume Grass Poison 490 75 80 85 110 90 50 1 False
3 71 Victreebel Grass Poison 490 80 105 65 100 70 70 1 False
4 591 Amoonguss Grass Poison 464 114 85 70 85 80 30 5 False
df.loc[df['Name'].str.contains('Mega')]
# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 122 120 80 1 False
7 6 CharizardMega Charizard X Fire Dragon 634 78 130 111 130 85 100 1 False
8 6 CharizardMega Charizard Y Fire Flying 634 78 104 78 159 115 100 1 False
12 9 BlastoiseMega Blastoise Water NaN 630 79 103 120 135 115 78 1 False
19 15 BeedrillMega Beedrill Bug Poison 495 65 150 40 15 80 145 1 False
23 18 PidgeotMega Pidgeot Normal Flying 579 83 80 80 135 80 121 1 False
71 65 AlakazamMega Alakazam Psychic NaN 590 55 50 65 175 95 150 1 False
87 80 SlowbroMega Slowbro Water Psychic 590 95 75 180 130 80 30 1 False
102 94 GengarMega Gengar Ghost Poison 600 60 65 80 170 95 130 1 False
124 115 KangaskhanMega Kangaskhan Normal NaN 590 105 125 100 60 100 100 1 False
137 127 PinsirMega Pinsir Bug Flying 600 65 155 120 65 90 105 1 False
141 130 GyaradosMega Gyarados Water Dark 640 95 155 109 70 130 81 1 False
154 142 AerodactylMega Aerodactyl Rock Flying 615 80 135 85 70 95 150 1 False
163 150 MewtwoMega Mewtwo X Psychic Fighting 780 106 190 100 154 100 130 1 True
164 150 MewtwoMega Mewtwo Y Psychic NaN 780 106 150 70 194 120 140 1 True
168 154 Meganium Grass NaN 525 80 82 100 83 100 80 2 False
196 181 AmpharosMega Ampharos Electric Dragon 610 90 95 105 165 110 45 2 False
224 208 SteelixMega Steelix Steel Ground 610 75 125 230 55 95 30 2 False
229 212 ScizorMega Scizor Bug Steel 600 70 150 140 65 100 75 2 False
232 214 HeracrossMega Heracross Bug Fighting 600 80 185 115 40 105 75 2 False
248 229 HoundoomMega Houndoom Dark Fire 600 75 90 90 140 90 115 2 False
268 248 TyranitarMega Tyranitar Rock Dark 700 100 164 150 95 120 71 2 False
275 254 SceptileMega Sceptile Grass Dragon 630 70 110 75 145 85 145 3 False
279 257 BlazikenMega Blaziken Fire Fighting 630 80 160 80 130 80 100 3 False
283 260 SwampertMega Swampert Water Ground 635 100 150 110 95 110 70 3 False
306 282 GardevoirMega Gardevoir Psychic Fairy 618 68 85 65 165 135 100 3 False
327 302 SableyeMega Sableye Dark Ghost 480 50 85 125 85 115 20 3 False
329 303 MawileMega Mawile Steel Fairy 480 50 105 125 55 95 50 3 False
333 306 AggronMega Aggron Steel NaN 630 70 140 230 60 80 50 3 False
336 308 MedichamMega Medicham Fighting Psychic 510 60 100 85 80 85 100 3 False
339 310 ManectricMega Manectric Electric NaN 575 70 75 80 135 80 135 3 False
349 319 SharpedoMega Sharpedo Water Dark 560 70 140 70 110 65 105 3 False
354 323 CameruptMega Camerupt Fire Ground 560 70 120 100 145 105 20 3 False
366 334 AltariaMega Altaria Dragon Fairy 590 75 110 110 110 105 80 3 False
387 354 BanetteMega Banette Ghost NaN 555 64 165 75 93 83 75 3 False
393 359 AbsolMega Absol Dark NaN 565 65 150 60 115 60 115 3 False
397 362 GlalieMega Glalie Ice NaN 580 80 120 80 120 80 100 3 False
409 373 SalamenceMega Salamence Dragon Flying 700 95 145 130 120 90 120 3 False
413 376 MetagrossMega Metagross Steel Psychic 700 80 145 150 105 110 110 3 False
418 380 LatiasMega Latias Dragon Psychic 700 80 100 120 140 150 110 3 True
420 381 LatiosMega Latios Dragon Psychic 700 80 130 100 160 120 110 3 True
426 384 RayquazaMega Rayquaza Dragon Flying 780 105 180 100 180 100 115 3 True
476 428 LopunnyMega Lopunny Normal Fighting 580 65 136 94 54 96 135 4 False
494 445 GarchompMega Garchomp Dragon Ground 700 108 170 115 120 95 92 4 False
498 448 LucarioMega Lucario Fighting Steel 625 70 145 88 140 70 112 4 False
511 460 AbomasnowMega Abomasnow Grass Ice 594 90 132 105 132 105 30 4 False
527 475 GalladeMega Gallade Psychic Fighting 618 68 165 95 65 115 110 4 False
591 531 AudinoMega Audino Normal Fairy 545 103 60 126 80 126 50 5 False
796 719 DiancieMega Diancie Rock Fairy 700 50 160 110 160 110 110 6 True
import re
df.loc[df['Type 1'].str.contains('Fire|grass', regex=True, flags=re.I)]
# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
0 1 Bulbasaur Grass Poison 318 45 49 49 65 65 45 1 False
1 2 Ivysaur Grass Poison 405 60 62 63 80 80 60 1 False
2 3 Venusaur Grass Poison 525 80 82 83 100 100 80 1 False
3 3 VenusaurMega Venusaur Grass Poison 625 80 100 123 122 120 80 1 False
4 4 Charmander Fire NaN 309 39 52 43 60 50 65 1 False
... ... ... ... ... ... ... ... ... ... ... ... ... ...
735 667 Litleo Fire Normal 369 62 50 58 73 54 72 6 False
736 668 Pyroar Fire Normal 507 86 68 72 109 66 106 6 False
740 672 Skiddo Grass NaN 350 66 65 48 62 57 52 6 False
741 673 Gogoat Grass NaN 531 123 100 62 97 81 68 6 False
799 721 Volcanion Fire Water 600 80 110 120 130 90 70 6 True

122 rows × 13 columns

import re
df.loc[df['Name'].str.contains('^pi[a-z]', regex=True, flags=re.I)]
# Name Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
20 16 Pidgey Normal Flying 251 40 45 40 35 35 56 1 False
21 17 Pidgeotto Normal Flying 349 63 60 55 50 50 71 1 False
22 18 Pidgeot Normal Flying 479 83 80 75 70 70 101 1 False
23 18 PidgeotMega Pidgeot Normal Flying 579 83 80 80 135 80 121 1 False
30 25 Pikachu Electric NaN 320 35 55 40 50 50 90 1 False
136 127 Pinsir Bug NaN 500 65 125 100 55 70 85 1 False
137 127 PinsirMega Pinsir Bug Flying 600 65 155 120 65 90 105 1 False
186 172 Pichu Electric NaN 205 20 40 15 35 35 60 2 False
219 204 Pineco Bug NaN 290 50 65 90 35 35 15 2 False
239 221 Piloswine Ice Ground 450 100 100 80 60 60 50 2 False
438 393 Piplup Water NaN 314 53 51 53 61 56 40 4 False
558 499 Pignite Fire Fighting 418 90 93 55 70 55 55 5 False
578 519 Pidove Normal Flying 264 50 55 50 36 30 43 5 False
read()
# 第一个参数满足条件的, 设置第二个参数的列为后面的值
df.loc[df['Type 1'] == 'Grass', 'Type 1'] = 'Flamer'df
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
0 1 Bulbasaur Flamer Poison 45 49 49 65 65 45 1 False
1 2 Ivysaur Flamer Poison 60 62 63 80 80 60 1 False
2 3 Venusaur Flamer Poison 80 82 83 100 100 80 1 False
3 3 VenusaurMega Venusaur Flamer Poison 80 100 123 122 120 80 1 False
4 4 Charmander Fire NaN 39 52 43 60 50 65 1 False
... ... ... ... ... ... ... ... ... ... ... ... ...
795 719 Diancie Rock Fairy 50 100 150 100 150 50 6 True
796 719 DiancieMega Diancie Rock Fairy 50 160 110 160 110 110 6 True
797 720 HoopaHoopa Confined Psychic Ghost 80 110 60 150 130 70 6 True
798 720 HoopaHoopa Unbound Psychic Dark 80 160 60 170 130 80 6 True
799 721 Volcanion Fire Water 80 110 120 130 90 70 6 True

800 rows × 12 columns

read()
# 第一个参数满足条件的, 设置第二个参数的列为后面的值
df.loc[df['Type 1'] == 'Grass', ['Type 1', 'Lendary']] = 'TEST VALUE'df
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary Lendary
0 1 Bulbasaur TEST VALUE Poison 45 49 49 65 65 45 1 False TEST VALUE
1 2 Ivysaur TEST VALUE Poison 60 62 63 80 80 60 1 False TEST VALUE
2 3 Venusaur TEST VALUE Poison 80 82 83 100 100 80 1 False TEST VALUE
3 3 VenusaurMega Venusaur TEST VALUE Poison 80 100 123 122 120 80 1 False TEST VALUE
4 4 Charmander Fire NaN 39 52 43 60 50 65 1 False NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ...
795 719 Diancie Rock Fairy 50 100 150 100 150 50 6 True NaN
796 719 DiancieMega Diancie Rock Fairy 50 160 110 160 110 110 6 True NaN
797 720 HoopaHoopa Confined Psychic Ghost 80 110 60 150 130 70 6 True NaN
798 720 HoopaHoopa Unbound Psychic Dark 80 160 60 170 130 80 6 True NaN
799 721 Volcanion Fire Water 80 110 120 130 90 70 6 True NaN

800 rows × 13 columns

read()
# 第一个参数满足条件的, 设置第二个参数的列为后面的值
df.loc[df['Type 1'] == 'Grass', ['Type 1', 'Lendary']] = ['TEST 1', 'TEST 2']df
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary Lendary
0 1 Bulbasaur TEST 1 Poison 45 49 49 65 65 45 1 False TEST 2
1 2 Ivysaur TEST 1 Poison 60 62 63 80 80 60 1 False TEST 2
2 3 Venusaur TEST 1 Poison 80 82 83 100 100 80 1 False TEST 2
3 3 VenusaurMega Venusaur TEST 1 Poison 80 100 123 122 120 80 1 False TEST 2
4 4 Charmander Fire NaN 39 52 43 60 50 65 1 False NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ...
795 719 Diancie Rock Fairy 50 100 150 100 150 50 6 True NaN
796 719 DiancieMega Diancie Rock Fairy 50 160 110 160 110 110 6 True NaN
797 720 HoopaHoopa Confined Psychic Ghost 80 110 60 150 130 70 6 True NaN
798 720 HoopaHoopa Unbound Psychic Dark 80 160 60 170 130 80 6 True NaN
799 721 Volcanion Fire Water 80 110 120 130 90 70 6 True NaN

800 rows × 13 columns

read()# 分组 求平均数
df.groupby(['Type 1']).mean(numeric_only=True).sort_values('Defense', ascending=False)
# HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
Type 1
Steel 442.851852 65.222222 92.703704 126.370370 67.518519 80.629630 55.259259 3.851852 0.148148
Rock 392.727273 65.363636 92.863636 100.795455 63.340909 75.477273 55.909091 3.454545 0.090909
Dragon 474.375000 83.312500 112.125000 86.375000 96.843750 88.843750 83.031250 3.875000 0.375000
Ground 356.281250 73.781250 95.750000 84.843750 56.468750 62.750000 63.906250 3.156250 0.125000
Ghost 486.500000 64.437500 73.781250 81.187500 79.343750 76.468750 64.343750 4.187500 0.062500
Water 303.089286 72.062500 74.151786 72.946429 74.812500 70.517857 65.964286 2.857143 0.035714
Ice 423.541667 72.000000 72.750000 71.416667 77.541667 76.291667 63.458333 3.541667 0.083333
Grass 344.871429 67.271429 73.214286 70.800000 77.500000 70.428571 61.928571 3.357143 0.042857
Bug 334.492754 56.884058 70.971014 70.724638 53.869565 64.797101 61.681159 3.217391 0.000000
Dark 461.354839 66.806452 88.387097 70.225806 74.645161 69.516129 76.161290 4.032258 0.064516
Poison 251.785714 67.250000 74.678571 68.821429 60.428571 64.392857 63.571429 2.535714 0.000000
Fire 327.403846 69.903846 84.769231 67.769231 88.980769 72.211538 74.442308 3.211538 0.096154
Psychic 380.807018 70.631579 71.456140 67.684211 98.403509 86.280702 81.491228 3.385965 0.245614
Electric 363.500000 59.795455 69.090909 66.295455 90.022727 73.704545 84.500000 3.272727 0.090909
Flying 677.750000 70.750000 78.750000 66.250000 94.250000 72.500000 102.500000 5.500000 0.500000
Fighting 363.851852 69.851852 96.777778 65.925926 53.111111 64.703704 66.074074 3.370370 0.000000
Fairy 449.529412 74.117647 61.529412 65.705882 78.529412 84.705882 48.588235 4.117647 0.058824
Normal 319.173469 77.275510 73.469388 59.846939 55.816327 63.724490 71.551020 3.051020 0.020408
read()# 分组 求和
df.groupby(['Type 1']).sum(numeric_only=True).sort_values('HP', ascending=False)
# HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
Type 1
Water 33946 8071 8305 8170 8379 7898 7388 320 4
Normal 31279 7573 7200 5865 5470 6245 7012 299 2
Grass 24141 4709 5125 4956 5425 4930 4335 235 3
Psychic 21706 4026 4073 3858 5609 4918 4645 193 14
Bug 23080 3925 4897 4880 3717 4471 4256 222 0
Fire 17025 3635 4408 3524 4627 3755 3871 167 5
Rock 17280 2876 4086 4435 2787 3321 2460 152 4
Dragon 15180 2666 3588 2764 3099 2843 2657 124 12
Electric 15994 2631 3040 2917 3961 3243 3718 144 4
Ground 11401 2361 3064 2715 1807 2008 2045 101 4
Dark 14302 2071 2740 2177 2314 2155 2361 125 2
Ghost 15568 2062 2361 2598 2539 2447 2059 134 2
Fighting 9824 1886 2613 1780 1434 1747 1784 91 0
Poison 7050 1883 2091 1927 1692 1803 1780 71 0
Steel 11957 1761 2503 3412 1823 2177 1492 104 4
Ice 10165 1728 1746 1714 1861 1831 1523 85 2
Fairy 7642 1260 1046 1117 1335 1440 826 70 1
Flying 2711 283 315 265 377 290 410 22 2
read()# 分组 求数量
df.groupby(['Type 1']).count().sort_values('HP', ascending=False)
# Name Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
Type 1
Water 112 112 53 112 112 112 112 112 112 112 112
Normal 98 98 37 98 98 98 98 98 98 98 98
Grass 70 70 37 70 70 70 70 70 70 70 70
Bug 69 69 52 69 69 69 69 69 69 69 69
Psychic 57 57 19 57 57 57 57 57 57 57 57
Fire 52 52 24 52 52 52 52 52 52 52 52
Electric 44 44 17 44 44 44 44 44 44 44 44
Rock 44 44 35 44 44 44 44 44 44 44 44
Ghost 32 32 22 32 32 32 32 32 32 32 32
Ground 32 32 19 32 32 32 32 32 32 32 32
Dragon 32 32 21 32 32 32 32 32 32 32 32
Dark 31 31 21 31 31 31 31 31 31 31 31
Poison 28 28 13 28 28 28 28 28 28 28 28
Fighting 27 27 7 27 27 27 27 27 27 27 27
Steel 27 27 22 27 27 27 27 27 27 27 27
Ice 24 24 11 24 24 24 24 24 24 24 24
Fairy 17 17 2 17 17 17 17 17 17 17 17
Flying 4 4 2 4 4 4 4 4 4 4 4
read()# 每行都设置 count = 1
df['count'] = 1# 分组 求数量
df.groupby(['Type 1']).count()['count']
Type 1
Bug          69
Dark         31
Dragon       32
Electric     44
Fairy        17
Fighting     27
Fire         52
Flying        4
Ghost        32
Grass        70
Ground       32
Ice          24
Normal       98
Poison       28
Psychic      57
Rock         44
Steel        27
Water       112
Name: count, dtype: int64
read()# 每行都设置 count = 1
df['count'] = 1# 分组 求数量
df.groupby(['Type 1', 'Type 2']).count()['count']
Type 1  Type 2  
Bug     Electric     2Fighting     2Fire         2Flying      14Ghost        1..
Water   Ice          3Poison       3Psychic      5Rock         4Steel        1
Name: count, Length: 136, dtype: int64
# 读取大文件
# 分批读取数据, chunksize 行数
for df in pd.read_csv(f'{data_dir}/pandas/pokemon_data.csv', chunksize=5):
#     print('CHUNK DF:::')
#     print(df['Name']);
# 读取大文件
# 分批读取数据, chunksize 行数new_df = pd.DataFrame()
for df in pd.read_csv(f'{data_dir}/pandas/pokemon_data.csv', chunksize=5):new_df = pd.concat([new_df, df])
#     results = df.groupby(['Type 1']).count()
#     new_df = pd.concat([new_df, results])new_df
# Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
0 1 Bulbasaur Grass Poison 45 49 49 65 65 45 1 False
1 2 Ivysaur Grass Poison 60 62 63 80 80 60 1 False
2 3 Venusaur Grass Poison 80 82 83 100 100 80 1 False
3 3 VenusaurMega Venusaur Grass Poison 80 100 123 122 120 80 1 False
4 4 Charmander Fire NaN 39 52 43 60 50 65 1 False
... ... ... ... ... ... ... ... ... ... ... ... ...
795 719 Diancie Rock Fairy 50 100 150 100 150 50 6 True
796 719 DiancieMega Diancie Rock Fairy 50 160 110 160 110 110 6 True
797 720 HoopaHoopa Confined Psychic Ghost 80 110 60 150 130 70 6 True
798 720 HoopaHoopa Unbound Psychic Dark 80 160 60 170 130 80 6 True
799 721 Volcanion Fire Water 80 110 120 130 90 70 6 True

800 rows × 12 columns