import pandas as pd #需要注意文件的路径 df = pd.read_csv(r"D:\Desktop\one.csv") print(df)
# 输出结果 ID Name Age City Salary 01 Jack 28 Beijing 22000 12 Lida 32 Shanghai 19000 23 John 43 Shenzhen 12000 34 Helen 38 Hengshui 3500
(1)自定义索引
在CSV文件中指定了一个列,然后使用index_col可以实现自定义索引。
import pandas as pd df = pd.read_csv(r"D:\Desktop\one.csv",index_col=['ID']) print(df)
# 输出结果 Name Age City Salary ID 1 Jack 28 Beijing 22000 2 Lida 32 Shanghai 19000 3 John 43 Shenzhen 12000 4 Helen 38 Hengshui 3500
(2)查看每一列的dtype
import pandas as pd import numpy as np df1 = pd.read_csv(r"D:\Desktop\one.csv") print(df1.dtypes) print("*"*20) # 转换salary为float类型 df2 = pd.read_csv(r"D:\Desktop\one.csv",dtype={'Salary':np.float64}) print(df2.dtypes)
# 输出结果 ID int64 Name object Age int64 City object Salary int64 dtype: object ******************** ID int64 Name object Age int64 City object Salary float64 dtype: object
# 输出结果 a b c d e 0 ID Name Age City Salary 11 Jack 28 Beijing 22000 22 Lida 32 Shanghai 19000 33 John 43 Shenzhen 12000 44 Helen 38 Hengshui 3500 ******************** a b c d e 01 Jack 28 Beijing 22000 12 Lida 32 Shanghai 19000 23 John 43 Shenzhen 12000 34 Helen 38 Hengshui 3500
(4)跳过指定的行数
skiprows参数表示跳过指定的行数。注意:包含标头所在行。
import pandas as pd df = pd.read_csv(r"D:\Desktop\one.csv",skiprows=2) print(df)
# 输出结果 2 Lida 32 Shanghai 19000 03 John 43 Shenzhen 12000 14 Helen 38 Hengshui 3500
# 输出结果 ID Name Age City Salary 01 Jack 28 Beijing 22000 12 Lida 32 Shanghai 19000 23 John 43 Shenzhen 12000 34 Helen 38 Hengshui 3500 ******************** City Salary Name Jack Beijing 22000 Lida Shanghai 19000 John Shenzhen 12000 Helen Hengshui 3500 ******************** Name Jack 22000 Lida 19000 John 12000 Helen 3500 Name: Salary, dtype: int64
import pandas as pd info = pd.DataFrame({'Name':['Parker','Terry','Smith','William'], 'Year':[2011,2009,2014,2010], 'Leaves': [10,15,9,4]}) print(info) print("*"*20) # 设置Name为行索引 print(info.set_index('Name'))
# 输出结果 Name Year Leaves 0 Parker 201110 1 Terry 200915 2 Smith 20149 3 William 20104 ******************** Year Leaves Name Parker 201110 Terry 200915 Smith 20149 William 20104
(3)重置索引
使用reset_index()来恢复初始行索引
import pandas as pd import numpy as np info = pd.DataFrame([('William', 'C'), ('Smith', 'Java'), ('Parker', 'Python'), ('Phill', np.nan)], index=[1, 2, 3, 4], columns=('name', 'Language')) print(info) print("*"*20) print(info.reset_index())
# 输出结果 name Language 1 William C 2 Smith Java 3 Parker Python 4 Phill NaN ******************** index name Language 01 William C 12 Smith Java 23 Parker Python 34 Phill NaN