import pandas as pd# 加载数据集并显示数据集的前五行 1分
data = _____pd.read_csv('auto-mpg.csv')_____
print("数据集的前五行:")
print(_____data.head()_____)
# 显示每一列的数据类型
print(data.dtypes)
# 检查缺失值并删除缺失值所在的行 2分
print("\n检查缺失值:")
print(_____data_____._____isnull()_____._____sum()_____)
data = _____data.dropna()_____
# 将 'horsepower' 列转换为数值类型,并(删除)处理转换中的异常值 1分
data['horsepower'] = _____pd.to_numeric_____(data['horsepower'], errors='coerce')
data = _____data.dropna(subset=['horsepower'])_____
# 显示每一列的数据类型
print(data.horsepower.dtypes)
# 检查清洗后的缺失值
print("\n检查清洗后的缺失值:")
print(data.isnull().sum())
from sklearn.preprocessing import StandardScaler
# 对数值型数据进行标准化处理 1分
numerical_features = ['displacement', 'horsepower', 'weight', 'acceleration']
scaler = StandardScaler()
data[numerical_features] = _____scaler.fit_transform(data[numerical_features])_____
from sklearn.model_selection import train_test_split
# 选择特征、自变量和目标变量 2分
selected_features = _____['cylinders','displacement','horsepower','weight',acceleration],'model year','origin'_____
X = _____data[Selected_features]_____
y = _____data['mpg']_____
# 划分数据集为训练集和测试集(训练集占8成) 1分
X_train, X_test, y_train, y_test = _____train_test_split_____(_____X,y,test_size=0.2_____, random_state=42)
# 将特征和目标变量合并到一个数据框中
cleaned_data = X.copy()
cleaned_data['mpg'] = y
# 保存清洗和处理后的数据(不存储额外的索引号) 1分
_____cleaned_data.to_csv_____('2.1.1_cleaned_data.csv', _____index=False_____)
# 打印消息指示文件已保存
print("\n清洗后的数据已保存到 2.1.1_cleaned_data.csv")