1.1.2

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# 读取数据集 2分
data = pd.read_csv('sensor_data.csv')
# 1. 传感器数据统计
# 对传感器类型进行分组,并计算每个组的数据数量和平均值 3分
sensor_stats = data.groupby('SensorType')['Value'].agg(['count','mean'])
# 输出结果
print("传感器数据数量和平均值:")
print(sensor_stats)

传感器数据数量和平均值:
count mean
SensorType
Humidity 2065 48.978302
Light 1950 49.439011
SoilMoisture 1951 49.325119
SoilPH 2029 49.865460
Temperature 2005 49.847153

# 2. 按位置统计温度和湿度数据
# 筛选出温度和湿度数据,然后按位置和传感器类型分组,计算每个组的平均值 2分
location_stats = data[data['SensorType'].isin(['Humidity','Temperature'])].groupby(['Location','SensorType'])['Value'].mean().unstack()
# 输出结果
print("每个位置的温度和湿度数据平均值:")
print(location_stats)

每个位置的温度和湿度数据平均值:
SensorType Humidity Temperature
Location
Field1 49.036008 50.482617
Field2 47.379411 49.874282
Field3 49.583266 48.761804
Field4 49.955498 50.241608

# 3. 数据清洗和异常值处理
# 标记异常值 3分
data['is_abnormal'] = np.where(
    ((data['SensorType'] == 'Temperature' ) & ((data['Value'] < -10) | (data['Value'] > 50))) |
    ((data['SensorType'] == 'Humidity') & ((data['Value'] < 0) | (data['Value'] > 100))),
    True, False
)
# 输出异常值数量 2分
print("异常值数量:", data['is_abnormal'].sum())
# 填补缺失值
# 使用前向填充和后向填充的方法填补缺失值 4分
data['Value'].fillna(method='ffill', inplace=True)
data['Value'].fillna(method='bfill', inplace=True)
# 保存清洗后的数据
# 删除用于标记异常值的列,并将清洗后的数据保存到新的CSV文件中 4分
cleaned_data = data.drop(columns=['is_abnormal'])
cleaned_data.to_csv('cleaned_sensor_data.csv', index=False)
print("数据清洗完成,已保存为 'cleaned_sensor_data.csv'")

异常值数量: 1011

/var/folders/rq/lzp3p28j4wsfzc34n6x5rdlh0000gn/T/ipykernel_57555/14547590.py:12: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.

data['Value'].fillna(method='ffill', inplace=True)
/var/folders/rq/lzp3p28j4wsfzc34n6x5rdlh0000gn/T/ipykernel_57555/14547590.py:12: FutureWarning: Series.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.
data['Value'].fillna(method='ffill', inplace=True)
/var/folders/rq/lzp3p28j4wsfzc34n6x5rdlh0000gn/T/ipykernel_57555/14547590.py:13: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.

data['Value'].fillna(method='bfill', inplace=True)
/var/folders/rq/lzp3p28j4wsfzc34n6x5rdlh0000gn/T/ipykernel_57555/14547590.py:13: FutureWarning: Series.fillna with 'method' is deprecated and will raise in a future version. Use obj.ffill() or obj.bfill() instead.
data['Value'].fillna(method='bfill', inplace=True)

数据清洗完成,已保存为 'cleaned_sensor_data.csv'