123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168 |
- from datetime import datetime
- import statistics
- import numpy as np
- import pandas as pd
- import matplotlib.pyplot as plt
- import seaborn as sns
- import warnings
- warnings.simplefilter("ignore")
- ###############
- # Filtering & Sampling Params
- ###############
- dht22_temp_stack = []
- dht22_temp_stack_errors = 0
- dht22_hum_stack = []
- dht22_hum_stack_errors = 0
- dht22_stack_size = 10
- dht22_std_deviation = 3
- dht22_error_count_stack_flush = 3
- dht22mqtt_temp_unit = 'C'
- ###############
- # Polling & Processing functions
- ###############
- def getTemperatureJitter(temperature):
- return getTemperature(temperature-0.3), getTemperature(temperature+0.3)
- def getTemperature(temperature):
- if(dht22mqtt_temp_unit == 'F'):
- temperature = temperature * (9 / 5) + 32
- return temperature
- def getHumidity(humidity):
- return humidity
- def processSensorValue(stack, error, value, value_type):
- # flush stack on accumulation of errors
- if(error >= dht22_error_count_stack_flush):
- stack = []
- error = 0
- # init stack
- if(len(stack) <= dht22_error_count_stack_flush):
- if(value not in stack):
- stack.append(value)
- # use jitter for bootstrap temperature stack
- if(value_type == 'temperature'):
- low, high = getTemperatureJitter(value)
- stack.append(low)
- stack.append(high)
- return stack, error, None
- # get statistics
- std = statistics.pstdev(stack)
- mean = statistics.mean(stack)
- # compute if outlier or not
- if(mean-std*dht22_std_deviation < value < mean+std*dht22_std_deviation):
- outlier = False
- if(value not in stack):
- stack.append(value)
- error = 0
- else:
- outlier = True
- error += 1
- # remove oldest element from stack
- if(len(stack) > 10):
- stack.pop(0)
- return stack, error, outlier
- ###############
- # Dataset processing
- ###############
- def timestampToSeconds(timestamp_begin, timestamp):
- b = datetime.fromtimestamp(timestamp_begin)
- e = datetime.fromtimestamp(timestamp)
- return (e-b).total_seconds()
- def generatePlots(dataset, data_type):
- plot_rows = 5
- plot_columns = 5
- reduce_rate = 1
- for r in np.arange(plot_rows):
- for c in np.arange(plot_columns):
- temp_dataset = dataset.iloc[::reduce_rate, :]
- freq = dataset['timestamp'].mean()/len(temp_dataset.index)
- print('generating '+data_type+' plot from data with sampling frequency s='+str(freq)+'...')
- temp_dataset = processDataset(temp_dataset)
- axes[r, c].set_title(data_type + ' at sampling frequency '+str(round(freq, 2))+' (s)')
- sns.scatterplot(ax=axes[r, c], data=temp_dataset, x='timestamp', y=data_type, hue='type', s=10)
- # visualize stack flushes
- resets = temp_dataset[temp_dataset['reset'] == 'True']
- for key, row in resets.iterrows():
- plt.axvline(x=row['timestamp'], color='k', alpha=1, linewidth=0.3)
- reduce_rate += 1
- def processDataset(dataset):
- dht22_temp_stack = []
- dht22_temp_stack_errors = 0
- dht22_hum_stack = []
- dht22_hum_stack_errors = 0
- dataset.loc[:, 'type'] = ''
- dataset.loc[:, 'reset'] = ''
- for key, row in dataset.iterrows():
- temperature = row['temperature']
- humidity = row['humidity']
- temp_data = processSensorValue(dht22_temp_stack,
- dht22_temp_stack_errors,
- temperature,
- 'temperature')
- dht22_temp_stack = temp_data[0]
- dht22_temp_stack_errors = temp_data[1]
- temperature_outlier = temp_data[2]
- hum_data = processSensorValue(dht22_hum_stack,
- dht22_hum_stack_errors,
- humidity,
- 'humidity')
- dht22_hum_stack = hum_data[0]
- dht22_hum_stack_errors = hum_data[1]
- humidity_outlier = hum_data[2]
- dataset.at[key, 'temperature_outlier'] = temperature_outlier
- dataset.at[key, 'humidity_outlier'] = humidity_outlier
- # record outlier detection source
- if(temperature_outlier and humidity_outlier):
- dataset.at[key, 'type'] = 'both outlier'
- elif(temperature_outlier):
- dataset.at[key, 'type'] = 'temperature outlier'
- elif(humidity_outlier):
- dataset.at[key, 'type'] = 'humidity outlier'
- else:
- dataset.at[key, 'type'] = 'accurate'
- # record reset pivots
- if(dht22_temp_stack_errors >= 3):
- dataset.at[key, 'reset'] = 'True'
- if(dht22_hum_stack_errors >= 3):
- dataset.at[key, 'reset'] = 'True'
- return dataset
- dataset_dir = 'datasets/'
- plots_dir = 'plots/'
- filename = '2021-01-30T20-08-36Z_recording'
- dataset = pd.read_csv(dataset_dir+filename+'.csv')
- dataset['timestamp'] = np.vectorize(timestampToSeconds)(dataset['timestamp'][0], dataset['timestamp'])
- print('formatted timestamps into seconds...')
- fig, axes = plt.subplots(5, 5, figsize=(50, 25))
- generatePlots(dataset, 'temperature')
- plt.savefig(plots_dir+filename+'_temperature.png')
- plt.clf()
- fig, axes = plt.subplots(5, 5, sharex=True, figsize=(50, 25))
- generatePlots(dataset, 'humidity')
- plt.savefig(plots_dir+filename+'_humidity.png')
|