dht22mqtt_visualize.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. from datetime import datetime
  2. import statistics
  3. import numpy as np
  4. import pandas as pd
  5. import matplotlib.pyplot as plt
  6. import seaborn as sns
  7. import warnings
  8. warnings.simplefilter("ignore")
  9. ###############
  10. # Filtering & Sampling Params
  11. ###############
  12. dht22_temp_stack = []
  13. dht22_temp_stack_errors = 0
  14. dht22_hum_stack = []
  15. dht22_hum_stack_errors = 0
  16. dht22_stack_size = 10
  17. dht22_std_deviation = 3
  18. dht22_error_count_stack_flush = 3
  19. dht22mqtt_temp_unit = 'C'
  20. ###############
  21. # Polling & Processing functions
  22. ###############
  23. def getTemperatureJitter(temperature):
  24. return getTemperature(temperature-0.3), getTemperature(temperature+0.3)
  25. def getTemperature(temperature):
  26. if(dht22mqtt_temp_unit == 'F'):
  27. temperature = temperature * (9 / 5) + 32
  28. return temperature
  29. def getHumidity(humidity):
  30. return humidity
  31. def processSensorValue(stack, error, value, value_type):
  32. # flush stack on accumulation of errors
  33. if(error >= dht22_error_count_stack_flush):
  34. stack = []
  35. error = 0
  36. # init stack
  37. if(len(stack) <= dht22_error_count_stack_flush):
  38. if(value not in stack):
  39. stack.append(value)
  40. # use jitter for bootstrap temperature stack
  41. if(value_type == 'temperature'):
  42. low, high = getTemperatureJitter(value)
  43. stack.append(low)
  44. stack.append(high)
  45. return stack, error, None
  46. # get statistics
  47. std = statistics.pstdev(stack)
  48. mean = statistics.mean(stack)
  49. # compute if outlier or not
  50. if(mean-std*dht22_std_deviation < value < mean+std*dht22_std_deviation):
  51. outlier = False
  52. if(value not in stack):
  53. stack.append(value)
  54. error = 0
  55. else:
  56. outlier = True
  57. error += 1
  58. # remove oldest element from stack
  59. if(len(stack) > 10):
  60. stack.pop(0)
  61. return stack, error, outlier
  62. ###############
  63. # Dataset processing
  64. ###############
  65. def timestampToSeconds(timestamp_begin, timestamp):
  66. b = datetime.fromtimestamp(timestamp_begin)
  67. e = datetime.fromtimestamp(timestamp)
  68. return (e-b).total_seconds()
  69. def generatePlots(dataset, data_type):
  70. plot_rows = 5
  71. plot_columns = 5
  72. reduce_rate = 1
  73. for r in np.arange(plot_rows):
  74. for c in np.arange(plot_columns):
  75. temp_dataset = dataset.iloc[::reduce_rate, :]
  76. freq = dataset['timestamp'].mean()/len(temp_dataset.index)
  77. print('generating '+data_type+' plot from data with sampling frequency s='+str(freq)+'...')
  78. temp_dataset = processDataset(temp_dataset)
  79. axes[r, c].set_title(data_type + ' at sampling frequency '+str(round(freq, 2))+' (s)')
  80. sns.scatterplot(ax=axes[r, c], data=temp_dataset, x='timestamp', y=data_type, hue='type', s=10)
  81. # visualize stack flushes
  82. resets = temp_dataset[temp_dataset['reset'] == 'True']
  83. for key, row in resets.iterrows():
  84. plt.axvline(x=row['timestamp'], color='k', alpha=1, linewidth=0.3)
  85. reduce_rate += 1
  86. def processDataset(dataset):
  87. dht22_temp_stack = []
  88. dht22_temp_stack_errors = 0
  89. dht22_hum_stack = []
  90. dht22_hum_stack_errors = 0
  91. dataset.loc[:, 'type'] = ''
  92. dataset.loc[:, 'reset'] = ''
  93. for key, row in dataset.iterrows():
  94. temperature = row['temperature']
  95. humidity = row['humidity']
  96. temp_data = processSensorValue(dht22_temp_stack,
  97. dht22_temp_stack_errors,
  98. temperature,
  99. 'temperature')
  100. dht22_temp_stack = temp_data[0]
  101. dht22_temp_stack_errors = temp_data[1]
  102. temperature_outlier = temp_data[2]
  103. hum_data = processSensorValue(dht22_hum_stack,
  104. dht22_hum_stack_errors,
  105. humidity,
  106. 'humidity')
  107. dht22_hum_stack = hum_data[0]
  108. dht22_hum_stack_errors = hum_data[1]
  109. humidity_outlier = hum_data[2]
  110. dataset.at[key, 'temperature_outlier'] = temperature_outlier
  111. dataset.at[key, 'humidity_outlier'] = humidity_outlier
  112. # record outlier detection source
  113. if(temperature_outlier and humidity_outlier):
  114. dataset.at[key, 'type'] = 'both outlier'
  115. elif(temperature_outlier):
  116. dataset.at[key, 'type'] = 'temperature outlier'
  117. elif(humidity_outlier):
  118. dataset.at[key, 'type'] = 'humidity outlier'
  119. else:
  120. dataset.at[key, 'type'] = 'accurate'
  121. # record reset pivots
  122. if(dht22_temp_stack_errors >= 3):
  123. dataset.at[key, 'reset'] = 'True'
  124. if(dht22_hum_stack_errors >= 3):
  125. dataset.at[key, 'reset'] = 'True'
  126. return dataset
  127. dataset_dir = 'datasets/'
  128. plots_dir = 'plots/'
  129. filename = '2021-01-30T20-08-36Z_recording'
  130. dataset = pd.read_csv(dataset_dir+filename+'.csv')
  131. dataset['timestamp'] = np.vectorize(timestampToSeconds)(dataset['timestamp'][0], dataset['timestamp'])
  132. print('formatted timestamps into seconds...')
  133. fig, axes = plt.subplots(5, 5, figsize=(50, 25))
  134. generatePlots(dataset, 'temperature')
  135. plt.savefig(plots_dir+filename+'_temperature.png')
  136. plt.clf()
  137. fig, axes = plt.subplots(5, 5, sharex=True, figsize=(50, 25))
  138. generatePlots(dataset, 'humidity')
  139. plt.savefig(plots_dir+filename+'_humidity.png')