dht22mqtt_visualize.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. from datetime import datetime
  2. import statistics
  3. import numpy as np
  4. import pandas as pd
  5. import matplotlib.pyplot as plt
  6. import seaborn as sns
  7. ###############
  8. # Filtering & Sampling Params
  9. ###############
  10. dht22_temp_stack = []
  11. dht22_temp_stack_errors = 0
  12. dht22_hum_stack = []
  13. dht22_hum_stack_errors = 0
  14. dht22_stack_size = 10
  15. dht22_std_deviation = 3
  16. dht22_error_count_stack_flush = 3
  17. dht22mqtt_temp_unit = 'C'
  18. ###############
  19. # Polling & Processing functions
  20. ###############
  21. def getTemperatureJitter(temperature):
  22. return getTemperature(temperature-0.3), getTemperature(temperature+0.3)
  23. def getTemperature(temperature):
  24. if(dht22mqtt_temp_unit == 'F'):
  25. temperature = temperature * (9 / 5) + 32
  26. return temperature
  27. def getHumidity(humidity):
  28. return humidity
  29. def processSensorValue(stack, error, value, value_type):
  30. # flush stack on accumulation of errors
  31. if(error >= dht22_error_count_stack_flush):
  32. stack = []
  33. error = 0
  34. # init stack
  35. if(len(stack) <= dht22_error_count_stack_flush):
  36. if(value not in stack):
  37. stack.append(value)
  38. # use jitter for bootstrap temperature stack
  39. if(value_type == 'temperature'):
  40. low, high = getTemperatureJitter(value)
  41. stack.append(low)
  42. stack.append(high)
  43. return stack, error, None
  44. # get statistics
  45. std = statistics.pstdev(stack)
  46. mean = statistics.mean(stack)
  47. # compute if outlier or not
  48. if(mean-std*dht22_std_deviation < value < mean+std*dht22_std_deviation):
  49. outlier = False
  50. if(value not in stack):
  51. stack.append(value)
  52. error = 0
  53. else:
  54. outlier = True
  55. error += 1
  56. # remove oldest element from stack
  57. if(len(stack) > 10):
  58. stack.pop(0)
  59. return stack, error, outlier
  60. ###############
  61. # Dataset processing
  62. ###############
  63. def timestampToSeconds(timestamp_begin, timestamp):
  64. b = datetime.fromtimestamp(timestamp_begin/1000)
  65. e = datetime.fromtimestamp(timestamp/1000)
  66. return (e-b).total_seconds()
  67. def generatePlots(dataset, data_type):
  68. plot_rows = 3
  69. plot_columns = 4
  70. reduce_rate = 1
  71. for r in np.arange(plot_rows):
  72. for c in np.arange(plot_columns):
  73. temp_dataset = dataset.iloc[::reduce_rate, :]
  74. freq = dataset['timestamp'].mean()/len(temp_dataset.index)
  75. print('generating plot at frequency s='+str(freq)+'...')
  76. temp_dataset = processDataset(temp_dataset)
  77. axes[r, c].set_title(data_type + ' at sampling frequency '+str(round(freq, 2))+' (s)')
  78. sns.scatterplot(ax=axes[r, c], data=temp_dataset, x='timestamp', y=data_type, hue='type', s=10)
  79. # visualize stack flushes
  80. resets = temp_dataset[temp_dataset['reset'] == 'True']
  81. for key, row in resets.iterrows():
  82. plt.axvline(x=row['timestamp'], color='k', alpha=1, linewidth=0.3)
  83. reduce_rate += 1
  84. def processDataset(dataset):
  85. dht22_temp_stack = []
  86. dht22_temp_stack_errors = 0
  87. dht22_hum_stack = []
  88. dht22_hum_stack_errors = 0
  89. dataset.loc[:, 'type'] = ''
  90. dataset.loc[:, 'reset'] = ''
  91. for key, row in dataset.iterrows():
  92. temperature = row['temperature']
  93. humidity = row['humidity']
  94. temp_data = processSensorValue(dht22_temp_stack,
  95. dht22_temp_stack_errors,
  96. temperature,
  97. 'temperature')
  98. dht22_temp_stack = temp_data[0]
  99. dht22_temp_stack_errors = temp_data[1]
  100. temperature_outlier = temp_data[2]
  101. hum_data = processSensorValue(dht22_hum_stack,
  102. dht22_hum_stack_errors,
  103. humidity,
  104. 'humidity')
  105. dht22_hum_stack = hum_data[0]
  106. dht22_hum_stack_errors = hum_data[1]
  107. humidity_outlier = hum_data[2]
  108. dataset.at[key, 'temperature_outlier'] = temperature_outlier
  109. dataset.at[key, 'humidity_outlier'] = humidity_outlier
  110. # record outlier detection source
  111. if(temperature_outlier and humidity_outlier):
  112. dataset.at[key, 'type'] = 'both outlier'
  113. elif(temperature_outlier):
  114. dataset.at[key, 'type'] = 'temperature outlier'
  115. elif(humidity_outlier):
  116. dataset.at[key, 'type'] = 'humidity outlier'
  117. else:
  118. dataset.at[key, 'type'] = 'accurate'
  119. # record reset pivots
  120. if(dht22_temp_stack_errors >= 3):
  121. dataset.at[key, 'reset'] = 'True'
  122. if(dht22_hum_stack_errors >= 3):
  123. dataset.at[key, 'reset'] = 'True'
  124. return dataset
  125. dataset_dir = 'datasets/'
  126. plots_dir = 'plots/'
  127. dataset = pd.read_csv(dataset_dir+'dataset.csv')
  128. dataset['timestamp'] = np.vectorize(timestampToSeconds)(dataset['timestamp'][0], dataset['timestamp'])
  129. print('formatted timestamps into seconds...')
  130. fig, axes = plt.subplots(3, 4, figsize=(50, 25))
  131. generatePlots(dataset, 'temperature')
  132. plt.savefig(plots_dir+'temperature.png')
  133. plt.clf()
  134. fig, axes = plt.subplots(3, 4, sharex=True, figsize=(50, 25))
  135. generatePlots(dataset, 'humidity')
  136. plt.savefig(plots_dir+'humidity.png')