version  0.0.1
Defines the C++ API for MsPASS
VectorStatistics.h
1 #ifndef _VECTORSTATISTICS_H_
2 #define _VECTORSTATISTICS_H_
3 #include <vector>
4 #include <algorithm>
5 #include "mspass/utility/MsPASSError.h"
6 namespace mspass
7 {
8 namespace utility{
9 
15 template <class T> class VectorStatistics
16 {
17 public:
23  VectorStatistics(std::vector<T> din);
25  VectorStatistics(T *din,int n);
27  T median();
29  T mean();
31  T q1_4();
33  T q3_4();
35  T interquartile();
37  T mad(T center);
39  T ssq();
41  T upper_bound();
43  T lower_bound();
45  T range();
48  T quantile(size_t n);
49 private:
50  std::vector<T> d;
51 };
52 
53 template <class T> VectorStatistics<T>::VectorStatistics(std::vector<T> din)
54 {
55  if(din.size()<=1) throw mspass::utility::MsPASSError(std::string("VectorStatistics constructor: ")
56  + "input vector has insufficient data to compute statistics",
57  mspass::utility::ErrorSeverity::Invalid);
58  d=din;
59  std::sort(d.begin(),d.end());
60 }
61 template <class T> VectorStatistics<T>::VectorStatistics(T *din,int n)
62 {
63  if(n<=1) throw mspass::utility::MsPASSError(std::string("VectorStatistics constructor: ")
64  + "input vector has insufficient data to compute statistics",
65  mspass::utility::ErrorSeverity::Invalid);
66  d.reserve(n);
67  for(int i=0;i<n;++i) d.push_back(din[i]);
68  std::sort(d.begin(),d.end());
69 }
70 template <class T> T VectorStatistics<T>::median()
71 {
72  int count=d.size();
73  int medposition=count/2;
74  if(count%2)
75  return(d[medposition]);
76  else
77  return( (d[medposition]+d[medposition-1])/2 );
78 }
79 template <class T> T VectorStatistics<T>::mean()
80 {
81  T result;
82  result=0;
83  for(int i=0;i<d.size();++i)
84  {
85  result += d[i];
86  }
87  return (result/d.size());
88 }
89 template <class T> T VectorStatistics<T>::q1_4()
90 {
91  int n=d.size();
92  double result;
93  if(n<4)
94  return(d[0]);
95  else
96  {
97  int nover4=(n-1)/4;
98  switch(n%4)
99  {
100  case(0):
101  result=static_cast<double>(d[nover4]);
102  break;
103  case(1):
104  result=0.75*static_cast<double>(d[nover4]) + 0.25*static_cast<double>(d[nover4+1]);
105  break;
106  case(2):
107  result=static_cast<double>(d[nover4]) + static_cast<double>(d[nover4+1]);
108  result /= 2.0;
109  break;
110  case(3):
111  result=0.25*static_cast<double>(d[nover4]) + 0.75*static_cast<double>(d[nover4+1]);
112  }
113  }
114  return(static_cast<T>(result));
115 }
116 template <class T> T VectorStatistics<T>::q3_4()
117 {
118  int n=d.size();
119  double result;
120  if(n<4)
121  return(d[n-1]);
122  else
123  {
124  int n3_4=3*(n-1)/4;
125  switch(n%4)
126  {
127  case(0):
128  result=static_cast<double>(d[n3_4]);
129  break;
130  case(1):
131  result=0.75*static_cast<double>(d[n3_4]) + 0.25*static_cast<double>(d[n3_4+1]);
132  break;
133  case(2):
134  result=static_cast<double>(d[n3_4]) + static_cast<double>(d[n3_4+1]);
135  result /= 2.0;
136  break;
137  case(3):
138  result=0.25*static_cast<double>(d[n3_4]) + 0.75*static_cast<double>(d[n3_4+1]);
139  }
140  }
141  return(static_cast<T>(result));
142 }
144 {
145  T result;
146  T d1_4,d3_4;
147  d1_4=this->q1_4();
148  d3_4=this->q3_4();
149  return(d3_4 - d1_4);
150 }
151 template <class T> T VectorStatistics<T>::mad(T center)
152 {
153  std::vector<T> absdiff;
154  int n=d.size();
155  int i;
156  for(i=0;i<n;++i)
157  {
158  T diff;
159  diff=d[i]-center;
160  if(diff<0) diff=-diff;
161  absdiff.push_back(diff);
162  }
163  VectorStatistics<T> result(absdiff);
164  return(result.median());
165 }
166 template <class T> T VectorStatistics<T>::ssq()
167 {
168  T result;
169  int i;
170  for(i=0;i<d.size();++i) result=d[i]*d[i];
171  return(result);
172 }
173 template <class T> T VectorStatistics<T>::upper_bound()
174 {
175  return(d[d.size()-1]);
176 }
177 template <class T> T VectorStatistics<T>::lower_bound()
178 {
179  return(d[0]);
180 }
181 template <class T> T VectorStatistics<T>::range()
182 {
183  T result;
184  result=this->upper_bound();
185  result-=this->lower_bound();
186  return(result);
187 }
188 template <class T> T VectorStatistics<T>::quantile(size_t n)
189 {
190  if(n>=d.size())
191  {
192  std::stringstream ss;
193  ss << "VectorStatistics::quantile method: asked for 1-quantile number "
194  << n<<" but data vecotor length is only "<<d.size()<<std::endl;
195  throw mspass::utility::MsPASSError(ss.str(),mspass::utility::ErrorSeverity::Invalid);
196  }
197 }
198 } // end utility namespace
199 } /* End mspass namespace encapsulation */
200 #endif
Base class for error object thrown by MsPASS library routines.
Definition: MsPASSError.h:40
Generic object to compute common robust statistics from a vector container of data.
Definition: VectorStatistics.h:16
T median()
Definition: VectorStatistics.h:70
T q3_4()
Definition: VectorStatistics.h:116
T ssq()
Definition: VectorStatistics.h:166
T q1_4()
Definition: VectorStatistics.h:89
T quantile(size_t n)
Definition: VectorStatistics.h:188
T mad(T center)
Definition: VectorStatistics.h:151
T interquartile()
Definition: VectorStatistics.h:143
T range()
Definition: VectorStatistics.h:181
T upper_bound()
Definition: VectorStatistics.h:173
T lower_bound()
Definition: VectorStatistics.h:177
VectorStatistics(std::vector< T > din)
Definition: VectorStatistics.h:53
T mean()
Definition: VectorStatistics.h:79