Ilwis-Objects  1.0
GIS and Remote Sensing framework for data access and processing
 All Classes Functions Enumerations Pages
containerstatistics.h
1 #ifndef ContainerStatistics_H
2 #define ContainerStatistics_H
3 
4 #include <vector>
5 #include <algorithm>
6 #include <iostream>
7 #include <boost/accumulators/accumulators.hpp>
8 #include <boost/accumulators/statistics/stats.hpp>
9 #include <boost/accumulators/statistics/mean.hpp>
10 #include <boost/accumulators/statistics/median.hpp>
11 #include <boost/accumulators/statistics/max.hpp>
12 #include <boost/accumulators/statistics/min.hpp>
13 #include <boost/accumulators/statistics/count.hpp>
14 #include <boost/accumulators/statistics/sum.hpp>
15 
16 using namespace boost::accumulators;
17 
18 namespace Ilwis{
19 
20 template<typename DataType> class KERNELSHARED_EXPORT ContainerStatistics
21 {
22 public:
23  typedef accumulator_set<DataType, stats<tag::mean,tag::min, tag::max, tag::count>> Basic;
24  typedef accumulator_set<DataType, stats<tag::median>> Median;
25 
26  struct HistogramBin{
27  HistogramBin(DataType limit=0, quint64 count=0) : _limit(limit), _count(count) {}
28 
29  DataType _limit;
30  quint64 _count;
31  };
32 
33  enum PropertySets{pBASIC=0, pMIN=1, pMAX=2, pDISTANCE=4, pDELTA=8,pNETTOCOUNT=16, pCOUNT=32, pSUM=64,
34  pMEAN=128, pMEDIAN=256, pPREDOMINANT=512, pSTDEV=1024, pHISTOGRAM=2048, pLAST=4096, pALL=4294967296};
35 
37  _sigDigits = shUNDEF;
38  _markers.resize(index(pLAST));
39  DataType undefined = undef<DataType>();
40  std::fill(_markers.begin(), _markers.end(), undefined);
41  }
42 
43  double operator[](PropertySets method) const{
44  if ( method == 0)
45  return rUNDEF;
46 
47  quint32 ind = index(method);
48  if ( ind < _markers.size()) {
49  if ( hasType(method, pMEAN | pMIN | pMAX | pMEDIAN))
50  return (DataType)_markers[ind];
51 
52  return _markers[ind] ;
53  }
54  return rUNDEF;
55  }
56 
57  std::vector<HistogramBin> histogram() {
58  return _bins;
59  }
60 
61 
62  double prop(PropertySets method) const{
63  return operator[](method);
64  }
65 
66  quint16 significantDigits() const {
67  return _sigDigits;
68  }
69 
70 
71  void findSignificantDigits(double distance) {
72  if ( distance == 0)
73  _sigDigits = 0;
74  else{
75  double d = prop(pMAX) - prop(pMIN);
76  int lenBase = log10(std::abs(d)) / 2;
77  QString num = QString::number(distance,'g',10);
78  quint32 len = num.size();
79  for(int i=num.size() - 2; i >=0; ++i){
80  QChar n = num[i];
81  if ( n == '.')
82  break;
83  if ( n != '0'){
84  len = i;
85  break;
86  }
87  }
88  _sigDigits = -log10(distance) + std::max( 3 - lenBase,1);
89  _sigDigits = std::min(len, _sigDigits);
90  }
91  }
92  void binCount(quint32 value) {
93  _binCount = value;
94  }
95 
96 
97 
98  template<typename IterType> bool calculate(const IterType& begin, const IterType& end, PropertySets mode=pBASIC){
99  Basic basicMarkers;
100  Median median;
101  quint64 count = 0;
102  DataType undefined = undef<DataType>();
103  double sigDigits = 0;
104  double rest = 0;
105  std::for_each(begin, end, [&] (const DataType& sample){
106  count++;
107  if ( sample != undefined) {
108  rest = fabs(sample - (qint64)sample);
109  sigDigits = std::max(sigDigits, rest - sigDigits);
110  basicMarkers(sample);
111  if ( hasType(mode, pMEDIAN)) {
112  median(sample);
113  }
114  }
115 
116  });
117  bool isUndefined = boost::accumulators::count(basicMarkers) == 0;
118  std::fill(_markers.begin(), _markers.end(), rUNDEF);
119  if( !isUndefined) {
120  _markers[index(pMIN)] = boost::accumulators::min(basicMarkers);
121  _markers[index(pMAX)] = boost::accumulators::max(basicMarkers);
122  _markers[index(pDISTANCE)] = std::abs(prop(pMAX) - prop(pMIN));
123  _markers[index(pDELTA)] = prop(pMAX) - prop(pMIN);
124  _markers[index(pNETTOCOUNT)] = boost::accumulators::count(basicMarkers);
125  _markers[index(pCOUNT)] = count;
126  _markers[index(pSUM)] = boost::accumulators::sum(basicMarkers);
127  _markers[index(pMEAN)] = boost::accumulators::mean(basicMarkers);
128  _markers[index(pMEDIAN)] = boost::accumulators::median(median);
129  findSignificantDigits(sigDigits);
130 
131  if ( mode & pSTDEV) {
132  _markers[index(pSTDEV)] = calcStdDev(begin, end, undefined);
133  }
134  if ( mode & pHISTOGRAM) {
135 
136  double ncount = prop(pNETTOCOUNT);
137  if ( ncount > 1) {
138  if (_binCount == iUNDEF ){
139  if ( prop(pSTDEV) == rUNDEF) {
140  _markers[index(pSTDEV)] = calcStdDev(begin, end, undefined);
141  }
142  if ( _markers[index(pSTDEV)] != rUNDEF ) {
143  double h = 3.5 * _markers[index(pSTDEV)] / pow(ncount, 0.3333);
144  _binCount = prop(pDISTANCE) / h;
145  }
146  }
147  }
148 
149  _bins.resize(_binCount);
150  double delta = prop(pDELTA);
151  for(int i=0; i < _binCount; ++i ) {
152  _bins[i] = HistogramBin(i * ( delta / _binCount));
153  }
154  std::for_each(begin, end, [&] (const DataType& sample){
155  quint16 index = getOffsetFactorFor(sample);
156  _bins[index]._count++;
157  });
158  }
159  }
160 
161  return true;
162  }
163 
164  bool isValid() const {
165  return prop(pMAX) != rUNDEF;
166  }
167 
168  double stretchLinear(double input, int stretchRange) const {
169  if ( input == rUNDEF)
170  return rUNDEF;
171 
172  double stretchFactor = stretchRange / prop(pDELTA);
173  quint16 index = getOffsetFactorFor(input);
174  return _bins[index]._limit * stretchFactor;
175  }
176 
185  std::pair<double,double> stretchLimits(double percent) const {
186  if (percent == rUNDEF) {
187  return std::pair<double,double>(prop(pMIN),prop(pMAX));
188  }
189  double delta = prop(pDELTA);
190  double downsizeBy = percent * delta / 100;
191  double newLower = prop(pMIN) + downsizeBy;
192  double newUpper = prop(pMAX) - downsizeBy;
193  return std::pair<double,double>(newLower,newUpper);
194  }
195 
196 private:
197  std::vector<double> _markers;
198 
199  quint32 _sigDigits;
200  std::vector<HistogramBin> _bins;
201  quint32 _binCount=iUNDEF;
202 
203  quint32 index(PropertySets method) const {
204  if ( method == 0)
205  return 0;
206  return (quint32)(log(method) / log(2) + 0.2);
207  }
208 
209  template<typename IterType> double calcStdDev(const IterType& begin, const IterType& end, DataType undefined) {
210  double ncount = prop(pNETTOCOUNT);
211  if ( ncount < 2)
212  return rUNDEF;
213  double acc = 0;
214  double mean = prop(pMEAN);
215  std::for_each(begin, end, [&] (const DataType& sample){
216  if ( sample != undefined)
217  acc += (sample - mean) * (sample - mean);
218  });
219  return sqrt(acc / (ncount-1));
220 
221  }
222 
223 
224  quint16 getOffsetFactorFor(const DataType& sample) const {
225  //double rmax = prop(pMAX);
226  //return _bins.size() * (double)(rmax - sample) / prop(pDELTA);
227  double rmin = prop(pMIN);
228  return _bins.size() * (double)(sample - rmin) / prop(pDELTA);
229  }
230 
231  double getBinWidth() const {
232  if (_bins.size() > 1) {
233  return _bins[1]._limit;
234  } else {
235  return _bins[0]._limit;
236  }
237  }
238 
239 
240 };
241 typedef ContainerStatistics<double> NumericStatistics;
242 }
243 
244 #endif // ContainerStatistics_H