Engauge Digitizer  2
GridClassifier.cpp
1 /******************************************************************************************************
2  * (C) 2014 markummitchell@github.com. This file is part of Engauge Digitizer, which is released *
3  * under GNU General Public License version 2 (GPLv2) or (at your option) any later version. See file *
4  * LICENSE or go to gnu.org/licenses for details. Distribution requires prior written permission. *
5  ******************************************************************************************************/
6 
7 #include "ColorFilter.h"
8 #include "Correlation.h"
9 #include "DocumentModelCoords.h"
10 #include "EngaugeAssert.h"
11 #include "gnuplot.h"
12 #include "GridClassifier.h"
13 #include <iostream>
14 #include "Logger.h"
15 #include <QDebug>
16 #include <QFile>
17 #include <QImage>
18 #include <qmath.h>
19 #include "QtToString.h"
20 #include "Transformation.h"
21 
22 int GridClassifier::NUM_PIXELS_PER_HISTOGRAM_BINS = 1;
23 double GridClassifier::PEAK_HALF_WIDTH = 4;
24 int GridClassifier::MIN_STEP_PIXELS = qFloor (4 * GridClassifier::PEAK_HALF_WIDTH); // Step includes down ramp, flat part, up ramp
25 const QString GNUPLOT_DELIMITER ("\t");
26 
27 // We set up the picket fence with binStart arbitrarily set close to zero. Peak is
28 // not exactly at zero since we want to include the left side of the first peak.
29 int GridClassifier::BIN_START_UNSHIFTED = qFloor (GridClassifier::PEAK_HALF_WIDTH);
30 
31 using namespace std;
32 
34 {
35 }
36 
37 int GridClassifier::binFromCoordinate (double coord,
38  double coordMin,
39  double coordMax) const
40 {
41  ENGAUGE_ASSERT (coordMin < coordMax);
42  ENGAUGE_ASSERT (coordMin <= coord);
43  ENGAUGE_ASSERT (coord <= coordMax);
44 
45  int bin = qFloor (0.5 + (m_numHistogramBins - 1.0) * (coord - coordMin) / (coordMax - coordMin));
46 
47  return bin;
48 }
49 
50 void GridClassifier::classify (bool isGnuplot,
51  const QPixmap &originalPixmap,
52  const Transformation &transformation,
53  int &countX,
54  double &startX,
55  double &stepX,
56  int &countY,
57  double &startY,
58  double &stepY)
59 {
60  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::classify";
61 
62  QImage image = originalPixmap.toImage ();
63 
64  m_numHistogramBins = image.width() / NUM_PIXELS_PER_HISTOGRAM_BINS;
65  ENGAUGE_ASSERT (m_numHistogramBins > 1);
66 
67  double xMin, xMax, yMin, yMax;
68  double binStartX, binStepX, binStartY, binStepY;
69 
70  m_binsX = new double [unsigned (m_numHistogramBins)];
71  m_binsY = new double [unsigned (m_numHistogramBins)];
72 
73  computeGraphCoordinateLimits (image,
74  transformation,
75  xMin,
76  xMax,
77  yMin,
78  yMax);
79  initializeHistogramBins ();
80  populateHistogramBins (image,
81  transformation,
82  xMin,
83  xMax,
84  yMin,
85  yMax);
86  searchStartStepSpace (isGnuplot,
87  m_binsX,
88  "x",
89  xMin,
90  xMax,
91  startX,
92  stepX,
93  binStartX,
94  binStepX);
95  searchStartStepSpace (isGnuplot,
96  m_binsY,
97  "y",
98  yMin,
99  yMax,
100  startY,
101  stepY,
102  binStartY,
103  binStepY);
104  searchCountSpace (m_binsX,
105  binStartX,
106  binStepX,
107  countX);
108  searchCountSpace (m_binsY,
109  binStartY,
110  binStepY,
111  countY);
112 
113  delete [] m_binsX;
114  delete [] m_binsY;
115 }
116 
117 void GridClassifier::computeGraphCoordinateLimits (const QImage &image,
118  const Transformation &transformation,
119  double &xMin,
120  double &xMax,
121  double &yMin,
122  double &yMax)
123 {
124  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::computeGraphCoordinateLimits";
125 
126  // Project every pixel onto the x axis, and onto the y axis. One set of histogram bins will be
127  // set up along each of the axes. The range of bins will encompass every pixel in the image, and no more.
128 
129  QPointF posGraphTL, posGraphTR, posGraphBL, posGraphBR;
130  transformation.transformScreenToRawGraph (QPointF (0, 0) , posGraphTL);
131  transformation.transformScreenToRawGraph (QPointF (image.width(), 0) , posGraphTR);
132  transformation.transformScreenToRawGraph (QPointF (0, image.height()) , posGraphBL);
133  transformation.transformScreenToRawGraph (QPointF (image.width(), image.height()), posGraphBR);
134 
135  // Compute x and y ranges for setting up the histogram bins
136  if (transformation.modelCoords().coordsType() == COORDS_TYPE_CARTESIAN) {
137 
138  // For affine cartesian coordinates, we only need to look at the screen corners
139  xMin = qMin (qMin (qMin (posGraphTL.x(), posGraphTR.x()), posGraphBL.x()), posGraphBR.x());
140  xMax = qMax (qMax (qMax (posGraphTL.x(), posGraphTR.x()), posGraphBL.x()), posGraphBR.x());
141  yMin = qMin (qMin (qMin (posGraphTL.y(), posGraphTR.y()), posGraphBL.y()), posGraphBR.y());
142  yMax = qMax (qMax (qMax (posGraphTL.y(), posGraphTR.y()), posGraphBL.y()), posGraphBR.y());
143 
144  } else {
145 
146  // For affine polar coordinates, easiest approach is to assume the full circle
147  xMin = 0.0;
148  xMax = transformation.modelCoords().thetaPeriod();
149  yMin = transformation.modelCoords().originRadius();
150  yMax = qMax (qMax (qMax (posGraphTL.y(), posGraphTR.y()), posGraphBL.y()), posGraphBR.y());
151 
152  }
153 
154  ENGAUGE_ASSERT (xMin < xMax);
155  ENGAUGE_ASSERT (yMin < yMax);
156 }
157 
158 double GridClassifier::coordinateFromBin (int bin,
159  double coordMin,
160  double coordMax) const
161 {
162  ENGAUGE_ASSERT (1 < m_numHistogramBins);
163  ENGAUGE_ASSERT (coordMin < coordMax);
164 
165  return coordMin + (coordMax - coordMin) * double (bin) / (double (m_numHistogramBins) - 1.0);
166 }
167 
168 void GridClassifier::copyVectorToVector (const double from [],
169  double to []) const
170 {
171  for (int bin = 0; bin < m_numHistogramBins; bin++) {
172  to [bin] = from [bin];
173  }
174 }
175 
176 void GridClassifier::dumpGnuplotCoordinate (const QString &coordinateLabel,
177  double corr,
178  const double *bins,
179  double coordinateMin,
180  double coordinateMax,
181  int binStart,
182  int binStep) const
183 {
184  QString filename = QString ("gridclassifier_%1_corr%2_startMax%3_stepMax%4.gnuplot")
185  .arg (coordinateLabel)
186  .arg (corr, 8, 'f', 3, '0')
187  .arg (binStart)
188  .arg (binStep);
189 
190  cout << GNUPLOT_FILE_MESSAGE.toLatin1().data() << filename.toLatin1().data() << "\n";
191 
192  QFile fileDump (filename);
193  fileDump.open (QIODevice::WriteOnly | QIODevice::Text);
194  QTextStream strDump (&fileDump);
195 
196  int bin;
197 
198  // For consistent scaling, get the max bin count
199  int binCountMax = 0;
200  for (bin = 0; bin < m_numHistogramBins; bin++) {
201  if (bins [bin] > binCountMax) {
202  binCountMax = qMax (signed (binCountMax),
203  signed (bins [bin]));
204  }
205  }
206 
207  // Get picket fence
208  double *picketFence = new double [unsigned (m_numHistogramBins)];
209  loadPicketFence (picketFence,
210  binStart,
211  binStep,
212  0,
213  false);
214 
215  // Header
216  strDump << "bin"
217  << GNUPLOT_DELIMITER << "coordinate"
218  << GNUPLOT_DELIMITER << "binCount"
219  << GNUPLOT_DELIMITER << "startStep"
220  << GNUPLOT_DELIMITER << "picketFence" << "\n";
221 
222  // Data, with one row per coordinate value
223  for (bin = 0; bin < m_numHistogramBins; bin++) {
224 
225  double coordinate = coordinateFromBin (bin,
226  coordinateMin,
227  coordinateMax);
228  double startStepValue (((bin - binStart) % binStep == 0) ? 1 : 0);
229  strDump << bin
230  << GNUPLOT_DELIMITER << coordinate
231  << GNUPLOT_DELIMITER << bins [bin]
232  << GNUPLOT_DELIMITER << binCountMax * startStepValue
233  << GNUPLOT_DELIMITER << binCountMax * picketFence [bin] << "\n";
234  }
235 
236  delete [] picketFence;
237 }
238 
239 void GridClassifier::dumpGnuplotCorrelations (const QString &coordinateLabel,
240  double valueMin,
241  double valueMax,
242  const double signalA [],
243  const double signalB [],
244  const double correlations [])
245 {
246  QString filename = QString ("gridclassifier_%1_correlations.gnuplot")
247  .arg (coordinateLabel);
248 
249  cout << GNUPLOT_FILE_MESSAGE.toLatin1().data() << filename.toLatin1().data() << "\n";
250 
251  QFile fileDump (filename);
252  fileDump.open (QIODevice::WriteOnly | QIODevice::Text);
253  QTextStream strDump (&fileDump);
254 
255  int bin;
256 
257  // Compute max values so curves can be normalized to the same heights
258  double signalAMax = 1, signalBMax = 1, correlationsMax = 1;
259  for (bin = 0; bin < m_numHistogramBins; bin++) {
260  if (bin == 0 || signalA [bin] > signalAMax) {
261  signalAMax = signalA [bin];
262  }
263  if (bin == 0 || signalB [bin] > signalBMax) {
264  signalBMax = signalB [bin];
265  }
266  if (bin == 0 || correlations [bin] > correlationsMax) {
267  correlationsMax = correlations [bin];
268  }
269  }
270 
271  // Prevent divide by zero error
272  if (qAbs (signalAMax) <= 0) {
273  signalAMax = 1.0;
274  }
275  if (qAbs (signalBMax) <= 0) {
276  signalBMax = 1.0;
277  }
278 
279  // Output normalized curves
280  for (int bin = 0; bin < m_numHistogramBins; bin++) {
281 
282  strDump << coordinateFromBin (bin,
283  valueMin,
284  valueMax)
285  << GNUPLOT_DELIMITER << signalA [bin] / signalAMax
286  << GNUPLOT_DELIMITER << signalB [bin] / signalBMax
287  << GNUPLOT_DELIMITER << correlations [bin] / correlationsMax << "\n";
288  }
289 }
290 
291 void GridClassifier::initializeHistogramBins ()
292 {
293  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::initializeHistogramBins";
294 
295  for (int bin = 0; bin < m_numHistogramBins; bin++) {
296  m_binsX [bin] = 0;
297  m_binsY [bin] = 0;
298  }
299 }
300 
301 void GridClassifier::loadPicketFence (double picketFence [],
302  int binStart,
303  int binStep,
304  int count,
305  bool isCount) const
306 {
307  const double PEAK_HEIGHT = 1.0; // Arbitrary height, since normalization will counteract any change to this value
308 
309  // Count argument is optional. Note that binStart already excludes PEAK_HALF_WIDTH bins at start,
310  // and we also exclude PEAK_HALF_WIDTH bins at the end
311  ENGAUGE_ASSERT (binStart >= PEAK_HALF_WIDTH);
312  ENGAUGE_ASSERT (binStep != 0);
313  if (!isCount) {
314  count = qFloor (1 + (m_numHistogramBins - binStart - PEAK_HALF_WIDTH) / binStep);
315  }
316 
317  // Bins that we need to worry about
318  int binStartMinusHalfWidth = qFloor (binStart - PEAK_HALF_WIDTH);
319  int binStopPlusHalfWidth = qFloor ((binStart + (count - 1) * binStep) + PEAK_HALF_WIDTH);
320 
321  // To normalize, we compute the area under the picket fence. Constraint is
322  // areaUnnormalized + NUM_HISTOGRAM_BINS * normalizationOffset = 0
323  double areaUnnormalized = count * PEAK_HEIGHT * PEAK_HALF_WIDTH;
324  double normalizationOffset = -1.0 * areaUnnormalized / m_numHistogramBins;
325 
326  for (int bin = 0; bin < m_numHistogramBins; bin++) {
327 
328  // Outside of the peaks, bin is small negative so correlation with unit function is zero. In other
329  // words, the function is normalized
330  picketFence [bin] = normalizationOffset;
331 
332  if ((binStartMinusHalfWidth <= bin) &&
333  (bin <= binStopPlusHalfWidth)) {
334 
335  // Closest peak
336  int ordinalClosestPeak = qFloor ((bin - binStart + binStep / 2) / binStep);
337  int binClosestPeak = binStart + ordinalClosestPeak * binStep;
338 
339  // Distance from closest peak is used to define an isosceles triangle
340  int distanceToClosestPeak = qAbs (bin - binClosestPeak);
341 
342  if (distanceToClosestPeak < PEAK_HALF_WIDTH) {
343 
344  // Map 0 to PEAK_HALF_WIDTH to 1 to 0
345  picketFence [bin] = 1.0 - double (distanceToClosestPeak) / PEAK_HALF_WIDTH + normalizationOffset;
346 
347  }
348  }
349  }
350 }
351 
352 void GridClassifier::populateHistogramBins (const QImage &image,
353  const Transformation &transformation,
354  double xMin,
355  double xMax,
356  double yMin,
357  double yMax)
358 {
359  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::populateHistogramBins";
360 
361  ColorFilter filter;
362  QRgb rgbBackground = filter.marginColor (&image);
363 
364  for (int x = 0; x < image.width(); x++) {
365  for (int y = 0; y < image.height(); y++) {
366 
367  QColor pixel = image.pixel (x, y);
368 
369  // Skip pixels with background color
370  if (!filter.colorCompare (rgbBackground,
371  pixel.rgb ())) {
372 
373  // Add this pixel to histograms
374  QPointF posGraph;
375  transformation.transformScreenToRawGraph (QPointF (x, y), posGraph);
376 
377  if (transformation.modelCoords().coordsType() == COORDS_TYPE_POLAR) {
378 
379  // If out of the 0 to period range, the theta value must shifted by the period to get into that range
380  while (posGraph.x() < xMin) {
381  posGraph.setX (posGraph.x() + transformation.modelCoords().thetaPeriod());
382  }
383  while (posGraph.x() > xMax) {
384  posGraph.setX (posGraph.x() - transformation.modelCoords().thetaPeriod());
385  }
386  }
387 
388  int binX = binFromCoordinate (posGraph.x(), xMin, xMax);
389  int binY = binFromCoordinate (posGraph.y(), yMin, yMax);
390 
391  ENGAUGE_ASSERT (0 <= binX);
392  ENGAUGE_ASSERT (0 <= binY);
393  ENGAUGE_ASSERT (binX < m_numHistogramBins);
394  ENGAUGE_ASSERT (binY < m_numHistogramBins);
395 
396  // Roundoff error in log scaling may let bin go just outside legal range
397  binX = qMin (binX, m_numHistogramBins - 1);
398  binY = qMin (binY, m_numHistogramBins - 1);
399 
400  ++m_binsX [binX];
401  ++m_binsY [binY];
402  }
403  }
404  }
405 }
406 
407 void GridClassifier::searchCountSpace (double bins [],
408  double binStart,
409  double binStep,
410  int &countMax)
411 {
412  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::searchCountSpace"
413  << " start=" << binStart
414  << " step=" << binStep;
415 
416  // Loop though the space of possible counts
417  Correlation correlation (m_numHistogramBins);
418  double *picketFence = new double [unsigned (m_numHistogramBins)];
419  double corr, corrMax = 0;
420  bool isFirst = true;
421  int countStop = qFloor (1 + (m_numHistogramBins - binStart) / binStep);
422  for (int count = 2; count <= countStop; count++) {
423 
424  loadPicketFence (picketFence,
425  qFloor (binStart),
426  qFloor (binStep),
427  count,
428  true);
429 
430  correlation.correlateWithoutShift (m_numHistogramBins,
431  bins,
432  picketFence,
433  corr);
434  if (isFirst || (corr > corrMax)) {
435  countMax = count;
436  corrMax = corr;
437  }
438 
439  isFirst = false;
440  }
441 
442  delete [] picketFence;
443 }
444 
445 void GridClassifier::searchStartStepSpace (bool isGnuplot,
446  double bins [],
447  const QString &coordinateLabel,
448  double valueMin,
449  double valueMax,
450  double &start,
451  double &step,
452  double &binStartMax,
453  double &binStepMax)
454 {
455  LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::searchStartStepSpace";
456 
457  // Correlations are tracked for logging
458  double *signalA = new double [unsigned (m_numHistogramBins)];
459  double *signalB = new double [unsigned (m_numHistogramBins)];
460  double *correlations = new double [unsigned (m_numHistogramBins)];
461  double *correlationsMax = new double [unsigned (m_numHistogramBins)];
462 
463  // Loop though the space of possible gridlines using the independent variables (start,step).
464  Correlation correlation (m_numHistogramBins);
465  double *picketFence = new double [unsigned (m_numHistogramBins)];
466  int binStart;
467  double corr = 0, corrMax = 0;
468  bool isFirst = true;
469 
470  // We do not explicitly search(=loop) through binStart here, since Correlation::correlateWithShift will take
471  // care of that for us
472 
473  // Step search starts out small, and stops at value that gives count substantially greater than 2. Freakishly small
474  // images need to have MIN_STEP_PIXELS overridden so the loop iterates at least once
475  binStartMax = BIN_START_UNSHIFTED + 1; // In case search below ever fails
476  binStepMax = qMin (MIN_STEP_PIXELS, m_numHistogramBins / 8); // In case search below ever fails
477  for (int binStep = qMin (MIN_STEP_PIXELS, m_numHistogramBins / 8); binStep < m_numHistogramBins / 4; binStep++) {
478 
479  loadPicketFence (picketFence,
480  BIN_START_UNSHIFTED,
481  binStep,
482  qFloor (PEAK_HALF_WIDTH),
483  false);
484 
485  correlation.correlateWithShift (m_numHistogramBins,
486  bins,
487  picketFence,
488  binStart,
489  corr,
490  correlations);
491  if (isFirst || (corr > corrMax)) {
492 
493  int binStartMaxNext = binStart + BIN_START_UNSHIFTED + 1; // Compensate for the shift performed inside loadPicketFence
494 
495  // Make sure binStartMax never goes out of bounds
496  if (binStartMaxNext < m_numHistogramBins) {
497 
498  binStartMax = binStartMaxNext;
499  binStepMax = binStep;
500  corrMax = corr;
501  copyVectorToVector (bins, signalA);
502  copyVectorToVector (picketFence, signalB);
503  copyVectorToVector (correlations, correlationsMax);
504 
505  // Output a gnuplot file. We should see the correlation values consistently increasing
506  if (isGnuplot) {
507 
508  dumpGnuplotCoordinate(coordinateLabel,
509  corr,
510  bins,
511  valueMin,
512  valueMax,
513  binStart,
514  binStep);
515  }
516  }
517  }
518 
519  isFirst = false;
520  }
521 
522  // Convert from bins back to graph coordinates
523  start = coordinateFromBin (qFloor (binStartMax),
524  valueMin,
525  valueMax);
526  if (binStartMax + binStepMax < m_numHistogramBins) {
527 
528  // Normal case where a reasonable step value is being calculated
529  double next = coordinateFromBin (qFloor (binStartMax + binStepMax),
530  valueMin,
531  valueMax);
532  step = next - start;
533  } else {
534 
535  // Pathological case where step jumps to outside the legal range. We bring the step back into range
536  double next = coordinateFromBin (m_numHistogramBins - 1,
537  valueMin,
538  valueMax);
539  step = next - start;
540  }
541 
542  if (isGnuplot) {
543  dumpGnuplotCorrelations (coordinateLabel,
544  valueMin,
545  valueMax,
546  signalA,
547  signalB,
548  correlationsMax);
549  }
550 
551  delete [] signalA;
552  delete [] signalB;
553  delete [] correlations;
554  delete [] correlationsMax;
555  delete [] picketFence;
556 }
QRgb marginColor(const QImage *image) const
Identify the margin color of the image, which is defined as the most common color in the four margins...
Definition: ColorFilter.cpp:78
bool colorCompare(QRgb rgb1, QRgb rgb2) const
See if the two color values are close enough to be considered to be the same.
Definition: ColorFilter.cpp:30
double thetaPeriod() const
Return the period of the theta value for polar coordinates, consistent with CoordThetaUnits.
Fast cross correlation between two functions.
Definition: Correlation.h:14
Class for filtering image to remove unimportant information.
Definition: ColorFilter.h:20
DocumentModelCoords modelCoords() const
Get method for DocumentModelCoords.
Affine transformation between screen and graph coordinates, based on digitized axis points.
GridClassifier()
Single constructor.
CoordsType coordsType() const
Get method for coordinates type.
void transformScreenToRawGraph(const QPointF &coordScreen, QPointF &coordGraph) const
Transform from cartesian pixel screen coordinates to cartesian/polar graph coordinates.
double originRadius() const
Get method for origin radius in polar mode.
void classify(bool isGnuplot, const QPixmap &originalPixmap, const Transformation &transformation, int &countX, double &startX, double &stepX, int &countY, double &startY, double &stepY)
Classify the specified image, and return the most probably x and y grid settings.