001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.math.stat.ranking;
019    
020    import java.util.ArrayList;
021    import java.util.Arrays;
022    import java.util.Iterator;
023    import java.util.List;
024    
025    import org.apache.commons.math.random.RandomData;
026    import org.apache.commons.math.random.RandomDataImpl;
027    import org.apache.commons.math.random.RandomGenerator;
028    
029    
030    /**
031     * <p> Ranking based on the natural ordering on doubles.</p>
032     * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties
033     * are handled using the selected {@link TiesStrategy}. 
034     * Configuration settings are supplied in optional constructor arguments.
035     * Defaults are {@link NaNStrategy#MAXIMAL} and {@link TiesStrategy#AVERAGE},
036     * respectively. When using {@link TiesStrategy#RANDOM}, a 
037     * {@link RandomGenerator} may be supplied as a constructor argument.</p>
038     * <p>Examples:
039     * <table border="1" cellpadding="3">
040     * <tr><th colspan="3">
041     * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17)
042     * </th></tr>
043     * <tr><th>NaNStrategy</th><th>TiesStrategy</th>
044     * <th><code>rank(data)</code></th>
045     * <tr>
046     * <td>default (NaNs maximal)</td>
047     * <td>default (ties averaged)</td>
048     * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr>
049     * <tr>
050     * <td>default (NaNs maximal)</td>
051     * <td>MINIMUM</td>
052     * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr>
053     * <tr>
054     * <td>MINIMAL</td>
055     * <td>default (ties averaged)</td>
056     * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr>
057     * <tr>
058     * <td>REMOVED</td>
059     * <td>SEQUENTIAL</td>
060     * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr>
061     * <tr>
062     * <td>MINIMAL</td>
063     * <td>MAXIMUM</td>
064     * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table></p>
065     * 
066     * @since 2.0
067     * @version $Revision: 799857 $ $Date: 2009-08-01 09:07:12 -0400 (Sat, 01 Aug 2009) $
068     */
069    public class NaturalRanking implements RankingAlgorithm {
070       
071        /** NaN strategy - defaults to NaNs maximal */
072        private final NaNStrategy nanStrategy;
073        
074        /** Ties strategy - defaults to ties averaged */
075        private final TiesStrategy tiesStrategy;
076        
077        /** Source of random data - used only when ties strategy is RANDOM */
078        private final RandomData randomData;
079        
080        /** default NaN strategy */
081        public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.MAXIMAL;
082        
083        /** default ties strategy */
084        public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;
085        
086        /**
087         * Create a NaturalRanking with default strategies for handling ties and NaNs.
088         */
089        public NaturalRanking() {
090            super();
091            tiesStrategy = DEFAULT_TIES_STRATEGY;
092            nanStrategy = DEFAULT_NAN_STRATEGY;
093            randomData = null;
094        }
095    
096        /**
097         * Create a NaturalRanking with the given TiesStrategy.
098         * 
099         * @param tiesStrategy the TiesStrategy to use
100         */
101        public NaturalRanking(TiesStrategy tiesStrategy) {
102            super();
103            this.tiesStrategy = tiesStrategy;
104            nanStrategy = DEFAULT_NAN_STRATEGY;
105            randomData = new RandomDataImpl();
106        }
107    
108        /**
109         * Create a NaturalRanking with the given NaNStrategy.
110         * 
111         * @param nanStrategy the NaNStrategy to use
112         */
113        public NaturalRanking(NaNStrategy nanStrategy) {
114            super();
115            this.nanStrategy = nanStrategy;
116            tiesStrategy = DEFAULT_TIES_STRATEGY;
117            randomData = null; 
118        }
119    
120        /**
121         * Create a NaturalRanking with the given NaNStrategy and TiesStrategy.
122         * 
123         * @param nanStrategy NaNStrategy to use
124         * @param tiesStrategy TiesStrategy to use
125         */
126        public NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy) {
127            super();
128            this.nanStrategy = nanStrategy;
129            this.tiesStrategy = tiesStrategy;
130            randomData = new RandomDataImpl();
131        }
132        
133        /**
134         * Create a NaturalRanking with TiesStrategy.RANDOM and the given
135         * RandomGenerator as the source of random data.
136         * 
137         * @param randomGenerator source of random data
138         */
139        public NaturalRanking(RandomGenerator randomGenerator) {
140            super();
141            this.tiesStrategy = TiesStrategy.RANDOM;
142            nanStrategy = DEFAULT_NAN_STRATEGY;
143            randomData = new RandomDataImpl(randomGenerator);
144        }
145    
146    
147        /**
148         * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM
149         * and the given source of random data.
150         * 
151         * @param nanStrategy NaNStrategy to use
152         * @param randomGenerator source of random data
153         */
154        public NaturalRanking(NaNStrategy nanStrategy,
155                RandomGenerator randomGenerator) {
156            super();
157            this.nanStrategy = nanStrategy;
158            this.tiesStrategy = TiesStrategy.RANDOM;
159            randomData = new RandomDataImpl(randomGenerator);
160        }
161        
162        /**
163         * Return the NaNStrategy
164         * 
165         * @return returns the NaNStrategy
166         */
167        public NaNStrategy getNanStrategy() {
168            return nanStrategy;
169        }
170    
171        /**
172         * Return the TiesStrategy
173         * 
174         * @return the TiesStrategy
175         */
176        public TiesStrategy getTiesStrategy() {
177            return tiesStrategy;
178        }
179    
180        /**
181         * Rank <code>data</code> using the natural ordering on Doubles, with
182         * NaN values handled according to <code>nanStrategy</code> and ties
183         * resolved using <code>tiesStrategy.</code>
184         * 
185         * @param data array to be ranked
186         * @return array of ranks
187         */
188        public double[] rank(double[] data) {
189            
190            // Array recording initial positions of data to be ranked
191            IntDoublePair[] ranks = new IntDoublePair[data.length];  
192            for (int i = 0; i < data.length; i++) {
193                ranks[i] = new IntDoublePair(data[i], i);
194            }
195            
196            // Recode, remove or record positions of NaNs
197            List<Integer> nanPositions = null;
198            switch (nanStrategy) {
199                case MAXIMAL: // Replace NaNs with +INFs
200                    recodeNaNs(ranks, Double.POSITIVE_INFINITY);
201                    break;
202                case MINIMAL: // Replace NaNs with -INFs
203                    recodeNaNs(ranks, Double.NEGATIVE_INFINITY);
204                    break;
205                case REMOVED: // Drop NaNs from data
206                    ranks = removeNaNs(ranks);
207                    break;
208                case FIXED:   // Record positions of NaNs
209                    nanPositions = getNanPositions(ranks);
210                    break;
211            }
212            
213            // Sort the IntDoublePairs
214            Arrays.sort(ranks);
215            
216            // Walk the sorted array, filling output array using sorted positions,
217            // resolving ties as we go
218            double[] out = new double[ranks.length];
219            int pos = 1;  // position in sorted array 
220            out[ranks[0].getPosition()] = pos;
221            List<Integer> tiesTrace = new ArrayList<Integer>();
222            tiesTrace.add(ranks[0].getPosition());
223            for (int i = 1; i < ranks.length; i++) {
224                if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) {
225                    // tie sequence has ended (or had length 1)
226                    pos = i + 1;
227                    if (tiesTrace.size() > 1) {  // if seq is nontrivial, resolve
228                        resolveTie(out, tiesTrace);
229                    }
230                    tiesTrace = new ArrayList<Integer>();
231                    tiesTrace.add(ranks[i].getPosition());
232                } else {
233                    // tie sequence continues
234                    tiesTrace.add(ranks[i].getPosition());
235                }
236                out[ranks[i].getPosition()] = pos;
237            }
238            if (tiesTrace.size() > 1) {  // handle tie sequence at end
239                resolveTie(out, tiesTrace);
240            }
241            if (nanStrategy == NaNStrategy.FIXED) {
242                restoreNaNs(out, nanPositions);
243            }
244            return out;
245        }
246        
247        /**
248         * Returns an array that is a copy of the input array with IntDoublePairs
249         * having NaN values removed.
250         * 
251         * @param ranks input array
252         * @return array with NaN-valued entries removed
253         */
254        private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) {
255            if (!containsNaNs(ranks)) {
256                return ranks;
257            }
258            IntDoublePair[] outRanks = new IntDoublePair[ranks.length];
259            int j = 0;
260            for (int i = 0; i < ranks.length; i++) {
261                if (Double.isNaN(ranks[i].getValue())) {
262                    // drop, but adjust original ranks of later elements
263                    for (int k = i + 1; k < ranks.length; k++) {
264                        ranks[k] = new IntDoublePair(
265                                ranks[k].getValue(), ranks[k].getPosition() - 1);
266                    }
267                } else {
268                    outRanks[j] = new IntDoublePair(
269                            ranks[i].getValue(), ranks[i].getPosition());
270                    j++;
271                }
272            }
273            IntDoublePair[] returnRanks = new IntDoublePair[j];
274            System.arraycopy(outRanks, 0, returnRanks, 0, j);
275            return returnRanks;
276        }
277    
278        /**
279         * Recodes NaN values to the given value. 
280         * 
281         * @param ranks array to recode
282         * @param value the value to replace NaNs with
283         */
284        private void recodeNaNs(IntDoublePair[] ranks, double value) {
285            for (int i = 0; i < ranks.length; i++) {
286                if (Double.isNaN(ranks[i].getValue())) {
287                    ranks[i] = new IntDoublePair(
288                            value, ranks[i].getPosition());
289                }
290            }
291        }
292        
293        /**
294         * Checks for presence of NaNs in <code>ranks.</code>
295         * 
296         * @param ranks array to be searched for NaNs
297         * @return true iff ranks contains one or more NaNs
298         */
299        private boolean containsNaNs(IntDoublePair[] ranks) {
300            for (int i = 0; i < ranks.length; i++) {
301                if (Double.isNaN(ranks[i].getValue())) {
302                    return true;
303                }
304            }
305            return false;
306        }
307        
308        /**
309         * Resolve a sequence of ties, using the configured {@link TiesStrategy}.
310         * The input <code>ranks</code> array is expected to take the same value
311         * for all indices in <code>tiesTrace</code>.  The common value is recoded
312         * according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>,
313         * tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged.
314         * The same array and trace with tiesStrategy AVERAGE will come out
315         * <5,8,3,6,3,7,1,3>.
316         * 
317         * @param ranks array of ranks 
318         * @param tiesTrace list of indices where <code>ranks</code> is constant
319         * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j] 
320         * </code>
321         */
322        private void resolveTie(double[] ranks, List<Integer> tiesTrace) {
323            
324            // constant value of ranks over tiesTrace
325            final double c = ranks[tiesTrace.get(0)];
326            
327            // length of sequence of tied ranks
328            final int length = tiesTrace.size();
329            
330            switch (tiesStrategy) {
331                case  AVERAGE:  // Replace ranks with average
332                    fill(ranks, tiesTrace, (2 * c + length - 1) / 2d);
333                    break;
334                case MAXIMUM:   // Replace ranks with maximum values
335                    fill(ranks, tiesTrace, c + length - 1);
336                    break;
337                case MINIMUM:   // Replace ties with minimum
338                    fill(ranks, tiesTrace, c);
339                    break;
340                case RANDOM:    // Fill with random integral values in [c, c + length - 1]
341                    Iterator<Integer> iterator = tiesTrace.iterator();
342                    long f = Math.round(c);
343                    while (iterator.hasNext()) {
344                        ranks[iterator.next()] = 
345                            randomData.nextLong(f, f + length - 1);
346                    }
347                    break;
348                case SEQUENTIAL:  // Fill sequentially from c to c + length - 1
349                    // walk and fill
350                    iterator = tiesTrace.iterator();
351                    f = Math.round(c);
352                    int i = 0;
353                    while (iterator.hasNext()) {
354                        ranks[iterator.next()] = f + i++;
355                    }
356                    break;
357            }   
358        }
359        
360        /**
361         * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code>
362         * 
363         * @param data array to modify
364         * @param tiesTrace list of index values to set
365         * @param value value to set
366         */
367        private void fill(double[] data, List<Integer> tiesTrace, double value) {
368            Iterator<Integer> iterator = tiesTrace.iterator();
369            while (iterator.hasNext()) {
370                data[iterator.next()] = value;
371            }
372        }
373        
374        /**
375         * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code>
376         * 
377         * @param ranks array to modify
378         * @param nanPositions list of index values to set to <code>Double.NaN</code>
379         */
380        private void restoreNaNs(double[] ranks, List<Integer> nanPositions) {
381            if (nanPositions.size() == 0) {
382                return;
383            }
384            Iterator<Integer> iterator = nanPositions.iterator();
385            while (iterator.hasNext()) {
386                ranks[iterator.next().intValue()] = Double.NaN;  
387            }
388            
389        }
390        
391        /**
392         * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code>
393         * 
394         * @param ranks array to search for <code>NaNs</code>
395         * @return list of indexes i such that <code>ranks[i] = NaN</code>
396         */
397        private List<Integer> getNanPositions(IntDoublePair[] ranks) {
398            ArrayList<Integer> out = new ArrayList<Integer>();
399            for (int i = 0; i < ranks.length; i++) {
400                if (Double.isNaN(ranks[i].getValue())) {
401                    out.add(Integer.valueOf(i));
402                }
403            }
404            return out;     
405        }
406        
407        /**
408         * Represents the position of a double value in an ordering.
409         * Comparable interface is implemented so Arrays.sort can be used
410         * to sort an array of IntDoublePairs by value.  Note that the
411         * implicitly defined natural ordering is NOT consistent with equals.
412         */
413        private static class IntDoublePair implements Comparable<IntDoublePair>  {
414    
415            /** Value of the pair */
416            final private double value;
417    
418            /** Original position of the pair */
419            final private int position;
420    
421            /**
422             * Construct an IntDoublePair with the given value and position.
423             * @param value the value of the pair
424             * @param position the original position
425             */
426            public IntDoublePair(double value, int position) {
427                this.value = value;
428                this.position = position;
429            }
430    
431            /**
432             * Compare this IntDoublePair to another pair.
433             * Only the <strong>values</strong> are compared.
434             * 
435             * @param other the other pair to compare this to
436             * @return result of <code>Double.compare(value, other.value)</code>
437             */
438            public int compareTo(IntDoublePair other) {
439                return Double.compare(value, other.value);
440            }
441    
442            /**
443             * Returns the value of the pair.
444             * @return value
445             */
446            public double getValue() {
447                return value;
448            }
449    
450            /**
451             * Returns the original position of the pair.
452             * @return position
453             */
454            public int getPosition() {
455                return position;
456            }
457        }
458    }