00001 /*@HEADER 00002 // *********************************************************************** 00003 // 00004 // Ifpack: Object-Oriented Algebraic Preconditioner Package 00005 // Copyright (2002) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // Redistribution and use in source and binary forms, with or without 00011 // modification, are permitted provided that the following conditions are 00012 // met: 00013 // 00014 // 1. Redistributions of source code must retain the above copyright 00015 // notice, this list of conditions and the following disclaimer. 00016 // 00017 // 2. Redistributions in binary form must reproduce the above copyright 00018 // notice, this list of conditions and the following disclaimer in the 00019 // documentation and/or other materials provided with the distribution. 00020 // 00021 // 3. Neither the name of the Corporation nor the names of the 00022 // contributors may be used to endorse or promote products derived from 00023 // this software without specific prior written permission. 00024 // 00025 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY 00026 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00027 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 00028 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE 00029 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 00030 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 00031 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00032 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00033 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00034 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00035 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00036 // 00037 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00038 // 00039 // *********************************************************************** 00040 //@HEADER 00041 */ 00042 00043 #include "Ifpack_config.h" 00044 00045 /* Modified by Edmond Chow, to sort integers and carry along an array of 00046 doubles */ 00047 #if 0 /* test program */ 00048 00049 #include <stdlib.h> 00050 #include <stdio.h> 00051 00052 void ifpack_multilist_sort (int *const pbase, double *const daux, size_t total_elems); 00053 #define QSORTLEN 20 00054 00055 int main() 00056 { 00057 int h[QSORTLEN]; 00058 double d[QSORTLEN]; 00059 int i; 00060 00061 for (i=0; i<QSORTLEN; i++) 00062 { 00063 h[i] = rand() >> 20; 00064 d[i] = (double) -h[i]; 00065 } 00066 00067 printf("before sorting\n"); 00068 for (i=0; i<QSORTLEN; i++) 00069 printf("%d %f\n", h[i], d[i]); 00070 00071 ifpack_multilist_sort(h, d, QSORTLEN); 00072 00073 printf("after sorting\n"); 00074 for (i=0; i<QSORTLEN; i++) 00075 printf("%d %f\n", h[i], d[i]); 00076 00077 return 0; 00078 } 00079 #endif /* test program */ 00080 00081 /* Copyright (C) 1991, 1992, 1996, 1997 Free Software Foundation, Inc. 00082 This file is part of the GNU C Library. 00083 Written by Douglas C. Schmidt (schmidt@ics.uci.edu). 00084 00085 The GNU C Library is free software; you can redistribute it and/or 00086 modify it under the terms of the GNU Library General Public License as 00087 published by the Free Software Foundation; either version 2 of the 00088 License, or (at your option) any later version. 00089 00090 The GNU C Library is distributed in the hope that it will be useful, 00091 but WITHOUT ANY WARRANTY; without even the implied warranty of 00092 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00093 Library General Public License for more details. 00094 00095 You should have received a copy of the GNU Library General Public 00096 License along with the GNU C Library; see the file COPYING.LIB. If not, 00097 write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, 00098 Boston, MA 02111-1307, USA. */ 00099 00100 #include <string.h> 00101 00102 /* Swap integers pointed to by a and b, and corresponding doubles */ 00103 #define SWAP(a, b) \ 00104 do { \ 00105 itemp = *a; \ 00106 *a = *b; \ 00107 *b = itemp; \ 00108 dtemp = daux[a-pbase]; \ 00109 daux[a-pbase] = daux[b-pbase]; \ 00110 daux[b-pbase] = dtemp; \ 00111 } while (0) 00112 00113 /* Discontinue quicksort algorithm when partition gets below this size. 00114 This particular magic number was chosen to work best on a Sun 4/260. */ 00115 #define MAX_THRESH 4 00116 00117 /* Stack node declarations used to store unfulfilled partition obligations. */ 00118 typedef struct 00119 { 00120 int *lo; 00121 int *hi; 00122 } stack_node; 00123 00124 /* The next 4 #defines implement a very fast in-line stack abstraction. */ 00125 #define STACK_SIZE (8 * sizeof(unsigned long int)) 00126 #define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top)) 00127 #define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi))) 00128 #define STACK_NOT_EMPTY (stack < top) 00129 00130 00131 /* Order size using quicksort. This implementation incorporates 00132 four optimizations discussed in Sedgewick: 00133 00134 1. Non-recursive, using an explicit stack of pointer that store the 00135 next array partition to sort. To save time, this maximum amount 00136 of space required to store an array of MAX_INT is allocated on the 00137 stack. Assuming a 32-bit integer, this needs only 32 * 00138 sizeof(stack_node) == 136 bits. Pretty cheap, actually. 00139 00140 2. Chose the pivot element using a median-of-three decision tree. 00141 This reduces the probability of selecting a bad pivot value and 00142 eliminates certain extraneous comparisons. 00143 00144 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving 00145 insertion sort to order the MAX_THRESH items within each partition. 00146 This is a big win, since insertion sort is faster for small, mostly 00147 sorted array segments. 00148 00149 4. The larger of the two sub-partitions is always pushed onto the 00150 stack first, with the algorithm then concentrating on the 00151 smaller partition. This *guarantees* no more than log (n) 00152 stack size is needed (actually O(1) in this case)! */ 00153 00154 void ifpack_multilist_sort (int *const pbase, double *const daux, size_t total_elems) 00155 { 00156 int itemp; 00157 double dtemp; 00158 const size_t size = 1; 00159 register int *base_ptr = (int *) pbase; 00160 00161 /* Allocating SIZE bytes for a pivot buffer facilitates a better 00162 algorithm below since we can do comparisons directly on the pivot. */ 00163 int pivot_buffer[1]; 00164 const size_t max_thresh = MAX_THRESH * size; 00165 00166 /* edmond: return if total_elems less than zero */ 00167 if (total_elems <= 0) 00168 /* Avoid lossage with unsigned arithmetic below. */ 00169 return; 00170 00171 if (total_elems > MAX_THRESH) 00172 { 00173 int *lo = base_ptr; 00174 int *hi = &lo[size * (total_elems - 1)]; 00175 /* Largest size needed for 32-bit int!!! */ 00176 stack_node stack[STACK_SIZE]; 00177 stack_node *top = stack + 1; 00178 00179 while (STACK_NOT_EMPTY) 00180 { 00181 int *left_ptr; 00182 int *right_ptr; 00183 00184 int *pivot = pivot_buffer; 00185 00186 /* Select median value from among LO, MID, and HI. Rearrange 00187 LO and HI so the three values are sorted. This lowers the 00188 probability of picking a pathological pivot value and 00189 skips a comparison for both the LEFT_PTR and RIGHT_PTR. */ 00190 00191 int *mid = lo + size * ((hi - lo) / size >> 1); 00192 00193 if (*mid - *lo < 0) 00194 SWAP (mid, lo); 00195 if (*hi - *mid < 0) 00196 SWAP (mid, hi); 00197 else 00198 goto jump_over; 00199 if (*mid - *lo < 0) 00200 SWAP (mid, lo); 00201 jump_over:; 00202 *pivot = *mid; 00203 pivot = pivot_buffer; 00204 00205 left_ptr = lo + size; 00206 right_ptr = hi - size; 00207 00208 /* Here's the famous ``collapse the walls'' section of quicksort. 00209 Gotta like those tight inner loops! They are the main reason 00210 that this algorithm runs much faster than others. */ 00211 do 00212 { 00213 while (*left_ptr - *pivot < 0) 00214 left_ptr += size; 00215 00216 while (*pivot - *right_ptr < 0) 00217 right_ptr -= size; 00218 00219 if (left_ptr < right_ptr) 00220 { 00221 SWAP (left_ptr, right_ptr); 00222 left_ptr += size; 00223 right_ptr -= size; 00224 } 00225 else if (left_ptr == right_ptr) 00226 { 00227 left_ptr += size; 00228 right_ptr -= size; 00229 break; 00230 } 00231 } 00232 while (left_ptr <= right_ptr); 00233 00234 /* Set up pointers for next iteration. First determine whether 00235 left and right partitions are below the threshold size. If so, 00236 ignore one or both. Otherwise, push the larger partition's 00237 bounds on the stack and continue sorting the smaller one. */ 00238 00239 if ((size_t) (right_ptr - lo) <= max_thresh) 00240 { 00241 if ((size_t) (hi - left_ptr) <= max_thresh) 00242 /* Ignore both small partitions. */ 00243 POP (lo, hi); 00244 else 00245 /* Ignore small left partition. */ 00246 lo = left_ptr; 00247 } 00248 else if ((size_t) (hi - left_ptr) <= max_thresh) 00249 /* Ignore small right partition. */ 00250 hi = right_ptr; 00251 else if ((right_ptr - lo) > (hi - left_ptr)) 00252 { 00253 /* Push larger left partition indices. */ 00254 PUSH (lo, right_ptr); 00255 lo = left_ptr; 00256 } 00257 else 00258 { 00259 /* Push larger right partition indices. */ 00260 PUSH (left_ptr, hi); 00261 hi = right_ptr; 00262 } 00263 } 00264 } 00265 00266 /* Once the BASE_PTR array is partially sorted by quicksort the rest 00267 is completely sorted using insertion sort, since this is efficient 00268 for partitions below MAX_THRESH size. BASE_PTR points to the beginning 00269 of the array to sort, and END_PTR points at the very last element in 00270 the array (*not* one beyond it!). */ 00271 00272 #define min(x, y) ((x) < (y) ? (x) : (y)) 00273 00274 { 00275 int *const end_ptr = &base_ptr[size * (total_elems - 1)]; 00276 int *tmp_ptr = base_ptr; 00277 int *thresh = min(end_ptr, base_ptr + max_thresh); 00278 register int *run_ptr; 00279 00280 /* Find smallest element in first threshold and place it at the 00281 array's beginning. This is the smallest array element, 00282 and the operation speeds up insertion sort's inner loop. */ 00283 00284 for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size) 00285 if (*run_ptr - *tmp_ptr < 0) 00286 tmp_ptr = run_ptr; 00287 00288 if (tmp_ptr != base_ptr) 00289 SWAP (tmp_ptr, base_ptr); 00290 00291 /* Insertion sort, running from left-hand-side up to right-hand-side. */ 00292 00293 run_ptr = base_ptr + size; 00294 while ((run_ptr += size) <= end_ptr) 00295 { 00296 tmp_ptr = run_ptr - size; 00297 while (*run_ptr - *tmp_ptr < 0) 00298 tmp_ptr -= size; 00299 00300 tmp_ptr += size; 00301 if (tmp_ptr != run_ptr) 00302 { 00303 int *trav; 00304 00305 trav = run_ptr + size; 00306 while (--trav >= run_ptr) 00307 { 00308 int c = *trav; 00309 double c2 = daux[trav-pbase]; 00310 00311 int *hi, *lo; 00312 00313 for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo) 00314 { 00315 *hi = *lo; 00316 daux[hi-pbase] = daux[lo-pbase]; 00317 } 00318 *hi = c; 00319 daux[hi-pbase] = c2; 00320 } 00321 } 00322 } 00323 } 00324 }
1.7.6.1