00001 /*@HEADER 00002 // *********************************************************************** 00003 // 00004 // Ifpack: Object-Oriented Algebraic Preconditioner Package 00005 // Copyright (2002) Sandia Corporation 00006 // 00007 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive 00008 // license for use of this work by or on behalf of the U.S. Government. 00009 // 00010 // This library is free software; you can redistribute it and/or modify 00011 // it under the terms of the GNU Lesser General Public License as 00012 // published by the Free Software Foundation; either version 2.1 of the 00013 // License, or (at your option) any later version. 00014 // 00015 // This library is distributed in the hope that it will be useful, but 00016 // WITHOUT ANY WARRANTY; without even the implied warranty of 00017 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 // Lesser General Public License for more details. 00019 // 00020 // You should have received a copy of the GNU Lesser General Public 00021 // License along with this library; if not, write to the Free Software 00022 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 00023 // USA 00024 // Questions? Contact Michael A. Heroux (maherou@sandia.gov) 00025 // 00026 // *********************************************************************** 00027 //@HEADER 00028 */ 00029 00030 #include "Ifpack_config.h" 00031 00032 /* Modified by Edmond Chow, to sort integers and carry along an array of 00033 doubles */ 00034 #if 0 /* test program */ 00035 00036 #include <stdlib.h> 00037 #include <stdio.h> 00038 00039 void ifpack_multilist_sort (int *const pbase, double *const daux, size_t total_elems); 00040 #define QSORTLEN 20 00041 00042 int main() 00043 { 00044 int h[QSORTLEN]; 00045 double d[QSORTLEN]; 00046 int i; 00047 00048 for (i=0; i<QSORTLEN; i++) 00049 { 00050 h[i] = rand() >> 20; 00051 d[i] = (double) -h[i]; 00052 } 00053 00054 printf("before sorting\n"); 00055 for (i=0; i<QSORTLEN; i++) 00056 printf("%d %f\n", h[i], d[i]); 00057 00058 ifpack_multilist_sort(h, d, QSORTLEN); 00059 00060 printf("after sorting\n"); 00061 for (i=0; i<QSORTLEN; i++) 00062 printf("%d %f\n", h[i], d[i]); 00063 00064 return 0; 00065 } 00066 #endif /* test program */ 00067 00068 /* Copyright (C) 1991, 1992, 1996, 1997 Free Software Foundation, Inc. 00069 This file is part of the GNU C Library. 00070 Written by Douglas C. Schmidt (schmidt@ics.uci.edu). 00071 00072 The GNU C Library is free software; you can redistribute it and/or 00073 modify it under the terms of the GNU Library General Public License as 00074 published by the Free Software Foundation; either version 2 of the 00075 License, or (at your option) any later version. 00076 00077 The GNU C Library is distributed in the hope that it will be useful, 00078 but WITHOUT ANY WARRANTY; without even the implied warranty of 00079 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00080 Library General Public License for more details. 00081 00082 You should have received a copy of the GNU Library General Public 00083 License along with the GNU C Library; see the file COPYING.LIB. If not, 00084 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 00085 Boston, MA 02111-1307, USA. */ 00086 00087 #include <string.h> 00088 00089 /* Swap integers pointed to by a and b, and corresponding doubles */ 00090 #define SWAP(a, b) \ 00091 do { \ 00092 itemp = *a; \ 00093 *a = *b; \ 00094 *b = itemp; \ 00095 dtemp = daux[a-pbase]; \ 00096 daux[a-pbase] = daux[b-pbase]; \ 00097 daux[b-pbase] = dtemp; \ 00098 } while (0) 00099 00100 /* Discontinue quicksort algorithm when partition gets below this size. 00101 This particular magic number was chosen to work best on a Sun 4/260. */ 00102 #define MAX_THRESH 4 00103 00104 /* Stack node declarations used to store unfulfilled partition obligations. */ 00105 typedef struct 00106 { 00107 int *lo; 00108 int *hi; 00109 } stack_node; 00110 00111 /* The next 4 #defines implement a very fast in-line stack abstraction. */ 00112 #define STACK_SIZE (8 * sizeof(unsigned long int)) 00113 #define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top)) 00114 #define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi))) 00115 #define STACK_NOT_EMPTY (stack < top) 00116 00117 00118 /* Order size using quicksort. This implementation incorporates 00119 four optimizations discussed in Sedgewick: 00120 00121 1. Non-recursive, using an explicit stack of pointer that store the 00122 next array partition to sort. To save time, this maximum amount 00123 of space required to store an array of MAX_INT is allocated on the 00124 stack. Assuming a 32-bit integer, this needs only 32 * 00125 sizeof(stack_node) == 136 bits. Pretty cheap, actually. 00126 00127 2. Chose the pivot element using a median-of-three decision tree. 00128 This reduces the probability of selecting a bad pivot value and 00129 eliminates certain extraneous comparisons. 00130 00131 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving 00132 insertion sort to order the MAX_THRESH items within each partition. 00133 This is a big win, since insertion sort is faster for small, mostly 00134 sorted array segments. 00135 00136 4. The larger of the two sub-partitions is always pushed onto the 00137 stack first, with the algorithm then concentrating on the 00138 smaller partition. This *guarantees* no more than log (n) 00139 stack size is needed (actually O(1) in this case)! */ 00140 00141 void ifpack_multilist_sort (int *const pbase, double *const daux, size_t total_elems) 00142 { 00143 int itemp; 00144 double dtemp; 00145 const size_t size = 1; 00146 register int *base_ptr = (int *) pbase; 00147 00148 /* Allocating SIZE bytes for a pivot buffer facilitates a better 00149 algorithm below since we can do comparisons directly on the pivot. */ 00150 int pivot_buffer[1]; 00151 const size_t max_thresh = MAX_THRESH * size; 00152 00153 /* edmond: return if total_elems less than zero */ 00154 if (total_elems <= 0) 00155 /* Avoid lossage with unsigned arithmetic below. */ 00156 return; 00157 00158 if (total_elems > MAX_THRESH) 00159 { 00160 int *lo = base_ptr; 00161 int *hi = &lo[size * (total_elems - 1)]; 00162 /* Largest size needed for 32-bit int!!! */ 00163 stack_node stack[STACK_SIZE]; 00164 stack_node *top = stack + 1; 00165 00166 while (STACK_NOT_EMPTY) 00167 { 00168 int *left_ptr; 00169 int *right_ptr; 00170 00171 int *pivot = pivot_buffer; 00172 00173 /* Select median value from among LO, MID, and HI. Rearrange 00174 LO and HI so the three values are sorted. This lowers the 00175 probability of picking a pathological pivot value and 00176 skips a comparison for both the LEFT_PTR and RIGHT_PTR. */ 00177 00178 int *mid = lo + size * ((hi - lo) / size >> 1); 00179 00180 if (*mid - *lo < 0) 00181 SWAP (mid, lo); 00182 if (*hi - *mid < 0) 00183 SWAP (mid, hi); 00184 else 00185 goto jump_over; 00186 if (*mid - *lo < 0) 00187 SWAP (mid, lo); 00188 jump_over:; 00189 *pivot = *mid; 00190 pivot = pivot_buffer; 00191 00192 left_ptr = lo + size; 00193 right_ptr = hi - size; 00194 00195 /* Here's the famous ``collapse the walls'' section of quicksort. 00196 Gotta like those tight inner loops! They are the main reason 00197 that this algorithm runs much faster than others. */ 00198 do 00199 { 00200 while (*left_ptr - *pivot < 0) 00201 left_ptr += size; 00202 00203 while (*pivot - *right_ptr < 0) 00204 right_ptr -= size; 00205 00206 if (left_ptr < right_ptr) 00207 { 00208 SWAP (left_ptr, right_ptr); 00209 left_ptr += size; 00210 right_ptr -= size; 00211 } 00212 else if (left_ptr == right_ptr) 00213 { 00214 left_ptr += size; 00215 right_ptr -= size; 00216 break; 00217 } 00218 } 00219 while (left_ptr <= right_ptr); 00220 00221 /* Set up pointers for next iteration. First determine whether 00222 left and right partitions are below the threshold size. If so, 00223 ignore one or both. Otherwise, push the larger partition's 00224 bounds on the stack and continue sorting the smaller one. */ 00225 00226 if ((size_t) (right_ptr - lo) <= max_thresh) 00227 { 00228 if ((size_t) (hi - left_ptr) <= max_thresh) 00229 /* Ignore both small partitions. */ 00230 POP (lo, hi); 00231 else 00232 /* Ignore small left partition. */ 00233 lo = left_ptr; 00234 } 00235 else if ((size_t) (hi - left_ptr) <= max_thresh) 00236 /* Ignore small right partition. */ 00237 hi = right_ptr; 00238 else if ((right_ptr - lo) > (hi - left_ptr)) 00239 { 00240 /* Push larger left partition indices. */ 00241 PUSH (lo, right_ptr); 00242 lo = left_ptr; 00243 } 00244 else 00245 { 00246 /* Push larger right partition indices. */ 00247 PUSH (left_ptr, hi); 00248 hi = right_ptr; 00249 } 00250 } 00251 } 00252 00253 /* Once the BASE_PTR array is partially sorted by quicksort the rest 00254 is completely sorted using insertion sort, since this is efficient 00255 for partitions below MAX_THRESH size. BASE_PTR points to the beginning 00256 of the array to sort, and END_PTR points at the very last element in 00257 the array (*not* one beyond it!). */ 00258 00259 #define min(x, y) ((x) < (y) ? (x) : (y)) 00260 00261 { 00262 int *const end_ptr = &base_ptr[size * (total_elems - 1)]; 00263 int *tmp_ptr = base_ptr; 00264 int *thresh = min(end_ptr, base_ptr + max_thresh); 00265 register int *run_ptr; 00266 00267 /* Find smallest element in first threshold and place it at the 00268 array's beginning. This is the smallest array element, 00269 and the operation speeds up insertion sort's inner loop. */ 00270 00271 for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size) 00272 if (*run_ptr - *tmp_ptr < 0) 00273 tmp_ptr = run_ptr; 00274 00275 if (tmp_ptr != base_ptr) 00276 SWAP (tmp_ptr, base_ptr); 00277 00278 /* Insertion sort, running from left-hand-side up to right-hand-side. */ 00279 00280 run_ptr = base_ptr + size; 00281 while ((run_ptr += size) <= end_ptr) 00282 { 00283 tmp_ptr = run_ptr - size; 00284 while (*run_ptr - *tmp_ptr < 0) 00285 tmp_ptr -= size; 00286 00287 tmp_ptr += size; 00288 if (tmp_ptr != run_ptr) 00289 { 00290 int *trav; 00291 00292 trav = run_ptr + size; 00293 while (--trav >= run_ptr) 00294 { 00295 int c = *trav; 00296 double c2 = daux[trav-pbase]; 00297 00298 int *hi, *lo; 00299 00300 for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo) 00301 { 00302 *hi = *lo; 00303 daux[hi-pbase] = daux[lo-pbase]; 00304 } 00305 *hi = c; 00306 daux[hi-pbase] = c2; 00307 } 00308 } 00309 } 00310 } 00311 }
1.7.6.1