00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043 #include "Euclid_dh.h"
00044 #include "krylov_dh.h"
00045 #include "Mem_dh.h"
00046 #include "Parser_dh.h"
00047 #include "Mat_dh.h"
00048
00049
00050 #undef __FUNC__
00051 #define __FUNC__ "bicgstab_euclid"
00052 void
00053 bicgstab_euclid (Mat_dh A, Euclid_dh ctx, double *x, double *b, int *itsOUT)
00054 {
00055 START_FUNC_DH int its, m = ctx->m;
00056 bool monitor;
00057 int maxIts = ctx->maxIts;
00058 double atol = ctx->atol, rtol = ctx->rtol;
00059
00060
00061 double alpha, alpha_1,
00062 beta_1,
00063 widget, widget_1, rho_1, rho_2, s_norm, eps, exit_a, b_iprod, r_iprod;
00064
00065
00066 double *t, *s, *s_hat, *v, *p, *p_hat, *r, *r_hat;
00067
00068 monitor = Parser_dhHasSwitch (parser_dh, "-monitor");
00069
00070
00071 t = (double *) MALLOC_DH (m * sizeof (double));
00072 s = (double *) MALLOC_DH (m * sizeof (double));
00073 s_hat = (double *) MALLOC_DH (m * sizeof (double));
00074 v = (double *) MALLOC_DH (m * sizeof (double));
00075 p = (double *) MALLOC_DH (m * sizeof (double));
00076 p_hat = (double *) MALLOC_DH (m * sizeof (double));
00077 r = (double *) MALLOC_DH (m * sizeof (double));
00078 r_hat = (double *) MALLOC_DH (m * sizeof (double));
00079
00080
00081 Mat_dhMatVec (A, x, s);
00082 CopyVec (m, b, r);
00083 Axpy (m, -1.0, s, r);
00084 CopyVec (m, r, r_hat);
00085
00086
00087 b_iprod = InnerProd (m, b, b);
00088 CHECK_V_ERROR;
00089 exit_a = atol * atol * b_iprod;
00090 CHECK_V_ERROR;
00091 eps = rtol * rtol * b_iprod;
00092
00093 its = 0;
00094 while (1)
00095 {
00096 ++its;
00097 rho_1 = InnerProd (m, r_hat, r);
00098 if (rho_1 == 0)
00099 {
00100 SET_V_ERROR ("(r_hat . r) = 0; method fails");
00101 }
00102
00103 if (its == 1)
00104 {
00105 CopyVec (m, r, p);
00106 CHECK_V_ERROR;
00107 }
00108 else
00109 {
00110 beta_1 = (rho_1 / rho_2) * (alpha_1 / widget_1);
00111
00112
00113 Axpy (m, -widget_1, v, p);
00114 CHECK_V_ERROR;
00115 ScaleVec (m, beta_1, p);
00116 CHECK_V_ERROR;
00117 Axpy (m, 1.0, r, p);
00118 CHECK_V_ERROR;
00119 }
00120
00121
00122 Euclid_dhApply (ctx, p, p_hat);
00123 CHECK_V_ERROR;
00124
00125
00126 Mat_dhMatVec (A, p_hat, v);
00127 CHECK_V_ERROR;
00128
00129
00130 {
00131 double tmp = InnerProd (m, r_hat, v);
00132 CHECK_V_ERROR;
00133 alpha = rho_1 / tmp;
00134 }
00135
00136
00137 CopyVec (m, r, s);
00138 CHECK_V_ERROR;
00139 Axpy (m, -alpha, v, s);
00140 CHECK_V_ERROR;
00141
00142
00143
00144
00145
00146 s_norm = InnerProd (m, s, s);
00147 if (s_norm < exit_a)
00148 {
00149 SET_INFO ("reached absolute stopping criteria");
00150 break;
00151 }
00152
00153
00154 Euclid_dhApply (ctx, s, s_hat);
00155 CHECK_V_ERROR;
00156
00157
00158 Mat_dhMatVec (A, s_hat, t);
00159 CHECK_V_ERROR;
00160
00161
00162 {
00163 double tmp1, tmp2;
00164 tmp1 = InnerProd (m, t, s);
00165 CHECK_V_ERROR;
00166 tmp2 = InnerProd (m, t, t);
00167 CHECK_V_ERROR;
00168 widget = tmp1 / tmp2;
00169 }
00170
00171
00172 Axpy (m, alpha, p_hat, x);
00173 CHECK_V_ERROR;
00174 Axpy (m, widget, s_hat, x);
00175 CHECK_V_ERROR;
00176
00177
00178 CopyVec (m, s, r);
00179 CHECK_V_ERROR;
00180 Axpy (m, -widget, t, r);
00181 CHECK_V_ERROR;
00182
00183
00184
00185
00186 r_iprod = InnerProd (m, r, r);
00187 CHECK_V_ERROR;
00188 if (r_iprod < eps)
00189 {
00190 SET_INFO ("stipulated residual reduction achieved");
00191 break;
00192 }
00193
00194
00195 if (monitor && myid_dh == 0)
00196 {
00197 fprintf (stderr, "[it = %i] %e\n", its, sqrt (r_iprod / b_iprod));
00198 }
00199
00200
00201 rho_2 = rho_1;
00202 widget_1 = widget;
00203 alpha_1 = alpha;
00204
00205 if (its >= maxIts)
00206 {
00207 its = -its;
00208 break;
00209 }
00210 }
00211
00212 *itsOUT = its;
00213
00214 FREE_DH (t);
00215 FREE_DH (s);
00216 FREE_DH (s_hat);
00217 FREE_DH (v);
00218 FREE_DH (p);
00219 FREE_DH (p_hat);
00220 FREE_DH (r);
00221 FREE_DH (r_hat);
00222 END_FUNC_DH}
00223
00224 #undef __FUNC__
00225 #define __FUNC__ "cg_euclid"
00226 void
00227 cg_euclid (Mat_dh A, Euclid_dh ctx, double *x, double *b, int *itsOUT)
00228 {
00229 START_FUNC_DH int its, m = A->m;
00230 double *p, *r, *s;
00231 double alpha, beta, gamma, gamma_old, eps, bi_prod, i_prod;
00232 bool monitor;
00233 int maxIts = ctx->maxIts;
00234
00235 double rtol = ctx->rtol;
00236
00237 monitor = Parser_dhHasSwitch (parser_dh, "-monitor");
00238
00239
00240
00241 bi_prod = InnerProd (m, b, b);
00242 CHECK_V_ERROR;
00243 eps = (rtol * rtol) * bi_prod;
00244
00245 p = (double *) MALLOC_DH (m * sizeof (double));
00246 s = (double *) MALLOC_DH (m * sizeof (double));
00247 r = (double *) MALLOC_DH (m * sizeof (double));
00248
00249
00250 Mat_dhMatVec (A, x, r);
00251 CHECK_V_ERROR;
00252 ScaleVec (m, -1.0, r);
00253 CHECK_V_ERROR;
00254 Axpy (m, 1.0, b, r);
00255 CHECK_V_ERROR;
00256
00257
00258 Euclid_dhApply (ctx, r, p);
00259 CHECK_V_ERROR;
00260
00261
00262 gamma = InnerProd (m, r, p);
00263 CHECK_V_ERROR;
00264
00265 its = 0;
00266 while (1)
00267 {
00268 ++its;
00269
00270
00271 Mat_dhMatVec (A, p, s);
00272 CHECK_V_ERROR;
00273
00274
00275 {
00276 double tmp = InnerProd (m, s, p);
00277 CHECK_V_ERROR;
00278 alpha = gamma / tmp;
00279 gamma_old = gamma;
00280 }
00281
00282
00283 Axpy (m, alpha, p, x);
00284 CHECK_V_ERROR;
00285
00286
00287 Axpy (m, -alpha, s, r);
00288 CHECK_V_ERROR;
00289
00290
00291 Euclid_dhApply (ctx, r, s);
00292 CHECK_V_ERROR;
00293
00294
00295 gamma = InnerProd (m, r, s);
00296 CHECK_V_ERROR;
00297
00298
00299 i_prod = InnerProd (m, r, r);
00300 CHECK_V_ERROR;
00301
00302 if (monitor && myid_dh == 0)
00303 {
00304 fprintf (stderr, "iter = %i rel. resid. norm: %e\n", its,
00305 sqrt (i_prod / bi_prod));
00306 }
00307
00308
00309 if (i_prod < eps)
00310 break;
00311
00312
00313 beta = gamma / gamma_old;
00314
00315
00316 ScaleVec (m, beta, p);
00317 CHECK_V_ERROR;
00318 Axpy (m, 1.0, s, p);
00319 CHECK_V_ERROR;
00320
00321 if (its >= maxIts)
00322 {
00323 its = -its;
00324 break;
00325 }
00326 }
00327
00328 *itsOUT = its;
00329
00330 FREE_DH (p);
00331 FREE_DH (s);
00332 FREE_DH (r);
00333 END_FUNC_DH}