25 void casadi_dense_lsqr_sym_ortho(T1 a, T1 b, T1* cs, T1* sn, T1* rho) {
35 }
else if (fabs(b)>fabs(a)) {
37 *sn = sign(b)/sqrt(1+tau*tau);
42 *cs = sign(a)/sqrt(1+tau*tau);
70 int casadi_dense_lsqr_single_solve(
const T1* A, T1* x, casadi_int tr,
const casadi_int ncol,
71 const casadi_int nrow, T1* w) {
73 T1 damp, atol, btol, conlim, ctol, anorm, acond, dampsq, ddnorm, res2, xnorm, xxnorm, z;
74 T1 cs2, sn2, alpha, beta, rhobar, phibar, bnorm, rnorm, arnorm, rhobar1, cs1, sn1, psi;
75 T1 cs, sn, rho, theta, phi, tau, t1, t2, n2dk, delta, gambar, rhs, zbar, gamma, res1;
76 T1 r1sq, r1norm, test1, test2, test3, rtol;
77 casadi_int iter_lim, itn, istop;
78 T1 *u, *v, *xx, *ww, *dk;
93 if (conlim > 0) ctol = 1/conlim;
105 u = w; w+= m; casadi_copy(x, m, u);
106 v = w; w+= n; casadi_clear(v, n);
107 xx = w; w+= n; casadi_clear(xx, n);
108 ww = w; w+= n; casadi_clear(v, n);
112 beta = casadi_norm_2(m, u);
115 for (i=0;i<m;++i) u[i]*=1/beta;
116 casadi_mv_dense(A, nrow, ncol, u, v, !tr);
118 alpha = casadi_norm_2(n, v);
122 for (i=0;i<n;++i) v[i]*=1/alpha;
123 casadi_copy(v, n, ww);
130 arnorm = alpha * beta;
132 while (itn<iter_lim) {
134 for (i=0;i<m;++i) u[i]*=-alpha;
136 casadi_mv_dense(A, nrow, ncol, u, v, !tr);
137 beta = casadi_norm_2(m, u);
140 for (i=0;i<m;++i) u[i]*=1/beta;
141 anorm = sqrt(anorm*anorm + alpha*alpha+beta*beta+damp*damp);
142 for (i=0;i<n;++i) v[i]*=-beta;
144 casadi_mv_dense(A, nrow, ncol, u, v, !tr);
145 alpha = casadi_norm_2(n, v);
146 if (alpha>0)
for (i=0;i<n;++i) v[i]*=1/alpha;
149 rhobar1 = sqrt(rhobar*rhobar+damp*damp);
151 cs1 = rhobar / rhobar1;
152 sn1 = damp / rhobar1;
156 casadi_dense_lsqr_sym_ortho(rhobar1, beta, &cs, &sn, &rho);
159 rhobar = -cs * alpha;
167 for (i=0;i<n;++i) dk[i]=ww[i]/rho;
169 for (i=0; i<n; ++i) xx[i] += t1*ww[i];
170 for (i=0; i<n; ++i) ww[i] = v[i] + t2*ww[i];
172 n2dk = casadi_norm_2(n, dk);
177 rhs = phi - delta * z;
179 xnorm = sqrt(xxnorm + zbar*zbar);
180 gamma = sqrt(gambar*gambar + theta*theta);
181 cs2 = gambar / gamma;
186 acond = anorm * sqrt(ddnorm);
187 res1 = phibar*phibar;
189 rnorm = sqrt(res1+res2);
190 arnorm = alpha*fabs(tau);
192 r1sq = rnorm*rnorm - dampsq * xxnorm;
193 r1norm = sqrt(fabs(r1sq));
194 if (r1sq < 0) r1norm = -r1norm;
196 test1 = rnorm / bnorm;
197 test2 = arnorm / (anorm * rnorm);
199 t1 = test1 / (1 + anorm * xnorm / bnorm);
200 rtol = btol + atol * anorm * xnorm / bnorm;
202 if (itn >= iter_lim) istop = 7;
203 if (1 + test3 <= 1) istop = 6;
204 if (1 + test2 <= 1) istop = 5;
205 if (1 + t1 <= 1) istop = 4;
207 if (test3 <= ctol) istop = 3;
208 if (test2 <= atol) istop = 2;
209 if (test1 <= rtol) istop = 1;
211 if (istop != 0)
break;
214 casadi_copy(xx, m, x);
238 template<
typename T1>
239 int casadi_dense_lsqr_solve(
const T1* A, T1* x, casadi_int nrhs, casadi_int tr,
240 const casadi_int ncol,
const casadi_int nrow, T1* w) {
242 for (i=0; i<nrhs;++i) {
244 if (casadi_dense_lsqr_single_solve(A, x+i*nrow, tr, ncol, nrow, w))
return 1;