25 void casadi_lsqr_sym_ortho(T1 a, T1 b, T1* cs, T1* sn, T1* rho) {
35 }
else if (fabs(b)>fabs(a)) {
37 *sn = sign(b)/sqrt(1+tau*tau);
42 *cs = sign(a)/sqrt(1+tau*tau);
70 int casadi_lsqr_single_solve(
const T1* A, T1* x, casadi_int tr,
const casadi_int* sp, T1* w) {
72 T1 damp, atol, btol, conlim, ctol, anorm, acond, dampsq, ddnorm, res2, xnorm, xxnorm, z;
73 T1 cs2, sn2, alpha, beta, rhobar, phibar, bnorm, rnorm, arnorm, rhobar1, cs1, sn1, psi;
74 T1 cs, sn, rho, theta, phi, tau, t1, t2, n2dk, delta, gambar, rhs, zbar, gamma, res1;
75 T1 r1sq, r1norm, test1, test2, test3, rtol;
76 casadi_int iter_lim, itn, istop;
77 T1 *u, *v, *xx, *ww, *dk;
92 if (conlim > 0) ctol = 1/conlim;
104 u = w; w+= m; casadi_copy(x, m, u);
105 v = w; w+= n; casadi_clear(v, n);
106 xx = w; w+= n; casadi_clear(xx, n);
107 ww = w; w+= n; casadi_clear(v, n);
111 beta = casadi_norm_2(m, u);
114 for (i=0;i<m;++i) u[i]*=1/beta;
115 casadi_mv(A, sp, u, v, !tr);
116 alpha = casadi_norm_2(n, v);
120 for (i=0;i<n;++i) v[i]*=1/alpha;
121 casadi_copy(v, n, ww);
128 arnorm = alpha * beta;
135 while (itn<iter_lim) {
137 for (i=0;i<m;++i) u[i]*=-alpha;
138 casadi_mv(A, sp, v, u, tr);
139 beta = casadi_norm_2(m, u);
142 for (i=0;i<m;++i) u[i]*=1/beta;
143 anorm = sqrt(anorm*anorm + alpha*alpha+beta*beta+damp*damp);
144 for (i=0;i<n;++i) v[i]*=-beta;
145 casadi_mv(A, sp, u, v, !tr);
146 alpha = casadi_norm_2(n, v);
147 if (alpha>0)
for (i=0;i<n;++i) v[i]*=1/alpha;
150 rhobar1 = sqrt(rhobar*rhobar+damp*damp);
152 cs1 = rhobar / rhobar1;
153 sn1 = damp / rhobar1;
157 casadi_lsqr_sym_ortho(rhobar1, beta, &cs, &sn, &rho);
160 rhobar = -cs * alpha;
168 for (i=0;i<n;++i) dk[i]=ww[i]/rho;
170 for (i=0; i<n; ++i) xx[i] += t1*ww[i];
171 for (i=0; i<n; ++i) ww[i] = v[i] + t2*ww[i];
173 n2dk = casadi_norm_2(n, dk);
178 rhs = phi - delta * z;
180 xnorm = sqrt(xxnorm + zbar*zbar);
181 gamma = sqrt(gambar*gambar + theta*theta);
182 cs2 = gambar / gamma;
187 acond = anorm * sqrt(ddnorm);
188 res1 = phibar*phibar;
190 rnorm = sqrt(res1+res2);
191 arnorm = alpha*fabs(tau);
193 r1sq = rnorm*rnorm - dampsq * xxnorm;
194 r1norm = sqrt(fabs(r1sq));
195 if (r1sq < 0) r1norm = -r1norm;
197 test1 = rnorm / bnorm;
198 test2 = arnorm / (anorm * rnorm);
200 t1 = test1 / (1 + anorm * xnorm / bnorm);
201 rtol = btol + atol * anorm * xnorm / bnorm;
203 if (itn >= iter_lim) istop = 7;
204 if (1 + test3 <= 1) istop = 6;
205 if (1 + test2 <= 1) istop = 5;
206 if (1 + t1 <= 1) istop = 4;
208 if (test3 <= ctol) istop = 3;
209 if (test2 <= atol) istop = 2;
210 if (test1 <= rtol) istop = 1;
212 if (istop != 0)
break;
215 casadi_copy(xx, m, x);
239 template<
typename T1>
240 int casadi_lsqr_solve(
const T1* A, T1* x, casadi_int nrhs, casadi_int tr,
241 const casadi_int* sp, T1* w) {
243 for (i=0; i<nrhs;++i) {
244 if (casadi_lsqr_single_solve(A, x+i*sp[1], tr, sp, w))
return 1;