sx_function.cpp
1  /*
2  * This file is part of CasADi.
3  *
4  * CasADi -- A symbolic framework for dynamic optimization.
5  * Copyright (C) 2010-2023 Joel Andersson, Joris Gillis, Moritz Diehl,
6  * KU Leuven. All rights reserved.
7  * Copyright (C) 2011-2014 Greg Horn
8  *
9  * CasADi is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 3 of the License, or (at your option) any later version.
13  *
14  * CasADi is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with CasADi; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  *
23  */
24 
25 
26 #include "sx_function.hpp"
27 #include <limits>
28 #include <stack>
29 #include <deque>
30 #include <sstream>
31 #include <iomanip>
32 #include <bitset>
33 #include "sx_node.hpp"
34 #include "output_sx.hpp"
35 #include "call_sx.hpp"
36 #include "casadi_common.hpp"
37 #include "sparsity_internal.hpp"
38 #include "casadi_interrupt.hpp"
39 #include "serializing_stream.hpp"
40 #include "global_options.hpp"
41 
42 namespace casadi {
43 
45  n_dep = f.nnz_in(); n_res = f.nnz_out();
46  dep.resize(n_dep); res.resize(n_res, -1);
47  f_n_in = f.n_in(); f_n_out = f.n_out();
48  f_nnz_in.resize(f_n_in); f_nnz_out.resize(f_n_out);
49  for (casadi_int i=0;i<f_n_in;++i) f_nnz_in[i] = f.nnz_in(i);
50  for (casadi_int i=0;i<f_n_out;++i) f_nnz_out[i] = f.nnz_out(i);
51  copy_elision_arg.resize(f_n_in, -1);
52  copy_elision_offset.resize(f_n_in, -1);
53  }
54 
55  SXFunction::SXFunction(const std::string& name,
56  const std::vector<SX >& inputv,
57  const std::vector<SX >& outputv,
58  const std::vector<std::string>& name_in,
59  const std::vector<std::string>& name_out)
60  : XFunction<SXFunction, SX, SXNode>(name, inputv, outputv, name_in, name_out) {
61 
62  // Default (persistent) options
63  just_in_time_opencl_ = false;
64  just_in_time_sparsity_ = false;
65  print_instructions_ = false;
66  }
67 
69  clear_mem();
70  }
71 
72  int SXFunction::eval(const double** arg, double** res,
73  casadi_int* iw, double* w, void* mem) const {
74  if (verbose_) casadi_message(name_ + "::eval");
75  setup(mem, arg, res, iw, w);
76 
77  // Make sure no free parameters
78  if (!free_vars_.empty()) {
79  std::stringstream ss;
80  disp(ss, false);
81  casadi_error("Cannot evaluate \"" + ss.str() + "\" since variables "
82  + str(free_vars_) + " are free.");
83  }
84 
85  // NOTE: The implementation of this function is very delicate. Small changes in the
86  // class structure can cause large performance losses. For this reason,
87  // the preprocessor macros are used below
88 
89  if (print_instructions_) {
90  int k = 0;
91  // Evaluate the algorithm
92  for (auto&& e : algorithm_) {
93  print_arg(uout(), k, e, w);
94  switch (e.op) {
95  CASADI_MATH_FUN_BUILTIN(w[e.i1], w[e.i2], w[e.i0])
96 
97  case OP_CONST: w[e.i0] = e.d; break;
98  case OP_INPUT: w[e.i0] = arg[e.i1]==nullptr ? 0 : arg[e.i1][e.i2]; break;
99  case OP_OUTPUT: if (res[e.i0]!=nullptr) res[e.i0][e.i2] = w[e.i1]; break;
100  case OP_CALL:
101  call_fwd(e, arg, res, iw, w);
102  break;
103  default:
104  casadi_error("Unknown operation" + str(e.op));
105  }
106  print_res(uout(), k, e, w);
107  k++;
108  }
109  } else {
110  // Evaluate the algorithm
111  for (auto&& e : algorithm_) {
112  switch (e.op) {
113  CASADI_MATH_FUN_BUILTIN(w[e.i1], w[e.i2], w[e.i0])
114 
115  case OP_CONST: w[e.i0] = e.d; break;
116  case OP_INPUT: w[e.i0] = arg[e.i1]==nullptr ? 0 : arg[e.i1][e.i2]; break;
117  case OP_OUTPUT: if (res[e.i0]!=nullptr) res[e.i0][e.i2] = w[e.i1]; break;
118  case OP_CALL:
119  call_fwd(e, arg, res, iw, w);
120  break;
121  default:
122  casadi_error("Unknown operation" + str(e.op));
123  }
124  }
125  }
126  return 0;
127  }
128 
129  bool SXFunction::is_smooth() const {
130  // Go through all nodes and check if any node is non-smooth
131  for (auto&& a : algorithm_) {
132  if (!operation_checker<SmoothChecker>(a.op)) {
133  return false;
134  }
135  }
136  return true;
137  }
138  std::string SXFunction::print(const ScalarAtomic& a) const {
139  std::stringstream stream;
140  if (a.op==OP_OUTPUT) {
141  stream << "output[" << a.i0 << "][" << a.i2 << "] = @" << a.i1;
142  } else if (a.op==OP_CALL) {
143  const ExtendedAlgEl& m = call_.el.at(a.i1);
144  stream << "[";
145  casadi_int k = 0;
146  for (casadi_int i=0; i<m.f.n_out(); ++i) {
147  if (m.f.nnz_out(i)>1) stream << "[";
148  for (casadi_int j=0; j<m.f.nnz_out(i); ++j) {
149  int el = m.res[k++];
150  if (el>=0) {
151  stream << "@" << el;
152  } else {
153  stream << "NULL";
154  }
155  if (j<m.f.nnz_out(i)-1) stream << ",";
156  }
157  if (m.f.nnz_out(i)>1) stream << "]";
158  if (i<m.f.n_out()-1) stream << ",";
159  }
160  stream << "] = ";
161  stream << m.f.name() << "(";
162  k = 0;
163  for (casadi_int i=0; i<m.f.n_in(); ++i) {
164  if (m.f.nnz_in(i)==0) stream << "0x0";
165  if (m.f.nnz_in(i)>1) stream << "[";
166  for (casadi_int j=0; j<m.f.nnz_in(i); ++j) {
167  stream << "@" << m.dep[k++];
168  if (j<m.f.nnz_in(i)-1) stream << ",";
169  }
170  if (m.f.nnz_in(i)>1) stream << "]";
171  if (i<m.f.n_in()-1) stream << ",";
172  }
173  stream << ")";
174  } else {
175  stream << "@" << a.i0 << " = ";
176  if (a.op==OP_INPUT) {
177  stream << "input[" << a.i1 << "][" << a.i2 << "]";
178  } else {
179  if (a.op==OP_CONST) {
180  stream << a.d;
181  } else if (a.op==OP_PARAMETER) {
182  stream << free_vars_[a.i1];
183  } else {
184  casadi_int ndep = casadi_math<double>::ndeps(a.op);
185  stream << casadi_math<double>::pre(a.op);
186  for (casadi_int c=0; c<ndep; ++c) {
187  if (c==0) {
188  stream << "@" << a.i1;
189  } else {
190  stream << casadi_math<double>::sep(a.op);
191  stream << "@" << a.i2;
192  }
193 
194  }
195  stream << casadi_math<double>::post(a.op);
196  }
197  }
198  }
199  return stream.str();
200  }
201 
202  void SXFunction::disp_more(std::ostream &stream) const {
203  stream << "Algorithm:";
204 
205  // Normal, interpreted output
206  for (auto&& a : algorithm_) {
208  stream << std::endl;
209  stream << print(a);
210  stream << ";";
211  }
212  }
213 
214  size_t SXFunction::codegen_sz_w(const CodeGenerator& g) const {
215  if (!g.avoid_stack()) return call_.sz_w+call_.sz_w_arg+call_.sz_w_res;
216  return sz_w();
217  }
218 
220 
221  // Make sure that there are no free variables
222  if (!free_vars_.empty()) {
223  casadi_error("Code generation of '" + name_ + "' is not possible since variables "
224  + str(free_vars_) + " are free.");
225  }
226 
227  // Generate code for the call nodes
228  for (auto&& m : call_.el) {
229  g.add_dependency(m.f);
230  }
231  }
232 
233 
234  void SXFunction::print_arg(std::ostream &stream, casadi_int k, const ScalarAtomic& el,
235  const double* w) const {
236  if (el.op==OP_INPUT || el.op==OP_OUTPUT || el.op==OP_CONST) return;
237  stream << name_ << ":" << k << ": " << print(el) << " inputs:" << std::endl;
238 
239  // Default dependencies
240  const int* dep = &el.i1;
241  casadi_int ndeps = casadi_math<double>::ndeps(el.op);
242 
243  // Call node overrides these defaults
244  if (el.op==OP_CALL) {
245  const ExtendedAlgEl& e = call_.el.at(el.i1);
246  ndeps = e.n_dep;
247  dep = get_ptr(e.dep);
248  stream << "[";
249  for (size_t i = 0; i < ndeps; ++i) {
250  if (i>0) stream << ", ";
251  if (print_canonical_) {
252  print_canonical(stream, w[dep[i]]);
253  } else {
254  DM::print_scalar(stream, w[dep[i]]);
255  }
256  }
257  stream << "]";
258  stream << std::endl;
259  return;
260  }
261 
262  for (size_t i = 0; i < ndeps; ++i) {
263  stream << i << ": ";
264  if (print_canonical_) {
265  print_canonical(stream, w[dep[i]]);
266  } else {
267  DM::print_scalar(stream, w[dep[i]]);
268  }
269  stream << std::endl;
270  }
271  }
272 
273  void SXFunction::print_arg(CodeGenerator& g, casadi_int k, const ScalarAtomic& el) const {
274  if (el.op==OP_INPUT || el.op==OP_OUTPUT || el.op==OP_CONST) return;
275  g << g.printf(name_ + ":" + str(k) + ": " + print(el) + " inputs:\\n") << "\n";
276  if (el.op==OP_CALL) {
277  const ExtendedAlgEl& m = call_.el[el.i1];
278  g << g.print_vector(m.f.nnz_in(), "arg[" + str(n_in_) + "]");
279  g << g.printf("\\n");
280  } else {
281  casadi_int ndeps = casadi_math<double>::ndeps(el.op);
282  if (ndeps==1) {
283  g << g.printf("0: %.16e\\n", g.sx_work(el.i1));
284  } else if (ndeps==2) {
285  g << g.printf("0: %.16e\\n1: %.16e\\n", g.sx_work(el.i1), g.sx_work(el.i2));
286  }
287  }
288  g << "\n";
289  }
290 
291  void SXFunction::print_res(CodeGenerator& g, casadi_int k, const ScalarAtomic& el) const {
292  if (el.op==OP_INPUT || el.op==OP_OUTPUT) return;
293  g << g.printf(name_ + ":" + str(k) + ": " + print(el) + " outputs:\\n") << "\n";
294  if (el.op==OP_CALL) {
295  const ExtendedAlgEl& m = call_.el[el.i1];
296  g << g.print_vector(m.f.nnz_out(), "w+" + str(m.f.nnz_in()));
297  g << g.printf("\\n");
298  } else {
299  g << g.printf("0: %.16e\\n", g.sx_work(el.i0));
300  }
301  g << "\n";
302  }
303 
304  void SXFunction::print_res(std::ostream &stream, casadi_int k, const ScalarAtomic& el,
305  const double* w) const {
306  if (el.op==OP_INPUT || el.op==OP_OUTPUT) return;
307  stream << name_ << ":" << k << ": " << print(el) << " outputs:" << std::endl;
308 
309  // Default outputs
310  const int* res = &el.i0;
311  casadi_int nres = 1;
312 
313  // Call node overrides these defaults
314  if (el.op==OP_CALL) {
315  const ExtendedAlgEl& e = call_.el.at(el.i1);
316  nres = e.n_res;
317  res = get_ptr(e.res);
318  stream << "[";
319  for (size_t i = 0; i < nres; ++i) {
320  if (i>0) stream << ", ";
321  if (print_canonical_) {
322  print_canonical(stream, w[res[i]]);
323  } else {
324  DM::print_scalar(stream, w[res[i]]);
325  }
326  }
327  stream << "]";
328  stream << std::endl;
329  return;
330  }
331 
332  for (size_t i = 0; i < nres; ++i) {
333  stream << i << ": ";
334  if (print_canonical_) {
335  print_canonical(stream, w[res[i]]);
336  } else {
337  DM::print_scalar(stream, w[res[i]]);
338  }
339  stream << std::endl;
340  }
341 
342  }
343 
346 
347  casadi_int cnt = 0;
348  // Run the algorithm
349  for (auto&& a : algorithm_) {
350  if (a.op==OP_OUTPUT) {
351  g << "if (res[" << a.i0 << "]!=0) "
352  << g.res(a.i0) << "[" << a.i2 << "]=" << g.sx_work(a.i1) << ";\n";
353  } else if (a.op==OP_CALL) {
354  const ExtendedAlgEl& m = call_.el[a.i1];
355 
356  casadi_int worksize = g.avoid_stack() ? worksize_ : 0;
357 
358  // Collect input arguments
359  casadi_int offset = worksize;
360  for (casadi_int i=0; i<m.f_n_in; ++i) {
361  if (m.copy_elision_arg[i]>=0) {
362  g << "arg[" << n_in_+i << "] = "
363  << "arg[" + str(m.copy_elision_arg[i]) << "]? "
364  << "arg[" + str(m.copy_elision_arg[i]) << "] + "
365  << str(m.copy_elision_offset[i]) << " : 0;\n";
366  } else {
367  if (m.f_nnz_in[i]==0) {
368  g << "arg[" << n_in_+i << "]=" << 0 << ";\n";
369  } else {
370  g << "arg[" << n_in_+i << "]=" << "w+" + str(offset) << ";\n";
371  }
372  }
373  offset += m.f_nnz_in[i];
374  }
375 
376 
377  casadi_int out_offset = offset;
378 
379  // Collect output arguments
380  for (casadi_int i=0; i<m.f_n_out; ++i) {
381  g << "res[" << n_out_+i << "]=" << "w+" + str(offset) << ";\n";
382  offset += m.f_nnz_out[i];
383  }
384  casadi_int k=0;
385  for (casadi_int i=0; i<m.f_n_in; ++i) {
386  if (m.copy_elision_arg[i]==-1) {
387  for (casadi_int j=0; j<m.f_nnz_in[i]; ++j) {
388  g << "w["+str(k+worksize) + "] = " << g.sx_work(m.dep[k]) << ";\n";
389  k++;
390  }
391  } else {
392  k+=m.f_nnz_in[i];
393  }
394  }
395  if (print_instructions_) print_arg(g, cnt, a);
396  std::string flag =
397  g(m.f, "arg+"+str(n_in_), "res+"+str(n_out_), "iw", "w+" + str(offset));
398  // Call function
399  g << "if (" << flag << ") return 1;\n";
400  if (print_instructions_) print_res(g, cnt, a);
401  for (casadi_int i=0;i<m.n_res;++i) {
402  if (m.res[i]>=0) {
403  g << g.sx_work(m.res[i]) << " = ";
404  g << "w[" + str(i+out_offset) + "];\n";
405  }
406  }
407  } else if (a.op==OP_INPUT) {
408  if (!copy_elision_[cnt]) {
409  g << g.sx_work(a.i0) << "="
410  << g.arg(a.i1) << "? " << g.arg(a.i1) << "[" << a.i2 << "] : 0;\n";
411  }
412  } else {
413  if (print_instructions_) print_arg(g, cnt, a);
414 
415  // Where to store the result
416  g << g.sx_work(a.i0) << "=";
417 
418  // What to store
419  if (a.op==OP_CONST) {
420  g << g.constant(a.d);
421  } else {
422  casadi_int ndep = casadi_math<double>::ndeps(a.op);
423  casadi_assert_dev(ndep>0);
424  if (ndep==1) g << g.print_op(a.op, g.sx_work(a.i1));
425  if (ndep==2) g << g.print_op(a.op, g.sx_work(a.i1), g.sx_work(a.i2));
426  }
427 
428  g << ";\n";
429 
430  if (print_instructions_) print_res(g, cnt, a);
431  }
432  cnt++;
433  }
434  }
435 
438  {{"default_in",
440  "Default input values"}},
441  {"just_in_time_sparsity",
442  {OT_BOOL,
443  "Propagate sparsity patterns using just-in-time "
444  "compilation to a CPU or GPU using OpenCL"}},
445  {"just_in_time_opencl",
446  {OT_BOOL,
447  "Just-in-time compilation for numeric evaluation using OpenCL (experimental)"}},
448  {"live_variables",
449  {OT_BOOL,
450  "Reuse variables in the work vector"}},
451  {"cse",
452  {OT_BOOL,
453  "Perform common subexpression elimination (complexity is N*log(N) in graph size)"}},
454  {"allow_free",
455  {OT_BOOL,
456  "Allow construction with free variables (Default: false)"}},
457  {"allow_duplicate_io_names",
458  {OT_BOOL,
459  "Allow construction with duplicate io names (Default: false)"}},
460  {"print_instructions",
461  {OT_BOOL,
462  "Print each operation during evaluation. Influenced by print_canonical."}}
463  }
464  };
465 
466  Dict SXFunction::generate_options(const std::string& target) const {
468  //opts["default_in"] = default_in_;
469  opts["live_variables"] = live_variables_;
470  opts["just_in_time_sparsity"] = just_in_time_sparsity_;
471  opts["just_in_time_opencl"] = just_in_time_opencl_;
472  return opts;
473  }
474 
475  void SXFunction::init(const Dict& opts) {
476  // Call the init function of the base class
478  if (verbose_) casadi_message(name_ + "::init");
479 
480  // Default (temporary) options
481  live_variables_ = true;
482 
483  bool cse_opt = false;
484  bool allow_free = false;
485 
486  // Read options
487  for (auto&& op : opts) {
488  if (op.first=="default_in") {
489  default_in_ = op.second;
490  } else if (op.first=="live_variables") {
491  live_variables_ = op.second;
492  } else if (op.first=="just_in_time_opencl") {
493  just_in_time_opencl_ = op.second;
494  } else if (op.first=="just_in_time_sparsity") {
495  just_in_time_sparsity_ = op.second;
496  } else if (op.first=="cse") {
497  cse_opt = op.second;
498  } else if (op.first=="allow_free") {
499  allow_free = op.second;
500  } else if (op.first=="print_instructions") {
501  print_instructions_ = op.second;
502  }
503  }
504 
505  // Perform common subexpression elimination
506  // This must be done before the lock, to avoid deadlocks
507  if (cse_opt) out_ = cse(out_);
508 
509  // Check/set default inputs
510  if (default_in_.empty()) {
511  default_in_.resize(n_in_, 0);
512  } else {
513  casadi_assert(default_in_.size()==n_in_,
514  "Option 'default_in' has incorrect length");
515  }
516 
517 #ifdef CASADI_WITH_THREADSAFE_SYMBOLICS
518  std::lock_guard<std::mutex> lock(SX::get_mutex_temp());
519 #endif // CASADI_WITH_THREADSAFE_SYMBOLICS
520 
521  // Stack used to sort the computational graph
522  std::stack<SXNode*> s;
523 
524  // All nodes
525  std::vector<SXNode*> nodes;
526 
527  // Add the list of nodes
528  casadi_int ind=0;
529  for (auto it = out_.begin(); it != out_.end(); ++it, ++ind) {
530  casadi_int nz=0;
531  for (auto itc = (*it)->begin(); itc != (*it)->end(); ++itc, ++nz) {
532  // Add outputs to the list
533  s.push(itc->get());
534  sort_depth_first(s, nodes);
535 
536  // A null pointer means an output instruction
537  nodes.push_back(static_cast<SXNode*>(nullptr));
538  }
539  }
540 
541  casadi_assert(nodes.size() <= std::numeric_limits<int>::max(), "Integer overflow");
542  // Set the temporary variables to be the corresponding place in the sorted graph
543  for (casadi_int i=0; i<nodes.size(); ++i) {
544  if (nodes[i]) {
545  nodes[i]->temp = static_cast<int>(i);
546  }
547  }
548 
549  // Sort the nodes by type
550  constants_.clear();
551  operations_.clear();
552  for (std::vector<SXNode*>::iterator it = nodes.begin(); it != nodes.end(); ++it) {
553  SXNode* t = *it;
554  if (t) {
555  if (t->is_constant())
556  constants_.push_back(SXElem::create(t));
557  else if (!t->is_symbolic() && t->op()>=0)
558  operations_.push_back(SXElem::create(t));
559  }
560  }
561 
562  // Input instructions
563  std::vector<std::pair<int, SXNode*> > symb_loc;
564 
565  // Current output and nonzero, start with the first one
566  int curr_oind, curr_nz=0;
567  casadi_assert(out_.size() <= std::numeric_limits<int>::max(), "Integer overflow");
568  for (curr_oind=0; curr_oind<out_.size(); ++curr_oind) {
569  if (out_[curr_oind].nnz()!=0) {
570  break;
571  }
572  }
573 
574  // Count the number of times each node is used
575  std::vector<casadi_int> refcount(nodes.size(), 0);
576 
577  // Get the sequence of instructions for the virtual machine
578  algorithm_.resize(0);
579  algorithm_.reserve(nodes.size());
580 
581  // Mapping of node index (cfr. temp) to algorithm index
582  std::vector<int> alg_index;
583  alg_index.reserve(nodes.size());
584 
585  for (std::vector<SXNode*>::iterator it=nodes.begin(); it!=nodes.end(); ++it) {
586  // Current node
587  SXNode* n = *it;
588 
589  // New element in the algorithm
590  AlgEl ae;
591 
592  // Get operation
593  ae.op = n==nullptr ? static_cast<int>(OP_OUTPUT) : static_cast<int>(n->op());
594 
595  // Default dependencies
596  int* dep = &ae.i1;
597  casadi_int ndeps = ae.op == -1 ? 1 : casadi_math<double>::ndeps(ae.op);
598 
599  // Get instruction
600  switch (ae.op) {
601  case OP_CONST: // constant
602  ae.d = n->to_double();
603  ae.i0 = n->temp;
604  break;
605  case OP_PARAMETER: // a parameter or input
606  symb_loc.push_back(std::make_pair(algorithm_.size(), n));
607  ae.i0 = n->temp;
608  ae.d = 0; // value not used, but set here to avoid uninitialized data in serialization
609  break;
610  case OP_OUTPUT: // output instruction
611  ae.i0 = curr_oind;
612  ae.i1 = out_[curr_oind]->at(curr_nz)->temp;
613  ae.i2 = curr_nz;
614 
615  // Go to the next nonzero
616  casadi_assert(curr_nz < std::numeric_limits<int>::max(), "Integer overflow");
617  curr_nz++;
618  if (curr_nz>=out_[curr_oind].nnz()) {
619  curr_nz=0;
620  casadi_assert(curr_oind < std::numeric_limits<int>::max(), "Integer overflow");
621  curr_oind++;
622  for (; curr_oind<out_.size(); ++curr_oind) {
623  if (out_[curr_oind].nnz()!=0) {
624  break;
625  }
626  }
627  }
628  break;
629  case OP_CALL: // Call node
630  {
631  ae.i0 = n->temp;
632 
633  // Index into ExtentedAlgEl collection
634  ae.i1 = call_.el.size();
635 
636  // Create ExtentedAlgEl instance
637  // This allocates space for dep and res
638  const Function& f = static_cast<const CallSX*>(n)->f_;
639  call_.el.emplace_back(f);
640 
641  // Make sure we have enough space to evaluate the Function call,
642  // noting that we wil only ever evaluate one call at a time.
643  call_.sz_arg = std::max(call_.sz_arg, f.sz_arg());
644  call_.sz_res = std::max(call_.sz_res, f.sz_res());
645  call_.sz_iw = std::max(call_.sz_iw, f.sz_iw());
646  call_.sz_w = std::max(call_.sz_w, f.sz_w());
647  call_.sz_w_arg = std::max(call_.sz_w_arg, static_cast<size_t>(f.nnz_in()));
648  call_.sz_w_res = std::max(call_.sz_w_res, static_cast<size_t>(f.nnz_out()));
649 
650  // Set the dependency pointer to the (uninitialised) slots of the ExtendedAlgEl
651  ExtendedAlgEl& m = call_.el.at(ae.i1);
652  dep = get_ptr(m.dep);
653  ndeps = m.n_dep;
654 
655  // Populate the dependency slots with node ids.
656  for (casadi_int i=0; i<ndeps; ++i) {
657  dep[i] = n->dep(i).get()->temp;
658  }
659  }
660  break;
661  case -1: // Output extraction node
662  {
663  dep = &algorithm_.at(alg_index.at(n->dep(0).get()->temp)).i1;
664  int oind = static_cast<OutputSX*>(n)->oind_;
665  casadi_assert(call_.el.at(dep[0]).res.at(oind)==-1, "Duplicate");
666  call_.el.at(dep[0]).res.at(oind) = n->temp;
667  }
668  break;
669  default: // Unary or binary operation
670  ae.i0 = n->temp;
671  ae.i1 = n->dep(0).get()->temp;
672  ae.i2 = n->dep(1).get()->temp;
673  }
674 
675  // Increase count of dependencies
676  for (casadi_int c=0; c<ndeps; ++c) {
677  refcount.at(dep[c])++;
678  }
679 
680  // Amend node index to algorithm index mapping
681  alg_index.push_back(algorithm_.size());
682 
683  // Add to algorithm
684  if (ae.op>=0) algorithm_.push_back(ae);
685 
686  }
687 
688  // Place in the work vector for each of the nodes in the tree (overwrites the reference counter)
689  std::vector<int> place(nodes.size());
690 
691  // Stack with unused elements in the work vector
692  std::stack<int> unused;
693 
694  // Work vector size
695  int worksize = 0;
696 
697  // Find a place in the work vector for the operation
698  for (auto&& a : algorithm_) {
699 
700  // Default dependencies
701  int* dep = &a.i1;
702  casadi_int ndeps = casadi_math<double>::ndeps(a.op);
703 
704  // Default outputs
705  int* res = &a.i0;
706  casadi_int nres = 1;
707 
708  // Call node overrides these defaults
709  if (a.op==OP_CALL) {
710  ExtendedAlgEl& e = call_.el.at(a.i1);
711  ndeps = e.n_dep;
712  dep = get_ptr(e.dep);
713  nres = e.n_res;
714  res = get_ptr(e.res);
715  }
716 
717  // decrease reference count of children
718  // reverse order so that the first argument will end up at the top of the stack
719  for (casadi_int c=ndeps-1; c>=0; --c) {
720  casadi_int ch_ind = dep[c];
721  casadi_int remaining = --refcount.at(ch_ind);
722  if (remaining==0) unused.push(place[ch_ind]);
723  }
724 
725  // Find a place to store the variable
726  if (a.op!=OP_OUTPUT) {
727  for (casadi_int c=0; c<nres; ++c) {
728  if (res[c]<0) continue;
729  if (live_variables_ && !unused.empty()) {
730  // Try to reuse a variable from the stack if possible (last in, first out)
731  res[c] = place[res[c]] = unused.top();
732  unused.pop();
733  } else {
734  // Allocate a new variable
735  res[c] = place[res[c]] = worksize++;
736  }
737  }
738  }
739 
740  // Save the location of the children
741  for (casadi_int c=0; c<ndeps; ++c) {
742  dep[c] = place[dep[c]];
743  }
744 
745  // If binary, make sure that the second argument is the same as the first one
746  // (in order to treat all operations as binary) NOTE: ugly
747  if (ndeps==1 && a.op!=OP_OUTPUT) {
748  a.i2 = a.i1;
749  }
750  }
751 
752  worksize_ = worksize;
753 
754  if (verbose_) {
755  if (live_variables_) {
756  casadi_message("Using live variables: work array is " + str(worksize_)
757  + " instead of " + str(nodes.size()));
758  } else {
759  casadi_message("Live variables disabled.");
760  }
761  }
762 
763  // Allocate work vectors (symbolic/numeric)
765 
766  alloc_arg(call_.sz_arg, true);
767  alloc_res(call_.sz_res, true);
768  alloc_iw(call_.sz_iw, true);
770 
771  // Reset the temporary variables
772  for (casadi_int i=0; i<nodes.size(); ++i) {
773  if (nodes[i]) {
774  nodes[i]->temp = 0;
775  }
776  }
777 
778  // Now mark each input's place in the algorithm
779  for (auto it=symb_loc.begin(); it!=symb_loc.end(); ++it) {
780  it->second->temp = it->first+1;
781  }
782 
783  // Add input instructions
784  casadi_assert(in_.size() <= std::numeric_limits<int>::max(), "Integer overflow");
785  for (int ind=0; ind<in_.size(); ++ind) {
786  int nz=0;
787  for (auto itc = in_[ind]->begin(); itc != in_[ind]->end(); ++itc, ++nz) {
788  int i = itc->get_temp()-1;
789  if (i>=0) {
790  // Mark as input
791  algorithm_[i].op = OP_INPUT;
792 
793  // Location of the input
794  algorithm_[i].i1 = ind;
795  algorithm_[i].i2 = nz;
796 
797  // Mark input as read
798  itc->set_temp(0);
799  }
800  }
801  }
802 
803  // Locate free variables
804  free_vars_.clear();
805  for (std::vector<std::pair<int, SXNode*> >::const_iterator it=symb_loc.begin();
806  it!=symb_loc.end(); ++it) {
807  if (it->second->temp!=0) {
808  // Store the index into free_vars
809  algorithm_[it->first].i1 = free_vars_.size();
810 
811  // Save to list of free parameters
812  free_vars_.push_back(SXElem::create(it->second));
813 
814  // Remove marker
815  it->second->temp=0;
816  }
817  }
818 
819  if (!allow_free && has_free()) {
820  casadi_error(name_ + "::init: Initialization failed since variables [" +
821  join(get_free(), ", ") + "] are free. These symbols occur in the output expressions "
822  "but you forgot to declare these as inputs. "
823  "Set option 'allow_free' to allow free variables.");
824  }
825 
827 
828  // Initialize just-in-time compilation for numeric evaluation using OpenCL
829  if (just_in_time_opencl_) {
830  casadi_error("OpenCL is not supported in this version of CasADi");
831  }
832 
833  // Initialize just-in-time compilation for sparsity propagation using OpenCL
835  casadi_error("OpenCL is not supported in this version of CasADi");
836  }
837 
838  // Print
839  if (verbose_) casadi_message(str(algorithm_.size()) + " elementary operations");
840  }
841 
844  copy_elision_.resize(algorithm_.size(), false);
845  return;
846  }
847  // Perform copy elision (codegen-only)
848  // Remove nodes that only serve to compose CALL inputs
849 
850  // For work vector elements, store the arg source (-1 for no trivial source)
851  std::vector<int> arg_i(worksize_, -1);
852  std::vector<int> nz_i(worksize_, -1);
853 
854  // Which algel corresponds to this source?
855  std::vector<casadi_int> alg_i(worksize_, -1);
856 
857  // Is this algel to be elided?
858  copy_elision_.resize(algorithm_.size(), false);
859 
860  casadi_int k=0;
861  for (auto&& e : algorithm_) {
862  switch (e.op) {
863  case OP_INPUT:
864  // Make source association
865  arg_i[e.i0] = e.i1;
866  nz_i[e.i0] = e.i2;
867  alg_i[e.i0] = k;
868  copy_elision_[k] = true;
869  break;
870  case OP_OUTPUT:
871  if (arg_i[e.i1]>=0) {
872  copy_elision_[alg_i[e.i1]] = false;
873  }
874  break;
875  case OP_CALL:
876  {
877  auto& m = call_.el[e.i1];
878 
879  // Inspect input arguments
880  casadi_int offset_input = 0;
881  for (casadi_int i=0; i<m.f_n_in; ++i) {
882  // Pattern match results
883  casadi_int arg = -1;
884  casadi_int offset = -1;
885  for (casadi_int j=0; j<m.f_nnz_in[i]; ++j) {
886  casadi_int k = offset_input+j;
887  if (j==0) {
888  arg = arg_i[m.dep[k]];
889  offset = nz_i[m.dep[k]];
890  }
891  if (arg_i[m.dep[k]]==-1) {
892  arg = -1;
893  // Pattern match failed
894  break;
895  }
896  if (nz_i[m.dep[k]]!=offset+j) {
897  arg = -1;
898  // Pattern match failed
899  break;
900  }
901  }
902 
903  // If we cannot perform elision
904  if (arg==-1) {
905  // We need copies for all nonzeros of input i
906  for (casadi_int j=0; j<m.f_nnz_in[i]; ++j) {
907  casadi_int k = offset_input+j;
908  if (arg_i[m.dep[k]]>=0) {
909  copy_elision_[alg_i[m.dep[k]]] = false;
910  }
911  }
912  }
913  // Store pattern match results
914  m.copy_elision_arg[i] = arg;
915  m.copy_elision_offset[i] = offset;
916 
917  offset += m.f_nnz_in[i];
918  offset_input += m.f_nnz_in[i];
919  }
920 
921  // Remove source association of all outputs
922  for (casadi_int i=0; i<m.n_res; ++i) {
923  if (m.res[i]>=0) {
924  arg_i[m.res[i]] = -1;
925  }
926  }
927  }
928  break;
929  case OP_CONST:
930  case OP_PARAMETER:
931  // Remove source association
932  arg_i[e.i0] = -1;
933  break;
934  default:
935  if (arg_i[e.i1]>=0) {
936  copy_elision_[alg_i[e.i1]] = false;
937  }
938  if (!casadi_math<double>::is_unary(e.op)) {
939  if (arg_i[e.i2]>=0) {
940  copy_elision_[alg_i[e.i2]] = false;
941  }
942  }
943  // Remove source association
944  arg_i[e.i0] = -1;
945  }
946  k++;
947  }
948  }
949 
951  std::vector<SXElem> ret(algorithm_.size(), casadi_limits<SXElem>::nan);
952 
953  std::vector<SXElem>::iterator it=ret.begin();
954 
955  // Iterator to the binary operations
956  std::vector<SXElem>::const_iterator b_it = operations_.begin();
957 
958  // Iterator to stack of constants
959  std::vector<SXElem>::const_iterator c_it = constants_.begin();
960 
961  // Iterator to free variables
962  std::vector<SXElem>::const_iterator p_it = free_vars_.begin();
963 
964  // Evaluate algorithm
965  if (verbose_) casadi_message("Evaluating algorithm forward");
966  for (auto&& a : algorithm_) {
967  switch (a.op) {
968  case OP_INPUT:
969  case OP_OUTPUT:
970  it++;
971  break;
972  case OP_CONST:
973  *it++ = *c_it++;
974  break;
975  case OP_PARAMETER:
976  *it++ = *p_it++;
977  break;
978  default:
979  *it++ = *b_it++;
980  }
981  }
982  casadi_assert(it==ret.end(), "Dimension mismatch");
983  return ret;
984  }
985 
987  eval_sx(const SXElem** arg, SXElem** res, casadi_int* iw, SXElem* w, void* mem,
988  bool always_inline, bool never_inline) const {
989 
990  always_inline = always_inline || always_inline_;
991  never_inline = never_inline || never_inline_;
992 
993  // non-inlining call is implemented in the base-class
994  if (!should_inline(true, always_inline, never_inline)) {
995  return FunctionInternal::eval_sx(arg, res, iw, w, mem, false, true);
996  }
997 
998  if (verbose_) casadi_message(name_ + "::eval_sx");
999 
1000  // Iterator to the binary operations
1001  std::vector<SXElem>::const_iterator b_it=operations_.begin();
1002 
1003  // Iterator to stack of constants
1004  std::vector<SXElem>::const_iterator c_it = constants_.begin();
1005 
1006  // Iterator to free variables
1007  std::vector<SXElem>::const_iterator p_it = free_vars_.begin();
1008 
1009  // Evaluate algorithm
1010  if (verbose_) casadi_message("Evaluating algorithm forward");
1011  for (auto&& a : algorithm_) {
1012  switch (a.op) {
1013  case OP_INPUT:
1014  w[a.i0] = arg[a.i1]==nullptr ? 0 : arg[a.i1][a.i2];
1015  break;
1016  case OP_OUTPUT:
1017  if (res[a.i0]!=nullptr) res[a.i0][a.i2] = w[a.i1];
1018  break;
1019  case OP_CONST:
1020  w[a.i0] = *c_it++;
1021  break;
1022  case OP_PARAMETER:
1023  w[a.i0] = *p_it++; break;
1024  case OP_CALL:
1025  {
1026  const ExtendedAlgEl& m = call_.el.at(a.i1);
1027  const SXElem& orig = *b_it++;
1028  std::vector<SXElem> deps(m.n_dep);
1029  bool identical = true;
1030 
1031  std::vector<SXElem> ret;
1032  for (casadi_int i=0;i<m.n_dep;++i) {
1033  identical &= SXElem::is_equal(w[m.dep.at(i)], orig->dep(i), 2);
1034  }
1035  if (identical) {
1036  ret = OutputSX::split(orig, m.n_res);
1037  } else {
1038  for (casadi_int i=0;i<m.n_dep;++i) deps[i] = w[m.dep[i]];
1039  ret = SXElem::call(m.f, deps);
1040  }
1041  for (casadi_int i=0;i<m.n_res;++i) {
1042  if (m.res[i]>=0) w[m.res[i]] = ret[i];
1043  }
1044  }
1045  break;
1046  default:
1047  {
1048  // Evaluate the function to a temporary value
1049  // (as it might overwrite the children in the work vector)
1050  SXElem f;
1051  switch (a.op) {
1052  CASADI_MATH_FUN_BUILTIN(w[a.i1], w[a.i2], f)
1053  }
1054 
1055  // If this new expression is identical to the expression used
1056  // to define the algorithm, then reuse
1057  const casadi_int depth = 2; // NOTE: a higher depth could possibly give more savings
1058  f.assignIfDuplicate(*b_it++, depth);
1059 
1060  // Finally save the function value
1061  w[a.i0] = f;
1062  }
1063  }
1064  }
1065  return 0;
1066  }
1067 
1068  void SXFunction::eval_mx(const MXVector& arg, MXVector& res,
1069  bool always_inline, bool never_inline) const {
1070  always_inline = always_inline || always_inline_;
1071  never_inline = never_inline || never_inline_;
1072 
1073  // non-inlining call is implemented in the base-class
1074  if (!always_inline) {
1075  return FunctionInternal::eval_mx(arg, res, false, true);
1076  }
1077 
1078  if (verbose_) casadi_message(name_ + "::eval_mx");
1079 
1080  // Iterator to stack of constants
1081  std::vector<SXElem>::const_iterator c_it = constants_.begin();
1082 
1083  casadi_assert(!has_free(),
1084  "Free variables not supported in inlining call to SXFunction::eval_mx");
1085 
1086  // Resize the number of outputs
1087  casadi_assert(arg.size()==n_in_, "Wrong number of input arguments");
1088  res.resize(out_.size());
1089 
1090  // Symbolic work, non-differentiated
1091  std::vector<MX> w(sz_w());
1092  if (verbose_) casadi_message("Allocated work vector");
1093 
1094  // Split up inputs analogous to symbolic primitives
1095  std::vector<std::vector<MX> > arg_split(in_.size());
1096  for (casadi_int i=0; i<in_.size(); ++i) {
1097  // Get nonzeros of argument
1098  std::vector<MX> orig = arg[i].get_nonzeros();
1099 
1100  // Project to needed sparsity
1101  std::vector<MX> target(sparsity_in_[i].nnz(), 0);
1102  std::vector<MX> w(arg[i].size1());
1103  casadi_project(get_ptr(orig), arg[i].sparsity(),
1104  get_ptr(target), sparsity_in_[i], get_ptr(w));
1105 
1106  // Store
1107  arg_split[i] = target;
1108  }
1109 
1110  // Allocate storage for split outputs
1111  std::vector<std::vector<MX> > res_split(out_.size());
1112  for (casadi_int i=0; i<out_.size(); ++i) res_split[i].resize(nnz_out(i));
1113 
1114  // Evaluate algorithm
1115  if (verbose_) casadi_message("Evaluating algorithm forward");
1116  for (auto&& a : algorithm_) {
1117  switch (a.op) {
1118  case OP_INPUT:
1119  w[a.i0] = arg_split[a.i1][a.i2];
1120  break;
1121  case OP_OUTPUT:
1122  res_split[a.i0][a.i2] = w[a.i1];
1123  break;
1124  case OP_CONST:
1125  w[a.i0] = static_cast<double>(*c_it++);
1126  break;
1127  case OP_CALL:
1128  {
1129  const ExtendedAlgEl& m = call_.el.at(a.i1);
1130  std::vector<MX> deps(m.n_dep);
1131  std::vector<MX> args;
1132 
1133  casadi_int k = 0;
1134  // Construct matrix-valued function arguments
1135  for (casadi_int i=0;i<m.f_n_in;++i) {
1136  std::vector<MX> arg;
1137  for (casadi_int j=0;j<m.f_nnz_in[i];++j) {
1138  arg.push_back(w[m.dep[k++]]);
1139  }
1140  args.push_back(sparsity_cast(vertcat(arg), m.f.sparsity_in(i)));
1141  }
1142 
1143 
1144  std::vector<MX> ret = m.f(args);
1145  std::vector<MX> res;
1146 
1147  // Break apart matriv-valued outputs into scalar components
1148  for (casadi_int i=0;i<m.f_n_out;++i) {
1149  std::vector<MX> nz = ret[i].get_nonzeros();
1150  res.insert(res.end(), nz.begin(), nz.end());
1151  }
1152 
1153  // Store into work vector
1154  for (casadi_int i=0;i<m.n_res;++i) {
1155  if (m.res[i]>=0) w[m.res[i]] = res[i];
1156  }
1157  }
1158  break;
1159  default:
1160  // Evaluate the function to a temporary value
1161  // (as it might overwrite the children in the work vector)
1162  MX f;
1163  switch (a.op) {
1164  CASADI_MATH_FUN_BUILTIN(w[a.i1], w[a.i2], f)
1165  }
1166 
1167  // Finally save the function value
1168  w[a.i0] = f;
1169  }
1170  }
1171 
1172  // Join split outputs
1173  for (casadi_int i=0; i<res.size(); ++i) {
1174  res[i] = sparsity_cast(vertcat(res_split[i]), sparsity_out_[i]);
1175  }
1176  }
1177 
1178  bool SXFunction::should_inline(bool with_sx, bool always_inline, bool never_inline) const {
1179  // If inlining has been specified
1180  casadi_assert(!(always_inline && never_inline),
1181  "Inconsistent options for " + definition());
1182  casadi_assert(!(never_inline && has_free()),
1183  "Must inline " + definition());
1184  if (always_inline) return true;
1185  if (never_inline) return false;
1186  // Functions with free variables must be inlined
1187  if (has_free()) return true;
1188  // Inlining by default
1189  return true;
1190  }
1191 
1192  void SXFunction::ad_forward(const std::vector<std::vector<SX> >& fseed,
1193  std::vector<std::vector<SX> >& fsens) const {
1194  if (verbose_) casadi_message(name_ + "::ad_forward");
1195 
1196  // Number of forward seeds
1197  casadi_int nfwd = fseed.size();
1198  fsens.resize(nfwd);
1199 
1200  // Quick return if possible
1201  if (nfwd==0) return;
1202 
1203  // Check if seeds need to have dimensions corrected
1204  casadi_int npar = 1;
1205  for (auto&& r : fseed) {
1206  if (!matching_arg(r, npar)) {
1207  casadi_assert_dev(npar==1);
1208  return ad_forward(replace_fseed(fseed, npar), fsens);
1209  }
1210  }
1211 
1212  // Make sure seeds have matching sparsity patterns
1213  for (auto it=fseed.begin(); it!=fseed.end(); ++it) {
1214  casadi_assert_dev(it->size()==n_in_);
1215  for (casadi_int i=0; i<n_in_; ++i) {
1216  if (it->at(i).sparsity()!=sparsity_in_[i]) {
1217  // Correct sparsity
1218  std::vector<std::vector<SX> > fseed2(fseed);
1219  for (auto&& r : fseed2) {
1220  for (casadi_int i=0; i<n_in_; ++i) r[i] = project(r[i], sparsity_in_[i]);
1221  }
1222  return ad_forward(fseed2, fsens);
1223  }
1224  }
1225  }
1226 
1227  // Allocate results
1228  for (casadi_int d=0; d<nfwd; ++d) {
1229  fsens[d].resize(n_out_);
1230  for (casadi_int i=0; i<fsens[d].size(); ++i)
1231  if (fsens[d][i].sparsity()!=sparsity_out_[i])
1232  fsens[d][i] = SX::zeros(sparsity_out_[i]);
1233  }
1234 
1235  // Iterator to the binary operations
1236  std::vector<SXElem>::const_iterator b_it=operations_.begin();
1237 
1238  // Tape
1239  std::vector<TapeEl<SXElem> > s_pdwork(operations_.size());
1240  std::vector<TapeEl<SXElem> >::iterator it1 = s_pdwork.begin();
1241 
1242  // Evaluate algorithm
1243  if (verbose_) casadi_message("Evaluating algorithm forward");
1244  for (auto&& e : algorithm_) {
1245  switch (e.op) {
1246  case OP_INPUT:
1247  case OP_OUTPUT:
1248  case OP_CONST:
1249  case OP_PARAMETER:
1250  break;
1251  default:
1252  {
1253  const SXElem& f=*b_it++;
1254  switch (e.op) {
1255  CASADI_MATH_DER_BUILTIN(f->dep(0), f->dep(1), f, it1++->d)
1256  case OP_CALL:
1257  it1++->d[0] = f;
1258  }
1259  }
1260  }
1261  }
1262 
1263  // Work vector
1264  std::vector<SXElem> w(worksize_);
1265 
1266  // Calculate forward sensitivities
1267  if (verbose_) casadi_message("Calculating forward derivatives");
1268  for (casadi_int dir=0; dir<nfwd; ++dir) {
1269  std::vector<TapeEl<SXElem> >::const_iterator it2 = s_pdwork.begin();
1270  for (auto&& a : algorithm_) {
1271  switch (a.op) {
1272  case OP_INPUT:
1273  w[a.i0] = fseed[dir][a.i1].nonzeros()[a.i2]; break;
1274  case OP_OUTPUT:
1275  fsens[dir][a.i0].nonzeros()[a.i2] = w[a.i1]; break;
1276  case OP_CONST:
1277  case OP_PARAMETER:
1278  w[a.i0] = 0;
1279  break;
1280  case OP_IF_ELSE_ZERO:
1281  w[a.i0] = if_else_zero(it2++->d[1], w[a.i2]);
1282  break;
1283  case OP_CALL:
1284  {
1285  auto& m = call_.el.at(a.i1);
1286  CallSX* call_node = static_cast<CallSX*>(it2->d[0].get());
1287 
1288  // Construct forward sensitivity function
1289  Function ff = m.f.forward(1);
1290 
1291  // Symbolic inputs to forward sensitivity function
1292  std::vector<SXElem> deps;
1293  deps.reserve(2*m.n_dep);
1294 
1295  // Set nominal inputs from node
1296  casadi_int offset = 0;
1297  for (casadi_int i=0;i<m.f_n_in;++i) {
1298  casadi_int nnz = ff.nnz_in(i);
1299  casadi_assert(nnz==0 || nnz==m.f.nnz_in(i), "Not implemented");
1300  for (casadi_int j=0;j<nnz;++j) {
1301  deps.push_back(call_node->dep(offset+j));
1302  }
1303  offset += m.f_nnz_in[i];
1304  }
1305 
1306  // Do not set nominal outputs
1307  offset = 0;
1308  for (casadi_int i=0;i<m.f_n_out;++i) {
1309  casadi_int nnz = ff.nnz_in(i+m.f_n_in);
1310  casadi_assert(nnz==0 || nnz==m.f.nnz_out(i), "Not implemented");
1311  for (casadi_int j=0;j<nnz;++j) {
1312  deps.push_back(call_node->get_output(offset+j));
1313  }
1314  offset += m.f_nnz_out[i];
1315  }
1316 
1317  // Read in forward seeds from work vector
1318  offset = 0;
1319  for (casadi_int i=0;i<m.f_n_in;++i) {
1320  casadi_int nnz = ff.nnz_in(i+m.f_n_in+m.f_n_out);
1321  // nnz=0 occurs for is_diff_in[i] false
1322  casadi_assert(nnz==0 || nnz==m.f.nnz_in(i), "Not implemented");
1323  if (nnz) {
1324  for (casadi_int j=0;j<nnz;++j) {
1325  deps.push_back(w[m.dep[offset+j]]);
1326  }
1327  }
1328  offset += m.f_nnz_in[i];
1329  }
1330 
1331  // Call forward sensitivity function
1332  std::vector<SXElem> ret = SXElem::call(ff, deps);
1333 
1334  // Retrieve sensitivities
1335  offset = 0;
1336  casadi_int k = 0;
1337  for (casadi_int i=0;i<m.f_n_out;++i) {
1338  casadi_int nnz = ff.nnz_out(i);
1339  // nnz=0 occurs for is_diff_out[i] false
1340  casadi_assert(nnz==0 || nnz==m.f_nnz_out[i], "Not implemented");
1341  if (nnz) {
1342  for (casadi_int j=0;j<nnz;++j) {
1343  if (m.res[offset+j]>=0) w[m.res[offset+j]] = ret[k];
1344  k++;
1345  }
1346  }
1347  offset += m.f_nnz_out[i];
1348  }
1349  }
1350  it2++;
1351  break;
1352  CASADI_MATH_BINARY_BUILTIN // Binary operation
1353  w[a.i0] = it2->d[0] * w[a.i1] + it2->d[1] * w[a.i2];
1354  it2++;
1355  break;
1356  default: // Unary operation
1357  w[a.i0] = it2->d[0] * w[a.i1];
1358  it2++;
1359  }
1360  }
1361  }
1362  }
1363 
1364  void SXFunction::ad_reverse(const std::vector<std::vector<SX> >& aseed,
1365  std::vector<std::vector<SX> >& asens) const {
1366  if (verbose_) casadi_message(name_ + "::ad_reverse");
1367 
1368  // number of adjoint seeds
1369  casadi_int nadj = aseed.size();
1370  asens.resize(nadj);
1371 
1372  // Quick return if possible
1373  if (nadj==0) return;
1374 
1375  // Check if seeds need to have dimensions corrected
1376  casadi_int npar = 1;
1377  for (auto&& r : aseed) {
1378  if (!matching_res(r, npar)) {
1379  casadi_assert_dev(npar==1);
1380  return ad_reverse(replace_aseed(aseed, npar), asens);
1381  }
1382  }
1383 
1384  // Make sure matching sparsity of fseed
1385  bool matching_sparsity = true;
1386  for (casadi_int d=0; d<nadj; ++d) {
1387  casadi_assert_dev(aseed[d].size()==n_out_);
1388  for (casadi_int i=0; matching_sparsity && i<n_out_; ++i)
1389  matching_sparsity = aseed[d][i].sparsity()==sparsity_out_[i];
1390  }
1391 
1392  // Correct sparsity if needed
1393  if (!matching_sparsity) {
1394  std::vector<std::vector<SX> > aseed2(aseed);
1395  for (casadi_int d=0; d<nadj; ++d)
1396  for (casadi_int i=0; i<n_out_; ++i)
1397  if (aseed2[d][i].sparsity()!=sparsity_out_[i])
1398  aseed2[d][i] = project(aseed2[d][i], sparsity_out_[i]);
1399  return ad_reverse(aseed2, asens);
1400  }
1401 
1402  // Allocate results if needed
1403  for (casadi_int d=0; d<nadj; ++d) {
1404  asens[d].resize(n_in_);
1405  for (casadi_int i=0; i<asens[d].size(); ++i) {
1406  if (asens[d][i].sparsity()!=sparsity_in_[i]) {
1407  asens[d][i] = SX::zeros(sparsity_in_[i]);
1408  } else {
1409  std::fill(asens[d][i]->begin(), asens[d][i]->end(), 0);
1410  }
1411  }
1412  }
1413 
1414  // Iterator to the binary operations
1415  std::vector<SXElem>::const_iterator b_it=operations_.begin();
1416 
1417  // Tape
1418  std::vector<TapeEl<SXElem> > s_pdwork(operations_.size());
1419  std::vector<TapeEl<SXElem> >::iterator it1 = s_pdwork.begin();
1420 
1421  // Evaluate algorithm
1422  if (verbose_) casadi_message("Evaluating algorithm forward");
1423  for (auto&& a : algorithm_) {
1424  switch (a.op) {
1425  case OP_INPUT:
1426  case OP_OUTPUT:
1427  case OP_CONST:
1428  case OP_PARAMETER:
1429  break;
1430  default:
1431  {
1432  const SXElem& f=*b_it++;
1433  switch (a.op) {
1434  CASADI_MATH_DER_BUILTIN(f->dep(0), f->dep(1), f, it1++->d)
1435  case OP_CALL:
1436  it1++->d[0] = f;
1437  }
1438  }
1439  }
1440  }
1441 
1442  // Calculate adjoint sensitivities
1443  if (verbose_) casadi_message("Calculating adjoint derivatives");
1444 
1445  // Work vector
1446  std::vector<SXElem> w(worksize_, 0);
1447 
1448  for (casadi_int dir=0; dir<nadj; ++dir) {
1449  auto it2 = s_pdwork.rbegin();
1450  for (auto it = algorithm_.rbegin(); it!=algorithm_.rend(); ++it) {
1451  SXElem seed;
1452  switch (it->op) {
1453  case OP_INPUT:
1454  asens[dir][it->i1].nonzeros()[it->i2] = w[it->i0];
1455  w[it->i0] = 0;
1456  break;
1457  case OP_OUTPUT:
1458  w[it->i1] += aseed[dir][it->i0].nonzeros()[it->i2];
1459  break;
1460  case OP_CONST:
1461  case OP_PARAMETER:
1462  w[it->i0] = 0;
1463  break;
1464  case OP_IF_ELSE_ZERO:
1465  seed = w[it->i0];
1466  w[it->i0] = 0;
1467  w[it->i2] += if_else_zero(it2++->d[1], seed);
1468  break;
1469  case OP_CALL:
1470  {
1471  auto& m = call_.el.at(it->i1);
1472  CallSX* call_node = static_cast<CallSX*>(it2->d[0].get());
1473 
1474  // Construct reverse sensitivity function
1475  Function fr = m.f.reverse(1);
1476 
1477  // Symbolic inputs to reverse sensitivity function
1478  std::vector<SXElem> deps;
1479  deps.reserve(m.n_dep+m.n_res);
1480 
1481  // Set nominal inputs from node
1482  casadi_int offset = 0;
1483  for (casadi_int i=0;i<m.f_n_in;++i) {
1484  casadi_int nnz = fr.nnz_in(i);
1485  casadi_assert(nnz==0 || nnz==m.f.nnz_in(i), "Not implemented");
1486  for (casadi_int j=0;j<nnz;++j) {
1487  deps.push_back(call_node->dep(offset+j));
1488  }
1489  offset += m.f_nnz_in[i];
1490  }
1491 
1492  // Do not set nominal outputs
1493  offset = 0;
1494  for (casadi_int i=0;i<m.f_n_out;++i) {
1495  casadi_int nnz = fr.nnz_in(i+m.f_n_in);
1496  casadi_assert(nnz==0 || nnz==m.f.nnz_out(i), "Not implemented");
1497  for (casadi_int j=0;j<nnz;++j) {
1498  deps.push_back(call_node->get_output(offset+j));
1499  }
1500  offset += m.f_nnz_out[i];
1501  }
1502 
1503  // Read in reverse seeds from work vector
1504  offset = 0;
1505  for (casadi_int i=0;i<m.f_n_out;++i) {
1506  casadi_int nnz = fr.nnz_in(i+m.f_n_in+m.f_n_out);
1507  // nnz=0 occurs for is_diff_out[i] false
1508  casadi_assert(nnz==0 || nnz==m.f.nnz_out(i), "Not implemented");
1509  if (nnz) {
1510  for (casadi_int j=0;j<nnz;++j) {
1511  deps.push_back((m.res[offset+j]>=0) ? w[m.res[offset+j]] : 0);
1512  }
1513  }
1514  offset += m.f.nnz_out(i);
1515  }
1516 
1517  // Call reverse sensitivity function
1518  std::vector<SXElem> ret = SXElem::call(fr, deps);
1519 
1520  // Clear out reverse seeds
1521  for (casadi_int i=0;i<m.n_res;++i) {
1522  if (m.res[i]>=0) w[m.res[i]] = 0;
1523  }
1524 
1525  // Store reverse sensitivities into work vector
1526  offset = 0;
1527  casadi_int k = 0;
1528  for (casadi_int i=0;i<m.f_n_in;++i) {
1529  casadi_int nnz = fr.nnz_out(i);
1530  // nnz=0 occurs for is_diff_in[i] false
1531  casadi_assert(nnz==0 || nnz==m.f_nnz_in[i], "Not implemented");
1532  if (nnz) {
1533  for (casadi_int j=0;j<nnz;++j) {
1534  w[m.dep[offset+j]] += ret[k++];
1535  }
1536  }
1537  offset += m.f_nnz_in[i];
1538  }
1539  }
1540  it2++;
1541  break;
1542  CASADI_MATH_BINARY_BUILTIN // Binary operation
1543  seed = w[it->i0];
1544  w[it->i0] = 0;
1545  w[it->i1] += it2->d[0] * seed;
1546  w[it->i2] += it2++->d[1] * seed;
1547  break;
1548  default: // Unary operation
1549  seed = w[it->i0];
1550  w[it->i0] = 0;
1551  w[it->i1] += it2++->d[0] * seed;
1552  }
1553  }
1554  }
1555  }
1556 
1557  template<typename T, typename CT>
1559  CT*** call_arg, T*** call_res, casadi_int** call_iw, T** call_w, T** nz_in, T** nz_out) const {
1560  *call_arg += n_in_;
1561  *call_res += n_out_;
1562  *nz_in = *call_w + worksize_;
1563  *nz_out = *call_w + worksize_ + call_.sz_w_arg;
1564  *call_w = *call_w + worksize_ + call_.sz_w_arg + call_.sz_w_res;
1565 
1566  // Set up call_arg to point to nz_in
1567  T* ptr_w = *nz_in;
1568  for (casadi_int i=0;i<m.f_n_in;++i) {
1569  (*call_arg)[i] = ptr_w;
1570  ptr_w+=m.f_nnz_in[i];
1571  }
1572 
1573  // Set up call_res to point to nz_out
1574  ptr_w = *nz_out;
1575  for (casadi_int i=0;i<m.f_n_out;++i) {
1576  (*call_res)[i] = ptr_w;
1577  ptr_w+=m.f_nnz_out[i];
1578  }
1579  }
1580 
1581  template<typename T>
1582  void SXFunction::call_fwd(const AlgEl& e, const T** arg, T** res, casadi_int* iw, T* w) const {
1583  auto& m = call_.el[e.i1];
1584  const T** call_arg = arg;
1585  T** call_res = res;
1586  casadi_int* call_iw = iw;
1587  T* call_w = w;
1588  T* nz_in;
1589  T* nz_out;
1590 
1591  call_setup(m, &call_arg, &call_res, &call_iw, &call_w, &nz_in, &nz_out);
1592 
1593  // Populate nz_in from work vector
1594  for (casadi_int i=0;i<m.n_dep;++i) {
1595  nz_in[i] = w[m.dep[i]];
1596  }
1597  // Perform call nz_in -> nz_out
1598  m.f(call_arg, call_res, call_iw, call_w);
1599 
1600  // Store nz_out results back in workvector
1601  for (casadi_int i=0;i<m.n_res;++i) {
1602  // Only if the result is actually needed
1603  if (m.res[i]>=0) {
1604  w[m.res[i]] = nz_out[i];
1605  }
1606  }
1607  }
1608 
1609 
1610  template<typename T>
1611  void SXFunction::call_rev(const AlgEl& e, T** arg, T** res, casadi_int* iw, T* w) const {
1612  auto& m = call_.el[e.i1];
1613  bvec_t** call_arg = arg;
1614  bvec_t** call_res = res;
1615  casadi_int* call_iw = iw;
1616  bvec_t* call_w = w;
1617  bvec_t* nz_in;
1618  bvec_t* nz_out;
1619 
1620  call_setup(m, &call_arg, &call_res, &call_iw, &call_w, &nz_in, &nz_out);
1621 
1622  std::fill_n(nz_in, m.n_dep, 0);
1623 
1624  // Read in reverse seeds nz_out from work vector
1625  for (casadi_int i=0;i<m.n_res;++i) {
1626  nz_out[i] = (m.res[i]>=0) ? w[m.res[i]] : 0;
1627  }
1628 
1629  // Perform reverse mode call nz_out -> nz_in
1630  m.f.rev(call_arg, call_res, call_iw, call_w);
1631 
1632  // Clear out reverse seeds
1633  for (casadi_int i=0;i<m.n_res;++i) {
1634  if (m.res[i]>=0) w[m.res[i]] = 0;
1635  }
1636 
1637  // Store reverse sensitivities into work vector
1638  for (casadi_int i=0;i<m.n_dep;++i) {
1639  w[m.dep[i]] |= nz_in[i];
1640  }
1641  }
1642 
1644  sp_forward(const bvec_t** arg, bvec_t** res, casadi_int* iw, bvec_t* w, void* mem) const {
1645  // Fall back when forward mode not allowed
1646  if (sp_weight()==1 || sp_weight()==-1)
1647  return FunctionInternal::sp_forward(arg, res, iw, w, mem);
1648  // Propagate sparsity forward
1649  for (auto&& e : algorithm_) {
1650  switch (e.op) {
1651  case OP_CONST:
1652  case OP_PARAMETER:
1653  w[e.i0] = 0; break;
1654  case OP_INPUT:
1655  w[e.i0] = arg[e.i1]==nullptr ? 0 : arg[e.i1][e.i2];
1656  break;
1657  case OP_OUTPUT:
1658  if (res[e.i0]!=nullptr) res[e.i0][e.i2] = w[e.i1];
1659  break;
1660  case OP_CALL:
1661  call_fwd(e, arg, res, iw, w);
1662  break;
1663  default: // Unary or binary operation
1664  w[e.i0] = w[e.i1] | w[e.i2]; break;
1665  }
1666  }
1667  return 0;
1668  }
1669 
1671  casadi_int* iw, bvec_t* w, void* mem) const {
1672  // Fall back when reverse mode not allowed
1673  if (sp_weight()==0 || sp_weight()==-1)
1674  return FunctionInternal::sp_reverse(arg, res, iw, w, mem);
1675  std::fill_n(w, sz_w(), 0);
1676 
1677  // Propagate sparsity backward
1678  for (auto it=algorithm_.rbegin(); it!=algorithm_.rend(); ++it) {
1679  // Temp seed
1680  bvec_t seed;
1681 
1682  // Propagate seeds
1683  switch (it->op) {
1684  case OP_CONST:
1685  case OP_PARAMETER:
1686  w[it->i0] = 0;
1687  break;
1688  case OP_INPUT:
1689  if (arg[it->i1]!=nullptr) arg[it->i1][it->i2] |= w[it->i0];
1690  w[it->i0] = 0;
1691  break;
1692  case OP_OUTPUT:
1693  if (res[it->i0]!=nullptr) {
1694  w[it->i1] |= res[it->i0][it->i2];
1695  res[it->i0][it->i2] = 0;
1696  }
1697  break;
1698  case OP_CALL:
1699  call_rev(*it, arg, res, iw, w);
1700  break;
1701  default: // Unary or binary operation
1702  seed = w[it->i0];
1703  w[it->i0] = 0;
1704  w[it->i1] |= seed;
1705  w[it->i2] |= seed;
1706  }
1707  }
1708  return 0;
1709  }
1710 
1711  const SX SXFunction::sx_in(casadi_int ind) const {
1712  return in_.at(ind);
1713  }
1714 
1715  const std::vector<SX> SXFunction::sx_in() const {
1716  return in_;
1717  }
1718 
1719  bool SXFunction::is_a(const std::string& type, bool recursive) const {
1720  return type=="SXFunction" || (recursive && XFunction<SXFunction,
1721  SX, SXNode>::is_a(type, recursive));
1722  }
1723 
1724  void SXFunction::export_code_body(const std::string& lang,
1725  std::ostream &ss, const Dict& options) const {
1726 
1727  // Default values for options
1728  casadi_int indent_level = 0;
1729 
1730  // Read options
1731  for (auto&& op : options) {
1732  if (op.first=="indent_level") {
1733  indent_level = op.second;
1734  } else {
1735  casadi_error("Unknown option '" + op.first + "'.");
1736  }
1737  }
1738 
1739  // Construct indent string
1740  std::string indent;
1741  for (casadi_int i=0;i<indent_level;++i) {
1742  indent += " ";
1743  }
1744 
1745  // Non-cell aliases for inputs
1746  for (casadi_int i=0;i<n_in_;++i) {
1747  ss << indent << "argin_" << i << " = nonzeros_gen(varargin{" << i+1 << "});" << std::endl;
1748  }
1749 
1750  Function f = shared_from_this<Function>();
1751 
1752  for (casadi_int k=0;k<f.n_instructions();++k) {
1753  // Get operation
1754  casadi_int op = static_cast<casadi_int>(f.instruction_id(k));
1755  // Get input positions into workvector
1756  std::vector<casadi_int> o = f.instruction_output(k);
1757  // Get output positions into workvector
1758  std::vector<casadi_int> i = f.instruction_input(k);
1759  switch (op) {
1760  case OP_INPUT:
1761  {
1762  ss << indent << "w" << o[0] << " = " << "argin_" << i[0] << "(" << i[1]+1 << ");";
1763  ss << std::endl;
1764  }
1765  break;
1766  case OP_OUTPUT:
1767  {
1768  ss << indent << "argout_" << o[0] << "{" << o[1]+1 << "} = w" << i[0] << ";";
1769  ss << std::endl;
1770  }
1771  break;
1772  case OP_CONST:
1773  {
1774  std::ios_base::fmtflags fmtfl = ss.flags();
1775  ss << indent << "w" << o[0] << " = ";
1776  ss << std::scientific << std::setprecision(std::numeric_limits<double>::digits10 + 1);
1777  ss << f.instruction_constant(k) << ";" << std::endl;
1778  ss.flags(fmtfl);
1779  }
1780  break;
1781  case OP_SQ:
1782  {
1783  ss << indent << "w" << o[0] << " = " << "w" << i[0] << "^2;" << std::endl;
1784  }
1785  break;
1786  case OP_FABS:
1787  {
1788  ss << indent << "w" << o[0] << " = abs(" << "w" << i[0] << ");" << std::endl;
1789  }
1790  break;
1791  case OP_POW:
1792  case OP_CONSTPOW:
1793  ss << indent << "w" << o[0] << " = " << "w" << i[0] << ".^w" << i[1] << ";" << std::endl;
1794  break;
1795  case OP_NOT:
1796  ss << indent << "w" << o[0] << " = ~" << "w" << i[0] << ";" << std::endl;
1797  break;
1798  case OP_OR:
1799  ss << indent << "w" << o[0] << " = w" << i[0] << " | w" << i[1] << ";" << std::endl;
1800  break;
1801  case OP_AND:
1802  ss << indent << "w" << o[0] << " = w" << i[0] << " & w" << i[1] << ";" << std::endl;
1803  break;
1804  case OP_NE:
1805  ss << indent << "w" << o[0] << " = w" << i[0] << " ~= w" << i[1] << ";" << std::endl;
1806  break;
1807  case OP_IF_ELSE_ZERO:
1808  ss << indent << "w" << o[0] << " = ";
1809  ss << "if_else_zero_gen(w" << i[0] << ", w" << i[1] << ");" << std::endl;
1810  break;
1811  default:
1813  ss << indent << "w" << o[0] << " = " << casadi::casadi_math<double>::print(op,
1814  "w"+std::to_string(i[0]), "w"+std::to_string(i[1])) << ";" << std::endl;
1815  } else {
1816  ss << indent << "w" << o[0] << " = " << casadi::casadi_math<double>::print(op,
1817  "w"+std::to_string(i[0])) << ";" << std::endl;
1818  }
1819  }
1820  }
1821 
1822  }
1823 
1825  XFunction<SXFunction, SX, SXNode>(s) {
1826  int version = s.version("SXFunction", 1, 3);
1827  size_t n_instructions;
1828  s.unpack("SXFunction::n_instr", n_instructions);
1829 
1830  s.unpack("SXFunction::worksize", worksize_);
1831  s.unpack("SXFunction::free_vars", free_vars_);
1832  s.unpack("SXFunction::operations", operations_);
1833  s.unpack("SXFunction::constants", constants_);
1834  s.unpack("SXFunction::default_in", default_in_);
1835 
1836  if (version>=2) {
1837 
1838  s.unpack("SXFunction::call_sz_arg", call_.sz_arg);
1839  s.unpack("SXFunction::call_sz_res", call_.sz_res);
1840  s.unpack("SXFunction::call_sz_iw", call_.sz_iw);
1841  s.unpack("SXFunction::call_sz_w", call_.sz_w);
1842  s.unpack("SXFunction::call_sz_arg", call_.sz_w_arg);
1843  s.unpack("SXFunction::call_sz_res", call_.sz_w_res);
1844 
1845  size_t el_size;
1846  s.unpack("SXFunction::call_el_size", el_size);
1847  call_.el.reserve(el_size);
1848 
1849  // Loop over nodes
1850  for (casadi_int k=0;k<el_size;++k) {
1851  Function f;
1852  s.unpack("SXFunction::call_el_f", f);
1853  call_.el.emplace_back(f);
1854  auto& e = call_.el[k];
1855  s.unpack("SXFunction::call_el_dep", e.dep);
1856  s.unpack("SXFunction::call_el_res", e.res);
1857  s.unpack("SXFunction::call_el_copy_elision_arg", e.copy_elision_arg);
1858  s.unpack("SXFunction::call_el_copy_elision_offset", e.copy_elision_offset);
1859  }
1860 
1861  s.unpack("SXFunction::copy_elision", copy_elision_);
1862 
1863  } else {
1864  call_.sz_arg = 0;
1865  call_.sz_res = 0;
1866  call_.sz_iw = 0;
1867  call_.sz_w = 0;
1868  call_.sz_w_arg = 0;
1869  call_.sz_w_res = 0;
1870  call_.el.clear();
1871  copy_elision_.resize(n_instructions, false);
1872  }
1873 
1874  algorithm_.resize(n_instructions);
1875  for (casadi_int k=0;k<n_instructions;++k) {
1876  AlgEl& e = algorithm_[k];
1877  s.unpack("SXFunction::ScalarAtomic::op", e.op);
1878  s.unpack("SXFunction::ScalarAtomic::i0", e.i0);
1879  s.unpack("SXFunction::ScalarAtomic::i1", e.i1);
1880  s.unpack("SXFunction::ScalarAtomic::i2", e.i2);
1881  }
1882 
1883  // Default (persistent) options
1884  just_in_time_opencl_ = false;
1885  just_in_time_sparsity_ = false;
1886 
1887  s.unpack("SXFunction::live_variables", live_variables_);
1888  if (version>=3) {
1889  s.unpack("SXFunction::print_instructions", print_instructions_);
1890  } else {
1891  print_instructions_ = false;
1892  }
1893 
1895  }
1896 
1899  s.version("SXFunction", 3);
1900  s.pack("SXFunction::n_instr", algorithm_.size());
1901 
1902  s.pack("SXFunction::worksize", worksize_);
1903  s.pack("SXFunction::free_vars", free_vars_);
1904  s.pack("SXFunction::operations", operations_);
1905  s.pack("SXFunction::constants", constants_);
1906  s.pack("SXFunction::default_in", default_in_);
1907 
1908  s.pack("SXFunction::call_sz_arg", call_.sz_arg);
1909  s.pack("SXFunction::call_sz_res", call_.sz_res);
1910  s.pack("SXFunction::call_sz_iw", call_.sz_iw);
1911  s.pack("SXFunction::call_sz_w", call_.sz_w);
1912  s.pack("SXFunction::call_sz_arg", call_.sz_w_arg);
1913  s.pack("SXFunction::call_sz_res", call_.sz_w_res);
1914 
1915  s.pack("SXFunction::call_el_size", call_.el.size());
1916  // Loop over ExtendedALgEl elements
1917  for (const auto& n : call_.el) {
1918  s.pack("SXFunction::call_el_f", n.f);
1919  s.pack("SXFunction::call_el_dep", n.dep);
1920  s.pack("SXFunction::call_el_res", n.res);
1921  s.pack("SXFunction::call_el_copy_elision_arg", n.copy_elision_arg);
1922  s.pack("SXFunction::call_el_copy_elision_offset", n.copy_elision_offset);
1923  }
1924 
1925  s.pack("SXFunction::copy_elision", copy_elision_);
1926 
1927  // Loop over algorithm
1928  for (const auto& e : algorithm_) {
1929  s.pack("SXFunction::ScalarAtomic::op", e.op);
1930  s.pack("SXFunction::ScalarAtomic::i0", e.i0);
1931  s.pack("SXFunction::ScalarAtomic::i1", e.i1);
1932  s.pack("SXFunction::ScalarAtomic::i2", e.i2);
1933  }
1934 
1935  s.pack("SXFunction::live_variables", live_variables_);
1936  s.pack("SXFunction::print_instructions", print_instructions_);
1937 
1939  }
1940 
1942  return new SXFunction(s);
1943  }
1944 
1945  void SXFunction::find(std::map<FunctionInternal*, Function>& all_fun,
1946  casadi_int max_depth) const {
1947  for (auto&& e : algorithm_) {
1948  if (e.op == OP_CALL) {
1949  const ExtendedAlgEl& m = call_.el.at(e.i1);
1950  add_embedded(all_fun, m.f, max_depth);
1951  }
1952  }
1953  }
1954 
1955  std::vector<SX> SXFunction::order(const std::vector<SX>& expr) {
1956 #ifdef CASADI_WITH_THREADSAFE_SYMBOLICS
1957  std::lock_guard<std::mutex> lock(SX::get_mutex_temp());
1958 #endif // CASADI_WITH_THREADSAFE_SYMBOLICS
1959  // Stack used to sort the computational graph
1960  std::stack<SXNode*> s;
1961 
1962  // All nodes
1963  std::vector<SXNode*> nodes;
1964 
1965  // Add the list of nodes
1966  casadi_int ind=0;
1967  for (auto it = expr.begin(); it != expr.end(); ++it, ++ind) {
1968  casadi_int nz=0;
1969  for (auto itc = (*it)->begin(); itc != (*it)->end(); ++itc, ++nz) {
1970  // Add outputs to the list
1971  s.push(itc->get());
1973  }
1974  }
1975 
1976  // Clear temporary markers
1977  for (casadi_int i=0; i<nodes.size(); ++i) {
1978  nodes[i]->temp = 0;
1979  }
1980 
1981  std::vector<SX> ret(nodes.size());
1982  for (casadi_int i=0; i<nodes.size(); ++i) {
1983  ret[i] = SXElem::create(nodes[i]);
1984  }
1985 
1986  return ret;
1987  }
1988 
1989 } // namespace casadi
SXElem get_output(casadi_int oind) const override
Get an output.
Definition: call_sx.hpp:106
const SXElem & dep(casadi_int i) const override
get the reference of a dependency
Definition: call_sx.hpp:119
Helper class for C code generation.
std::string add_dependency(const Function &f)
Add a function dependency.
std::string arg(casadi_int i) const
Refer to argument.
void reserve_work(casadi_int n)
Reserve a maximum size of work elements, used for padding of index.
std::string constant(const std::vector< casadi_int > &v)
Represent an array constant; adding it when new.
std::string printf(const std::string &str, const std::vector< std::string > &arg=std::vector< std::string >())
Printf.
std::string print_op(casadi_int op, const std::string &a0)
Print an operation to a c file.
void print_vector(std::ostream &s, const std::string &name, const std::vector< casadi_int > &v)
Print casadi_int vector to a c file.
std::string res(casadi_int i) const
Refer to resuly.
bool avoid_stack() const
Avoid stack?
std::string sx_work(casadi_int i)
Declare a work vector element.
Helper class for Serialization.
void unpack(Sparsity &e)
Reconstruct an object from the input stream.
void version(const std::string &name, int v)
void alloc_iw(size_t sz_iw, bool persistent=false)
Ensure required length of iw field.
std::vector< Sparsity > sparsity_in_
Input and output sparsity.
std::vector< std::vector< M > > replace_fseed(const std::vector< std::vector< M >> &fseed, casadi_int npar) const
Replace 0-by-0 forward seeds.
void alloc_res(size_t sz_res, bool persistent=false)
Ensure required length of res field.
std::string definition() const
Get function signature: name:(inputs)->(outputs)
void alloc_arg(size_t sz_arg, bool persistent=false)
Ensure required length of arg field.
static void print_canonical(std::ostream &stream, const Sparsity &sp, const double *nz)
Print canonical representation of a numeric matrix.
virtual double sp_weight() const
Weighting factor for chosing forward/reverse mode,.
size_t n_in_
Number of inputs and outputs.
virtual void eval_mx(const MXVector &arg, MXVector &res, bool always_inline, bool never_inline) const
Evaluate with symbolic matrices.
virtual int eval_sx(const SXElem **arg, SXElem **res, casadi_int *iw, SXElem *w, void *mem, bool always_inline, bool never_inline) const
Evaluate with symbolic scalars.
bool matching_arg(const std::vector< M > &arg, casadi_int &npar) const
Check if input arguments that needs to be replaced.
virtual int sp_forward(const bvec_t **arg, bvec_t **res, casadi_int *iw, bvec_t *w, void *mem) const
Propagate sparsity forward.
std::vector< double > nz_in(const std::vector< DM > &arg) const
Convert from/to flat vector of input/output nonzeros.
static const Options options_
Options.
std::vector< Sparsity > sparsity_out_
bool matching_res(const std::vector< M > &arg, casadi_int &npar) const
Check if output arguments that needs to be replaced.
void disp(std::ostream &stream, bool more) const override
Display object.
size_t sz_w() const
Get required length of w field.
virtual int sp_reverse(bvec_t **arg, bvec_t **res, casadi_int *iw, bvec_t *w, void *mem) const
Propagate sparsity backwards.
void alloc_w(size_t sz_w, bool persistent=false)
Ensure required length of w field.
std::vector< double > nz_out(const std::vector< DM > &res) const
Convert from/to flat vector of input/output nonzeros.
casadi_int nnz_out() const
Number of input/output nonzeros.
void setup(void *mem, const double **arg, double **res, casadi_int *iw, double *w) const
Set the (persistent and temporary) work vectors.
Dict generate_options(const std::string &target) const override
Reconstruct options dict.
void add_embedded(std::map< FunctionInternal *, Function > &all_fun, const Function &dep, casadi_int max_depth) const
std::vector< std::vector< M > > replace_aseed(const std::vector< std::vector< M >> &aseed, casadi_int npar) const
Replace 0-by-0 reverse seeds.
Function object.
Definition: function.hpp:60
Function forward(casadi_int nfwd) const
Get a function that calculates nfwd forward derivatives.
Definition: function.cpp:1135
casadi_int nnz_out() const
Get number of output nonzeros.
Definition: function.cpp:855
casadi_int n_instructions() const
Number of instruction in the algorithm (SXFunction/MXFunction)
Definition: function.cpp:1717
size_t sz_res() const
Get required length of res field.
Definition: function.cpp:1085
const std::string & name() const
Name of the function.
Definition: function.cpp:1315
Function reverse(casadi_int nadj) const
Get a function that calculates nadj adjoint derivatives.
Definition: function.cpp:1143
std::vector< casadi_int > instruction_input(casadi_int k) const
Locations in the work vector for the inputs of the instruction.
Definition: function.cpp:1749
const Sparsity & sparsity_in(casadi_int ind) const
Get sparsity of a given input.
Definition: function.cpp:1015
std::vector< casadi_int > instruction_output(casadi_int k) const
Location in the work vector for the output of the instruction.
Definition: function.cpp:1765
size_t sz_iw() const
Get required length of iw field.
Definition: function.cpp:1087
casadi_int n_out() const
Get the number of function outputs.
Definition: function.cpp:823
casadi_int n_in() const
Get the number of function inputs.
Definition: function.cpp:819
size_t sz_w() const
Get required length of w field.
Definition: function.cpp:1089
size_t sz_arg() const
Get required length of arg field.
Definition: function.cpp:1083
casadi_int nnz_in() const
Get number of input nonzeros.
Definition: function.cpp:851
double instruction_constant(casadi_int k) const
Get the floating point output argument of an instruction (SXFunction)
Definition: function.cpp:1757
casadi_int instruction_id(casadi_int k) const
Identifier index of the instruction (SXFunction/MXFunction)
Definition: function.cpp:1741
static Matrix< Scalar > zeros(casadi_int nrow=1, casadi_int ncol=1)
Create a dense matrix or a matrix with specified sparsity with all entries zero.
static casadi_int copy_elision_min_size
static void check()
Raises an error if an interrupt was captured.
MX - Matrix expression.
Definition: mx.hpp:92
Sparse matrix class. SX and DM are specializations.
Definition: matrix_decl.hpp:99
void print_scalar(std::ostream &stream) const
Print scalar.
static std::vector< SXElem > split(const SXElem &e, casadi_int n)
Definition: output_sx.hpp:139
Base class for FunctionInternal and LinsolInternal.
bool verbose_
Verbose printout.
void clear_mem()
Clear all memory (called from destructor)
The basic scalar symbolic class of CasADi.
Definition: sx_elem.hpp:75
void assignIfDuplicate(const SXElem &scalar, casadi_int depth=1)
Assign to another expression, if a duplicate.
Definition: sx_elem.cpp:111
static std::vector< SXElem > call(const Function &f, const std::vector< SXElem > &deps)
Definition: sx_elem.cpp:403
static SXElem create(SXNode *node)
Definition: sx_elem.cpp:62
SXNode * get() const
Get a pointer to the node.
Definition: sx_elem.cpp:174
static bool is_equal(const SXElem &x, const SXElem &y, casadi_int depth=0)
Check equality up to a given depth.
Definition: sx_elem.cpp:529
Internal node class for SXFunction.
Definition: sx_function.hpp:54
std::vector< SXElem > operations_
The expressions corresponding to each binary operation.
SXFunction(const std::string &name, const std::vector< Matrix< SXElem > > &inputv, const std::vector< Matrix< SXElem > > &outputv, const std::vector< std::string > &name_in, const std::vector< std::string > &name_out)
Constructor.
void eval_mx(const MXVector &arg, MXVector &res, bool always_inline, bool never_inline) const override
Evaluate symbolically, MX type.
SX instructions_sx() const override
get SX expression associated with instructions
void ad_reverse(const std::vector< std::vector< SX > > &aseed, std::vector< std::vector< SX > > &asens) const
Calculate reverse mode directional derivatives.
std::string print(const ScalarAtomic &a) const
bool should_inline(bool with_sx, bool always_inline, bool never_inline) const override
void codegen_declarations(CodeGenerator &g) const override
Generate code for the declarations of the C function.
void print_res(std::ostream &stream, casadi_int k, const ScalarAtomic &el, const double *w) const
const std::vector< SX > sx_in() const override
Get function input(s) and output(s)
void init(const Dict &opts) override
Initialize.
void export_code_body(const std::string &lang, std::ostream &stream, const Dict &options) const override
Export function in a specific language.
void serialize_body(SerializingStream &s) const override
Serialize an object without type information.
void init_copy_elision()
Part of initialize responsible of prepaprign copy elision.
static const Options options_
Options.
std::vector< bool > copy_elision_
Copy elision per algel.
bool is_smooth() const
Check if smooth.
int sp_forward(const bvec_t **arg, bvec_t **res, casadi_int *iw, bvec_t *w, void *mem) const override
Propagate sparsity forward.
size_t codegen_sz_w(const CodeGenerator &g) const override
Get the size of the work vector, for codegen.
void call_rev(const AlgEl &e, T **arg, T **res, casadi_int *iw, T *w) const
bool has_free() const override
Does the function have free variables.
void call_fwd(const AlgEl &e, const T **arg, T **res, casadi_int *iw, T *w) const
void find(std::map< FunctionInternal *, Function > &all_fun, casadi_int max_depth) const override
std::vector< SXElem > constants_
The expressions corresponding to each constant.
int sp_reverse(bvec_t **arg, bvec_t **res, casadi_int *iw, bvec_t *w, void *mem) const override
Propagate sparsity backwards.
bool just_in_time_opencl_
With just-in-time compilation using OpenCL.
struct casadi::SXFunction::CallInfo call_
void codegen_body(CodeGenerator &g) const override
Generate code for the body of the C function.
static std::vector< SX > order(const std::vector< SX > &expr)
void print_arg(std::ostream &stream, casadi_int k, const ScalarAtomic &el, const double *w) const
int eval_sx(const SXElem **arg, SXElem **res, casadi_int *iw, SXElem *w, void *mem, bool always_inline, bool never_inline) const override
evaluate symbolically while also propagating directional derivatives
std::vector< std::string > get_free() const override
Print free variables.
std::vector< AlgEl > algorithm_
all binary nodes of the tree in the order of execution
int eval(const double **arg, double **res, casadi_int *iw, double *w, void *mem) const override
Evaluate numerically, work vectors given.
Definition: sx_function.cpp:72
std::vector< SXElem > free_vars_
Free variables.
bool is_a(const std::string &type, bool recursive) const override
Check if the function is of a particular type.
std::vector< double > default_in_
Default input values.
void disp_more(std::ostream &stream) const override
Print the algorithm.
static ProtoFunction * deserialize(DeserializingStream &s)
Deserialize without type information.
~SXFunction() override
Destructor.
Definition: sx_function.cpp:68
Dict generate_options(const std::string &target="clone") const override
Reconstruct options dict.
bool print_instructions_
Print each operation during evaluation.
void call_setup(const ExtendedAlgEl &m, CT ***call_arg, T ***call_res, casadi_int **call_iw, T **call_w, T **nz_in, T **nz_out) const
bool live_variables_
Live variables?
void ad_forward(const std::vector< std::vector< SX > > &fseed, std::vector< std::vector< SX > > &fsens) const
Calculate forward mode directional derivatives.
casadi_int n_instructions() const override
Get the number of atomic operations.
bool just_in_time_sparsity_
With just-in-time compilation for the sparsity propagation.
Internal node class for SX.
Definition: sx_node.hpp:49
virtual const SXElem & dep(casadi_int i) const
get the reference of a child
Definition: sx_node.cpp:80
virtual double to_double() const
Get value of a constant node.
Definition: sx_node.cpp:56
virtual bool is_symbolic() const
check properties of a node
Definition: sx_node.hpp:71
virtual casadi_int op() const =0
get the operation
virtual bool is_constant() const
check properties of a node
Definition: sx_node.hpp:69
Helper class for Serialization.
void version(const std::string &name, int v)
void pack(const Sparsity &e)
Serializes an object to the output stream.
Internal node class for the base class of SXFunction and MXFunction.
Definition: x_function.hpp:57
std::vector< Matrix< SXElem > > out_
Outputs of the function (needed for symbolic calculations)
Definition: x_function.hpp:262
void delayed_deserialize_members(DeserializingStream &s)
Definition: x_function.hpp:299
void init(const Dict &opts) override
Initialize.
Definition: x_function.hpp:319
std::vector< Matrix< SXElem > > in_
Inputs of the function (needed for symbolic calculations)
Definition: x_function.hpp:257
void delayed_serialize_members(SerializingStream &s) const
Helper functions to avoid recursion limit.
Definition: x_function.hpp:305
void serialize_body(SerializingStream &s) const override
Serialize an object without type information.
Definition: x_function.hpp:311
static void sort_depth_first(std::stack< SXNode * > &s, std::vector< SXNode * > &nodes)
Topological sorting of the nodes based on Depth-First Search (DFS)
Definition: x_function.hpp:395
casadi_limits class
The casadi namespace.
Definition: archiver.cpp:28
std::string join(const std::vector< std::string > &l, const std::string &delim)
double if_else_zero(double x, double y)
Conditional assignment.
Definition: calculus.hpp:289
unsigned long long bvec_t
void casadi_project(const T1 *x, const casadi_int *sp_x, T1 *y, const casadi_int *sp_y, T1 *w)
Sparse copy: y <- x, w work vector (length >= number of rows)
std::vector< MX > MXVector
Definition: mx.hpp:1006
@ OT_DOUBLEVECTOR
Matrix< SXElem > SX
Definition: sx_fwd.hpp:32
std::string str(const T &v)
String representation, any type.
GenericType::Dict Dict
C++ equivalent of Python's dict or MATLAB's struct.
T * get_ptr(std::vector< T > &v)
Get a pointer to the data contained in the vector.
std::ostream & uout()
@ OP_NE
Definition: calculus.hpp:70
@ OP_IF_ELSE_ZERO
Definition: calculus.hpp:71
@ OP_AND
Definition: calculus.hpp:70
@ OP_OUTPUT
Definition: calculus.hpp:82
@ OP_CONST
Definition: calculus.hpp:79
@ OP_OR
Definition: calculus.hpp:70
@ OP_INPUT
Definition: calculus.hpp:82
@ OP_POW
Definition: calculus.hpp:66
@ OP_PARAMETER
Definition: calculus.hpp:85
@ OP_FABS
Definition: calculus.hpp:71
@ OP_CALL
Definition: calculus.hpp:88
@ OP_CONSTPOW
Definition: calculus.hpp:66
@ OP_NOT
Definition: calculus.hpp:70
@ OP_SQ
Definition: calculus.hpp:67
Options metadata for a class.
Definition: options.hpp:40
std::vector< ExtendedAlgEl > el
std::vector< int > copy_elision_offset
std::vector< int > copy_elision_arg
ExtendedAlgEl(const Function &fun)
Definition: sx_function.cpp:44
An atomic operation for the SXElem virtual machine.
Definition: sx_function.hpp:37
int i0
Operator index.
Definition: sx_function.hpp:39
Easy access to all the functions for a particular type.
Definition: calculus.hpp:1125
static casadi_int ndeps(unsigned char op)
Number of dependencies.
Definition: calculus.hpp:1623
static std::string print(unsigned char op, const std::string &x, const std::string &y)
Print.
Definition: calculus.hpp:1641