sx_function.cpp
1  /*
2  * This file is part of CasADi.
3  *
4  * CasADi -- A symbolic framework for dynamic optimization.
5  * Copyright (C) 2010-2023 Joel Andersson, Joris Gillis, Moritz Diehl,
6  * KU Leuven. All rights reserved.
7  * Copyright (C) 2011-2014 Greg Horn
8  *
9  * CasADi is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 3 of the License, or (at your option) any later version.
13  *
14  * CasADi is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with CasADi; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  *
23  */
24 
25 
26 #include "sx_function.hpp"
27 #include <limits>
28 #include <stack>
29 #include <deque>
30 #include <sstream>
31 #include <iomanip>
32 #include <bitset>
33 #include "sx_node.hpp"
34 #include "output_sx.hpp"
35 #include "call_sx.hpp"
36 #include "casadi_common.hpp"
37 #include "sparsity_internal.hpp"
38 #include "casadi_interrupt.hpp"
39 #include "serializing_stream.hpp"
40 #include "global_options.hpp"
41 
42 namespace casadi {
43 
45  n_dep = f.nnz_in(); n_res = f.nnz_out();
46  dep.resize(n_dep); res.resize(n_res, -1);
47  f_n_in = f.n_in(); f_n_out = f.n_out();
48  f_nnz_in.resize(f_n_in); f_nnz_out.resize(f_n_out);
49  for (casadi_int i=0;i<f_n_in;++i) f_nnz_in[i] = f.nnz_in(i);
50  for (casadi_int i=0;i<f_n_out;++i) f_nnz_out[i] = f.nnz_out(i);
51  copy_elision_arg.resize(f_n_in, -1);
52  copy_elision_offset.resize(f_n_in, -1);
53  }
54 
55  SXFunction::SXFunction(const std::string& name,
56  const std::vector<SX >& inputv,
57  const std::vector<SX >& outputv,
58  const std::vector<std::string>& name_in,
59  const std::vector<std::string>& name_out)
60  : XFunction<SXFunction, SX, SXNode>(name, inputv, outputv, name_in, name_out) {
61 
62  // Default (persistent) options
63  just_in_time_opencl_ = false;
64  just_in_time_sparsity_ = false;
65  }
66 
68  clear_mem();
69  }
70 
71  int SXFunction::eval(const double** arg, double** res,
72  casadi_int* iw, double* w, void* mem) const {
73  if (verbose_) casadi_message(name_ + "::eval");
74  setup(mem, arg, res, iw, w);
75 
76  // Make sure no free parameters
77  if (!free_vars_.empty()) {
78  std::stringstream ss;
79  disp(ss, false);
80  casadi_error("Cannot evaluate \"" + ss.str() + "\" since variables "
81  + str(free_vars_) + " are free.");
82  }
83 
84  // NOTE: The implementation of this function is very delicate. Small changes in the
85  // class structure can cause large performance losses. For this reason,
86  // the preprocessor macros are used below
87 
88  // Evaluate the algorithm
89  for (auto&& e : algorithm_) {
90  switch (e.op) {
91  CASADI_MATH_FUN_BUILTIN(w[e.i1], w[e.i2], w[e.i0])
92 
93  case OP_CONST: w[e.i0] = e.d; break;
94  case OP_INPUT: w[e.i0] = arg[e.i1]==nullptr ? 0 : arg[e.i1][e.i2]; break;
95  case OP_OUTPUT: if (res[e.i0]!=nullptr) res[e.i0][e.i2] = w[e.i1]; break;
96  case OP_CALL:
97  call_fwd(e, arg, res, iw, w);
98  break;
99  default:
100  casadi_error("Unknown operation" + str(e.op));
101  }
102  }
103  return 0;
104  }
105 
106  bool SXFunction::is_smooth() const {
107  // Go through all nodes and check if any node is non-smooth
108  for (auto&& a : algorithm_) {
109  if (!operation_checker<SmoothChecker>(a.op)) {
110  return false;
111  }
112  }
113  return true;
114  }
115 
116  void SXFunction::disp_more(std::ostream &stream) const {
117  stream << "Algorithm:";
118 
119  // Iterator to free variables
120  std::vector<SXElem>::const_iterator p_it = free_vars_.begin();
121 
122  // Normal, interpreted output
123  for (auto&& a : algorithm_) {
125  stream << std::endl;
126  if (a.op==OP_OUTPUT) {
127  stream << "output[" << a.i0 << "][" << a.i2 << "] = @" << a.i1;
128  } else if (a.op==OP_CALL) {
129  const ExtendedAlgEl& m = call_.el.at(a.i1);
130  stream << "[";
131  casadi_int k = 0;
132  for (casadi_int i=0; i<m.f.n_out(); ++i) {
133  if (m.f.nnz_out(i)>1) stream << "[";
134  for (casadi_int j=0; j<m.f.nnz_out(i); ++j) {
135  int el = m.res[k++];
136  if (el>=0) {
137  stream << "@" << el;
138  } else {
139  stream << "NULL";
140  }
141  if (j<m.f.nnz_out(i)-1) stream << ",";
142  }
143  if (m.f.nnz_out(i)>1) stream << "]";
144  if (i<m.f.n_out()-1) stream << ",";
145  }
146  stream << "] = ";
147  stream << m.f.name() << "(";
148  k = 0;
149  for (casadi_int i=0; i<m.f.n_in(); ++i) {
150  if (m.f.nnz_in(i)==0) stream << "0x0";
151  if (m.f.nnz_in(i)>1) stream << "[";
152  for (casadi_int j=0; j<m.f.nnz_in(i); ++j) {
153  stream << "@" << m.dep[k++];
154  if (j<m.f.nnz_in(i)-1) stream << ",";
155  }
156  if (m.f.nnz_in(i)>1) stream << "]";
157  if (i<m.f.n_in()-1) stream << ",";
158  }
159  stream << ")";
160  } else {
161  stream << "@" << a.i0 << " = ";
162  if (a.op==OP_INPUT) {
163  stream << "input[" << a.i1 << "][" << a.i2 << "]";
164  } else {
165  if (a.op==OP_CONST) {
166  stream << a.d;
167  } else if (a.op==OP_PARAMETER) {
168  stream << *p_it++;
169  } else {
170  casadi_int ndep = casadi_math<double>::ndeps(a.op);
171  stream << casadi_math<double>::pre(a.op);
172  for (casadi_int c=0; c<ndep; ++c) {
173  if (c==0) {
174  stream << "@" << a.i1;
175  } else {
176  stream << casadi_math<double>::sep(a.op);
177  stream << "@" << a.i2;
178  }
179 
180  }
181  stream << casadi_math<double>::post(a.op);
182  }
183  }
184  }
185  stream << ";";
186  }
187  }
188 
189  size_t SXFunction::codegen_sz_w(const CodeGenerator& g) const {
190  if (!g.avoid_stack()) return call_.sz_w+call_.sz_w_arg+call_.sz_w_res;
191  return sz_w();
192  }
193 
195 
196  // Make sure that there are no free variables
197  if (!free_vars_.empty()) {
198  casadi_error("Code generation of '" + name_ + "' is not possible since variables "
199  + str(free_vars_) + " are free.");
200  }
201 
202  // Generate code for the call nodes
203  for (auto&& m : call_.el) {
204  g.add_dependency(m.f);
205  }
206  }
207 
210 
211  casadi_int k=0;
212  // Run the algorithm
213  for (auto&& a : algorithm_) {
214  if (a.op==OP_OUTPUT) {
215  g << "if (res[" << a.i0 << "]!=0) "
216  << g.res(a.i0) << "[" << a.i2 << "]=" << g.sx_work(a.i1) << ";\n";
217  } else if (a.op==OP_CALL) {
218  const ExtendedAlgEl& m = call_.el[a.i1];
219 
220  casadi_int worksize = g.avoid_stack() ? worksize_ : 0;
221 
222  // Collect input arguments
223  casadi_int offset = worksize;
224  for (casadi_int i=0; i<m.f_n_in; ++i) {
225  if (m.copy_elision_arg[i]>=0) {
226  g << "arg[" << n_in_+i << "] = "
227  << "arg[" + str(m.copy_elision_arg[i]) << "]? "
228  << "arg[" + str(m.copy_elision_arg[i]) << "] + "
229  << str(m.copy_elision_offset[i]) << " : 0;\n";
230  } else {
231  if (m.f_nnz_in[i]==0) {
232  g << "arg[" << n_in_+i << "]=" << 0 << ";\n";
233  } else {
234  g << "arg[" << n_in_+i << "]=" << "w+" + str(offset) << ";\n";
235  }
236  }
237  offset += m.f_nnz_in[i];
238  }
239 
240 
241  casadi_int out_offset = offset;
242 
243  // Collect output arguments
244  for (casadi_int i=0; i<m.f_n_out; ++i) {
245  g << "res[" << n_out_+i << "]=" << "w+" + str(offset) << ";\n";
246  offset += m.f_nnz_out[i];
247  }
248  casadi_int k=0;
249  for (casadi_int i=0; i<m.f_n_in; ++i) {
250  if (m.copy_elision_arg[i]==-1) {
251  for (casadi_int j=0; j<m.f_nnz_in[i]; ++j) {
252  g << "w["+str(k+worksize) + "] = " << g.sx_work(m.dep[k]) << ";\n";
253  k++;
254  }
255  } else {
256  k+=m.f_nnz_in[i];
257  }
258  }
259  std::string flag =
260  g(m.f, "arg+"+str(n_in_), "res+"+str(n_out_), "iw", "w+" + str(offset));
261  // Call function
262  g << "if (" << flag << ") return 1;\n";
263  for (casadi_int i=0;i<m.n_res;++i) {
264  if (m.res[i]>=0) {
265  g << g.sx_work(m.res[i]) << " = ";
266  g << "w[" + str(i+out_offset) + "];\n";
267  }
268  }
269  } else if (a.op==OP_INPUT) {
270  if (!copy_elision_[k]) {
271  g << g.sx_work(a.i0) << "="
272  << g.arg(a.i1) << "? " << g.arg(a.i1) << "[" << a.i2 << "] : 0;\n";
273  }
274  } else {
275 
276  // Where to store the result
277  g << g.sx_work(a.i0) << "=";
278 
279  // What to store
280  if (a.op==OP_CONST) {
281  g << g.constant(a.d);
282  } else {
283  casadi_int ndep = casadi_math<double>::ndeps(a.op);
284  casadi_assert_dev(ndep>0);
285  if (ndep==1) g << g.print_op(a.op, g.sx_work(a.i1));
286  if (ndep==2) g << g.print_op(a.op, g.sx_work(a.i1), g.sx_work(a.i2));
287  }
288  g << ";\n";
289  }
290  k++;
291  }
292  }
293 
296  {{"default_in",
298  "Default input values"}},
299  {"just_in_time_sparsity",
300  {OT_BOOL,
301  "Propagate sparsity patterns using just-in-time "
302  "compilation to a CPU or GPU using OpenCL"}},
303  {"just_in_time_opencl",
304  {OT_BOOL,
305  "Just-in-time compilation for numeric evaluation using OpenCL (experimental)"}},
306  {"live_variables",
307  {OT_BOOL,
308  "Reuse variables in the work vector"}},
309  {"cse",
310  {OT_BOOL,
311  "Perform common subexpression elimination (complexity is N*log(N) in graph size)"}},
312  {"allow_free",
313  {OT_BOOL,
314  "Allow construction with free variables (Default: false)"}},
315  {"allow_duplicate_io_names",
316  {OT_BOOL,
317  "Allow construction with duplicate io names (Default: false)"}}
318  }
319  };
320 
321  Dict SXFunction::generate_options(const std::string& target) const {
323  //opts["default_in"] = default_in_;
324  opts["live_variables"] = live_variables_;
325  opts["just_in_time_sparsity"] = just_in_time_sparsity_;
326  opts["just_in_time_opencl"] = just_in_time_opencl_;
327  return opts;
328  }
329 
330  void SXFunction::init(const Dict& opts) {
331  // Call the init function of the base class
333  if (verbose_) casadi_message(name_ + "::init");
334 
335  // Default (temporary) options
336  live_variables_ = true;
337 
338  bool cse_opt = false;
339  bool allow_free = false;
340 
341  // Read options
342  for (auto&& op : opts) {
343  if (op.first=="default_in") {
344  default_in_ = op.second;
345  } else if (op.first=="live_variables") {
346  live_variables_ = op.second;
347  } else if (op.first=="just_in_time_opencl") {
348  just_in_time_opencl_ = op.second;
349  } else if (op.first=="just_in_time_sparsity") {
350  just_in_time_sparsity_ = op.second;
351  } else if (op.first=="cse") {
352  cse_opt = op.second;
353  } else if (op.first=="allow_free") {
354  allow_free = op.second;
355  }
356  }
357 
358  // Perform common subexpression elimination
359  // This must be done before the lock, to avoid deadlocks
360  if (cse_opt) out_ = cse(out_);
361 
362  // Check/set default inputs
363  if (default_in_.empty()) {
364  default_in_.resize(n_in_, 0);
365  } else {
366  casadi_assert(default_in_.size()==n_in_,
367  "Option 'default_in' has incorrect length");
368  }
369 
370 #ifdef CASADI_WITH_THREADSAFE_SYMBOLICS
371  std::lock_guard<std::mutex> lock(SX::get_mutex_temp());
372 #endif // CASADI_WITH_THREADSAFE_SYMBOLICS
373 
374  // Stack used to sort the computational graph
375  std::stack<SXNode*> s;
376 
377  // All nodes
378  std::vector<SXNode*> nodes;
379 
380  // Add the list of nodes
381  casadi_int ind=0;
382  for (auto it = out_.begin(); it != out_.end(); ++it, ++ind) {
383  casadi_int nz=0;
384  for (auto itc = (*it)->begin(); itc != (*it)->end(); ++itc, ++nz) {
385  // Add outputs to the list
386  s.push(itc->get());
387  sort_depth_first(s, nodes);
388 
389  // A null pointer means an output instruction
390  nodes.push_back(static_cast<SXNode*>(nullptr));
391  }
392  }
393 
394  casadi_assert(nodes.size() <= std::numeric_limits<int>::max(), "Integer overflow");
395  // Set the temporary variables to be the corresponding place in the sorted graph
396  for (casadi_int i=0; i<nodes.size(); ++i) {
397  if (nodes[i]) {
398  nodes[i]->temp = static_cast<int>(i);
399  }
400  }
401 
402  // Sort the nodes by type
403  constants_.clear();
404  operations_.clear();
405  for (std::vector<SXNode*>::iterator it = nodes.begin(); it != nodes.end(); ++it) {
406  SXNode* t = *it;
407  if (t) {
408  if (t->is_constant())
409  constants_.push_back(SXElem::create(t));
410  else if (!t->is_symbolic() && t->op()>=0)
411  operations_.push_back(SXElem::create(t));
412  }
413  }
414 
415  // Input instructions
416  std::vector<std::pair<int, SXNode*> > symb_loc;
417 
418  // Current output and nonzero, start with the first one
419  int curr_oind, curr_nz=0;
420  casadi_assert(out_.size() <= std::numeric_limits<int>::max(), "Integer overflow");
421  for (curr_oind=0; curr_oind<out_.size(); ++curr_oind) {
422  if (out_[curr_oind].nnz()!=0) {
423  break;
424  }
425  }
426 
427  // Count the number of times each node is used
428  std::vector<casadi_int> refcount(nodes.size(), 0);
429 
430  // Get the sequence of instructions for the virtual machine
431  algorithm_.resize(0);
432  algorithm_.reserve(nodes.size());
433 
434  // Mapping of node index (cfr. temp) to algorithm index
435  std::vector<int> alg_index;
436  alg_index.reserve(nodes.size());
437 
438  for (std::vector<SXNode*>::iterator it=nodes.begin(); it!=nodes.end(); ++it) {
439  // Current node
440  SXNode* n = *it;
441 
442  // New element in the algorithm
443  AlgEl ae;
444 
445  // Get operation
446  ae.op = n==nullptr ? static_cast<int>(OP_OUTPUT) : static_cast<int>(n->op());
447 
448  // Default dependencies
449  int* dep = &ae.i1;
450  casadi_int ndeps = ae.op == -1 ? 1 : casadi_math<double>::ndeps(ae.op);
451 
452  // Get instruction
453  switch (ae.op) {
454  case OP_CONST: // constant
455  ae.d = n->to_double();
456  ae.i0 = n->temp;
457  break;
458  case OP_PARAMETER: // a parameter or input
459  symb_loc.push_back(std::make_pair(algorithm_.size(), n));
460  ae.i0 = n->temp;
461  ae.d = 0; // value not used, but set here to avoid uninitialized data in serialization
462  break;
463  case OP_OUTPUT: // output instruction
464  ae.i0 = curr_oind;
465  ae.i1 = out_[curr_oind]->at(curr_nz)->temp;
466  ae.i2 = curr_nz;
467 
468  // Go to the next nonzero
469  casadi_assert(curr_nz < std::numeric_limits<int>::max(), "Integer overflow");
470  curr_nz++;
471  if (curr_nz>=out_[curr_oind].nnz()) {
472  curr_nz=0;
473  casadi_assert(curr_oind < std::numeric_limits<int>::max(), "Integer overflow");
474  curr_oind++;
475  for (; curr_oind<out_.size(); ++curr_oind) {
476  if (out_[curr_oind].nnz()!=0) {
477  break;
478  }
479  }
480  }
481  break;
482  case OP_CALL: // Call node
483  {
484  ae.i0 = n->temp;
485 
486  // Index into ExtentedAlgEl collection
487  ae.i1 = call_.el.size();
488 
489  // Create ExtentedAlgEl instance
490  // This allocates space for dep and res
491  const Function& f = static_cast<const CallSX*>(n)->f_;
492  call_.el.emplace_back(f);
493 
494  // Make sure we have enough space to evaluate the Function call,
495  // noting that we wil only ever evaluate one call at a time.
496  call_.sz_arg = std::max(call_.sz_arg, f.sz_arg());
497  call_.sz_res = std::max(call_.sz_res, f.sz_res());
498  call_.sz_iw = std::max(call_.sz_iw, f.sz_iw());
499  call_.sz_w = std::max(call_.sz_w, f.sz_w());
500  call_.sz_w_arg = std::max(call_.sz_w_arg, static_cast<size_t>(f.nnz_in()));
501  call_.sz_w_res = std::max(call_.sz_w_res, static_cast<size_t>(f.nnz_out()));
502 
503  // Set the dependency pointer to the (uninitialised) slots of the ExtendedAlgEl
504  ExtendedAlgEl& m = call_.el.at(ae.i1);
505  dep = get_ptr(m.dep);
506  ndeps = m.n_dep;
507 
508  // Populate the dependency slots with node ids.
509  for (casadi_int i=0; i<ndeps; ++i) {
510  dep[i] = n->dep(i).get()->temp;
511  }
512  }
513  break;
514  case -1: // Output extraction node
515  {
516  dep = &algorithm_.at(alg_index.at(n->dep(0).get()->temp)).i1;
517  int oind = static_cast<OutputSX*>(n)->oind_;
518  casadi_assert(call_.el.at(dep[0]).res.at(oind)==-1, "Duplicate");
519  call_.el.at(dep[0]).res.at(oind) = n->temp;
520  }
521  break;
522  default: // Unary or binary operation
523  ae.i0 = n->temp;
524  ae.i1 = n->dep(0).get()->temp;
525  ae.i2 = n->dep(1).get()->temp;
526  }
527 
528  // Increase count of dependencies
529  for (casadi_int c=0; c<ndeps; ++c) {
530  refcount.at(dep[c])++;
531  }
532 
533  // Amend node index to algorithm index mapping
534  alg_index.push_back(algorithm_.size());
535 
536  // Add to algorithm
537  if (ae.op>=0) algorithm_.push_back(ae);
538 
539  }
540 
541  // Place in the work vector for each of the nodes in the tree (overwrites the reference counter)
542  std::vector<int> place(nodes.size());
543 
544  // Stack with unused elements in the work vector
545  std::stack<int> unused;
546 
547  // Work vector size
548  int worksize = 0;
549 
550  // Find a place in the work vector for the operation
551  for (auto&& a : algorithm_) {
552 
553  // Default dependencies
554  int* dep = &a.i1;
555  casadi_int ndeps = casadi_math<double>::ndeps(a.op);
556 
557  // Default outputs
558  int* res = &a.i0;
559  casadi_int nres = 1;
560 
561  // Call node overrides these defaults
562  if (a.op==OP_CALL) {
563  ExtendedAlgEl& e = call_.el.at(a.i1);
564  ndeps = e.n_dep;
565  dep = get_ptr(e.dep);
566  nres = e.n_res;
567  res = get_ptr(e.res);
568  }
569 
570  // decrease reference count of children
571  // reverse order so that the first argument will end up at the top of the stack
572  for (casadi_int c=ndeps-1; c>=0; --c) {
573  casadi_int ch_ind = dep[c];
574  casadi_int remaining = --refcount.at(ch_ind);
575  if (remaining==0) unused.push(place[ch_ind]);
576  }
577 
578  // Find a place to store the variable
579  if (a.op!=OP_OUTPUT) {
580  for (casadi_int c=0; c<nres; ++c) {
581  if (res[c]<0) continue;
582  if (live_variables_ && !unused.empty()) {
583  // Try to reuse a variable from the stack if possible (last in, first out)
584  res[c] = place[res[c]] = unused.top();
585  unused.pop();
586  } else {
587  // Allocate a new variable
588  res[c] = place[res[c]] = worksize++;
589  }
590  }
591  }
592 
593  // Save the location of the children
594  for (casadi_int c=0; c<ndeps; ++c) {
595  dep[c] = place[dep[c]];
596  }
597 
598  // If binary, make sure that the second argument is the same as the first one
599  // (in order to treat all operations as binary) NOTE: ugly
600  if (ndeps==1 && a.op!=OP_OUTPUT) {
601  a.i2 = a.i1;
602  }
603  }
604 
605  worksize_ = worksize;
606 
607  if (verbose_) {
608  if (live_variables_) {
609  casadi_message("Using live variables: work array is " + str(worksize_)
610  + " instead of " + str(nodes.size()));
611  } else {
612  casadi_message("Live variables disabled.");
613  }
614  }
615 
616  // Allocate work vectors (symbolic/numeric)
618 
619  alloc_arg(call_.sz_arg, true);
620  alloc_res(call_.sz_res, true);
621  alloc_iw(call_.sz_iw, true);
623 
624  // Reset the temporary variables
625  for (casadi_int i=0; i<nodes.size(); ++i) {
626  if (nodes[i]) {
627  nodes[i]->temp = 0;
628  }
629  }
630 
631  // Now mark each input's place in the algorithm
632  for (auto it=symb_loc.begin(); it!=symb_loc.end(); ++it) {
633  it->second->temp = it->first+1;
634  }
635 
636  // Add input instructions
637  casadi_assert(in_.size() <= std::numeric_limits<int>::max(), "Integer overflow");
638  for (int ind=0; ind<in_.size(); ++ind) {
639  int nz=0;
640  for (auto itc = in_[ind]->begin(); itc != in_[ind]->end(); ++itc, ++nz) {
641  int i = itc->get_temp()-1;
642  if (i>=0) {
643  // Mark as input
644  algorithm_[i].op = OP_INPUT;
645 
646  // Location of the input
647  algorithm_[i].i1 = ind;
648  algorithm_[i].i2 = nz;
649 
650  // Mark input as read
651  itc->set_temp(0);
652  }
653  }
654  }
655 
656  // Locate free variables
657  free_vars_.clear();
658  for (std::vector<std::pair<int, SXNode*> >::const_iterator it=symb_loc.begin();
659  it!=symb_loc.end(); ++it) {
660  if (it->second->temp!=0) {
661  // Save to list of free parameters
662  free_vars_.push_back(SXElem::create(it->second));
663 
664  // Remove marker
665  it->second->temp=0;
666  }
667  }
668 
669  if (!allow_free && has_free()) {
670  casadi_error(name_ + "::init: Initialization failed since variables [" +
671  join(get_free(), ", ") + "] are free. These symbols occur in the output expressions "
672  "but you forgot to declare these as inputs. "
673  "Set option 'allow_free' to allow free variables.");
674  }
675 
677 
678  // Initialize just-in-time compilation for numeric evaluation using OpenCL
679  if (just_in_time_opencl_) {
680  casadi_error("OpenCL is not supported in this version of CasADi");
681  }
682 
683  // Initialize just-in-time compilation for sparsity propagation using OpenCL
685  casadi_error("OpenCL is not supported in this version of CasADi");
686  }
687 
688  // Print
689  if (verbose_) casadi_message(str(algorithm_.size()) + " elementary operations");
690  }
691 
694  copy_elision_.resize(algorithm_.size(), false);
695  return;
696  }
697  // Perform copy elision (codegen-only)
698  // Remove nodes that only serve to compose CALL inputs
699 
700  // For work vector elements, store the arg source (-1 for no trivial source)
701  std::vector<int> arg_i(worksize_, -1);
702  std::vector<int> nz_i(worksize_, -1);
703 
704  // Which algel corresponds to this source?
705  std::vector<casadi_int> alg_i(worksize_, -1);
706 
707  // Is this algel to be elided?
708  copy_elision_.resize(algorithm_.size(), false);
709 
710  casadi_int k=0;
711  for (auto&& e : algorithm_) {
712  switch (e.op) {
713  case OP_INPUT:
714  // Make source association
715  arg_i[e.i0] = e.i1;
716  nz_i[e.i0] = e.i2;
717  alg_i[e.i0] = k;
718  copy_elision_[k] = true;
719  break;
720  case OP_OUTPUT:
721  if (arg_i[e.i1]>=0) {
722  copy_elision_[alg_i[e.i1]] = false;
723  }
724  break;
725  case OP_CALL:
726  {
727  auto& m = call_.el[e.i1];
728 
729  // Inspect input arguments
730  casadi_int offset_input = 0;
731  for (casadi_int i=0; i<m.f_n_in; ++i) {
732  // Pattern match results
733  casadi_int arg = -1;
734  casadi_int offset = -1;
735  for (casadi_int j=0; j<m.f_nnz_in[i]; ++j) {
736  casadi_int k = offset_input+j;
737  if (j==0) {
738  arg = arg_i[m.dep[k]];
739  offset = nz_i[m.dep[k]];
740  }
741  if (arg_i[m.dep[k]]==-1) {
742  arg = -1;
743  // Pattern match failed
744  break;
745  }
746  if (nz_i[m.dep[k]]!=offset+j) {
747  arg = -1;
748  // Pattern match failed
749  break;
750  }
751  }
752 
753  // If we cannot perform elision
754  if (arg==-1) {
755  // We need copies for all nonzeros of input i
756  for (casadi_int j=0; j<m.f_nnz_in[i]; ++j) {
757  casadi_int k = offset_input+j;
758  if (arg_i[m.dep[k]]>=0) {
759  copy_elision_[alg_i[m.dep[k]]] = false;
760  }
761  }
762  }
763  // Store pattern match results
764  m.copy_elision_arg[i] = arg;
765  m.copy_elision_offset[i] = offset;
766 
767  offset += m.f_nnz_in[i];
768  offset_input += m.f_nnz_in[i];
769  }
770 
771  // Remove source association of all outputs
772  for (casadi_int i=0; i<m.n_res; ++i) {
773  if (m.res[i]>=0) {
774  arg_i[m.res[i]] = -1;
775  }
776  }
777  }
778  break;
779  case OP_CONST:
780  case OP_PARAMETER:
781  // Remove source association
782  arg_i[e.i0] = -1;
783  break;
784  default:
785  if (arg_i[e.i1]>=0) {
786  copy_elision_[alg_i[e.i1]] = false;
787  }
788  if (!casadi_math<double>::is_unary(e.op)) {
789  if (arg_i[e.i2]>=0) {
790  copy_elision_[alg_i[e.i2]] = false;
791  }
792  }
793  // Remove source association
794  arg_i[e.i0] = -1;
795  }
796  k++;
797  }
798  }
799 
801  std::vector<SXElem> ret(algorithm_.size(), casadi_limits<SXElem>::nan);
802 
803  std::vector<SXElem>::iterator it=ret.begin();
804 
805  // Iterator to the binary operations
806  std::vector<SXElem>::const_iterator b_it = operations_.begin();
807 
808  // Iterator to stack of constants
809  std::vector<SXElem>::const_iterator c_it = constants_.begin();
810 
811  // Iterator to free variables
812  std::vector<SXElem>::const_iterator p_it = free_vars_.begin();
813 
814  // Evaluate algorithm
815  if (verbose_) casadi_message("Evaluating algorithm forward");
816  for (auto&& a : algorithm_) {
817  switch (a.op) {
818  case OP_INPUT:
819  case OP_OUTPUT:
820  it++;
821  break;
822  case OP_CONST:
823  *it++ = *c_it++;
824  break;
825  case OP_PARAMETER:
826  *it++ = *p_it++;
827  break;
828  default:
829  *it++ = *b_it++;
830  }
831  }
832  casadi_assert(it==ret.end(), "Dimension mismatch");
833  return ret;
834  }
835 
837  eval_sx(const SXElem** arg, SXElem** res, casadi_int* iw, SXElem* w, void* mem,
838  bool always_inline, bool never_inline) const {
839 
840  always_inline = always_inline || always_inline_;
841  never_inline = never_inline || never_inline_;
842 
843  // non-inlining call is implemented in the base-class
844  if (!should_inline(true, always_inline, never_inline)) {
845  return FunctionInternal::eval_sx(arg, res, iw, w, mem, false, true);
846  }
847 
848  if (verbose_) casadi_message(name_ + "::eval_sx");
849 
850  // Iterator to the binary operations
851  std::vector<SXElem>::const_iterator b_it=operations_.begin();
852 
853  // Iterator to stack of constants
854  std::vector<SXElem>::const_iterator c_it = constants_.begin();
855 
856  // Iterator to free variables
857  std::vector<SXElem>::const_iterator p_it = free_vars_.begin();
858 
859  // Evaluate algorithm
860  if (verbose_) casadi_message("Evaluating algorithm forward");
861  for (auto&& a : algorithm_) {
862  switch (a.op) {
863  case OP_INPUT:
864  w[a.i0] = arg[a.i1]==nullptr ? 0 : arg[a.i1][a.i2];
865  break;
866  case OP_OUTPUT:
867  if (res[a.i0]!=nullptr) res[a.i0][a.i2] = w[a.i1];
868  break;
869  case OP_CONST:
870  w[a.i0] = *c_it++;
871  break;
872  case OP_PARAMETER:
873  w[a.i0] = *p_it++; break;
874  case OP_CALL:
875  {
876  const ExtendedAlgEl& m = call_.el.at(a.i1);
877  const SXElem& orig = *b_it++;
878  std::vector<SXElem> deps(m.n_dep);
879  bool identical = true;
880 
881  std::vector<SXElem> ret;
882  for (casadi_int i=0;i<m.n_dep;++i) {
883  identical &= SXElem::is_equal(w[m.dep.at(i)], orig->dep(i), 2);
884  }
885  if (identical) {
886  ret = OutputSX::split(orig, m.n_res);
887  } else {
888  for (casadi_int i=0;i<m.n_dep;++i) deps[i] = w[m.dep[i]];
889  ret = SXElem::call(m.f, deps);
890  }
891  for (casadi_int i=0;i<m.n_res;++i) {
892  if (m.res[i]>=0) w[m.res[i]] = ret[i];
893  }
894  }
895  break;
896  default:
897  {
898  // Evaluate the function to a temporary value
899  // (as it might overwrite the children in the work vector)
900  SXElem f;
901  switch (a.op) {
902  CASADI_MATH_FUN_BUILTIN(w[a.i1], w[a.i2], f)
903  }
904 
905  // If this new expression is identical to the expression used
906  // to define the algorithm, then reuse
907  const casadi_int depth = 2; // NOTE: a higher depth could possibly give more savings
908  f.assignIfDuplicate(*b_it++, depth);
909 
910  // Finally save the function value
911  w[a.i0] = f;
912  }
913  }
914  }
915  return 0;
916  }
917 
918  void SXFunction::eval_mx(const MXVector& arg, MXVector& res,
919  bool always_inline, bool never_inline) const {
920  always_inline = always_inline || always_inline_;
921  never_inline = never_inline || never_inline_;
922 
923  // non-inlining call is implemented in the base-class
924  if (!always_inline) {
925  return FunctionInternal::eval_mx(arg, res, false, true);
926  }
927 
928  if (verbose_) casadi_message(name_ + "::eval_mx");
929 
930  // Iterator to stack of constants
931  std::vector<SXElem>::const_iterator c_it = constants_.begin();
932 
933  casadi_assert(!has_free(),
934  "Free variables not supported in inlining call to SXFunction::eval_mx");
935 
936  // Resize the number of outputs
937  casadi_assert(arg.size()==n_in_, "Wrong number of input arguments");
938  res.resize(out_.size());
939 
940  // Symbolic work, non-differentiated
941  std::vector<MX> w(sz_w());
942  if (verbose_) casadi_message("Allocated work vector");
943 
944  // Split up inputs analogous to symbolic primitives
945  std::vector<std::vector<MX> > arg_split(in_.size());
946  for (casadi_int i=0; i<in_.size(); ++i) {
947  // Get nonzeros of argument
948  std::vector<MX> orig = arg[i].get_nonzeros();
949 
950  // Project to needed sparsity
951  std::vector<MX> target(sparsity_in_[i].nnz(), 0);
952  std::vector<MX> w(arg[i].size1());
953  casadi_project(get_ptr(orig), arg[i].sparsity(),
954  get_ptr(target), sparsity_in_[i], get_ptr(w));
955 
956  // Store
957  arg_split[i] = target;
958  }
959 
960  // Allocate storage for split outputs
961  std::vector<std::vector<MX> > res_split(out_.size());
962  for (casadi_int i=0; i<out_.size(); ++i) res_split[i].resize(nnz_out(i));
963 
964  // Evaluate algorithm
965  if (verbose_) casadi_message("Evaluating algorithm forward");
966  for (auto&& a : algorithm_) {
967  switch (a.op) {
968  case OP_INPUT:
969  w[a.i0] = arg_split[a.i1][a.i2];
970  break;
971  case OP_OUTPUT:
972  res_split[a.i0][a.i2] = w[a.i1];
973  break;
974  case OP_CONST:
975  w[a.i0] = static_cast<double>(*c_it++);
976  break;
977  case OP_CALL:
978  {
979  const ExtendedAlgEl& m = call_.el.at(a.i1);
980  std::vector<MX> deps(m.n_dep);
981  std::vector<MX> args;
982 
983  casadi_int k = 0;
984  // Construct matrix-valued function arguments
985  for (casadi_int i=0;i<m.f_n_in;++i) {
986  std::vector<MX> arg;
987  for (casadi_int j=0;j<m.f_nnz_in[i];++j) {
988  arg.push_back(w[m.dep[k++]]);
989  }
990  args.push_back(sparsity_cast(vertcat(arg), m.f.sparsity_in(i)));
991  }
992 
993 
994  std::vector<MX> ret = m.f(args);
995  std::vector<MX> res;
996 
997  // Break apart matriv-valued outputs into scalar components
998  for (casadi_int i=0;i<m.f_n_out;++i) {
999  std::vector<MX> nz = ret[i].get_nonzeros();
1000  res.insert(res.end(), nz.begin(), nz.end());
1001  }
1002 
1003  // Store into work vector
1004  for (casadi_int i=0;i<m.n_res;++i) {
1005  if (m.res[i]>=0) w[m.res[i]] = res[i];
1006  }
1007  }
1008  break;
1009  default:
1010  // Evaluate the function to a temporary value
1011  // (as it might overwrite the children in the work vector)
1012  MX f;
1013  switch (a.op) {
1014  CASADI_MATH_FUN_BUILTIN(w[a.i1], w[a.i2], f)
1015  }
1016 
1017  // Finally save the function value
1018  w[a.i0] = f;
1019  }
1020  }
1021 
1022  // Join split outputs
1023  for (casadi_int i=0; i<res.size(); ++i) {
1024  res[i] = sparsity_cast(vertcat(res_split[i]), sparsity_out_[i]);
1025  }
1026  }
1027 
1028  bool SXFunction::should_inline(bool with_sx, bool always_inline, bool never_inline) const {
1029  // If inlining has been specified
1030  casadi_assert(!(always_inline && never_inline),
1031  "Inconsistent options for " + definition());
1032  casadi_assert(!(never_inline && has_free()),
1033  "Must inline " + definition());
1034  if (always_inline) return true;
1035  if (never_inline) return false;
1036  // Functions with free variables must be inlined
1037  if (has_free()) return true;
1038  // Inlining by default
1039  return true;
1040  }
1041 
1042  void SXFunction::ad_forward(const std::vector<std::vector<SX> >& fseed,
1043  std::vector<std::vector<SX> >& fsens) const {
1044  if (verbose_) casadi_message(name_ + "::ad_forward");
1045 
1046  // Number of forward seeds
1047  casadi_int nfwd = fseed.size();
1048  fsens.resize(nfwd);
1049 
1050  // Quick return if possible
1051  if (nfwd==0) return;
1052 
1053  // Check if seeds need to have dimensions corrected
1054  casadi_int npar = 1;
1055  for (auto&& r : fseed) {
1056  if (!matching_arg(r, npar)) {
1057  casadi_assert_dev(npar==1);
1058  return ad_forward(replace_fseed(fseed, npar), fsens);
1059  }
1060  }
1061 
1062  // Make sure seeds have matching sparsity patterns
1063  for (auto it=fseed.begin(); it!=fseed.end(); ++it) {
1064  casadi_assert_dev(it->size()==n_in_);
1065  for (casadi_int i=0; i<n_in_; ++i) {
1066  if (it->at(i).sparsity()!=sparsity_in_[i]) {
1067  // Correct sparsity
1068  std::vector<std::vector<SX> > fseed2(fseed);
1069  for (auto&& r : fseed2) {
1070  for (casadi_int i=0; i<n_in_; ++i) r[i] = project(r[i], sparsity_in_[i]);
1071  }
1072  return ad_forward(fseed2, fsens);
1073  }
1074  }
1075  }
1076 
1077  // Allocate results
1078  for (casadi_int d=0; d<nfwd; ++d) {
1079  fsens[d].resize(n_out_);
1080  for (casadi_int i=0; i<fsens[d].size(); ++i)
1081  if (fsens[d][i].sparsity()!=sparsity_out_[i])
1082  fsens[d][i] = SX::zeros(sparsity_out_[i]);
1083  }
1084 
1085  // Iterator to the binary operations
1086  std::vector<SXElem>::const_iterator b_it=operations_.begin();
1087 
1088  // Tape
1089  std::vector<TapeEl<SXElem> > s_pdwork(operations_.size());
1090  std::vector<TapeEl<SXElem> >::iterator it1 = s_pdwork.begin();
1091 
1092  // Evaluate algorithm
1093  if (verbose_) casadi_message("Evaluating algorithm forward");
1094  for (auto&& e : algorithm_) {
1095  switch (e.op) {
1096  case OP_INPUT:
1097  case OP_OUTPUT:
1098  case OP_CONST:
1099  case OP_PARAMETER:
1100  break;
1101  default:
1102  {
1103  const SXElem& f=*b_it++;
1104  switch (e.op) {
1105  CASADI_MATH_DER_BUILTIN(f->dep(0), f->dep(1), f, it1++->d)
1106  case OP_CALL:
1107  it1++->d[0] = f;
1108  }
1109  }
1110  }
1111  }
1112 
1113  // Work vector
1114  std::vector<SXElem> w(worksize_);
1115 
1116  // Calculate forward sensitivities
1117  if (verbose_) casadi_message("Calculating forward derivatives");
1118  for (casadi_int dir=0; dir<nfwd; ++dir) {
1119  std::vector<TapeEl<SXElem> >::const_iterator it2 = s_pdwork.begin();
1120  for (auto&& a : algorithm_) {
1121  switch (a.op) {
1122  case OP_INPUT:
1123  w[a.i0] = fseed[dir][a.i1].nonzeros()[a.i2]; break;
1124  case OP_OUTPUT:
1125  fsens[dir][a.i0].nonzeros()[a.i2] = w[a.i1]; break;
1126  case OP_CONST:
1127  case OP_PARAMETER:
1128  w[a.i0] = 0;
1129  break;
1130  case OP_IF_ELSE_ZERO:
1131  w[a.i0] = if_else_zero(it2++->d[1], w[a.i2]);
1132  break;
1133  case OP_CALL:
1134  {
1135  auto& m = call_.el.at(a.i1);
1136  CallSX* call_node = static_cast<CallSX*>(it2->d[0].get());
1137 
1138  // Construct forward sensitivity function
1139  Function ff = m.f.forward(1);
1140 
1141  // Symbolic inputs to forward sensitivity function
1142  std::vector<SXElem> deps;
1143  deps.reserve(2*m.n_dep);
1144 
1145  // Set nominal inputs from node
1146  casadi_int offset = 0;
1147  for (casadi_int i=0;i<m.f_n_in;++i) {
1148  casadi_int nnz = ff.nnz_in(i);
1149  casadi_assert(nnz==0 || nnz==m.f.nnz_in(i), "Not implemented");
1150  for (casadi_int j=0;j<nnz;++j) {
1151  deps.push_back(call_node->dep(offset+j));
1152  }
1153  offset += m.f_nnz_in[i];
1154  }
1155 
1156  // Do not set nominal outputs
1157  offset = 0;
1158  for (casadi_int i=0;i<m.f_n_out;++i) {
1159  casadi_int nnz = ff.nnz_in(i+m.f_n_in);
1160  casadi_assert(nnz==0 || nnz==m.f.nnz_out(i), "Not implemented");
1161  for (casadi_int j=0;j<nnz;++j) {
1162  deps.push_back(call_node->get_output(offset+j));
1163  }
1164  offset += m.f_nnz_out[i];
1165  }
1166 
1167  // Read in forward seeds from work vector
1168  offset = 0;
1169  for (casadi_int i=0;i<m.f_n_in;++i) {
1170  casadi_int nnz = ff.nnz_in(i+m.f_n_in+m.f_n_out);
1171  // nnz=0 occurs for is_diff_in[i] false
1172  casadi_assert(nnz==0 || nnz==m.f.nnz_in(i), "Not implemented");
1173  if (nnz) {
1174  for (casadi_int j=0;j<nnz;++j) {
1175  deps.push_back(w[m.dep[offset+j]]);
1176  }
1177  }
1178  offset += m.f_nnz_in[i];
1179  }
1180 
1181  // Call forward sensitivity function
1182  std::vector<SXElem> ret = SXElem::call(ff, deps);
1183 
1184  // Retrieve sensitivities
1185  offset = 0;
1186  casadi_int k = 0;
1187  for (casadi_int i=0;i<m.f_n_out;++i) {
1188  casadi_int nnz = ff.nnz_out(i);
1189  // nnz=0 occurs for is_diff_out[i] false
1190  casadi_assert(nnz==0 || nnz==m.f_nnz_out[i], "Not implemented");
1191  if (nnz) {
1192  for (casadi_int j=0;j<nnz;++j) {
1193  if (m.res[offset+j]>=0) w[m.res[offset+j]] = ret[k];
1194  k++;
1195  }
1196  }
1197  offset += m.f_nnz_out[i];
1198  }
1199  }
1200  it2++;
1201  break;
1202  CASADI_MATH_BINARY_BUILTIN // Binary operation
1203  w[a.i0] = it2->d[0] * w[a.i1] + it2->d[1] * w[a.i2];
1204  it2++;
1205  break;
1206  default: // Unary operation
1207  w[a.i0] = it2->d[0] * w[a.i1];
1208  it2++;
1209  }
1210  }
1211  }
1212  }
1213 
1214  void SXFunction::ad_reverse(const std::vector<std::vector<SX> >& aseed,
1215  std::vector<std::vector<SX> >& asens) const {
1216  if (verbose_) casadi_message(name_ + "::ad_reverse");
1217 
1218  // number of adjoint seeds
1219  casadi_int nadj = aseed.size();
1220  asens.resize(nadj);
1221 
1222  // Quick return if possible
1223  if (nadj==0) return;
1224 
1225  // Check if seeds need to have dimensions corrected
1226  casadi_int npar = 1;
1227  for (auto&& r : aseed) {
1228  if (!matching_res(r, npar)) {
1229  casadi_assert_dev(npar==1);
1230  return ad_reverse(replace_aseed(aseed, npar), asens);
1231  }
1232  }
1233 
1234  // Make sure matching sparsity of fseed
1235  bool matching_sparsity = true;
1236  for (casadi_int d=0; d<nadj; ++d) {
1237  casadi_assert_dev(aseed[d].size()==n_out_);
1238  for (casadi_int i=0; matching_sparsity && i<n_out_; ++i)
1239  matching_sparsity = aseed[d][i].sparsity()==sparsity_out_[i];
1240  }
1241 
1242  // Correct sparsity if needed
1243  if (!matching_sparsity) {
1244  std::vector<std::vector<SX> > aseed2(aseed);
1245  for (casadi_int d=0; d<nadj; ++d)
1246  for (casadi_int i=0; i<n_out_; ++i)
1247  if (aseed2[d][i].sparsity()!=sparsity_out_[i])
1248  aseed2[d][i] = project(aseed2[d][i], sparsity_out_[i]);
1249  return ad_reverse(aseed2, asens);
1250  }
1251 
1252  // Allocate results if needed
1253  for (casadi_int d=0; d<nadj; ++d) {
1254  asens[d].resize(n_in_);
1255  for (casadi_int i=0; i<asens[d].size(); ++i) {
1256  if (asens[d][i].sparsity()!=sparsity_in_[i]) {
1257  asens[d][i] = SX::zeros(sparsity_in_[i]);
1258  } else {
1259  std::fill(asens[d][i]->begin(), asens[d][i]->end(), 0);
1260  }
1261  }
1262  }
1263 
1264  // Iterator to the binary operations
1265  std::vector<SXElem>::const_iterator b_it=operations_.begin();
1266 
1267  // Tape
1268  std::vector<TapeEl<SXElem> > s_pdwork(operations_.size());
1269  std::vector<TapeEl<SXElem> >::iterator it1 = s_pdwork.begin();
1270 
1271  // Evaluate algorithm
1272  if (verbose_) casadi_message("Evaluating algorithm forward");
1273  for (auto&& a : algorithm_) {
1274  switch (a.op) {
1275  case OP_INPUT:
1276  case OP_OUTPUT:
1277  case OP_CONST:
1278  case OP_PARAMETER:
1279  break;
1280  default:
1281  {
1282  const SXElem& f=*b_it++;
1283  switch (a.op) {
1284  CASADI_MATH_DER_BUILTIN(f->dep(0), f->dep(1), f, it1++->d)
1285  case OP_CALL:
1286  it1++->d[0] = f;
1287  }
1288  }
1289  }
1290  }
1291 
1292  // Calculate adjoint sensitivities
1293  if (verbose_) casadi_message("Calculating adjoint derivatives");
1294 
1295  // Work vector
1296  std::vector<SXElem> w(worksize_, 0);
1297 
1298  for (casadi_int dir=0; dir<nadj; ++dir) {
1299  auto it2 = s_pdwork.rbegin();
1300  for (auto it = algorithm_.rbegin(); it!=algorithm_.rend(); ++it) {
1301  SXElem seed;
1302  switch (it->op) {
1303  case OP_INPUT:
1304  asens[dir][it->i1].nonzeros()[it->i2] = w[it->i0];
1305  w[it->i0] = 0;
1306  break;
1307  case OP_OUTPUT:
1308  w[it->i1] += aseed[dir][it->i0].nonzeros()[it->i2];
1309  break;
1310  case OP_CONST:
1311  case OP_PARAMETER:
1312  w[it->i0] = 0;
1313  break;
1314  case OP_IF_ELSE_ZERO:
1315  seed = w[it->i0];
1316  w[it->i0] = 0;
1317  w[it->i2] += if_else_zero(it2++->d[1], seed);
1318  break;
1319  case OP_CALL:
1320  {
1321  auto& m = call_.el.at(it->i1);
1322  CallSX* call_node = static_cast<CallSX*>(it2->d[0].get());
1323 
1324  // Construct reverse sensitivity function
1325  Function fr = m.f.reverse(1);
1326 
1327  // Symbolic inputs to reverse sensitivity function
1328  std::vector<SXElem> deps;
1329  deps.reserve(m.n_dep+m.n_res);
1330 
1331  // Set nominal inputs from node
1332  casadi_int offset = 0;
1333  for (casadi_int i=0;i<m.f_n_in;++i) {
1334  casadi_int nnz = fr.nnz_in(i);
1335  casadi_assert(nnz==0 || nnz==m.f.nnz_in(i), "Not implemented");
1336  for (casadi_int j=0;j<nnz;++j) {
1337  deps.push_back(call_node->dep(offset+j));
1338  }
1339  offset += m.f_nnz_in[i];
1340  }
1341 
1342  // Do not set nominal outputs
1343  offset = 0;
1344  for (casadi_int i=0;i<m.f_n_out;++i) {
1345  casadi_int nnz = fr.nnz_in(i+m.f_n_in);
1346  casadi_assert(nnz==0 || nnz==m.f.nnz_out(i), "Not implemented");
1347  for (casadi_int j=0;j<nnz;++j) {
1348  deps.push_back(call_node->get_output(offset+j));
1349  }
1350  offset += m.f_nnz_out[i];
1351  }
1352 
1353  // Read in reverse seeds from work vector
1354  offset = 0;
1355  for (casadi_int i=0;i<m.f_n_out;++i) {
1356  casadi_int nnz = fr.nnz_in(i+m.f_n_in+m.f_n_out);
1357  // nnz=0 occurs for is_diff_out[i] false
1358  casadi_assert(nnz==0 || nnz==m.f.nnz_out(i), "Not implemented");
1359  if (nnz) {
1360  for (casadi_int j=0;j<nnz;++j) {
1361  deps.push_back((m.res[offset+j]>=0) ? w[m.res[offset+j]] : 0);
1362  }
1363  }
1364  offset += m.f.nnz_out(i);
1365  }
1366 
1367  // Call reverse sensitivity function
1368  std::vector<SXElem> ret = SXElem::call(fr, deps);
1369 
1370  // Clear out reverse seeds
1371  for (casadi_int i=0;i<m.n_res;++i) {
1372  if (m.res[i]>=0) w[m.res[i]] = 0;
1373  }
1374 
1375  // Store reverse sensitivities into work vector
1376  offset = 0;
1377  casadi_int k = 0;
1378  for (casadi_int i=0;i<m.f_n_in;++i) {
1379  casadi_int nnz = fr.nnz_out(i);
1380  // nnz=0 occurs for is_diff_in[i] false
1381  casadi_assert(nnz==0 || nnz==m.f_nnz_in[i], "Not implemented");
1382  if (nnz) {
1383  for (casadi_int j=0;j<nnz;++j) {
1384  w[m.dep[offset+j]] += ret[k++];
1385  }
1386  }
1387  offset += m.f_nnz_in[i];
1388  }
1389  }
1390  it2++;
1391  break;
1392  CASADI_MATH_BINARY_BUILTIN // Binary operation
1393  seed = w[it->i0];
1394  w[it->i0] = 0;
1395  w[it->i1] += it2->d[0] * seed;
1396  w[it->i2] += it2++->d[1] * seed;
1397  break;
1398  default: // Unary operation
1399  seed = w[it->i0];
1400  w[it->i0] = 0;
1401  w[it->i1] += it2++->d[0] * seed;
1402  }
1403  }
1404  }
1405  }
1406 
1407  template<typename T, typename CT>
1409  CT*** call_arg, T*** call_res, casadi_int** call_iw, T** call_w, T** nz_in, T** nz_out) const {
1410  *call_arg += n_in_;
1411  *call_res += n_out_;
1412  *nz_in = *call_w + worksize_;
1413  *nz_out = *call_w + worksize_ + call_.sz_w_arg;
1414  *call_w = *call_w + worksize_ + call_.sz_w_arg + call_.sz_w_res;
1415 
1416  // Set up call_arg to point to nz_in
1417  T* ptr_w = *nz_in;
1418  for (casadi_int i=0;i<m.f_n_in;++i) {
1419  (*call_arg)[i] = ptr_w;
1420  ptr_w+=m.f_nnz_in[i];
1421  }
1422 
1423  // Set up call_res to point to nz_out
1424  ptr_w = *nz_out;
1425  for (casadi_int i=0;i<m.f_n_out;++i) {
1426  (*call_res)[i] = ptr_w;
1427  ptr_w+=m.f_nnz_out[i];
1428  }
1429  }
1430 
1431  template<typename T>
1432  void SXFunction::call_fwd(const AlgEl& e, const T** arg, T** res, casadi_int* iw, T* w) const {
1433  auto& m = call_.el[e.i1];
1434  const T** call_arg = arg;
1435  T** call_res = res;
1436  casadi_int* call_iw = iw;
1437  T* call_w = w;
1438  T* nz_in;
1439  T* nz_out;
1440 
1441  call_setup(m, &call_arg, &call_res, &call_iw, &call_w, &nz_in, &nz_out);
1442 
1443  // Populate nz_in from work vector
1444  for (casadi_int i=0;i<m.n_dep;++i) {
1445  nz_in[i] = w[m.dep[i]];
1446  }
1447  // Perform call nz_in -> nz_out
1448  m.f(call_arg, call_res, call_iw, call_w);
1449 
1450  // Store nz_out results back in workvector
1451  for (casadi_int i=0;i<m.n_res;++i) {
1452  // Only if the result is actually needed
1453  if (m.res[i]>=0) {
1454  w[m.res[i]] = nz_out[i];
1455  }
1456  }
1457  }
1458 
1459 
1460  template<typename T>
1461  void SXFunction::call_rev(const AlgEl& e, T** arg, T** res, casadi_int* iw, T* w) const {
1462  auto& m = call_.el[e.i1];
1463  bvec_t** call_arg = arg;
1464  bvec_t** call_res = res;
1465  casadi_int* call_iw = iw;
1466  bvec_t* call_w = w;
1467  bvec_t* nz_in;
1468  bvec_t* nz_out;
1469 
1470  call_setup(m, &call_arg, &call_res, &call_iw, &call_w, &nz_in, &nz_out);
1471 
1472  std::fill_n(nz_in, m.n_dep, 0);
1473 
1474  // Read in reverse seeds nz_out from work vector
1475  for (casadi_int i=0;i<m.n_res;++i) {
1476  nz_out[i] = (m.res[i]>=0) ? w[m.res[i]] : 0;
1477  }
1478 
1479  // Perform reverse mode call nz_out -> nz_in
1480  m.f.rev(call_arg, call_res, call_iw, call_w);
1481 
1482  // Clear out reverse seeds
1483  for (casadi_int i=0;i<m.n_res;++i) {
1484  if (m.res[i]>=0) w[m.res[i]] = 0;
1485  }
1486 
1487  // Store reverse sensitivities into work vector
1488  for (casadi_int i=0;i<m.n_dep;++i) {
1489  w[m.dep[i]] |= nz_in[i];
1490  }
1491  }
1492 
1494  sp_forward(const bvec_t** arg, bvec_t** res, casadi_int* iw, bvec_t* w, void* mem) const {
1495  // Fall back when forward mode not allowed
1496  if (sp_weight()==1 || sp_weight()==-1)
1497  return FunctionInternal::sp_forward(arg, res, iw, w, mem);
1498  // Propagate sparsity forward
1499  for (auto&& e : algorithm_) {
1500  switch (e.op) {
1501  case OP_CONST:
1502  case OP_PARAMETER:
1503  w[e.i0] = 0; break;
1504  case OP_INPUT:
1505  w[e.i0] = arg[e.i1]==nullptr ? 0 : arg[e.i1][e.i2];
1506  break;
1507  case OP_OUTPUT:
1508  if (res[e.i0]!=nullptr) res[e.i0][e.i2] = w[e.i1];
1509  break;
1510  case OP_CALL:
1511  call_fwd(e, arg, res, iw, w);
1512  break;
1513  default: // Unary or binary operation
1514  w[e.i0] = w[e.i1] | w[e.i2]; break;
1515  }
1516  }
1517  return 0;
1518  }
1519 
1521  casadi_int* iw, bvec_t* w, void* mem) const {
1522  // Fall back when reverse mode not allowed
1523  if (sp_weight()==0 || sp_weight()==-1)
1524  return FunctionInternal::sp_reverse(arg, res, iw, w, mem);
1525  std::fill_n(w, sz_w(), 0);
1526 
1527  // Propagate sparsity backward
1528  for (auto it=algorithm_.rbegin(); it!=algorithm_.rend(); ++it) {
1529  // Temp seed
1530  bvec_t seed;
1531 
1532  // Propagate seeds
1533  switch (it->op) {
1534  case OP_CONST:
1535  case OP_PARAMETER:
1536  w[it->i0] = 0;
1537  break;
1538  case OP_INPUT:
1539  if (arg[it->i1]!=nullptr) arg[it->i1][it->i2] |= w[it->i0];
1540  w[it->i0] = 0;
1541  break;
1542  case OP_OUTPUT:
1543  if (res[it->i0]!=nullptr) {
1544  w[it->i1] |= res[it->i0][it->i2];
1545  res[it->i0][it->i2] = 0;
1546  }
1547  break;
1548  case OP_CALL:
1549  call_rev(*it, arg, res, iw, w);
1550  break;
1551  default: // Unary or binary operation
1552  seed = w[it->i0];
1553  w[it->i0] = 0;
1554  w[it->i1] |= seed;
1555  w[it->i2] |= seed;
1556  }
1557  }
1558  return 0;
1559  }
1560 
1561  const SX SXFunction::sx_in(casadi_int ind) const {
1562  return in_.at(ind);
1563  }
1564 
1565  const std::vector<SX> SXFunction::sx_in() const {
1566  return in_;
1567  }
1568 
1569  bool SXFunction::is_a(const std::string& type, bool recursive) const {
1570  return type=="SXFunction" || (recursive && XFunction<SXFunction,
1571  SX, SXNode>::is_a(type, recursive));
1572  }
1573 
1574  void SXFunction::export_code_body(const std::string& lang,
1575  std::ostream &ss, const Dict& options) const {
1576 
1577  // Default values for options
1578  casadi_int indent_level = 0;
1579 
1580  // Read options
1581  for (auto&& op : options) {
1582  if (op.first=="indent_level") {
1583  indent_level = op.second;
1584  } else {
1585  casadi_error("Unknown option '" + op.first + "'.");
1586  }
1587  }
1588 
1589  // Construct indent string
1590  std::string indent;
1591  for (casadi_int i=0;i<indent_level;++i) {
1592  indent += " ";
1593  }
1594 
1595  // Non-cell aliases for inputs
1596  for (casadi_int i=0;i<n_in_;++i) {
1597  ss << indent << "argin_" << i << " = nonzeros_gen(varargin{" << i+1 << "});" << std::endl;
1598  }
1599 
1600  Function f = shared_from_this<Function>();
1601 
1602  for (casadi_int k=0;k<f.n_instructions();++k) {
1603  // Get operation
1604  casadi_int op = static_cast<casadi_int>(f.instruction_id(k));
1605  // Get input positions into workvector
1606  std::vector<casadi_int> o = f.instruction_output(k);
1607  // Get output positions into workvector
1608  std::vector<casadi_int> i = f.instruction_input(k);
1609  switch (op) {
1610  case OP_INPUT:
1611  {
1612  ss << indent << "w" << o[0] << " = " << "argin_" << i[0] << "(" << i[1]+1 << ");";
1613  ss << std::endl;
1614  }
1615  break;
1616  case OP_OUTPUT:
1617  {
1618  ss << indent << "argout_" << o[0] << "{" << o[1]+1 << "} = w" << i[0] << ";";
1619  ss << std::endl;
1620  }
1621  break;
1622  case OP_CONST:
1623  {
1624  std::ios_base::fmtflags fmtfl = ss.flags();
1625  ss << indent << "w" << o[0] << " = ";
1626  ss << std::scientific << std::setprecision(std::numeric_limits<double>::digits10 + 1);
1627  ss << f.instruction_constant(k) << ";" << std::endl;
1628  ss.flags(fmtfl);
1629  }
1630  break;
1631  case OP_SQ:
1632  {
1633  ss << indent << "w" << o[0] << " = " << "w" << i[0] << "^2;" << std::endl;
1634  }
1635  break;
1636  case OP_FABS:
1637  {
1638  ss << indent << "w" << o[0] << " = abs(" << "w" << i[0] << ");" << std::endl;
1639  }
1640  break;
1641  case OP_POW:
1642  case OP_CONSTPOW:
1643  ss << indent << "w" << o[0] << " = " << "w" << i[0] << ".^w" << i[1] << ";" << std::endl;
1644  break;
1645  case OP_NOT:
1646  ss << indent << "w" << o[0] << " = ~" << "w" << i[0] << ";" << std::endl;
1647  break;
1648  case OP_OR:
1649  ss << indent << "w" << o[0] << " = w" << i[0] << " | w" << i[1] << ";" << std::endl;
1650  break;
1651  case OP_AND:
1652  ss << indent << "w" << o[0] << " = w" << i[0] << " & w" << i[1] << ";" << std::endl;
1653  break;
1654  case OP_NE:
1655  ss << indent << "w" << o[0] << " = w" << i[0] << " ~= w" << i[1] << ";" << std::endl;
1656  break;
1657  case OP_IF_ELSE_ZERO:
1658  ss << indent << "w" << o[0] << " = ";
1659  ss << "if_else_zero_gen(w" << i[0] << ", w" << i[1] << ");" << std::endl;
1660  break;
1661  default:
1663  ss << indent << "w" << o[0] << " = " << casadi::casadi_math<double>::print(op,
1664  "w"+std::to_string(i[0]), "w"+std::to_string(i[1])) << ";" << std::endl;
1665  } else {
1666  ss << indent << "w" << o[0] << " = " << casadi::casadi_math<double>::print(op,
1667  "w"+std::to_string(i[0])) << ";" << std::endl;
1668  }
1669  }
1670  }
1671 
1672  }
1673 
1675  XFunction<SXFunction, SX, SXNode>(s) {
1676  int version = s.version("SXFunction", 1, 2);
1677  size_t n_instructions;
1678  s.unpack("SXFunction::n_instr", n_instructions);
1679 
1680  s.unpack("SXFunction::worksize", worksize_);
1681  s.unpack("SXFunction::free_vars", free_vars_);
1682  s.unpack("SXFunction::operations", operations_);
1683  s.unpack("SXFunction::constants", constants_);
1684  s.unpack("SXFunction::default_in", default_in_);
1685 
1686  if (version>=2) {
1687 
1688  s.unpack("SXFunction::call_sz_arg", call_.sz_arg);
1689  s.unpack("SXFunction::call_sz_res", call_.sz_res);
1690  s.unpack("SXFunction::call_sz_iw", call_.sz_iw);
1691  s.unpack("SXFunction::call_sz_w", call_.sz_w);
1692  s.unpack("SXFunction::call_sz_arg", call_.sz_w_arg);
1693  s.unpack("SXFunction::call_sz_res", call_.sz_w_res);
1694 
1695  size_t el_size;
1696  s.unpack("SXFunction::call_el_size", el_size);
1697  call_.el.reserve(el_size);
1698 
1699  // Loop over nodes
1700  for (casadi_int k=0;k<el_size;++k) {
1701  Function f;
1702  s.unpack("SXFunction::call_el_f", f);
1703  call_.el.emplace_back(f);
1704  auto& e = call_.el[k];
1705  s.unpack("SXFunction::call_el_dep", e.dep);
1706  s.unpack("SXFunction::call_el_res", e.res);
1707  s.unpack("SXFunction::call_el_copy_elision_arg", e.copy_elision_arg);
1708  s.unpack("SXFunction::call_el_copy_elision_offset", e.copy_elision_offset);
1709  }
1710 
1711  s.unpack("SXFunction::copy_elision", copy_elision_);
1712 
1713  } else {
1714  call_.sz_arg = 0;
1715  call_.sz_res = 0;
1716  call_.sz_iw = 0;
1717  call_.sz_w = 0;
1718  call_.sz_w_arg = 0;
1719  call_.sz_w_res = 0;
1720  call_.el.clear();
1721  copy_elision_.resize(n_instructions, false);
1722  }
1723 
1724  algorithm_.resize(n_instructions);
1725  for (casadi_int k=0;k<n_instructions;++k) {
1726  AlgEl& e = algorithm_[k];
1727  s.unpack("SXFunction::ScalarAtomic::op", e.op);
1728  s.unpack("SXFunction::ScalarAtomic::i0", e.i0);
1729  s.unpack("SXFunction::ScalarAtomic::i1", e.i1);
1730  s.unpack("SXFunction::ScalarAtomic::i2", e.i2);
1731  }
1732 
1733  // Default (persistent) options
1734  just_in_time_opencl_ = false;
1735  just_in_time_sparsity_ = false;
1736 
1737  s.unpack("SXFunction::live_variables", live_variables_);
1738 
1740  }
1741 
1744  s.version("SXFunction", 2);
1745  s.pack("SXFunction::n_instr", algorithm_.size());
1746 
1747  s.pack("SXFunction::worksize", worksize_);
1748  s.pack("SXFunction::free_vars", free_vars_);
1749  s.pack("SXFunction::operations", operations_);
1750  s.pack("SXFunction::constants", constants_);
1751  s.pack("SXFunction::default_in", default_in_);
1752 
1753  s.pack("SXFunction::call_sz_arg", call_.sz_arg);
1754  s.pack("SXFunction::call_sz_res", call_.sz_res);
1755  s.pack("SXFunction::call_sz_iw", call_.sz_iw);
1756  s.pack("SXFunction::call_sz_w", call_.sz_w);
1757  s.pack("SXFunction::call_sz_arg", call_.sz_w_arg);
1758  s.pack("SXFunction::call_sz_res", call_.sz_w_res);
1759 
1760  s.pack("SXFunction::call_el_size", call_.el.size());
1761  // Loop over ExtendedALgEl elements
1762  for (const auto& n : call_.el) {
1763  s.pack("SXFunction::call_el_f", n.f);
1764  s.pack("SXFunction::call_el_dep", n.dep);
1765  s.pack("SXFunction::call_el_res", n.res);
1766  s.pack("SXFunction::call_el_copy_elision_arg", n.copy_elision_arg);
1767  s.pack("SXFunction::call_el_copy_elision_offset", n.copy_elision_offset);
1768  }
1769 
1770  s.pack("SXFunction::copy_elision", copy_elision_);
1771 
1772  // Loop over algorithm
1773  for (const auto& e : algorithm_) {
1774  s.pack("SXFunction::ScalarAtomic::op", e.op);
1775  s.pack("SXFunction::ScalarAtomic::i0", e.i0);
1776  s.pack("SXFunction::ScalarAtomic::i1", e.i1);
1777  s.pack("SXFunction::ScalarAtomic::i2", e.i2);
1778  }
1779 
1780  s.pack("SXFunction::live_variables", live_variables_);
1781 
1783  }
1784 
1786  return new SXFunction(s);
1787  }
1788 
1789  void SXFunction::find(std::map<FunctionInternal*, Function>& all_fun,
1790  casadi_int max_depth) const {
1791  for (auto&& e : algorithm_) {
1792  if (e.op == OP_CALL) {
1793  const ExtendedAlgEl& m = call_.el.at(e.i1);
1794  add_embedded(all_fun, m.f, max_depth);
1795  }
1796  }
1797  }
1798 
1799  std::vector<SX> SXFunction::order(const std::vector<SX>& expr) {
1800 #ifdef CASADI_WITH_THREADSAFE_SYMBOLICS
1801  std::lock_guard<std::mutex> lock(SX::get_mutex_temp());
1802 #endif // CASADI_WITH_THREADSAFE_SYMBOLICS
1803  // Stack used to sort the computational graph
1804  std::stack<SXNode*> s;
1805 
1806  // All nodes
1807  std::vector<SXNode*> nodes;
1808 
1809  // Add the list of nodes
1810  casadi_int ind=0;
1811  for (auto it = expr.begin(); it != expr.end(); ++it, ++ind) {
1812  casadi_int nz=0;
1813  for (auto itc = (*it)->begin(); itc != (*it)->end(); ++itc, ++nz) {
1814  // Add outputs to the list
1815  s.push(itc->get());
1817  }
1818  }
1819 
1820  // Clear temporary markers
1821  for (casadi_int i=0; i<nodes.size(); ++i) {
1822  nodes[i]->temp = 0;
1823  }
1824 
1825  std::vector<SX> ret(nodes.size());
1826  for (casadi_int i=0; i<nodes.size(); ++i) {
1827  ret[i] = SXElem::create(nodes[i]);
1828  }
1829 
1830  return ret;
1831  }
1832 
1833 } // namespace casadi
SXElem get_output(casadi_int oind) const override
Get an output.
Definition: call_sx.hpp:106
const SXElem & dep(casadi_int i) const override
get the reference of a dependency
Definition: call_sx.hpp:119
Helper class for C code generation.
std::string add_dependency(const Function &f)
Add a function dependency.
std::string arg(casadi_int i) const
Refer to argument.
void reserve_work(casadi_int n)
Reserve a maximum size of work elements, used for padding of index.
std::string constant(const std::vector< casadi_int > &v)
Represent an array constant; adding it when new.
std::string print_op(casadi_int op, const std::string &a0)
Print an operation to a c file.
std::string res(casadi_int i) const
Refer to resuly.
bool avoid_stack() const
Avoid stack?
std::string sx_work(casadi_int i)
Declare a work vector element.
Helper class for Serialization.
void unpack(Sparsity &e)
Reconstruct an object from the input stream.
void version(const std::string &name, int v)
void alloc_iw(size_t sz_iw, bool persistent=false)
Ensure required length of iw field.
std::vector< Sparsity > sparsity_in_
Input and output sparsity.
std::vector< std::vector< M > > replace_fseed(const std::vector< std::vector< M >> &fseed, casadi_int npar) const
Replace 0-by-0 forward seeds.
void alloc_res(size_t sz_res, bool persistent=false)
Ensure required length of res field.
std::string definition() const
Get function signature: name:(inputs)->(outputs)
void alloc_arg(size_t sz_arg, bool persistent=false)
Ensure required length of arg field.
virtual double sp_weight() const
Weighting factor for chosing forward/reverse mode,.
size_t n_in_
Number of inputs and outputs.
virtual void eval_mx(const MXVector &arg, MXVector &res, bool always_inline, bool never_inline) const
Evaluate with symbolic matrices.
virtual int eval_sx(const SXElem **arg, SXElem **res, casadi_int *iw, SXElem *w, void *mem, bool always_inline, bool never_inline) const
Evaluate with symbolic scalars.
bool matching_arg(const std::vector< M > &arg, casadi_int &npar) const
Check if input arguments that needs to be replaced.
virtual int sp_forward(const bvec_t **arg, bvec_t **res, casadi_int *iw, bvec_t *w, void *mem) const
Propagate sparsity forward.
std::vector< double > nz_in(const std::vector< DM > &arg) const
Convert from/to flat vector of input/output nonzeros.
static const Options options_
Options.
std::vector< Sparsity > sparsity_out_
bool matching_res(const std::vector< M > &arg, casadi_int &npar) const
Check if output arguments that needs to be replaced.
void disp(std::ostream &stream, bool more) const override
Display object.
size_t sz_w() const
Get required length of w field.
virtual int sp_reverse(bvec_t **arg, bvec_t **res, casadi_int *iw, bvec_t *w, void *mem) const
Propagate sparsity backwards.
void alloc_w(size_t sz_w, bool persistent=false)
Ensure required length of w field.
std::vector< double > nz_out(const std::vector< DM > &res) const
Convert from/to flat vector of input/output nonzeros.
casadi_int nnz_out() const
Number of input/output nonzeros.
void setup(void *mem, const double **arg, double **res, casadi_int *iw, double *w) const
Set the (persistent and temporary) work vectors.
Dict generate_options(const std::string &target) const override
Reconstruct options dict.
void add_embedded(std::map< FunctionInternal *, Function > &all_fun, const Function &dep, casadi_int max_depth) const
std::vector< std::vector< M > > replace_aseed(const std::vector< std::vector< M >> &aseed, casadi_int npar) const
Replace 0-by-0 reverse seeds.
Function object.
Definition: function.hpp:60
Function forward(casadi_int nfwd) const
Get a function that calculates nfwd forward derivatives.
Definition: function.cpp:1135
casadi_int nnz_out() const
Get number of output nonzeros.
Definition: function.cpp:855
casadi_int n_instructions() const
Number of instruction in the algorithm (SXFunction/MXFunction)
Definition: function.cpp:1709
size_t sz_res() const
Get required length of res field.
Definition: function.cpp:1085
const std::string & name() const
Name of the function.
Definition: function.cpp:1307
Function reverse(casadi_int nadj) const
Get a function that calculates nadj adjoint derivatives.
Definition: function.cpp:1143
std::vector< casadi_int > instruction_input(casadi_int k) const
Locations in the work vector for the inputs of the instruction.
Definition: function.cpp:1741
const Sparsity & sparsity_in(casadi_int ind) const
Get sparsity of a given input.
Definition: function.cpp:1015
std::vector< casadi_int > instruction_output(casadi_int k) const
Location in the work vector for the output of the instruction.
Definition: function.cpp:1757
size_t sz_iw() const
Get required length of iw field.
Definition: function.cpp:1087
casadi_int n_out() const
Get the number of function outputs.
Definition: function.cpp:823
casadi_int n_in() const
Get the number of function inputs.
Definition: function.cpp:819
size_t sz_w() const
Get required length of w field.
Definition: function.cpp:1089
size_t sz_arg() const
Get required length of arg field.
Definition: function.cpp:1083
casadi_int nnz_in() const
Get number of input nonzeros.
Definition: function.cpp:851
double instruction_constant(casadi_int k) const
Get the floating point output argument of an instruction (SXFunction)
Definition: function.cpp:1749
casadi_int instruction_id(casadi_int k) const
Identifier index of the instruction (SXFunction/MXFunction)
Definition: function.cpp:1733
static Matrix< Scalar > zeros(casadi_int nrow=1, casadi_int ncol=1)
Create a dense matrix or a matrix with specified sparsity with all entries zero.
static casadi_int copy_elision_min_size
static void check()
Raises an error if an interrupt was captured.
MX - Matrix expression.
Definition: mx.hpp:92
Sparse matrix class. SX and DM are specializations.
Definition: matrix_decl.hpp:99
static std::vector< SXElem > split(const SXElem &e, casadi_int n)
Definition: output_sx.hpp:139
Base class for FunctionInternal and LinsolInternal.
bool verbose_
Verbose printout.
void clear_mem()
Clear all memory (called from destructor)
The basic scalar symbolic class of CasADi.
Definition: sx_elem.hpp:75
void assignIfDuplicate(const SXElem &scalar, casadi_int depth=1)
Assign to another expression, if a duplicate.
Definition: sx_elem.cpp:111
static std::vector< SXElem > call(const Function &f, const std::vector< SXElem > &deps)
Definition: sx_elem.cpp:403
static SXElem create(SXNode *node)
Definition: sx_elem.cpp:62
SXNode * get() const
Get a pointer to the node.
Definition: sx_elem.cpp:174
static bool is_equal(const SXElem &x, const SXElem &y, casadi_int depth=0)
Check equality up to a given depth.
Definition: sx_elem.cpp:529
Internal node class for SXFunction.
Definition: sx_function.hpp:54
std::vector< SXElem > operations_
The expressions corresponding to each binary operation.
SXFunction(const std::string &name, const std::vector< Matrix< SXElem > > &inputv, const std::vector< Matrix< SXElem > > &outputv, const std::vector< std::string > &name_in, const std::vector< std::string > &name_out)
Constructor.
void eval_mx(const MXVector &arg, MXVector &res, bool always_inline, bool never_inline) const override
Evaluate symbolically, MX type.
SX instructions_sx() const override
get SX expression associated with instructions
void ad_reverse(const std::vector< std::vector< SX > > &aseed, std::vector< std::vector< SX > > &asens) const
Calculate reverse mode directional derivatives.
bool should_inline(bool with_sx, bool always_inline, bool never_inline) const override
void codegen_declarations(CodeGenerator &g) const override
Generate code for the declarations of the C function.
const std::vector< SX > sx_in() const override
Get function input(s) and output(s)
void init(const Dict &opts) override
Initialize.
void export_code_body(const std::string &lang, std::ostream &stream, const Dict &options) const override
Export function in a specific language.
void serialize_body(SerializingStream &s) const override
Serialize an object without type information.
void init_copy_elision()
Part of initialize responsible of prepaprign copy elision.
static const Options options_
Options.
std::vector< bool > copy_elision_
Copy elision per algel.
bool is_smooth() const
Check if smooth.
int sp_forward(const bvec_t **arg, bvec_t **res, casadi_int *iw, bvec_t *w, void *mem) const override
Propagate sparsity forward.
size_t codegen_sz_w(const CodeGenerator &g) const override
Get the size of the work vector, for codegen.
void call_rev(const AlgEl &e, T **arg, T **res, casadi_int *iw, T *w) const
bool has_free() const override
Does the function have free variables.
void call_fwd(const AlgEl &e, const T **arg, T **res, casadi_int *iw, T *w) const
void find(std::map< FunctionInternal *, Function > &all_fun, casadi_int max_depth) const override
std::vector< SXElem > constants_
The expressions corresponding to each constant.
int sp_reverse(bvec_t **arg, bvec_t **res, casadi_int *iw, bvec_t *w, void *mem) const override
Propagate sparsity backwards.
bool just_in_time_opencl_
With just-in-time compilation using OpenCL.
struct casadi::SXFunction::CallInfo call_
void codegen_body(CodeGenerator &g) const override
Generate code for the body of the C function.
static std::vector< SX > order(const std::vector< SX > &expr)
int eval_sx(const SXElem **arg, SXElem **res, casadi_int *iw, SXElem *w, void *mem, bool always_inline, bool never_inline) const override
evaluate symbolically while also propagating directional derivatives
std::vector< std::string > get_free() const override
Print free variables.
std::vector< AlgEl > algorithm_
all binary nodes of the tree in the order of execution
int eval(const double **arg, double **res, casadi_int *iw, double *w, void *mem) const override
Evaluate numerically, work vectors given.
Definition: sx_function.cpp:71
std::vector< SXElem > free_vars_
Free variables.
bool is_a(const std::string &type, bool recursive) const override
Check if the function is of a particular type.
std::vector< double > default_in_
Default input values.
void disp_more(std::ostream &stream) const override
Print the algorithm.
static ProtoFunction * deserialize(DeserializingStream &s)
Deserialize without type information.
~SXFunction() override
Destructor.
Definition: sx_function.cpp:67
Dict generate_options(const std::string &target="clone") const override
Reconstruct options dict.
void call_setup(const ExtendedAlgEl &m, CT ***call_arg, T ***call_res, casadi_int **call_iw, T **call_w, T **nz_in, T **nz_out) const
bool live_variables_
Live variables?
void ad_forward(const std::vector< std::vector< SX > > &fseed, std::vector< std::vector< SX > > &fsens) const
Calculate forward mode directional derivatives.
casadi_int n_instructions() const override
Get the number of atomic operations.
bool just_in_time_sparsity_
With just-in-time compilation for the sparsity propagation.
Internal node class for SX.
Definition: sx_node.hpp:49
virtual const SXElem & dep(casadi_int i) const
get the reference of a child
Definition: sx_node.cpp:80
virtual double to_double() const
Get value of a constant node.
Definition: sx_node.cpp:56
virtual bool is_symbolic() const
check properties of a node
Definition: sx_node.hpp:71
virtual casadi_int op() const =0
get the operation
virtual bool is_constant() const
check properties of a node
Definition: sx_node.hpp:69
Helper class for Serialization.
void version(const std::string &name, int v)
void pack(const Sparsity &e)
Serializes an object to the output stream.
Internal node class for the base class of SXFunction and MXFunction.
Definition: x_function.hpp:57
std::vector< Matrix< SXElem > > out_
Outputs of the function (needed for symbolic calculations)
Definition: x_function.hpp:262
void delayed_deserialize_members(DeserializingStream &s)
Definition: x_function.hpp:299
void init(const Dict &opts) override
Initialize.
Definition: x_function.hpp:319
std::vector< Matrix< SXElem > > in_
Inputs of the function (needed for symbolic calculations)
Definition: x_function.hpp:257
void delayed_serialize_members(SerializingStream &s) const
Helper functions to avoid recursion limit.
Definition: x_function.hpp:305
void serialize_body(SerializingStream &s) const override
Serialize an object without type information.
Definition: x_function.hpp:311
static void sort_depth_first(std::stack< SXNode * > &s, std::vector< SXNode * > &nodes)
Topological sorting of the nodes based on Depth-First Search (DFS)
Definition: x_function.hpp:395
casadi_limits class
The casadi namespace.
Definition: archiver.cpp:28
std::string join(const std::vector< std::string > &l, const std::string &delim)
double if_else_zero(double x, double y)
Conditional assignment.
Definition: calculus.hpp:289
unsigned long long bvec_t
void casadi_project(const T1 *x, const casadi_int *sp_x, T1 *y, const casadi_int *sp_y, T1 *w)
Sparse copy: y <- x, w work vector (length >= number of rows)
std::vector< MX > MXVector
Definition: mx.hpp:1006
@ OT_DOUBLEVECTOR
Matrix< SXElem > SX
Definition: sx_fwd.hpp:32
std::string str(const T &v)
String representation, any type.
GenericType::Dict Dict
C++ equivalent of Python's dict or MATLAB's struct.
T * get_ptr(std::vector< T > &v)
Get a pointer to the data contained in the vector.
@ OP_NE
Definition: calculus.hpp:70
@ OP_IF_ELSE_ZERO
Definition: calculus.hpp:71
@ OP_AND
Definition: calculus.hpp:70
@ OP_OUTPUT
Definition: calculus.hpp:82
@ OP_CONST
Definition: calculus.hpp:79
@ OP_OR
Definition: calculus.hpp:70
@ OP_INPUT
Definition: calculus.hpp:82
@ OP_POW
Definition: calculus.hpp:66
@ OP_PARAMETER
Definition: calculus.hpp:85
@ OP_FABS
Definition: calculus.hpp:71
@ OP_CALL
Definition: calculus.hpp:88
@ OP_CONSTPOW
Definition: calculus.hpp:66
@ OP_NOT
Definition: calculus.hpp:70
@ OP_SQ
Definition: calculus.hpp:67
Options metadata for a class.
Definition: options.hpp:40
std::vector< ExtendedAlgEl > el
std::vector< int > copy_elision_offset
std::vector< int > copy_elision_arg
ExtendedAlgEl(const Function &fun)
Definition: sx_function.cpp:44
An atomic operation for the SXElem virtual machine.
Definition: sx_function.hpp:37
int i0
Operator index.
Definition: sx_function.hpp:39
Easy access to all the functions for a particular type.
Definition: calculus.hpp:1125
static casadi_int ndeps(unsigned char op)
Number of dependencies.
Definition: calculus.hpp:1623
static std::string print(unsigned char op, const std::string &x, const std::string &y)
Print.
Definition: calculus.hpp:1641