mapsum.cpp
1 /*
2  * This file is part of CasADi.
3  *
4  * CasADi -- A symbolic framework for dynamic optimization.
5  * Copyright (C) 2010-2023 Joel Andersson, Joris Gillis, Moritz Diehl,
6  * KU Leuven. All rights reserved.
7  * Copyright (C) 2011-2014 Greg Horn
8  *
9  * CasADi is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 3 of the License, or (at your option) any later version.
13  *
14  * CasADi is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with CasADi; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  *
23  */
24 
25 
26 #include "mapsum.hpp"
27 #include "serializing_stream.hpp"
28 
29 namespace casadi {
30 
31  Function MapSum::create(const std::string& name, const std::string& parallelization,
32  const Function& f, casadi_int n,
33  const std::vector<bool>& reduce_in,
34  const std::vector<bool>& reduce_out,
35  const Dict& opts) {
36  if (reduce_out.empty()) return create(name, parallelization, f, n,
37  reduce_in, std::vector<bool>(f.n_out(), false));
38  casadi_assert(reduce_in.size()==f.n_in(), "Dimension mismatch");
39  casadi_assert(reduce_out.size()==f.n_out(), "Dimension mismatch");
40 
41  if (parallelization == "serial") {
42  std::string suffix = str(reduce_in)+str(reduce_out);
43  Function ret;
44  if (!f->incache(name, ret, suffix)) {
45  // Create new serial map
46  ret = Function::create(new MapSum(name, f, n, reduce_in, reduce_out), opts);
47  casadi_assert_dev(ret.name()==name);
48  // Save in cache
49  f->tocache_if_missing(ret, suffix);
50  }
51  return ret.wrap_as_needed(opts);
52  } else {
53  casadi_error("Unknown parallelization: " + parallelization);
54  }
55  }
56 
57  MapSum::MapSum(const std::string& name, const Function& f, casadi_int n,
58  const std::vector<bool>& reduce_in,
59  const std::vector<bool>& reduce_out)
60  : FunctionInternal(name), f_(f), n_(n), reduce_in_(reduce_in), reduce_out_(reduce_out) {
61  casadi_assert_dev(reduce_in.size()==f.n_in());
62  casadi_assert_dev(reduce_out.size()==f.n_out());
63  }
64 
67  s.pack("MapSum::f", f_);
68  s.pack("MapSum::n", n_);
69  s.pack("MapSum::reduce_in", reduce_in_);
70  s.pack("MapSum::reduce_out", reduce_out_);
71  }
72 
75  s.pack("MapSum::class_name", class_name());
76  }
77 
79  s.unpack("MapSum::f", f_);
80  s.unpack("MapSum::n", n_);
81  s.unpack("MapSum::reduce_in", reduce_in_);
82  s.unpack("MapSum::reduce_out", reduce_out_);
83  }
84 
86  std::string class_name;
87  s.unpack("MapSum::class_name", class_name);
88  if (class_name=="MapSum") {
89  return new MapSum(s);
90  } else {
91  casadi_error("class name '" + class_name + "' unknown.");
92  }
93  }
94 
96  clear_mem();
97  }
98 
99  std::vector<std::string> MapSum::get_function() const {
100  return {"f"};
101  }
102 
103  const Function& MapSum::get_function(const std::string &name) const {
104  casadi_assert(has_function(name),
105  "No function \"" + name + "\" in " + name_ + ". " +
106  "Available functions: " + join(get_function()) + ".");
107  return f_;
108  }
109 
110  bool MapSum::has_function(const std::string& fname) const {
111  return fname=="f";
112  }
113 
114  void MapSum::init(const Dict& opts) {
117 
118  // Call the initialization method of the base class
120 
121  // Allocate sufficient memory for serial evaluation
122  alloc_arg(f_.sz_arg());
123  alloc_res(f_.sz_res());
124  alloc_w(f_.sz_w(), true);
125  alloc_iw(f_.sz_iw());
126 
127  // Allocate scratch space for dummping result of reduced outputs
128  for (casadi_int j=0;j<n_out_;++j) {
129  if (reduce_out_[j]) alloc_w(f_.nnz_out(j), true);
130  }
131  }
132 
133  template<typename T1>
134  void casadi_add(casadi_int n, const T1* x, T1* y) {
135  casadi_int i;
136  if (!x || !y) return;
137  for (i=0; i<n; ++i) *y++ += *x++;
138  }
139 
140  template<>
141  void casadi_add(casadi_int n, const bvec_t* x, bvec_t* y) {
142  casadi_int i;
143  if (!x || !y) return;
144  for (i=0; i<n; ++i) *y++ |= *x++;
145  }
146 
147  template<typename T>
148  int MapSum::eval_gen(const T** arg, T** res, casadi_int* iw, T* w, int mem) const {
149  const T** arg1 = arg+n_in_;
150  std::copy_n(arg, n_in_, arg1);
151  T** res1 = res+n_out_;
152 
153  T* w_scratch = w + f_.sz_w();
154  for (casadi_int j=0;j<n_out_;++j) {
155  if (res[j] && reduce_out_[j]) {
156  casadi_clear(res[j], f_.nnz_out(j)); // clear sums
157  res1[j] = w_scratch; // Make the function dump result in scratch space
158  w_scratch += f_.nnz_out(j);
159  } else {
160  res1[j] = res[j];
161  }
162  }
163  for (casadi_int i=0; i<n_; ++i) {
164  if (f_(arg1, res1, iw, w, mem)) return 1;
165  for (casadi_int j=0; j<n_in_; ++j) {
166  if (arg1[j] && !reduce_in_[j]) arg1[j] += f_.nnz_in(j);
167  }
168  for (casadi_int j=0; j<n_out_; ++j) {
169  if (res1[j]) {
170  if (reduce_out_[j]) {
171  casadi_add(f_.nnz_out(j), res1[j], res[j]); // Perform sum
172  } else {
173  res1[j] += f_.nnz_out(j);
174  }
175  }
176  }
177  }
178  return 0;
179  }
180 
181  int MapSum::eval_sx(const SXElem** arg, SXElem** res,
182  casadi_int* iw, SXElem* w, void* mem,
183  bool always_inline, bool never_inline) const {
184  return eval_gen(arg, res, iw, w);
185  }
186 
187  int MapSum::sp_forward(const bvec_t** arg, bvec_t** res,
188  casadi_int* iw, bvec_t* w, void* mem) const {
189  return eval_gen(arg, res, iw, w);
190  }
191 
192  int MapSum::sp_reverse(bvec_t** arg, bvec_t** res, casadi_int* iw, bvec_t* w, void* mem) const {
193  // Note: f_.rev(arg,res,iw,w)
194  // has a side effect of clearing res
195  // Reduced outputs should not be cleared;
196  // they must influence each iteration
197 
198  // Store reduced res in scratch space
199  bvec_t* w_scratch = w + f_.sz_w();
200  for (casadi_int j=0;j<n_out_;++j) {
201  if (res[j] && reduce_out_[j]) {
202  casadi_copy(res[j], f_.nnz_out(j), w_scratch);
203  w_scratch += f_.nnz_out(j);
204  }
205  }
206  bvec_t** arg1 = arg+n_in_;
207  std::copy_n(arg, n_in_, arg1);
208  bvec_t** res1 = res+n_out_;
209  std::copy_n(res, n_out_, res1);
210  for (casadi_int i=0; i<n_; ++i) {
211  // Restore res1[j] from scratch space
212  w_scratch = w + f_.sz_w();
213  for (casadi_int j=0;j<n_out_;++j) {
214  if (res[j] && reduce_out_[j]) {
215  casadi_copy(w_scratch, f_.nnz_out(j), res1[j]);
216  w_scratch += f_.nnz_out(j);
217  }
218  }
219  if (f_.rev(arg1, res1, iw, w)) return 1;
220  for (casadi_int j=0; j<n_in_; ++j) {
221  if (arg1[j] && !reduce_in_[j]) arg1[j] += f_.nnz_in(j);
222  }
223  for (casadi_int j=0; j<n_out_; ++j) {
224  if (res1[j] && !reduce_out_[j]) res1[j] += f_.nnz_out(j);
225  }
226  }
227  return 0;
228  }
229 
231  g.add_dependency(f_);
232  }
233 
236  g.local("i", "casadi_int");
237  g.local("arg1", "const casadi_real*", "*");
238  g.local("res1", "casadi_real*", "*");
239  g.local("w_scratch", "casadi_real*", "*");
240  // Input buffer
241  g << "arg1 = arg+" << n_in_ << ";\n"
242  << "for (i=0; i<" << n_in_ << "; ++i) arg1[i]=arg[i];\n";
243  // Output buffer
244  g << "res1 = res+" << n_out_ << ";\n";
245  g << "w_scratch = w+" << f_.sz_w() << ";\n";
246  for (casadi_int j=0;j<n_out_;++j) {
247  if (reduce_out_[j]) {
248  g << "if (res[" << j << "]) {\n";
249  g << "casadi_clear(res[" << j << "], " << f_.nnz_out(j) << ");\n";
250  g << "res1[" << j << "] = w_scratch;\n";
251  g << "w_scratch+=" << f_.nnz_out(j) << ";\n";
252  g << "} else {\n";
253  g << "res1[" << j << "] = res[" << j << "];\n";
254  g << "}\n";
255  } else {
256  g << "res1[" << j << "] = res[" << j << "];\n";
257  }
258  }
259 
260  g << "for (i=0; i<" << n_ << "; ++i) {\n";
261  // Evaluate
262  g << "if (" << g(f_, "arg1", "res1", "iw", "w") << ") return 1;\n";
263  // Update input buffers
264  for (casadi_int j=0; j<n_in_; ++j) {
265  if (!reduce_in_[j] && f_.nnz_in(j)) {
266  g << "if (arg1[" << j << "]) arg1[" << j << "]+=" << f_.nnz_in(j) << ";\n";
267  }
268  }
269  // Update output buffers
270  for (casadi_int j=0; j<n_out_; ++j) {
271  if (reduce_out_[j]) {
272  g << "if (res1[" << j << "]) ";
273  g << g.axpy(f_.nnz_out(j), "1.0", "res1[" + str(j) + "]", "res[" + str(j) + "]") << "\n";
274  } else {
275  if (f_.nnz_out(j)) {
276  g << "if (res1[" << j << "]) ";
277  g << "res1[" << j << "]+=" << f_.nnz_out(j) << ";\n";
278  }
279  }
280  }
281  g << "}\n";
282  }
283 
285  ::get_forward(casadi_int nfwd, const std::string& name,
286  const std::vector<std::string>& inames,
287  const std::vector<std::string>& onames,
288  const Dict& opts) const {
289  // Generate map of derivative
290  Function df = f_.forward(nfwd);
291 
292  for (casadi_int i=0;i<n_out_;++i) {
293  if (reduce_out_[i]) casadi_assert(df.nnz_in(n_in_+i)==0, "Case not implemented");
294  }
295 
296  std::vector<bool> reduce_in = join(reduce_in_, reduce_out_, reduce_in_);
297  Function dm = MapSum::create("mapsum" + str(n_) + "_" + df.name(), parallelization(),
298  df, n_, reduce_in, reduce_out_);
299 
300  // Input expressions
301  std::vector<MX> arg = dm.mx_in();
302 
303  // Need to reorder sensitivity inputs
304  std::vector<MX> res = arg;
305  std::vector<MX>::iterator it=res.begin()+n_in_+n_out_;
306  std::vector<casadi_int> ind;
307  for (casadi_int i=0; i<n_in_; ++i, ++it) {
308  if (reduce_in_[i]) continue;
309  casadi_int sz = f_.size2_in(i);
310  ind.clear();
311  for (casadi_int k=0; k<n_; ++k) {
312  for (casadi_int d=0; d<nfwd; ++d) {
313  for (casadi_int j=0; j<sz; ++j) {
314  ind.push_back((d*n_ + k)*sz + j);
315  }
316  }
317  }
318  *it = (*it)(Slice(), ind); // NOLINT
319  }
320 
321  // Get output expressions
322  res = dm(res);
323 
324  // Reorder sensitivity outputs
325  it = res.begin();
326  for (casadi_int i=0; i<n_out_; ++i, ++it) {
327  if (reduce_out_[i]) continue;
328  casadi_int sz = f_.size2_out(i);
329  ind.clear();
330  for (casadi_int d=0; d<nfwd; ++d) {
331  for (casadi_int k=0; k<n_; ++k) {
332  for (casadi_int j=0; j<sz; ++j) {
333  ind.push_back((k*nfwd + d)*sz + j);
334  }
335  }
336  }
337  *it = (*it)(Slice(), ind); // NOLINT
338  }
339 
340  // Construct return function
341  Dict custom_opts = opts;
342  custom_opts["always_inline"] = true;
343  custom_opts["allow_duplicate_io_names"] = true;
344  return Function(name, arg, res, inames, onames, custom_opts);
345  }
346 
348  ::get_reverse(casadi_int nadj, const std::string& name,
349  const std::vector<std::string>& inames,
350  const std::vector<std::string>& onames,
351  const Dict& opts) const {
352  // Generate map of derivative
353  Function df = f_.reverse(nadj);
354 
355  for (casadi_int i=0;i<n_out_;++i) {
356  if (reduce_out_[i]) casadi_assert(df.nnz_in(n_in_+i)==0, "Case not implemented");
357  }
358 
359  std::vector<bool> reduce_in = join(reduce_in_, reduce_out_, reduce_out_);
360  Function dm = MapSum::create("mapsum" + str(n_) + "_" + df.name(), parallelization(),
361  df, n_, reduce_in, reduce_in_);
362 
363  // Input expressions
364  std::vector<MX> arg = dm.mx_in();
365 
366  // Need to reorder sensitivity inputs
367  std::vector<MX> res = arg;
368  std::vector<MX>::iterator it=res.begin()+n_in_+n_out_;
369  std::vector<casadi_int> ind;
370  for (casadi_int i=0; i<n_out_; ++i, ++it) {
371  if (reduce_out_[i]) continue;
372  casadi_int sz = f_.size2_out(i);
373  ind.clear();
374  for (casadi_int k=0; k<n_; ++k) {
375  for (casadi_int d=0; d<nadj; ++d) {
376  for (casadi_int j=0; j<sz; ++j) {
377  ind.push_back((d*n_ + k)*sz + j);
378  }
379  }
380  }
381  *it = (*it)(Slice(), ind); // NOLINT
382  }
383 
384  // Get output expressions
385  res = dm(res);
386 
387  // Reorder sensitivity outputs
388  it = res.begin();
389  for (casadi_int i=0; i<n_in_; ++i, ++it) {
390  if (reduce_in_[i]) continue;
391  casadi_int sz = f_.size2_in(i);
392  ind.clear();
393  for (casadi_int d=0; d<nadj; ++d) {
394  for (casadi_int k=0; k<n_; ++k) {
395  for (casadi_int j=0; j<sz; ++j) {
396  ind.push_back((k*nadj + d)*sz + j);
397  }
398  }
399  }
400  *it = (*it)(Slice(), ind); // NOLINT
401  }
402 
403  // Construct return function
404  Dict custom_opts = opts;
405  custom_opts["always_inline"] = true;
406  custom_opts["allow_duplicate_io_names"] = true;
407  return Function(name, arg, res, inames, onames, custom_opts);
408  }
409 
410  int MapSum::eval(const double** arg, double** res, casadi_int* iw, double* w, void* mem) const {
411  // This checkout/release dance is an optimization.
412  // Could also use the thread-safe variant f_(arg1, res1, iw, w)
413  // in Map::eval_gen
414  setup(mem, arg, res, iw, w);
416  return eval_gen(arg, res, iw, w, m);
417  }
418 
419 } // namespace casadi
Helper class for C code generation.
std::string axpy(casadi_int n, const std::string &a, const std::string &x, const std::string &y)
Codegen axpy: y += a*x.
std::string add_dependency(const Function &f)
Add a function dependency.
void local(const std::string &name, const std::string &type, const std::string &ref="")
Declare a local variable.
void add_auxiliary(Auxiliary f, const std::vector< std::string > &inst={"casadi_real"})
Add a built-in auxiliary function.
Helper class for Serialization.
void unpack(Sparsity &e)
Reconstruct an object from the input stream.
Internal class for Function.
void alloc_iw(size_t sz_iw, bool persistent=false)
Ensure required length of iw field.
void init(const Dict &opts) override
Initialize.
void tocache_if_missing(Function &f, const std::string &suffix="") const
Save function to cache, only if missing.
std::vector< bool > is_diff_out_
void serialize_body(SerializingStream &s) const override
Serialize an object without type information.
void alloc_res(size_t sz_res, bool persistent=false)
Ensure required length of res field.
void alloc_arg(size_t sz_arg, bool persistent=false)
Ensure required length of arg field.
bool incache(const std::string &fname, Function &f, const std::string &suffix="") const
Get function in cache.
size_t n_in_
Number of inputs and outputs.
void serialize_type(SerializingStream &s) const override
Serialize type information.
void alloc_w(size_t sz_w, bool persistent=false)
Ensure required length of w field.
void setup(void *mem, const double **arg, double **res, casadi_int *iw, double *w) const
Set the (persistent and temporary) work vectors.
std::vector< bool > is_diff_in_
Are inputs and outputs differentiable?
Function object.
Definition: function.hpp:60
Function forward(casadi_int nfwd) const
Get a function that calculates nfwd forward derivatives.
Definition: function.cpp:1135
casadi_int nnz_out() const
Get number of output nonzeros.
Definition: function.cpp:855
size_t sz_res() const
Get required length of res field.
Definition: function.cpp:1085
const MX mx_in(casadi_int ind) const
Get symbolic primitives equivalent to the input expressions.
Definition: function.cpp:1584
const std::string & name() const
Name of the function.
Definition: function.cpp:1307
Function reverse(casadi_int nadj) const
Get a function that calculates nadj adjoint derivatives.
Definition: function.cpp:1143
static Function create(FunctionInternal *node)
Create from node.
Definition: function.cpp:336
bool is_diff_out(casadi_int ind) const
Get differentiability of inputs/output.
Definition: function.cpp:1055
int rev(bvec_t **arg, bvec_t **res, casadi_int *iw, bvec_t *w, int mem=0) const
Propagate sparsity backward.
Definition: function.cpp:1100
size_t sz_iw() const
Get required length of iw field.
Definition: function.cpp:1087
casadi_int n_out() const
Get the number of function outputs.
Definition: function.cpp:823
casadi_int n_in() const
Get the number of function inputs.
Definition: function.cpp:819
bool is_diff_in(casadi_int ind) const
Get differentiability of inputs/output.
Definition: function.cpp:1047
size_t sz_w() const
Get required length of w field.
Definition: function.cpp:1089
size_t sz_arg() const
Get required length of arg field.
Definition: function.cpp:1083
Function wrap_as_needed(const Dict &opts) const
Wrap in a Function with options.
Definition: function.cpp:1920
casadi_int nnz_in() const
Get number of input nonzeros.
Definition: function.cpp:851
MapSum(DeserializingStream &s)
Deserializing constructor.
Definition: mapsum.cpp:78
void serialize_body(SerializingStream &s) const override
Serialize an object without type information.
Definition: mapsum.cpp:65
int eval_gen(const T **arg, T **res, casadi_int *iw, T *w, int mem=0) const
Evaluate or propagate sparsities.
Definition: mapsum.cpp:148
int sp_reverse(bvec_t **arg, bvec_t **res, casadi_int *iw, bvec_t *w, void *mem) const override
Propagate sparsity backwards.
Definition: mapsum.cpp:192
~MapSum() override
Destructor.
Definition: mapsum.cpp:95
void serialize_type(SerializingStream &s) const override
Serialize type information.
Definition: mapsum.cpp:73
int sp_forward(const bvec_t **arg, bvec_t **res, casadi_int *iw, bvec_t *w, void *mem) const override
Propagate sparsity forward.
Definition: mapsum.cpp:187
Function f_
Definition: mapsum.hpp:221
std::string class_name() const override
Get type name.
Definition: mapsum.hpp:66
static Function create(const std::string &name, const std::string &parallelization, const Function &f, casadi_int n, const std::vector< bool > &reduce_in, const std::vector< bool > &reduce_out, const Dict &opts=Dict())
Definition: mapsum.cpp:31
virtual std::string parallelization() const
Type of parallellization.
Definition: mapsum.hpp:111
casadi_int n_
Definition: mapsum.hpp:224
void init(const Dict &opts) override
Initialize.
Definition: mapsum.cpp:114
static ProtoFunction * deserialize(DeserializingStream &s)
Deserialize with type disambiguation.
Definition: mapsum.cpp:85
std::vector< bool > reduce_in_
Definition: mapsum.hpp:227
void codegen_declarations(CodeGenerator &g) const override
Generate code for the declarations of the C function.
Definition: mapsum.cpp:230
virtual std::vector< std::string > get_function() const override
Definition: mapsum.cpp:99
std::vector< bool > reduce_out_
Definition: mapsum.hpp:230
int eval(const double **arg, double **res, casadi_int *iw, double *w, void *mem) const override
Evaluate the function numerically.
Definition: mapsum.cpp:410
int eval_sx(const SXElem **arg, SXElem **res, casadi_int *iw, SXElem *w, void *mem, bool always_inline, bool never_inline) const override
evaluate symbolically while also propagating directional derivatives
Definition: mapsum.cpp:181
void codegen_body(CodeGenerator &g) const override
Generate code for the body of the C function.
Definition: mapsum.cpp:234
Function get_reverse(casadi_int nadj, const std::string &name, const std::vector< std::string > &inames, const std::vector< std::string > &onames, const Dict &opts) const override
Generate a function that calculates nadj adjoint derivatives.
Definition: mapsum.cpp:348
bool has_function(const std::string &fname) const override
Definition: mapsum.cpp:110
Function get_forward(casadi_int nfwd, const std::string &name, const std::vector< std::string > &inames, const std::vector< std::string > &onames, const Dict &opts) const override
Generate a function that calculates nfwd forward derivatives.
Definition: mapsum.cpp:285
Base class for FunctionInternal and LinsolInternal.
void clear_mem()
Clear all memory (called from destructor)
The basic scalar symbolic class of CasADi.
Definition: sx_elem.hpp:75
Helper class for Serialization.
void pack(const Sparsity &e)
Serializes an object to the output stream.
Class representing a Slice.
Definition: slice.hpp:48
The casadi namespace.
Definition: archiver.cpp:28
std::string join(const std::vector< std::string > &l, const std::string &delim)
unsigned long long bvec_t
void casadi_copy(const T1 *x, casadi_int n, T1 *y)
COPY: y <-x.
std::string str(const T &v)
String representation, any type.
void casadi_add(casadi_int n, const T1 *x, T1 *y)
Definition: mapsum.cpp:134
GenericType::Dict Dict
C++ equivalent of Python's dict or MATLAB's struct.
void casadi_clear(T1 *x, casadi_int n)
CLEAR: x <- 0.