RecFilter
recfilter.h
Go to the documentation of this file.
1 #ifndef _RECURSIVE_FILTER_H_
2 #define _RECURSIVE_FILTER_H_
3 
4 #include <iomanip>
5 #include <vector>
6 #include <string>
7 #include <stdexcept>
8 #include <cstdio>
9 #include <algorithm>
10 
11 #include <Halide.h>
12 
13 // Forward declarations of internal structures
14 
15 struct FilterInfo;
16 struct RecFilterContents;
17 class RecFilterFunc;
18 class RecFilterSchedule;
19 class RecFilter;
20 class RecFilterDim;
22 class RecFilterRefVar;
23 class RecFilterRefExpr;
24 
25 class FuncTag;
26 class VarTag;
27 
28 enum VariableTag : int;
29 enum FunctionTag : int;
30 
31 // ----------------------------------------------------------------------------
32 
33 /**@name Dimensions for defining a recursive filter */
34 // {@
35 
36 // /** Filter dimension for channels */
37 // class RecFilterChannel {
38 // private:
39 // Halide::Var v; ///< variable for the dimension
40 // int e; ///< number of channels
41 //
42 // public:
43 // /** Empty constructor */
44 // RecFilterChannel(void) {}
45 //
46 // /** Constructor
47 // * @param var_name name of dimension
48 // * @param num_channels number of channels
49 // */
50 // RecFilterChannel(std::string var_name, int num_channels):
51 // v(var_name), e(num_channels) {}
52 //
53 // /** Convert into Halide::Var for interoperability with Halide code */
54 // Halide::Var var(void) const { return v; }
55 //
56 // /** Number of channels traversed by this dimension */
57 // int num_channels(void) const { return e; }
58 //
59 // /** Express as Halide::Expr so that it can be used to index other Halide
60 // * functions and buffers */
61 // operator Halide::Expr(void) {
62 // return Halide::Internal::Variable::make(Halide::Int(32), v.name());
63 // }
64 // };
65 
66 
67 /** Filter dimension with variable name and width of image in the dimension */
68 class RecFilterDim {
69 private:
70  Halide::Var v; ///< variable for the dimension
71  int e; ///< size of input/output buffer in the dimension
72 
73 public:
74  /** Empty constructor */
75  RecFilterDim(void) {}
76 
77  /** Constructor
78  * @param var_name name of dimension
79  * @param var_extent size of dimension, i.e. image width or height
80  */
81  RecFilterDim(std::string var_name, int var_extent):
82  v(var_name), e(var_extent) {}
83 
84  /** Convert into Halide::Var for interoperability with Halide code */
85  Halide::Var var(void) const { return v; }
86 
87  /** Size of input/output buffer indexed by this dimension */
88  int num_pixels(void) const { return e; }
89 
90  /** Express as Halide::Expr so that it can be used to index other Halide
91  * functions and buffers */
92  operator Halide::Expr(void) {
93  return Halide::Internal::Variable::make(Halide::Int(32), v.name());
94  }
95 };
96 
97 /** Filter dimension augmented with causality */
99 private:
100  RecFilterDim r; ///< variable for the filter dimension
101  bool c; ///< causality
102 
103 public:
104  /** Empty constructor */
106 
107  /** Constructor
108  * @param rec_var RecFilterDim object
109  * @param causal causality of the dimension
110  */
112  r(rec_var), c(causal) {}
113 
114  /** Convert into Halide::Var for interoperability with Halide code */
115  Halide::Var var(void) const { return r.var(); }
116 
117  /** Size of input/output buffer indexed by this dimension */
118  int num_pixels(void) const { return r.num_pixels(); }
119 
120  /** Causality of the dimension */
121  bool causal(void) const { return c; }
122 
123  /** Express as Halide::Expr so that it can be used to index other Halide
124  * functions and buffers */
125  operator Halide::Expr(void) {
126  return Halide::Internal::Variable::make(Halide::Int(32), r.var().name());
127  }
128 };
129 // @}
130 
131 /**@name Operators to indicate causal and anticausal scans in a particular filter dimension */
132 // {@
133 /** Operator to create causal scan indication, +x indicates causal scan where
134  * x is a RecFilterDim object */
136 
137 /** Operator to create anticausal scan indication, -x indicates causal scan where
138  * x is a RecFilterDim object */
140 // @}
141 
142 
143 // ----------------------------------------------------------------------------
144 
145 /** Recursive filter class */
146 class RecFilter {
147 private:
148 
149  /** Maximum threads to launch per CUDA warp, global constant required for GPU targets */
150  static int max_threads_per_cuda_warp;
151 
152  /** Vectorization width, global constant global constant required for CPU targets */
153  static int vectorization_width;
154 
155  /** Data members of the recursive filter */
156  Halide::Internal::IntrusivePtr<RecFilterContents> contents;
157 
158  /** Get the recursive filter function by name */
159  RecFilterFunc& internal_function(std::string func_name);
160 
161  /** Get all recursive filter funcs that have the given tag */
162  std::vector<std::string> internal_functions(FuncTag ftag);
163 
164  /** Inline all calls to a given function
165  *
166  * Preconditions:
167  * - function must not have any update definitions
168  *
169  * Side effects: function is completely removed from the filters depedency graph
170  *
171  * \param[in] func_name name of function to inline
172  */
173  void inline_func(std::string func_name);
174 
175  /** Finalize the filter; triggers automatic function transformations and cleanup */
176  void finalize(void);
177 
178  /** Perform chores before realizing: compile the filter if not already done, upload
179  * buffers to device and allocate buffers for realization
180  *
181  * \returns realization object that contains allocated buffers
182  */
183  Halide::Realization create_realization(void);
184 
185 public:
186 
187  /** Empty constructor */
188  RecFilter(std::string name="");
189 
190  /** Standard assignment operator */
191  RecFilter& operator=(const RecFilter &r);
192 
193  /** Name of the filter */
194  std::string name(void) const;
195 
196  /** @name Recursive filter initialization
197  * These functions allow functional programming like syntax to initialize
198  * a recursive filter
199  * \code
200  * R(x) = some_expression_involving_x // 1D filter
201  * R(x,y) = some_expression_involving_x_y // 2D filter
202  * R(x,y,z) = some_expression_involving_x_y_z // 3D filter
203  * R({x,y..}) = some_expression_for_involving_x_y.. // nD filter
204  * \endcode
205  */
206  // {@
210  RecFilterRefVar operator()(std::vector<RecFilterDim> x);
211  // @}
212 
213  /** @name Recursive filter result expression
214  * These functions return an expression that represents the final result of
215  * the filter
216  * \code
217  * R(x) // pixel x for a 1D filter
218  * R(x,y) // pixel (x,y) for a 1D filter
219  * R(x,y,z) // pixel (x,y,z) for a 1D filter
220  * R({x,y..}) // pixel (x,y..) for a nD filter
221  * \endcode
222  */
223  // {@
224  RecFilterRefExpr operator()(Halide::Var x);
225  RecFilterRefExpr operator()(Halide::Var x, Halide::Var y);
226  RecFilterRefExpr operator()(Halide::Var x, Halide::Var y, Halide::Var z);
227  RecFilterRefExpr operator()(std::vector<Halide::Var> x);
228  RecFilterRefExpr operator()(Halide::Expr x);
229  RecFilterRefExpr operator()(Halide::Expr x, Halide::Expr y);
230  RecFilterRefExpr operator()(Halide::Expr x, Halide::Expr y, Halide::Expr z);
231  RecFilterRefExpr operator()(std::vector<Halide::Expr> x);
232  // @}
233 
234  /** Add a pure definition to the recursive filter
235  * \param pure_args list of pure args
236  * \param pure_def list of expressions to initialize the filter
237  */
238  void define(std::vector<RecFilterDim> pure_args, std::vector<Halide::Expr> pure_def);
239 
240  /**@name Compile and run */
241  // {@
242 
243  /** Get the compilation target, inferred from HL_JIT_TARGET */
244  Halide::Target target(void);
245 
246  /** Apply output domains bounds; this is performed implicitly for tiled
247  * filters, but it must be called by the application for non-tiled filters */
248  void apply_bounds(void);
249 
250  /** Trigger JIT compilation for specified hardware-platform target; dumps the generated
251  * codegen in human readable HTML format if filename is specified */
252  void compile_jit(std::string filename="");
253 
254  /** Compute the filter
255  * \returns Realization object that contains all the buffers
256  */
257  Halide::Realization realize(void);
258 
259  /** Profile the filter
260  * \param iterations number of profiling iterations
261  * \returns computation time in milliseconds
262  */
263  float profile(int iterations);
264  // @}
265 
266 
267  /** @name Routines to add filters
268  *
269  * @brief Add a causal or anticausal scan to the recursive filter with given
270  * feedback and feed forward coefficients
271  *
272  * \param x filter dimension
273  * \param coeff 1 feedforward and n feedback coeffs (n = filter order)
274  *
275  * Preconditions:
276  * - first argument must of of the form +x, -x or x where x is a RecFilterDim object
277  */
278  // {@
279  void add_filter(RecFilterDim x, std::vector<float> coeff);
280  void add_filter(RecFilterDimAndCausality x, std::vector<float> coeff);
281  // @}
282 
283  /** @name Image boundary conditions
284  * Clamp image border to the last pixel in all boundaries, default border is 0
285  */
286  // {@
287  void set_clamped_image_border(void);
288  // @}
289 
290  /** Cast the recfilter as a Halide::Func; this returns the function that holds
291  * the final result of this filter; useful for extracting the result of this
292  * function to use as input to other Halide Func
293  */
294  Halide::Func as_func(void);
295 
296  /**
297  * Extract the constituent function by name, useful for debugging:
298  * - realize only a particular stage for correctness or profiling
299  * - debugging/testing schedules by using Halide's scheduling primitives directly
300  * on the function instead of the high level collective scheduling
301  */
302  Halide::Func func(std::string func_name);
303 
304 
305  /**@name Tiling routines
306  * @brief Tile a list of dimensions into their respective tile widths specified as
307  * variable-tile width pairs.
308  *
309  * Preconditions:
310  * - dimension with specified variable name must exist
311  * - tile width must be a multiple of image width for each dimension
312  */
313  // {@
314  void split_all_dimensions(int tx);
315  void split(RecFilterDim x, int tx);
316  void split(RecFilterDim x, int tx, RecFilterDim y, int ty);
317  void split(RecFilterDim x, int tx, RecFilterDim y, int ty, RecFilterDim z, int tz);
318  void split(std::map<std::string, int> dims);
319  // @}
320 
321 
322  /** @name Cascading API */
323  // {@
324 
325  /**
326  *
327  * Cascade the filter to produce multiple filters using list of list of scans and
328  * producing a list of recursive filters each ccomputes the corresponding list
329  * of scans in an overlapped fashion
330  *
331  * Preconditions:
332  * - filter must not be tiled
333  * - list of list of scans spans all the scans of the original filter
334  * - no scan is repeated in the list of list of scans
335  * - the relative order of scans with respect to causality remains preserved
336  *
337  * \param a list of scans for first filter
338  * \param b list of scans for second filter
339  *
340  * \return two cascaded filters
341  */
342  std::vector<RecFilter> cascade(std::vector<int> a, std::vector<int> b);
343 
344  /**
345  * Cascade the filter to produce multiple filters using list of list of scans and
346  * producing a list of recursive filters each ccomputes the corresponding list
347  * of scans in an overlapped fashion
348  *
349  * Preconditions:
350  * - filter must not be tiled
351  * - list of list of scans spans all the scans of the original filter
352  * - no scan is repeated in the list of list of scans
353  * - the relative order of scans with respect to causality remains preserved
354  *
355  * \param scan list of list of scans, each inner list becomes a separate filter
356  *
357  * \return list of cascaded filters
358  */
359  std::vector<RecFilter> cascade(std::vector<std::vector<int> > scan);
360 
361  /** Computing all causal scans in all dimensions in an overlapped fashion
362  * and all anticausal scans in an overlapped fashion and cascade the two groups
363  *
364  * Preconditions:
365  * - filter must not be tiled
366  *
367  * \returns list of cascaded filters
368  */
369  std::vector<RecFilter> cascade_by_causality(void);
370 
371  /** Compute all scans in the same dimension in an overlapped fashion and cascade
372  * different dimensions
373  *
374  * Preconditions:
375  * - filter must not be tiled
376  *
377  * \returns list of cascaded filters
378  */
379  std::vector<RecFilter> cascade_by_dimension(void);
380 
381  /** Overlap a given filter with the current filter creating a higher
382  * order filter
383  *
384  * Preconditions:
385  * - filter must not be tiled
386  * - given filter must have same number of dimensions in the same order
387  * - each scan of each dimension of given filter must have same causality
388  *
389  * \todo This function is only partially tested and may be unstable
390  *
391  * \param fA filter to be overlapped with current filter
392  * \param name name of the overlapped filter (optional)
393  *
394  * \returns overlapped computation of all scans in both filters
395  */
396  RecFilter overlap_to_higher_order_filter(RecFilter fA, std::string name="O");
397  // @}
398 
399  /**@name Collective scheduling handles */
400  // {@
401 
402  /** Extract a handle to schedule intra-tile functions of the tiled filter
403  * \param id 0 for all intra tile functions, 1 for nD intra-tile functions, otherwise 1D intra-tile functions
404  */
406 
407  /** Extract a handle to schedule intra-tile functions of the tiled filter */
409 
410  /** Extract a handle to schedule non-tiled filter */
412 
413  /** Set final result of filter to be computed at an external recursive
414  * filter, useful for merging the filter with external stages; the filter
415  * must not depend upon the external function
416  *
417  * \param external function inside which the filter must be computed
418  */
419  void compute_at(RecFilter external);
420 
421  /** Set final result of filter to be computed at an external Func,
422  * useful for merging the filter with external stages; the filter
423  * must not depend upon the external function
424  *
425  * \param external function inside which the filter must be computed
426  * \param granularity variable where this filter's result should be computed
427  */
428  void compute_at(Halide::Func external, Halide::Var granularity);
429  // @}
430 
431  /**@name Automatic scheduling for GPU targets */
432  // {@
433  /** Automatic GPU schedule for non-tiled filter and return a handle for additional scheduling
434  * \param tile_width tiling factor to split non-tiled dimensions into CUDA blocks and CUDA tiles
435  */
436  void gpu_auto_full_schedule(int tile_width=32);
437 
438  /** Automatic GPU schedule for tiled or non-tiled recursive filter;
439  * calls RecFilter::gpu_auto_full_schedule(),
440  * RecFilter::gpu_auto_intra_schedule() and
441  * RecFilter::gpu_auto_inter_schedule().
442  * \param tile_width tiling factor to non-tiled full dimensions into CUDA blocks and CUDA tiles (only used if filter is not tiled)
443  */
444  void gpu_auto_schedule(int tile_width=32);
445 
446  /** Automatic GPU schedule for inter-tile functions of tiled filter */
447  void gpu_auto_inter_schedule(void);
448 
449  /** Automatic GPU schedule for intra-tile functions if tiled filter
450  * \param id 0 for all intra tile functions, 1 for nD intra-tile functions, otherwise 1D intra-tile functions
451  */
452  void gpu_auto_intra_schedule(int id);
453  // @}
454 
455  /**@name Automatic scheduling for CPU targets */
456  // {@
457 
458  /** Automatic CPU schedule for tiled or non-tiled recursive filter;
459  * calls RecFilter::cpu_auto_full_schedule(),
460  * RecFilter::cpu_auto_intra_schedule() and
461  * RecFilter::cpu_auto_inter_schedule().
462  */
463  void cpu_auto_schedule(void);
464 
465  /** Automatic CPU schedule for non-tiled filter */
466  void cpu_auto_full_schedule(void);
467 
468  /** Automatic CPU schedule for inter-tile functions of tiled filter */
469  void cpu_auto_inter_schedule(void);
470 
471  /** Automatic CPU schedule for intra-tile functions if tiled filter */
472  void cpu_auto_intra_schedule(void);
473  // @}
474 
475  /** @name Generic handles to write schedules for dimensions of internal functions */
476  // {@
477  VarTag full (int i=-1);
478  VarTag inner (int i=-1);
479  VarTag outer (int i=-1);
480  VarTag tail (void);
481  VarTag full_scan (void);
482  VarTag inner_scan (void);
483  VarTag outer_scan (void);
484  VarTag inner_channels(void);
485  VarTag outer_channels(void);
486  // @}
487 
488  /**@name Print Halide code for the recursive filter */
489  // {@
490  std::string print_functions(void) const;
491  std::string print_synopsis (void) const;
492  std::string print_schedule (void) const;
493  std::string print_hl_code (void) const;
494  // @}
495 
496  /**@name Global constants for scheduling */
497  // {@
498 
499  /** Set the maximum threads to launch per CUDA warp, must be called before scheduling the RecFilter object */
500  static void set_max_threads_per_cuda_warp(int v);
501 
502  /** Set the vectorization width, must be called before scheduling the RecFilter object */
503  static void set_vectorization_width(int v);
504  // @}
505 
506 protected:
507  /** Allow scheduler access to internal functions; only needed to append the
508  * scheduling commands to each RecFilterFunc::schedule for debugging purposes */
509  friend class RecFilterSchedule;
510 };
511 
512 // -----------------------------------------------------------------------------
513 
514 /** Handle to schedule internal Halide functions that constitute the
515  * recursive filter */
517 private:
518  RecFilter recfilter;
519  std::vector<std::string> func_list;
520 
521  std::map<int,std::vector<Halide::VarOrRVar> > var_list_by_tag(RecFilterFunc f, VarTag vtag);
522  std::map<int,Halide::VarOrRVar> var_by_tag(RecFilterFunc f, VarTag vtag);
523 
524  bool contains_vars_with_tag(VarTag vtag);
525 
526 protected:
527  bool empty(void);
528  friend class RecFilter;
529 
530 public:
531  RecFilterSchedule(RecFilter& r, std::vector<std::string> fl);
532 
535 
537  RecFilterSchedule& split(VarTag v, int factor);
538  RecFilterSchedule& split(VarTag v, int factor, VarTag vin);
539  RecFilterSchedule& split(VarTag v, int factor, VarTag vin, VarTag vout);
540 
541  RecFilterSchedule& reorder(std::vector<VarTag> x);
548 
549  RecFilterSchedule& storage_layout (VarTag innermost, VarTag outermost);
550  RecFilterSchedule& reorder_storage(std::vector<VarTag> x);
555 
556  RecFilterSchedule& unroll (VarTag v, int factor=0);
557  RecFilterSchedule& parallel (VarTag v, int factor=0);
558  RecFilterSchedule& vectorize (VarTag v, int factor=0);
562 
566 };
567 
568 
569 // ----------------------------------------------------------------------------
570 
571 /** Create an expression that can be used to initialize a pixel.
572  * This class allows functional programming like syntax to initialize a filter R:
573  * \code
574  * R(x) = some_expression_involving_x // 1D filter
575  * R(x,y) = some_expression_involving_x_y // 2D filter
576  * R(x,y,z) = some_expression_involving_x_y_z // 3D filter
577  * R({x,y..}) = some_expression_for_involving_x_y.. // nD filter
578  * \endcode
579  */
581 private:
582  RecFilter rf;
583  std::vector<RecFilterDim> args;
584 
585 public:
586  RecFilterRefVar(RecFilter r, std::vector<RecFilterDim> a);
587 
588  /** Use this as the left-hand-side of a definition */
589  void operator=(Halide::Expr pure_def);
590 
591  /** Use this as the left-hand-side of a definition for a Func with multiple outputs */
592  void operator=(const Halide::Tuple &pure_def);
593 
594  /** Use this as the left-hand-side of a definition */
595  void operator=(Halide::FuncRefVar pure_def);
596 
597  /** Use this as the left-hand-side of a definition */
598  void operator=(Halide::FuncRefExpr pure_def);
599 
600  /** Use this as the left-hand-side of a definition for a Func with multiple outputs */
601  void operator=(std::vector<Halide::Expr> pure_def);
602 
603  /** Use this RecFilterRefVar as a call to the internal recfilter output */
604  operator Halide::Expr(void);
605 
606  /** Use this RecFilterRefVar as a call to the one of the output buffers of
607  * the internal recfilter */
608  Halide::Expr operator[](int);
609 };
610 
611 /** Constructing an Expr from the final result of a recursive filter. This class
612  * allows using \c R(x,y) as pixel \c (x,y) of the final result of the filter */
614 private:
615  RecFilter rf;
616  std::vector<Halide::Expr> args;
617 
618 public:
619  RecFilterRefExpr(RecFilter r, std::vector<Halide::Expr> a);
620 
621  /** Use this RecFilterRefVar as a call to the internal recfilter output */
622  operator Halide::Expr(void);
623 
624  /** Use this RecFilterRefVar as a call to the one of the output buffers of
625  * the internal recfilter */
626  Halide::Expr operator[](int);
627 };
628 
629 // -----------------------------------------------------------------------------
630 
631 /** Scheduling tags for RecFilter function dimensions
632  * \todo add documentation for members
633  * \todo simplify this API
634  */
635 class VarTag {
636 public:
637  VarTag(void);
638  VarTag(const VarTag &t);
639  VarTag(const VariableTag &t);
640  VarTag(const VarTag &t, int i);
641  VarTag(const VariableTag &t, int i);
642  VarTag(int i);
643 
644  VarTag& operator=(const VarTag &t);
645  VarTag& operator=(const VariableTag &t);
646 
647  int as_integer(void) const;
648  VarTag split_var (void) const;
649  int count (void) const;
650  bool has_count (void) const;
651  int check (const VariableTag &t) const;
652  bool same_except_count(const VarTag &t) const;
653 
654 private:
655  VariableTag tag;
656 };
657 
658 // -----------------------------------------------------------------------------
659 
660 /** @name Printing utils for recursive filter, Halide functions and schedules */
661 // {@
662 std::ostream &operator<<(std::ostream &s, const RecFilter &r);
663 std::ostream &operator<<(std::ostream &s, const RecFilterFunc &f);
664 std::ostream &operator<<(std::ostream &s, const RecFilterDim &f);
665 std::ostream &operator<<(std::ostream &s, const Halide::Func &f);
666 std::ostream &operator<<(std::ostream &s, const Halide::Internal::Function &f);
667 // @}
668 
669 // ----------------------------------------------------------------------------
670 
671 /** Command line arg parser */
672 class Arguments {
673 public:
674  int width; ///< image width
675  int max_width; ///< max image width
676  int min_width; ///< min image width
677  int block; ///< block size
678  int iterations; ///< profiling iterations
679  bool nocheck; ///< skip check Halide result against reference solution
680  bool noschedule; ///< do not use automatic scheduling
681 
682  /** Parse command line args from number of args and list of args */
683  Arguments(int argc, char** argv);
684 };
685 
686 // ----------------------------------------------------------------------------
687 
688 // Random image generation and printing utils
689 
690 /** Generate an image of a given size with random entries */
691 template<typename T>
692 Halide::Image<T> generate_random_image(size_t w, size_t h=0, size_t c=0, size_t d=0) {
693  Halide::Image<T> image;
694 
695  int MIN_ELEMENT = 1;
696  int MAX_ELEMENT = 1;
697 
698  if (w && h && c && d) {
699  image = Halide::Image<T>(w,h,c,d);
700  } else if (w && h && c) {
701  image = Halide::Image<T>(w,h,c);
702  } else if (w && h) {
703  image = Halide::Image<T>(w,h);
704  } else if (w) {
705  image = Halide::Image<T>(w);
706  }
707 
708  if (image.dimensions() == 1) {
709  for (size_t x=0; x<w; x++) {
710  image(x) = T(MIN_ELEMENT + (rand() % MAX_ELEMENT));
711  }
712  }
713  else if (image.dimensions() == 2) {
714  for (size_t y=0; y<h; y++) {
715  for (size_t x=0; x<w; x++) {
716  image(x,y) = T(MIN_ELEMENT + (rand() % MAX_ELEMENT));
717  }
718  }
719  }
720  else if (image.dimensions() == 3) {
721  for (size_t z=0; z<c; z++) {
722  for (size_t y=0; y<h; y++) {
723  for (size_t x=0; x<w; x++) {
724  image(x,y,z) = T(MIN_ELEMENT + (rand() % MAX_ELEMENT));
725  }
726  }
727  }
728  }
729  else if (image.dimensions() == 4) {
730  for (size_t t=0; t<d; t++) {
731  for (size_t z=0; z<c; z++) {
732  for (size_t y=0; y<h; y++) {
733  for (size_t x=0; x<w; x++) {
734  image(x,y,z,t) = T(MIN_ELEMENT + (rand() % MAX_ELEMENT));
735  }
736  }
737  }
738  }
739  }
740  return image;
741 }
742 
743 
744 /** Print an image */
745 template<typename T>
746 std::ostream &operator<<(std::ostream &s, Halide::Image<T> image) {
747  int precision = 4;
748  if (image.dimensions() == 1) {
749  for (size_t x=image.min(0); x<image.min(0)+image.extent(0); x++) {
750  s << std::setw(precision) << image(x) << " ";
751  }
752  s << "\n";
753  }
754  else if (image.dimensions() == 2) {
755  for (size_t y=image.min(1); y<image.min(1)+image.extent(1); y++) {
756  for (size_t x=image.min(0); x<image.min(0)+image.extent(0); x++) {
757  s << std::setw(precision) << float(image(x,y)) << " ";
758  }
759  s << "\n";
760  }
761  }
762  else if (image.dimensions() == 3) {
763  for (size_t z=image.min(2); z<image.min(2)+image.extent(2); z++) {
764  for (size_t y=image.min(1); y<image.min(1)+image.extent(1); y++) {
765  for (size_t x=image.min(0); x<image.min(0)+image.extent(0); x++) {
766  s << std::setw(precision) << float(image(x,y,z)) << " ";
767  }
768  s << "\n";
769  }
770  s << "--\n";
771  }
772  }
773  else if (image.dimensions() == 4) {
774  for (size_t w=image.min(3); w<image.min(3)+image.extent(3); w++) {
775  for (size_t z=image.min(2); z<image.min(2)+image.extent(2); z++) {
776  for (size_t y=image.min(1); y<image.min(1)+image.extent(1); y++) {
777  for (size_t x=image.min(0); x<image.min(0)+image.extent(0); x++) {
778  s << std::setw(precision) << float(image(x,y,z,w)) << " ";
779  }
780  s << "\n";
781  }
782  s << "--\n";
783  }
784  s << "--\n";
785  }
786  }
787  return s;
788 }
789 
790 // ----------------------------------------------------------------------------
791 
792 /** Compare ref and Halide solutions and print the mean square error */
793 template <typename T>
794 class CheckResult {
795 public:
796  float max_diff; ///< max diff percentage
797  float mean_diff; ///< mean diff percentage
798  Halide::Image<T> ref; ///< reference solution
799  Halide::Image<T> out; ///< Halide solution
800  Halide::Image<float> diff; ///< pixel wise diff
801 
802  CheckResult(Halide::Image<T> r, Halide::Image<T> o)
803  : max_diff(0.0), mean_diff(0.0), ref(r), out(o)
804  {
805  assert(r.width() == o.width());
806  assert(r.height() == o.height());
807  assert(r.channels()== o.channels());
808 
809  int width = r.width();
810  int height = r.height();
811  int channels= r.channels();
812 
813  diff = Halide::Image<float>(width, height, channels);
814 
815  for (int z=0; z<channels; z++) {
816  for (int y=0; y<height; y++) {
817  for (int x=0; x<width; x++) {
818  diff(x,y,z) = r(x,y,z) - o(x,y,z);
819  float re = 100.0 * std::abs(diff(x,y,z)) / (r(x,y,z) + 1e-9);
820  mean_diff += re;
821  max_diff = std::max(re, max_diff);
822  }
823  }
824  }
825  mean_diff /= float(width*height*channels);
826  }
827 };
828 
829 /** Compare ref and Halide solutions and print the verbose difference */
830 template <typename T>
831 class CheckResultVerbose : public CheckResult<T> {
832 public:
833  CheckResultVerbose(Halide::Image<T> r, Halide::Image<T> o) :
834  CheckResult<T>(r,o) {}
835 };
836 
837 
838 /** Print the synopsis of checking error */
839 template<typename T>
840 std::ostream &operator<<(std::ostream &s, const CheckResult<T> &v) {
841  s << "Max relative error = " << v.max_diff << " % \n";
842  s << "Mean relative error = " << v.mean_diff << " % \n\n";
843  return s;
844 }
845 
846 /** Print the result and synopsis of checking error */
847 template<typename T>
848 std::ostream &operator<<(std::ostream &s, const CheckResultVerbose<T> &v) {
849  s << "Reference" << "\n" << v.ref << "\n";
850  s << "Halide output" << "\n" << v.out << "\n";
851  s << "Difference " << "\n" << v.diff << "\n";
852  s << "Max relative error = " << v.max_diff << " % \n";
853  s << "Mean relative error = " << v.mean_diff << " % \n\n";
854  return s;
855 }
856 
857 
858 #endif // _RECURSIVE_FILTER_H_
Halide::Target target(void)
Get the compilation target, inferred from HL_JIT_TARGET.
RecFilterSchedule & split(VarTag v, int factor)
RecFilterSchedule & compute_locally(void)
VarTag inner_channels(void)
VarTag split_var(void) const
int as_integer(void) const
RecFilterSchedule & compute_globally(void)
Constructing an Expr from the final result of a recursive filter.
Definition: recfilter.h:613
bool empty(void)
Halide::Image< T > out
Halide solution.
Definition: recfilter.h:799
void gpu_auto_schedule(int tile_width=32)
Automatic GPU schedule for tiled or non-tiled recursive filter; calls RecFilter::gpu_auto_full_schedu...
std::string print_schedule(void) const
void split(RecFilterDim x, int tx)
RecFilterSchedule full_schedule(void)
Extract a handle to schedule non-tiled filter.
Halide::Func func(std::string func_name)
Extract the constituent function by name, useful for debugging:
Halide::Expr operator[](int)
Use this RecFilterRefVar as a call to the one of the output buffers of the internal recfilter...
std::string name(void) const
Name of the filter.
RecFilterDimAndCausality(void)
Empty constructor.
Definition: recfilter.h:105
VarTag full(int i=-1)
std::string print_synopsis(void) const
std::string print_functions(void) const
VarTag inner_scan(void)
RecFilterSchedule intra_schedule(int id=0)
Extract a handle to schedule intra-tile functions of the tiled filter.
int num_pixels(void) const
Size of input/output buffer indexed by this dimension.
Definition: recfilter.h:118
RecFilterSchedule & storage_layout(VarTag innermost, VarTag outermost)
RecFilterSchedule & gpu_blocks(VarTag v1)
RecFilter & operator=(const RecFilter &r)
Standard assignment operator.
Recursive filter function with scheduling interface.
VarTag outer_channels(void)
Recursive filter class.
Definition: recfilter.h:146
float max_diff
max diff percentage
Definition: recfilter.h:796
RecFilterSchedule & reorder_storage(std::vector< VarTag > x)
Halide::Func as_func(void)
Cast the recfilter as a Halide::Func; this returns the function that holds the final result of this f...
Command line arg parser.
Definition: recfilter.h:672
bool noschedule
do not use automatic scheduling
Definition: recfilter.h:680
VarTag outer(int i=-1)
void compile_jit(std::string filename="")
Trigger JIT compilation for specified hardware-platform target; dumps the generated codegen in human ...
int iterations
profiling iterations
Definition: recfilter.h:678
Halide::Realization realize(void)
Compute the filter.
int min_width
min image width
Definition: recfilter.h:676
void cpu_auto_schedule(void)
Automatic CPU schedule for tiled or non-tiled recursive filter; calls RecFilter::cpu_auto_full_schedu...
RecFilterSchedule(RecFilter &r, std::vector< std::string > fl)
void cpu_auto_inter_schedule(void)
Automatic CPU schedule for inter-tile functions of tiled filter.
std::vector< RecFilter > cascade_by_dimension(void)
Compute all scans in the same dimension in an overlapped fashion and cascade different dimensions...
RecFilterSchedule & fuse(VarTag v1, VarTag v2)
void apply_bounds(void)
Apply output domains bounds; this is performed implicitly for tiled filters, but it must be called by...
bool has_count(void) const
void gpu_auto_inter_schedule(void)
Automatic GPU schedule for inter-tile functions of tiled filter.
RecFilterDim(void)
Empty constructor.
Definition: recfilter.h:75
RecFilterDim(std::string var_name, int var_extent)
Constructor.
Definition: recfilter.h:81
void set_clamped_image_border(void)
VarTag inner(int i=-1)
CheckResultVerbose(Halide::Image< T > r, Halide::Image< T > o)
Definition: recfilter.h:833
VarTag tail(void)
Info about scans in a particular dimension.
Compare ref and Halide solutions and print the verbose difference.
Definition: recfilter.h:831
Halide::Image< float > diff
pixel wise diff
Definition: recfilter.h:800
std::ostream & operator<<(std::ostream &s, const RecFilter &r)
int block
block size
Definition: recfilter.h:677
Halide::Image< T > generate_random_image(size_t w, size_t h=0, size_t c=0, size_t d=0)
Generate an image of a given size with random entries.
Definition: recfilter.h:692
void operator=(Halide::Expr pure_def)
Use this as the left-hand-side of a definition.
int max_width
max image width
Definition: recfilter.h:675
Scheduling tags for RecFilter function dimensions.
Definition: recfilter.h:635
static void set_max_threads_per_cuda_warp(int v)
Set the maximum threads to launch per CUDA warp, must be called before scheduling the RecFilter objec...
Data members of the recursive filter.
Arguments(int argc, char **argv)
Parse command line args from number of args and list of args.
bool same_except_count(const VarTag &t) const
void add_filter(RecFilterDim x, std::vector< float > coeff)
RecFilterSchedule & unroll(VarTag v, int factor=0)
VarTag full_scan(void)
RecFilterSchedule & reorder(std::vector< VarTag > x)
int width
image width
Definition: recfilter.h:674
Scheduling tags for Functions.
int count(void) const
VarTag outer_scan(void)
RecFilterRefVar operator()(RecFilterDim x)
RecFilterDimAndCausality operator+(RecFilterDim x)
Operator to create causal scan indication, +x indicates causal scan where x is a RecFilterDim object...
void define(std::vector< RecFilterDim > pure_args, std::vector< Halide::Expr > pure_def)
Add a pure definition to the recursive filter.
int check(const VariableTag &t) const
void cpu_auto_full_schedule(void)
Automatic CPU schedule for non-tiled filter.
std::vector< RecFilter > cascade_by_causality(void)
Computing all causal scans in all dimensions in an overlapped fashion and all anticausal scans in an ...
Halide::Image< T > ref
reference solution
Definition: recfilter.h:798
RecFilter(std::string name="")
Empty constructor.
RecFilter overlap_to_higher_order_filter(RecFilter fA, std::string name="O")
Overlap a given filter with the current filter creating a higher order filter.
RecFilterDimAndCausality(RecFilterDim rec_var, bool causal)
Constructor.
Definition: recfilter.h:111
CheckResult(Halide::Image< T > r, Halide::Image< T > o)
Definition: recfilter.h:802
Filter dimension augmented with causality.
Definition: recfilter.h:98
VarTag(void)
Create an expression that can be used to initialize a pixel.
Definition: recfilter.h:580
RecFilterRefVar(RecFilter r, std::vector< RecFilterDim > a)
Handle to schedule internal Halide functions that constitute the recursive filter.
Definition: recfilter.h:516
void gpu_auto_intra_schedule(int id)
Automatic GPU schedule for intra-tile functions if tiled filter.
RecFilterSchedule inter_schedule(void)
Extract a handle to schedule intra-tile functions of the tiled filter.
void compute_at(RecFilter external)
Set final result of filter to be computed at an external recursive filter, useful for merging the fil...
void gpu_auto_full_schedule(int tile_width=32)
Automatic GPU schedule for non-tiled filter and return a handle for additional scheduling.
float mean_diff
mean diff percentage
Definition: recfilter.h:797
bool nocheck
skip check Halide result against reference solution
Definition: recfilter.h:679
Halide::Expr operator[](int)
Use this RecFilterRefVar as a call to the one of the output buffers of the internal recfilter...
std::string print_hl_code(void) const
RecFilterSchedule & vectorize(VarTag v, int factor=0)
void split_all_dimensions(int tx)
float profile(int iterations)
Profile the filter.
RecFilterRefExpr(RecFilter r, std::vector< Halide::Expr > a)
bool causal(void) const
Causality of the dimension.
Definition: recfilter.h:121
Halide::Var var(void) const
Convert into Halide::Var for interoperability with Halide code.
Definition: recfilter.h:85
RecFilterSchedule & gpu_threads(VarTag v1)
RecFilterSchedule & parallel(VarTag v, int factor=0)
Filter dimension for channels.
Definition: recfilter.h:68
static void set_vectorization_width(int v)
Set the vectorization width, must be called before scheduling the RecFilter object.
int num_pixels(void) const
Size of input/output buffer indexed by this dimension.
Definition: recfilter.h:88
VarTag & operator=(const VarTag &t)
RecFilterDimAndCausality operator-(RecFilterDim x)
Operator to create anticausal scan indication, -x indicates causal scan where x is a RecFilterDim obj...
std::vector< RecFilter > cascade(std::vector< int > a, std::vector< int > b)
Cascade the filter to produce multiple filters using list of list of scans and producing a list of re...
void cpu_auto_intra_schedule(void)
Automatic CPU schedule for intra-tile functions if tiled filter.
Halide::Var var(void) const
Convert into Halide::Var for interoperability with Halide code.
Definition: recfilter.h:115
Compare ref and Halide solutions and print the mean square error.
Definition: recfilter.h:794