cu-matrix.h
Go to the documentation of this file.
1 // cudamatrix/cu-matrix.h
2 
3 // Copyright 2009-2012 Karel Vesely
4 // 2013 Johns Hopkins University (author: Daniel Povey)
5 // 2013 Hainan Xu
6 // 2013 Xiaohui Zhang
7 // 2013-2015 Guoguo Chen
8 // 2017 Shiyin Kang
9 // 2019 Yiwen Shao
10 
11 // See ../../COPYING for clarification regarding multiple authors
12 //
13 // Licensed under the Apache License, Version 2.0 (the "License");
14 // you may not use this file except in compliance with the License.
15 // You may obtain a copy of the License at
16 //
17 // http://www.apache.org/licenses/LICENSE-2.0
18 //
19 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
20 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
21 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
22 // MERCHANTABLITY OR NON-INFRINGEMENT.
23 // See the Apache 2 License for the specific language governing permissions and
24 // limitations under the License.
25 
26 
27 
28 #ifndef KALDI_CUDAMATRIX_CU_MATRIX_H_
29 #define KALDI_CUDAMATRIX_CU_MATRIX_H_
30 
31 #include <sstream>
32 #include <vector>
33 
35 #include "cudamatrix/cu-common.h"
36 #include "cudamatrix/cu-value.h"
37 #include "matrix/matrix-common.h"
38 #include "matrix/kaldi-matrix.h"
39 #include "cudamatrix/cu-array.h"
40 #include "cudamatrix/cu-math.h"
41 #include "cudamatrix/cu-rand.h"
43 
44 namespace kaldi {
45 
46 template<typename Real>
47 Real TraceMatMat(const CuMatrixBase<Real> &A, const CuMatrixBase<Real> &B,
49 
54 template<typename Real>
55 void AddMatMatBatched(const Real alpha, std::vector<CuSubMatrix<Real>* > &C,
56  const std::vector<CuSubMatrix<Real>* > &A,
57  MatrixTransposeType transA,
58  const std::vector<CuSubMatrix<Real>* > &B,
59  MatrixTransposeType transB,
60  const Real beta);
61 
69 /*
70 template<typename Real>
71 struct MatrixElement {
72  int row;
73  int column;
74  Real weight;
75 };
76 // */
77 
78 template<typename Real>
79 class CuMatrixBase {
80  public:
81  friend class CuMatrixBase<float>;
82  friend class CuMatrixBase<double>;
83  friend class CuVectorBase<float>;
84  friend class CuVectorBase<double>;
85  friend class VectorBase<Real>;
86  friend class CuSpMatrix<Real>;
87  friend class CuTpMatrix<float>;
88  friend class CuTpMatrix<double>;
89  friend class CuVectorBase<Real>;
90  friend class CuSubMatrix<Real>;
91  friend class CuRand<Real>;
92  friend class CuSubVector<Real>;
93  friend class CuBlockMatrix<Real>;
94  friend class CuSparseMatrix<float>;
95  friend class CuSparseMatrix<double>;
96  friend class CuSparseMatrix<Real>;
97 
102  void CopyCols(const CuMatrixBase<Real> &src,
103  const CuArrayBase<MatrixIndexT> &indexes);
104 
105 
110  void AddCols(const CuMatrixBase<Real> &src,
111  const CuArrayBase<MatrixIndexT> &indices);
112 
116  void CopyRows(const CuMatrixBase<Real> &src,
117  const CuArrayBase<MatrixIndexT> &indexes);
118 
125  void CopyRows(const CuArrayBase<const Real*> &src);
126 
134  void CopyToRows(const CuArrayBase<Real*> &dst) const;
135 
139  void AddRows(Real alpha,
140  const CuMatrixBase<Real> &src,
141  const CuArrayBase<MatrixIndexT> &indexes);
142 
143 
148  void MulRows(const CuMatrixBase<Real> &src,
149  const CuArrayBase<MatrixIndexT> &indexes);
150 
151 
155  void AddRows(Real alpha,
156  const CuArrayBase<const Real*> &src);
157 
158 
163  void AddToRows(Real alpha,
164  const CuArrayBase<MatrixIndexT> &indexes,
165  CuMatrixBase<Real> *dst) const;
166 
167 
175  void AddToRows(Real alpha, const CuArrayBase<Real*> &dst) const;
176 
177 
181  void SumColumnRanges(const CuMatrixBase<Real> &src,
182  const CuArrayBase<Int32Pair> &indexes);
183 
184 
190  void AddRowRanges(const CuMatrixBase<Real> &src,
191  const CuArrayBase<Int32Pair> &indexes);
192 
193 
194  friend Real TraceMatMat<Real>(const CuMatrixBase<Real> &A,
195  const CuMatrixBase<Real> &B,
196  MatrixTransposeType trans);
197 
198  friend Real TraceMatSmat<Real>(const CuMatrixBase<Real> &A,
199  const CuSparseMatrix<Real> &B,
200  MatrixTransposeType trans);
201 
202  friend void AddMatMatBatched<Real>(const Real alpha,
203  std::vector<CuSubMatrix<Real>* > &C,
204  const std::vector<CuSubMatrix<Real>* > &A,
205  MatrixTransposeType transA,
206  const std::vector<CuSubMatrix<Real>* > &B,
207  MatrixTransposeType transB,
208  const Real beta);
209 
212  void AddToDiag(Real value);
213 
215  MatrixIndexT NumRows() const { return num_rows_; }
216  MatrixIndexT NumCols() const { return num_cols_; }
217  MatrixIndexT Stride() const { return stride_; }
218 
219  // MatrixDim is a struct containing "rows", "cols" and "stride",
220  // that is an argument of most CUDA kernels.
221  ::MatrixDim Dim() const {
223  return d;
224  }
225 
226  Real FrobeniusNorm() const { return sqrt(TraceMatMat(*this, *this, kTrans)); }
227 
228  bool IsUnit(Real tol = 0.001) const;
229 
231  bool ApproxEqual(const CuMatrixBase<Real> &other, float tol = 0.01) const;
232 
234  MatrixIndexT SizeInBytes() const { return num_rows_*stride_*sizeof(Real); }
235 
236  // Copy functions. These do not resize.
237  template<typename OtherReal>
238  void CopyFromMat(const MatrixBase<OtherReal> &src,
239  MatrixTransposeType trans = kNoTrans);
240 
241 
242  void CopyFromGeneralMat(const GeneralMatrix &src,
243  MatrixTransposeType trans = kNoTrans);
244 
245  void CopyFromMat(const MatrixBase<Real> &src,
246  MatrixTransposeType trans = kNoTrans);
247 
248  void CopyFromSp(const CuSpMatrix<Real> &M);
249 
250  template<typename OtherReal>
251  void CopyFromTp(const CuTpMatrix<OtherReal> &M,
252  MatrixTransposeType trans = kNoTrans);
253 
254  // This function will copy from source rows (start_range, end_range]
255  // if the range is outside of the clamped region then the clamped
256  // row will be replicated across the out of range areas
258  int32_t start_range, int32_t end_range,
259  int32_t clamp_low, int32_t clamp_high);
260 
261  template<typename OtherReal>
262  void CopyFromMat(const CuMatrixBase<OtherReal> &M,
263  MatrixTransposeType trans = kNoTrans);
264 
265  template<typename OtherReal>
267  MatrixTransposeType trans = kNoTrans) const;
268 
273  void CopyRowsFromVec(const CuVectorBase<Real> &v);
274 
276  void CopyRowsFromVec(const VectorBase<Real> &v);
277 
281  void CopyColsFromVec(const CuVectorBase<Real> &v);
282 
284  void CopyColFromVec(const CuVectorBase<Real> &v, const MatrixIndexT col);
285 
288  void Sigmoid(const CuMatrixBase<Real> &src);
289 
293  void Heaviside(const CuMatrixBase<Real> &src);
294 
295  void Exp(const CuMatrixBase<Real> &src);
296 
297  void Log(const CuMatrixBase<Real> &src);
298 
299  void Pow(const CuMatrixBase<Real> &src, Real power);
300 
307  void PowAbs(const CuMatrixBase<Real> &src, Real power, bool include_sign=false);
308 
309  void Floor(const CuMatrixBase<Real> &src, Real floor_val);
310 
311  void Ceiling(const CuMatrixBase<Real> &src, Real ceiling_val);
312 
317  void ExpLimited(const CuMatrixBase<Real> &src, Real lower_limit, Real upper_limit);
318 
322  void ExpSpecial(const CuMatrixBase<Real> &src);
323 
328  void SoftMaxPerRow(const CuMatrixBase<Real> &src);
329 
334  void LogSoftMaxPerRow(const CuMatrixBase<Real> &src);
335 
336 
340  void SoftHinge(const CuMatrixBase<Real> &src);
341 
346  void GroupPnorm(const CuMatrixBase<Real> &src, Real pow);
347 
350  void DiffGroupPnorm(const CuMatrixBase<Real> &in_value,
351  const CuMatrixBase<Real> &out_value,
352  const CuMatrixBase<Real> &out_deriv, Real power);
353 
358  void GroupMax(const CuMatrixBase<Real> &src);
359 
367  void GroupMaxDeriv(const CuMatrixBase<Real> &input,
368  const CuMatrixBase<Real> &output);
369 
372  void ParametricRelu(const CuMatrixBase<Real> &src,
373  const CuVectorBase<Real> &alpha,
374  const CuVectorBase<Real> &beta);
375 
379  void DiffParametricRelu(const CuMatrixBase<Real> &value,
380  const CuMatrixBase<Real> &diff,
381  const CuVectorBase<Real> &alpha,
382  const CuVectorBase<Real> &beta);
383 
386  void Tanh(const CuMatrixBase<Real> &src);
387 
390  void DiffSigmoid(const CuMatrixBase<Real> &value,
391  const CuMatrixBase<Real> &diff);
392 
395  void DiffTanh(const CuMatrixBase<Real> &value,
396  const CuMatrixBase<Real> &diff);
397 
403  void DiffSoftmaxPerRow(const CuMatrixBase<Real> &value,
404  const CuMatrixBase<Real> &diff);
405 
411  void DiffLogSoftmaxPerRow(const CuMatrixBase<Real> &out_value,
412  const CuMatrixBase<Real> &out_deriv);
413 
423  void DiffXent(const CuArrayBase<int32> &tgt,
424  CuVector<Real> *log_post_tgt);
425 
430  void Cholesky(CuMatrixBase<Real> *inv_cholesky = NULL);
431 
432 
436  void SymInvertPosDef();
437 
438  inline void ApplyPow(Real power) {
439  this -> Pow(*this, power);
440  };
441 
442 
443  inline void ApplyPowAbs(Real power, bool include_sign=false) {
444  this -> PowAbs(*this, power, include_sign);
445  };
446 
447  inline void ApplyHeaviside() {
448  this -> Heaviside(*this);
449  };
450 
451  inline void ApplyFloor(Real floor_val) {
452  this -> Floor(*this, floor_val);
453  };
454 
455  inline void ApplyCeiling(Real ceiling_val) {
456  this -> Ceiling(*this, ceiling_val);
457  };
458 
459  inline void ApplyExp() {
460  this -> Exp(*this);
461  };
462 
463 
464  inline void ApplyExpLimited(Real lower_limit, Real upper_limit) {
465  this -> ExpLimited(*this, lower_limit, upper_limit);
466  };
467 
468  inline void ApplyExpSpecial() {
469  this -> ExpSpecial(*this);
470  };
471 
472  inline void ApplySoftMaxPerRow() {
473  this -> SoftMaxPerRow(*this);
474  };
475 
476  inline void ApplyLogSoftMaxPerRow() {
477  this -> LogSoftMaxPerRow(*this);
478  };
479 
480  inline void ApplyLog() {
481  this -> Log(*this);
482  };
483 
486  void FindRowMaxId(CuArray<int32> *id) const;
487 
489  void SetZero();
490  void Set(Real value);
491  void Add(Real value);
493  void SetZeroAboveDiag();
494  void Scale(Real value);
495 
497  void MulElements(const CuMatrixBase<Real> &A);
499  void DivElements(const CuMatrixBase<Real> &A);
501  void Max(const CuMatrixBase<Real> &A);
503  void Min(const CuMatrixBase<Real> &A);
505  void MulColsVec(const CuVectorBase<Real> &scale);
507  void MulRowsVec(const CuVectorBase<Real> &scale);
509  void MulRowsGroupMat(const CuMatrixBase<Real> &src);
511  void DivRowsVec(const CuVectorBase<Real> &div);
513  void InvertElements();
515  void AddMat(Real alpha, const CuMatrixBase<Real> &A,
516  MatrixTransposeType trans = kNoTrans);
517 
519  void AddSmat(Real alpha, const CuSparseMatrix<Real> &A,
520  MatrixTransposeType trans = kNoTrans);
521 
527  void AddSmatMat(Real alpha, const CuSparseMatrix<Real> &A,
528  MatrixTransposeType transA, const CuMatrixBase<Real> &B,
529  Real beta);
530 
535  void AddMatSmat(Real alpha, const CuMatrixBase<Real> &A,
536  const CuSparseMatrix<Real> &B, MatrixTransposeType transB,
537  Real beta);
538 
539 
547  void AddToElements(Real alpha, const CuArrayBase<int32> &elements);
548 
549 
566  void AddMatBlocks(Real alpha, const CuMatrixBase<Real> &A,
567  MatrixTransposeType trans = kNoTrans);
568 
570  void AddVecToCols(Real alpha, const CuVectorBase<Real> &col, Real beta = 1.0);
572  void AddVecToRows(Real alpha, const CuVectorBase<Real> &row, Real beta = 1.0);
574  void AddMatMat(Real alpha, const CuMatrixBase<Real> &A, MatrixTransposeType transA,
575  const CuMatrixBase<Real> &B, MatrixTransposeType transB, Real beta);
577  void AddVecVec(Real alpha, const CuVectorBase<Real> &x, const CuVectorBase<Real> &y);
580  void SetMatMatDivMat(const CuMatrixBase<Real> &A, const CuMatrixBase<Real> &B, const CuMatrixBase<Real> &C);
581 
585  void SymAddMat2(const Real alpha, const CuMatrixBase<Real> &M,
586  MatrixTransposeType transA, Real beta);
587 
588 
591  void AddMatBlock(Real alpha, const CuMatrixBase<Real> &A, MatrixTransposeType transA,
592  const CuBlockMatrix<Real> &B, MatrixTransposeType transB, Real beta);
593 
596  void AddDiagVecMat(const Real alpha, const CuVectorBase<Real> &v,
597  const CuMatrixBase<Real> &M, MatrixTransposeType transM,
598  Real beta = 1.0);
599 
600  // *this = beta * *this + alpha * M * diag(v) [or M^T].
601  // The same as adding M but scaling each column M_j by v(j).
602  void AddMatDiagVec(const Real alpha,
603  const CuMatrixBase<Real> &M, MatrixTransposeType transM,
605  Real beta = 1.0);
606 
608  void AddMatMatElements(const Real alpha,
609  const CuMatrixBase<Real>& A,
610  const CuMatrixBase<Real>& B,
611  const Real beta);
612 
614  void AddMatSp(const Real alpha,
615  const CuMatrixBase<Real> &A, MatrixTransposeType transA,
616  const CuSpMatrix<Real> &B,
617  const Real beta) {
618  CuMatrix<Real> M(B);
619  return AddMatMat(alpha, A, transA, M, kNoTrans, beta);
620  }
621 
623  void AddSpMat(const Real alpha,
624  const CuSpMatrix<Real> &A,
625  const CuMatrixBase<Real> &B, MatrixTransposeType transB,
626  const Real beta) {
627  CuMatrix<Real> M(A);
628  return AddMatMat(alpha, M, kNoTrans, B, transB, beta);
629  }
630 
632  void AddTpMat(const Real alpha,
633  const CuTpMatrix<Real> &A, MatrixTransposeType transA,
634  const CuMatrixBase<Real> &B, MatrixTransposeType transB,
635  const Real beta) {
636  CuMatrix<Real> M(A);
637  return AddMatMat(alpha, M, transA, B, transB, beta);
638  }
639 
641  void AddMatTp(const Real alpha,
642  const CuMatrixBase<Real> &A, MatrixTransposeType transA,
643  const CuTpMatrix<Real> &B, MatrixTransposeType transB,
644  const Real beta) {
645  CuMatrix<Real> M(B);
646  return AddMatMat(alpha, A, transA, M, transB, beta);
647  }
648 
649  void CopyFromBlock(const CuBlockMatrix<Real> &B,
650  MatrixTransposeType trans = kNoTrans);
651  void CopyLowerToUpper();
652  void CopyUpperToLower();
653  inline CuSubMatrix<Real> Range(const MatrixIndexT row_offset,
654  const MatrixIndexT num_rows,
655  const MatrixIndexT col_offset,
656  const MatrixIndexT num_cols) const {
657  return CuSubMatrix<Real>(*this, row_offset, num_rows,
658  col_offset, num_cols);
659  }
660  inline CuSubMatrix<Real> RowRange(const MatrixIndexT row_offset,
661  const MatrixIndexT num_rows) const {
662  return CuSubMatrix<Real>(*this, row_offset, num_rows,
663  0, num_cols_);
664  }
665  inline CuSubMatrix<Real> ColRange(const MatrixIndexT col_offset,
666  const MatrixIndexT num_cols) const {
667  return CuSubMatrix<Real>(*this, 0, num_rows_, col_offset, num_cols);
668  }
669 
670  inline const CuSubVector<Real> Row(MatrixIndexT i) const {
671  KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
672  static_cast<UnsignedMatrixIndexT>(num_rows_));
673  return CuSubVector<Real>(data_ + (i * stride_), NumCols());
674  }
675 
677  KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
678  static_cast<UnsignedMatrixIndexT>(num_rows_));
679  return CuSubVector<Real>(data_ + (i * stride_), NumCols());
680  }
681 
683  KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
684  static_cast<UnsignedMatrixIndexT>(num_rows_) &&
685  static_cast<UnsignedMatrixIndexT>(c) <
686  static_cast<UnsignedMatrixIndexT>(num_cols_));
687  return CuValue<Real>(data_ + r * stride_ + c);
688  }
689 
690  inline Real operator() (MatrixIndexT r, MatrixIndexT c) const {
691  KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
692  static_cast<UnsignedMatrixIndexT>(num_rows_) &&
693  static_cast<UnsignedMatrixIndexT>(c) <
694  static_cast<UnsignedMatrixIndexT>(num_cols_));
695  return CuValue<Real>(data_ + r * stride_ + c); // will be casted to Real.
696  }
697 
698  Real Sum() const;
699  Real Max() const;
700  Real Min() const;
701 
703  Real Trace(bool check_square = true) const;
704 
705  void SetRandn();
706 
707  void SetRandUniform();
708 
709  void Write(std::ostream &os, bool binary) const;
710 
711  // This function, adds a list of MatrixElements (scaled by alpha) to corresponding locations to
712  // (*this).
713  void AddElements(Real alpha, const std::vector<MatrixElement<Real> >& input);
714 
715  // For each i, with indexes[i] = (j, k), does (*this)(j, k) += input[i].
716  // Requires, but does not check, that the vector of indexes does not contrain
717  // repeated elements, 'input' is the start of an array of length equal to
718  // indexes.Dim(), which is located on GPU memory if we are using the GPU.
719  void AddElements(Real alpha, const CuArrayBase<Int32Pair> &indexes,
720  const Real *input);
721 
722  // This function requires that 'output' is a host array and is allocated with size
723  // of indexes.size(), and for each element of 'indexes' it interprets it as
724  // a (row, column) index into *this, and puts (*this)(row, column) into
725  // the corresponding element of 'output'.
726  void Lookup(const std::vector<Int32Pair> &indexes,
727  Real *output) const;
728 
729  // CUDA version of Lookup, would be called internally by the above function.
730  void Lookup(const CuArrayBase<Int32Pair> &indexes,
731  Real *output) const;
732 
733  // Creates binary mask with per-element equality predicates of *this, mat.
734  // Output stored to 'mask', values : 1.0 = equal, 0.0 = not-equal.
735  void EqualElementMask(const CuMatrixBase<Real> &mat, CuMatrix<Real> *mask) const;
736 
737 
740  inline const Real* RowData(MatrixIndexT r) const { return data_ + r * stride_; }
743  inline Real* RowData(MatrixIndexT r) { return data_ + r * stride_; }
746  inline const Real *Data() const { return data_; }
749  inline Real *Data() { return data_; }
750 
751  // The following two functions should only be called if we did not compile
752  // with CUDA or could not get a CUDA card; in that case the contents are
753  // interpreted the same as a regular matrix. DON'T USE THESE UNLESS YOU KNOW
754  // WHAT YOU ARE DOING!
755  inline const MatrixBase<Real> &Mat() const {
756  return *(reinterpret_cast<const MatrixBase<Real>* >(this));
757  }
758  inline MatrixBase<Real> &Mat() {
759  return *(reinterpret_cast<MatrixBase<Real>* >(this));
760  }
761 
762  protected:
763 
764  // The constructors are protected to prevent the user creating an instance of
765  // this class (you should create a child class CuMatrix or CuSubMatrix.
766 
767  CuMatrixBase(): data_(NULL), num_cols_(0), num_rows_(0), stride_(0) { }
768 
771  CuMatrixBase(Real *data,
772  MatrixIndexT num_rows,
773  MatrixIndexT num_cols,
774  MatrixIndexT stride):
775  data_(data), num_cols_(num_cols), num_rows_(num_rows), stride_(stride) { }
776 
777  Real *data_;
778  // Note: it might seem a bit backwards that we have the number of columns
781  // first here; it's necessary because we need the data to be laid out the same
782  // as for MatrixBase so the Mat() function call will work. We don't want to
783  // change the layout of MatrixBase at this point, or there will be crashes if
784  // people don't thoroughly recompile.
788 
789  private:
791 }; // class CuMatrixBase
792 
795 template<typename Real>
796 class CuMatrix: public CuMatrixBase<Real> {
797  public:
798 
799  CuMatrix() { }
800 
803  MatrixResizeType resize_type = kSetZero,
804  MatrixStrideType stride_type = kDefaultStride) {
805  Resize(rows, cols, resize_type, stride_type);
806  }
807 
808  // Note: we had to remove the "explicit" keyword due
809  // to problems with STL vectors of CuMatrixBase.
810  CuMatrix(const CuMatrix<Real> &other,
811  MatrixTransposeType trans = kNoTrans);
812 
813  explicit CuMatrix(const CuBlockMatrix<Real> &other,
814  MatrixTransposeType trans = kNoTrans);
815 
816  explicit CuMatrix(const CuMatrixBase<Real> &other,
817  MatrixTransposeType trans = kNoTrans);
818 
819  template<typename OtherReal>
820  explicit CuMatrix(const MatrixBase<OtherReal> &other,
821  MatrixTransposeType trans = kNoTrans);
822 
824  explicit CuMatrix(const CuSpMatrix<Real> &M) : CuMatrixBase<Real>() {
825  Resize(M.NumRows(), M.NumRows(), kUndefined);
826  this->CopyFromSp(M);
827  }
828 
830  template <typename OtherReal>
831  explicit CuMatrix(const CuTpMatrix<OtherReal> & M,
832  MatrixTransposeType trans = kNoTrans) : CuMatrixBase<Real>() {
833  Resize(M.NumCols(), M.NumRows(), kUndefined);
834  this->CopyFromTp(M, trans);
835  }
836 
838  template<typename OtherReal>
839  explicit CuMatrix(const CuMatrixBase<OtherReal> &M,
840  MatrixTransposeType trans = kNoTrans);
841 
842  CuMatrix<Real> &operator = (const CuMatrixBase<Real> &other) {
843  this->Resize(other.NumRows(), other.NumCols(), kUndefined);
844  this->CopyFromMat(other);
845  return *this;
846  }
847 
848  CuMatrix<Real> &operator = (const CuMatrix<Real> &other) {
849  this->Resize(other.NumRows(), other.NumCols(), kUndefined);
850  this->CopyFromMat(other);
851  return *this;
852  }
853 
854  CuMatrix<Real> &operator = (const MatrixBase<Real> &other) {
855  this->Resize(other.NumRows(), other.NumCols(), kUndefined);
856  this->CopyFromMat(other);
857  return *this;
858  }
859 
860  void Transpose();
861 
863  void Resize(MatrixIndexT rows, MatrixIndexT cols,
864  MatrixResizeType resize_type = kSetZero,
865  MatrixStrideType stride_type = kDefaultStride);
866 
867  void Swap(Matrix<Real> *mat);
868  void Swap(CuMatrix<Real> *mat);
869 
870  template<typename OtherReal>
871  void Swap(CuMatrix<OtherReal> *mat);
872 
874  void Read(std::istream &is, bool binary);
875 
877  ~CuMatrix() { Destroy(); }
878 
879  inline const Matrix<Real> &Mat() const {
880  return *(reinterpret_cast<const Matrix<Real>* >(this));
881  }
882  inline Matrix<Real> &Mat() {
883  return *(reinterpret_cast<Matrix<Real>* >(this));
884  }
885 
896  void CompObjfAndDeriv(const std::vector<MatrixElement<Real> > &elements,
897  const CuMatrix<Real> &A,
898  Real *tot_objf,
899  Real *tot_weight);
900 
901  private:
902  void Destroy();
903 };
904 
905 
907 template<typename Real>
908 class CuSubMatrix: public CuMatrixBase<Real> {
909  public:
910  inline CuSubMatrix(const CuMatrixBase<Real> &mat,
911  const MatrixIndexT row_offset,
912  const MatrixIndexT num_rows,
913  const MatrixIndexT col_offset,
914  const MatrixIndexT num_cols);
915 
916  // This constructor should be used with caution; it can be used for
917  // constructing 'fake' submatrices if you want to play with
918  // the stride. 'data' should point to GPU data if you're using the
919  // GPU.
920  inline CuSubMatrix(const Real *data,
921  const MatrixIndexT num_rows,
922  const MatrixIndexT num_cols,
923  const MatrixIndexT stride);
924 
927  inline CuSubMatrix<Real> (const CuSubMatrix &other):
928  CuMatrixBase<Real> (other.data_, other.num_rows_, other.num_cols_,
929  other.stride_) {}
930  private:
932  CuSubMatrix<Real> &operator = (const CuSubMatrix<Real> &other);
933 };
934 
935 
936 template<typename Real>
938  const CuMatrixBase<Real> &B, Real tol = 0.01) {
939  return A.ApproxEqual(B, tol);
940 }
941 
942 template<typename Real>
943 inline void AssertEqual(const CuMatrixBase<Real> &A,
944  const CuMatrixBase<Real> &B, float tol = 0.01) {
945  KALDI_ASSERT(A.ApproxEqual(B, tol));
946 }
947 
948 template<typename Real>
950  return (M.NumRows() == N.NumRows() && M.NumCols() == N.NumCols());
951 }
952 
953 template<typename Real>
955  return (M.NumRows() == N.NumRows() && M.NumCols() == N.NumCols()
956  && M.Stride() == N.Stride());
957 }
958 
960 template<typename Real>
961 std::ostream &operator << (std::ostream &out, const CuMatrixBase<Real> &mat);
962 
963 
964 template<typename Real>
965 template<typename OtherReal>
967  MatrixTransposeType trans) {
968  if (trans == kNoTrans) Init(M.NumRows(), M.NumCols(), kDefaultStride);
969  else Init(M.NumCols(), M.NumRows(), kDefaultStride);
970  M.CopyToMat(this, trans);
971 }
972 
973 template<typename Real>
974 template<typename OtherReal>
976  MatrixTransposeType trans) {
977  cu.CopyToMat(this, trans);
978 }
979 
980 
981 } // namespace
982 
983 
985 
986 #endif
const MatrixBase< Real > & Mat() const
Definition: cu-matrix.h:755
CuSubVector< Real > Row(MatrixIndexT i)
Definition: cu-matrix.h:676
MatrixBase< Real > & Mat()
Definition: cu-matrix.h:758
void CopyFromMat(const MatrixBase< OtherReal > &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:344
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
MatrixIndexT Stride() const
Definition: cu-matrix.h:217
Real * Data()
Return data pointer.
Definition: cu-matrix.h:749
Matrix()
Empty constructor.
void ApplyCeiling(Real ceiling_val)
Definition: cu-matrix.h:455
This class is a wrapper that enables you to store a matrix in one of three forms: either as a Matrix<...
void SoftHinge(const CuMatrixBase< Real > &src)
Apply the function y = log(1 + exp(x)), to each element.
Definition: cu-matrix.cc:1555
void ApplyPow(Real power)
Definition: cu-matrix.h:438
Real Trace(bool check_square=true) const
Return the trace. If check_square = true, will crash if matrix is not square.
Definition: cu-matrix.cc:3075
MatrixResizeType
Definition: matrix-common.h:37
void GroupMax(const CuMatrixBase< Real > &src)
Apply the function y(i) = (max_{j = i*G}^{(i+1)*G-1} x_j where G = x.NumCols() / y.NumCols() must be an integer.
Definition: cu-matrix.cc:1617
void Write(std::ostream &os, bool binary) const
Definition: cu-matrix.cc:502
const CuSubVector< Real > Row(MatrixIndexT i) const
Definition: cu-matrix.h:670
void ExpLimited(const CuMatrixBase< Real > &src, Real lower_limit, Real upper_limit)
This is equivalent to running: Floor(src, lower_limit); Ceiling(src, upper_limit); Exp(src) ...
Definition: cu-matrix.cc:2541
void AddSmatMat(Real alpha, const CuSparseMatrix< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, Real beta)
(*this) = alpha * op(A) * B + beta * (*this), where A is sparse.
Definition: cu-matrix.cc:1024
void CopyToMat(MatrixBase< OtherReal > *dst, MatrixTransposeType trans=kNoTrans) const
Definition: cu-matrix.cc:447
void AddToElements(Real alpha, const CuArrayBase< int32 > &elements)
This is a rather special purpose function; we might generalize it later by adding a transpose-type op...
Definition: cu-matrix.cc:3344
MatrixIndexT NumRows() const
void AddMatTp(const Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuTpMatrix< Real > &B, MatrixTransposeType transB, const Real beta)
this <– beta*this + alpha*A*B.
Definition: cu-matrix.h:641
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
Base class which provides matrix operations not involving resizing or allocation. ...
Definition: kaldi-matrix.h:49
Structure containing size of the matrix plus stride.
Definition: cu-matrixdim.h:46
CuMatrix(const CuSpMatrix< Real > &M)
Copy constructor taking SpMatrix...
Definition: cu-matrix.h:824
void AddRows(Real alpha, const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indexes)
Does for each row r, this.Row(r) += alpha * src.row(indexes[r]).
Definition: cu-matrix.cc:2766
void AddRowRanges(const CuMatrixBase< Real > &src, const CuArrayBase< Int32Pair > &indexes)
For each row r of this and for each column c, do (*this)(r, c) += src(j, c), where j ranges from ind...
Definition: cu-matrix.cc:2931
CuMatrix(MatrixIndexT rows, MatrixIndexT cols, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Constructor with memory initialisation.
Definition: cu-matrix.h:802
void AddElements(Real alpha, const std::vector< MatrixElement< Real > > &input)
Definition: cu-matrix.cc:3277
void AddMatDiagVec(const Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType transM, CuVectorBase< Real > &v, Real beta=1.0)
Definition: cu-matrix.cc:1415
Real Sum() const
Definition: cu-matrix.cc:3012
void CopyRangeFromMatClamped(const CuMatrixBase< Real > &src, int32_t start_range, int32_t end_range, int32_t clamp_low, int32_t clamp_high)
Definition: cu-matrix.cc:419
CuSubMatrix< Real > Range(const MatrixIndexT row_offset, const MatrixIndexT num_rows, const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Definition: cu-matrix.h:653
CuMatrix(const CuTpMatrix< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy constructor taking TpMatrix...
Definition: cu-matrix.h:831
void ApplyFloor(Real floor_val)
Definition: cu-matrix.h:451
void Log(const CuMatrixBase< Real > &src)
Definition: cu-matrix.cc:2477
void AddMatBlock(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuBlockMatrix< Real > &B, MatrixTransposeType transB, Real beta)
This function is like AddMatMat but for where the second argument is of type CuBlockMatrix (a block-d...
Definition: cu-matrix.cc:3205
Matrix< Real > & Mat()
Definition: cu-matrix.h:882
void AddVecToCols(Real alpha, const CuVectorBase< Real > &col, Real beta=1.0)
(for each column c of *this), c = alpha * col + beta * c
Definition: cu-matrix.cc:1232
void Ceiling(const CuMatrixBase< Real > &src, Real ceiling_val)
Definition: cu-matrix.cc:2601
The class CuBlockMatrix holds a vector of objects of type CuMatrix, say, M_1, M_2, .
void AddMatSp(const Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuSpMatrix< Real > &B, const Real beta)
this <– beta*this + alpha*A*B
Definition: cu-matrix.h:614
void AddSmat(Real alpha, const CuSparseMatrix< Real > &A, MatrixTransposeType trans=kNoTrans)
*this += alpha * A.
Definition: cu-matrix.cc:985
void AddToDiag(Real value)
Adds "value" to the diagonal elements of the matrix.
Definition: cu-matrix.cc:604
void AddMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
*this += alpha * A
Definition: cu-matrix.cc:954
void DivRowsVec(const CuVectorBase< Real > &div)
divide i&#39;th row by scale[i]
Definition: cu-matrix.cc:899
A class for storing matrices.
Definition: kaldi-matrix.h:823
void AddMatMatElements(const Real alpha, const CuMatrixBase< Real > &A, const CuMatrixBase< Real > &B, const Real beta)
*this = beta * *this + alpha * A .* B (.* element by element multiplication)
Definition: cu-matrix.cc:1447
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
void CopyRowsFromVec(const CuVectorBase< Real > &v)
This function has two modes of operation.
Definition: cu-matrix.cc:2301
Real * data_
GPU data pointer (or regular matrix data pointer,.
Definition: cu-matrix.h:777
~CuMatrix()
Destructor.
Definition: cu-matrix.h:877
void CopyFromMat(const MatrixBase< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Copy given matrix. (no resize is done).
MatrixIndexT SizeInBytes() const
Get size of matrix in bytes.
Definition: cu-matrix.h:234
void AddCols(const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indices)
Add column indices[r] of src to column r.
Definition: cu-matrix.cc:2701
void AddTpMat(const Real alpha, const CuTpMatrix< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
this <– beta*this + alpha*A*B.
Definition: cu-matrix.h:632
MatrixIndexT stride_
Definition: cu-matrix.h:787
void InvertElements()
invert the matrix by elements.
Definition: cu-matrix.cc:932
bool ApproxEqual(const CuMatrixBase< Real > &other, float tol=0.01) const
True if ((*this)-other).FrobeniusNorm() <= tol * this->FrobeniusNorm()
Definition: cu-matrix.cc:2137
void CopyColFromVec(const CuVectorBase< Real > &v, const MatrixIndexT col)
Copy vector into specific column of matrix.
Definition: cu-matrix.cc:2414
bool IsUnit(Real tol=0.001) const
Definition: cu-matrix.cc:629
The following class is used to simulate non-const references to Real, e.g.
Definition: cu-value.h:34
void Floor(const CuMatrixBase< Real > &src, Real floor_val)
Definition: cu-matrix.cc:2582
void Lookup(const std::vector< Int32Pair > &indexes, Real *output) const
Definition: cu-matrix.cc:3370
bool SameDim(const MatrixBase< Real > &M, const MatrixBase< Real > &N)
void AddMatBlocks(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType trans=kNoTrans)
This function is like AddMat (it does *this += alpha * src), except that it supports cases where *thi...
Definition: cu-matrix.cc:1119
void Scale(Real value)
Definition: cu-matrix.cc:644
void SymInvertPosDef()
Inversion for positive definite symmetric matrices.
Definition: cu-matrix.cc:2111
void Pow(const CuMatrixBase< Real > &src, Real power)
Definition: cu-matrix.cc:2500
void AddMatMatBatched(const Real alpha, std::vector< CuSubMatrix< Real > * > &C, const std::vector< CuSubMatrix< Real > * > &A, MatrixTransposeType transA, const std::vector< CuSubMatrix< Real > * > &B, MatrixTransposeType transB, const Real beta)
Does multiple matrix multiplications, executing them in parallel using cuBLAS&#39;s gemmBatched if we are...
Definition: cu-matrix.cc:2207
void ApplyLogSoftMaxPerRow()
Definition: cu-matrix.h:476
int32 MatrixIndexT
Definition: matrix-common.h:98
void AddVecToRows(Real alpha, const CuVectorBase< Real > &row, Real beta=1.0)
(for each row r of *this), r = alpha * row + beta * r
Definition: cu-matrix.cc:1261
void ApplyPowAbs(Real power, bool include_sign=false)
Definition: cu-matrix.h:443
void CopyFromSp(const CuSpMatrix< Real > &M)
Definition: cu-matrix.cc:360
void Sigmoid(const CuMatrixBase< Real > &src)
Set each element to the sigmoid of the corresponding element of "src": element by element...
Definition: cu-matrix.cc:1534
void Add(Real value)
Definition: cu-matrix.cc:582
void DiffXent(const CuArrayBase< int32 > &tgt, CuVector< Real > *log_post_tgt)
Differentiate the block [softmax+cross-entropy] : dE/da = posterior_mat - target_mat, &#39;E&#39; is error function, &#39;a&#39; is activation on softmax input.
Definition: cu-matrix.cc:1957
void AddToRows(Real alpha, const CuArrayBase< MatrixIndexT > &indexes, CuMatrixBase< Real > *dst) const
For each row i of *this, adds this->Row(i) to dst->Row(indexes(i)) if indexes(i) >= 0...
Definition: cu-matrix.cc:2869
void SetZero()
Math operations, some calling kernels.
Definition: cu-matrix.cc:509
void SoftMaxPerRow(const CuMatrixBase< Real > &src)
Softmax nonlinearity Y = Softmax(X) : Yij = e^Xij / sum_k(e^Xik), done to each row, with attention to avoiding overflow or underflow.
Definition: cu-matrix.cc:1717
void MulRows(const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indexes)
Does for each row r, this.Row(r) *= alpha * src.row(indexes[r]), where &#39;*=&#39; is elementwise multiplica...
Definition: cu-matrix.cc:2790
void MulElements(const CuMatrixBase< Real > &A)
Multiply two matrices elementwise: C = C .* A.
Definition: cu-matrix.cc:667
void CopyRows(const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indexes)
Copies row r from row indexes[r] of src.
Definition: cu-matrix.cc:2678
void CopyFromBlock(const CuBlockMatrix< Real > &B, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:161
MatrixStrideType
Definition: matrix-common.h:44
void SymAddMat2(const Real alpha, const CuMatrixBase< Real > &M, MatrixTransposeType transA, Real beta)
*this = beta * *this + alpha * M M^T, for symmetric matrices.
Definition: cu-matrix.cc:1353
void CopyColsFromVec(const CuVectorBase< Real > &v)
Copies vector into matrix, column-by-column.
Definition: cu-matrix.cc:2376
void GroupPnorm(const CuMatrixBase< Real > &src, Real pow)
Apply the function y(i) = (sum_{j = i*G}^{(i+1)*G-1} x_j ^ (power)) ^ (1 / p) where G = x...
Definition: cu-matrix.cc:1576
#define KALDI_PARANOID_ASSERT(cond)
Definition: kaldi-error.h:206
void AddMatMat(Real alpha, const CuMatrixBase< Real > &A, MatrixTransposeType transA, const CuMatrixBase< Real > &B, MatrixTransposeType transB, Real beta)
C = alpha * A(^T)*B(^T) + beta * C.
Definition: cu-matrix.cc:1291
void Cholesky(CuMatrixBase< Real > *inv_cholesky=NULL)
This function does sets *this to the Cholesky factor of *this (i.e.
Definition: cu-matrix.cc:1987
Real TraceMatMat(const MatrixBase< Real > &A, const MatrixBase< Real > &B, MatrixTransposeType trans)
We need to declare this here as it will be a friend function.
This class is used for a piece of a CuMatrix.
Definition: matrix-common.h:70
void DivElements(const CuMatrixBase< Real > &A)
Divide two matrices elementwise: C = A ./ A.
Definition: cu-matrix.cc:691
KALDI_DISALLOW_COPY_AND_ASSIGN(CuMatrixBase)
void DiffSoftmaxPerRow(const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff)
Differentiate backward through the softmax function.
Definition: cu-matrix.cc:1868
void GroupMaxDeriv(const CuMatrixBase< Real > &input, const CuMatrixBase< Real > &output)
Calculate derivatives for the GroupMax function above, where "input" is the input to the GroupMax fun...
Definition: cu-matrix.cc:874
CuSubMatrix< Real > RowRange(const MatrixIndexT row_offset, const MatrixIndexT num_rows) const
Definition: cu-matrix.h:660
void DiffTanh(const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff)
Differentiate backward through the tanh function.
Definition: cu-matrix.cc:1809
Real * RowData(MatrixIndexT r)
Get raw row pointer.
Definition: cu-matrix.h:743
void CopyFromGeneralMat(const GeneralMatrix &src, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:3096
void FindRowMaxId(CuArray< int32 > *id) const
Find the id of the maximal element for each row (resizes the &#39;id&#39; array to the appropriate size)...
Definition: cu-matrix.cc:1829
void Heaviside(const CuMatrixBase< Real > &src)
Set each element to the Heaviside function of the corresponding element of "src", which we define as ...
Definition: cu-matrix.cc:2435
void DiffSigmoid(const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff)
Differentiate backward through the sigmoid function.
Definition: cu-matrix.cc:1764
void MulColsVec(const CuVectorBase< Real > &scale)
scale i&#39;th column by scale[i]
Definition: cu-matrix.cc:765
const Real * Data() const
Return data pointer (const).
Definition: cu-matrix.h:746
void SumColumnRanges(const CuMatrixBase< Real > &src, const CuArrayBase< Int32Pair > &indexes)
For each row r of this and for each column c, sets (*this)(r, c) to the sum src(r, j), where j ranges from indexes[c].first through indexes[c].second - 1.
Definition: cu-matrix.cc:2893
CuSubMatrix< Real > ColRange(const MatrixIndexT col_offset, const MatrixIndexT num_cols) const
Definition: cu-matrix.h:665
CuMatrixBase(Real *data, MatrixIndexT num_rows, MatrixIndexT num_cols, MatrixIndexT stride)
This constructor takes the #rows, #cols and stride; it&#39;s called from the constructor of CuSubMatrix...
Definition: cu-matrix.h:771
void ApplyExpSpecial()
Definition: cu-matrix.h:468
Matrix for CUDA computing.
Definition: matrix-common.h:69
void ApplyExpLimited(Real lower_limit, Real upper_limit)
Definition: cu-matrix.h:464
MatrixIndexT NumCols() const
Definition: cu-matrix.h:216
void ApplySoftMaxPerRow()
Definition: cu-matrix.h:472
void DiffLogSoftmaxPerRow(const CuMatrixBase< Real > &out_value, const CuMatrixBase< Real > &out_deriv)
Differentiate backward through the log softmax function.
Definition: cu-matrix.cc:1903
void DiffGroupPnorm(const CuMatrixBase< Real > &in_value, const CuMatrixBase< Real > &out_value, const CuMatrixBase< Real > &out_deriv, Real power)
Differentiate backward through the GroupPnorm function.
Definition: cu-matrix.cc:841
CuValue< Real > operator()(MatrixIndexT r, MatrixIndexT c)
Definition: cu-matrix.h:682
Class CuArrayBase, CuSubArray and CuArray are analogues of classes CuVectorBase, CuSubVector and CuVe...
Definition: cu-array.h:44
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
friend Real TraceMatMat(const CuMatrixBase< Real > &A, const CuMatrixBase< Real > &B, MatrixTransposeType trans)
Definition: cu-matrix.cc:2145
void CopyFromTp(const CuTpMatrix< OtherReal > &M, MatrixTransposeType trans=kNoTrans)
Definition: cu-matrix.cc:280
MatrixTransposeType
Definition: matrix-common.h:32
static void AssertEqual(float a, float b, float relative_tolerance=0.001)
assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
Definition: kaldi-math.h:276
void MulRowsGroupMat(const CuMatrixBase< Real > &src)
divide each row into src.NumCols() groups, and then scale i&#39;th row&#39;s jth group of elements by src[i...
Definition: cu-matrix.cc:816
Real Min() const
Definition: cu-matrix.cc:3054
MatrixIndexT num_cols_
Definition: cu-matrix.h:785
::MatrixDim Dim() const
Definition: cu-matrix.h:221
Real FrobeniusNorm() const
Definition: cu-matrix.h:226
void CopyCols(const CuMatrixBase< Real > &src, const CuArrayBase< MatrixIndexT > &indexes)
Copies column r from column indexes[r] of src.
Definition: cu-matrix.cc:2656
void PowAbs(const CuMatrixBase< Real > &src, Real power, bool include_sign=false)
Apply power to the absolute value of each element.
Definition: cu-matrix.cc:2521
void CopyToRows(const CuArrayBase< Real *> &dst) const
For each row r of this matrix, copies it to the array of floats at the location given by dst[r]...
Definition: cu-matrix.cc:2744
bool SameDimAndStride(const CuMatrixBase< Real > &M, const CuMatrixBase< Real > &N)
Definition: cu-matrix.h:954
void AddSpMat(const Real alpha, const CuSpMatrix< Real > &A, const CuMatrixBase< Real > &B, MatrixTransposeType transB, const Real beta)
this <– beta*this + alpha*SpA*B
Definition: cu-matrix.h:623
void LogSoftMaxPerRow(const CuMatrixBase< Real > &src)
LogSoftmax nonlinearity Y = LogSoftmax(X) : Yij = Xij - log(sum_k(e^Xik)), done to each row...
Definition: cu-matrix.cc:1740
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
MatrixIndexT NumCols() const
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
void ExpSpecial(const CuMatrixBase< Real > &src)
For each element x of the matrix, set it to (x < 0 ? exp(x) : x + 1).
Definition: cu-matrix.cc:2563
void SetMatMatDivMat(const CuMatrixBase< Real > &A, const CuMatrixBase< Real > &B, const CuMatrixBase< Real > &C)
*this = a * b / c (by element; when c = 0, *this = a) *this can be an alias of a, b or c safely and g...
Definition: cu-matrix.cc:1206
const Matrix< Real > & Mat() const
Definition: cu-matrix.h:879
void Tanh(const CuMatrixBase< Real > &src)
Compute the hyperbolic tangent (tanh) function; element by element, *this = tanh(src).
Definition: cu-matrix.cc:1786
Real Max() const
Definition: cu-matrix.cc:3033
void ParametricRelu(const CuMatrixBase< Real > &src, const CuVectorBase< Real > &alpha, const CuVectorBase< Real > &beta)
Compute the parametric rectified linear unit function; element by element, *this = src * (src > 0 ...
Definition: cu-matrix.cc:1467
void Set(Real value)
Definition: cu-matrix.cc:531
void DiffParametricRelu(const CuMatrixBase< Real > &value, const CuMatrixBase< Real > &diff, const CuVectorBase< Real > &alpha, const CuVectorBase< Real > &beta)
Differentiate backward through the parametric relu function.
Definition: cu-matrix.cc:1501
void MulRowsVec(const CuVectorBase< Real > &scale)
scale i&#39;th row by scale[i]
Definition: cu-matrix.cc:792
void EqualElementMask(const CuMatrixBase< Real > &mat, CuMatrix< Real > *mask) const
Definition: cu-matrix.cc:3429
void AddMatSmat(Real alpha, const CuMatrixBase< Real > &A, const CuSparseMatrix< Real > &B, MatrixTransposeType transB, Real beta)
(*this) = alpha * A * op(B) + beta * (*this), where B is sparse and op(B) is either B or trans(B) dep...
Definition: cu-matrix.cc:1080
void Exp(const CuMatrixBase< Real > &src)
Definition: cu-matrix.cc:2456
void AddVecVec(Real alpha, const CuVectorBase< Real > &x, const CuVectorBase< Real > &y)
A = alpha * x * y^T + A .
Definition: cu-matrix.cc:1329
void SetZeroAboveDiag()
Zeroes all elements for which col > row.
Definition: cu-matrix.cc:554
Vector for CUDA computing.
Definition: matrix-common.h:72
void AddDiagVecMat(const Real alpha, const CuVectorBase< Real > &v, const CuMatrixBase< Real > &M, MatrixTransposeType transM, Real beta=1.0)
*this = beta * *this + alpha * diag(v) * M [or M^T].
Definition: cu-matrix.cc:1382
const Real * RowData(MatrixIndexT r) const
Get raw row pointer (const).
Definition: cu-matrix.h:740
MatrixIndexT num_rows_
Definition: cu-matrix.h:786