nnet3-xvector-compute-batched.cc
Go to the documentation of this file.
1 // nnet3bin/nnet3-xvector-compute.cc
2 
3 // Copyright 2019 Daniel Povey
4 // 2017 Johns Hopkins University (author: Daniel Povey)
5 // 2017 Johns Hopkins University (author: Daniel Garcia-Romero)
6 // 2017 David Snyder
7 
8 // See ../../COPYING for clarification regarding multiple authors
9 //
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 //
14 // http://www.apache.org/licenses/LICENSE-2.0
15 //
16 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
18 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
19 // MERCHANTABLITY OR NON-INFRINGEMENT.
20 // See the Apache 2 License for the specific language governing permissions and
21 // limitations under the License.
22 
23 
24 #include "base/kaldi-common.h"
25 #include "util/common-utils.h"
27 #include "base/timer.h"
28 #include "nnet3/nnet-utils.h"
29 
30 namespace kaldi {
31 namespace nnet3 {
32 
33 
35  int32 chunk_size { 150 };
36  int32 batch_size { 32 };
37  bool pad_input { true };
41 
42 
43  void Register(OptionsItf *po) {
44  po->Register("chunk-size", &chunk_size,
45  "Size of chunk, in input frames. Includes the nnet "
46  "context, so the number of chunks will be more than "
47  "total-input-frames / chunk-size.");
48  po->Register("batch-size", &batch_size,
49  "Size of the batches of chunks that we compute at once. ");
50  po->Register("pad-input", &pad_input,
51  "If true, for utterances shorter than `chunk-size` frames "
52  "we will pad with repeats of the last frame.");
53  compute_config.Register(po);
54  optimize_config.Register(po);
55  compiler_config.Register(po);
56  }
57 };
58 
59 
72 void DivideIntoPieces(int32 a, int32 b, std::vector<int32> *pieces) {
73  KALDI_ASSERT(b > 0);
74  pieces->clear();
75  pieces->reserve(b);
76  int32 a_sign = 1;
77  // Make sure a is positive before division, because the behavior of division
78  // with negative operands is not fully defined in C.
79  if (a < 0) {
80  a_sign = -1;
81  a *= -1;
82  }
83  int32 piece_size1 = a / b,
84  piece_size2 = piece_size1 + 1,
85  remainder = a % b;
86  int32 num_pieces_of_size1 = b - remainder,
87  num_pieces_of_size2 = remainder;
88  KALDI_ASSERT(a == num_pieces_of_size1 * piece_size1 +
89  num_pieces_of_size2 * piece_size2);
90 
91  for (int32 i = 0; i < num_pieces_of_size1; i++)
92  pieces->push_back(piece_size1 * a_sign);
93  for (int32 i = 0; i < num_pieces_of_size2; i++)
94  pieces->push_back(piece_size2 * a_sign);
95 }
96 
97 
98 
100  public:
112  const Nnet &nnet,
113  int32 total_context);
114 
119  void AcceptUtterance(const std::string &utt,
120  const Matrix<BaseFloat> &input);
121 
122 
126  bool XvectorReady() const;
127 
139  void OutputXvector(std::string *utt,
140  Vector<BaseFloat> *xvector);
141 
142 
149  void Flush();
150 
151 
152  private:
153 
154  struct XvectorTask {
155  std::string utt_id;
160  };
161 
162 
197  void SplitUtteranceIntoChunks(int32 num_frames,
198  std::vector<int32> *start_frames);
199 
203  XvectorTask* CreateTask(const std::string &utt, int32 num_chunks);
204 
205 
211  void ComputeOneBatch();
212 
226  void AddChunkToBatch(XvectorTask *task,
227  const Matrix<BaseFloat> &input,
228  int32 chunk_start);
229 
232  const Nnet &nnet_;
233 
236 
245 
246 
248  std::shared_ptr<const NnetComputation> computation_;
249 
250 
256 
262  std::vector<XvectorTask*> tasks_this_batch_;
263 
264  // results_head_ is the first element in the singly linked list of
265  // already-computed xvectors, or NULL if that list is empty. Note:
266  // utterances that are ready will appear here first; new utterances
267  // get added to the tail.
269  // results_tail_ is the last element in the singly linked list of
270  // already-computed xvectors, or NULL if the list is empty.
272 };
273 
276  const std::string &utt, int32 num_chunks) {
277  XvectorTask *task = new XvectorTask;
278  task->utt_id = utt;
279  task->num_chunks = num_chunks;
280  task->num_chunks_finished = 0;
281  task->xvector.Resize(xvector_dim_);
282  task->tail = NULL;
283  if (results_tail_) {
284  results_tail_->tail = task;
285  results_tail_ = task;
286  } else { // List was previously empty.
287  results_head_ = task;
288  results_tail_ = task;
289  }
290  return task;
291 }
292 
294  const BatchedXvectorComputerOptions &opts,
295  const Nnet &nnet,
296  int32 total_context):
297  opts_(opts),
298  total_context_(total_context),
299  nnet_(nnet),
300  position_in_batch_(0),
301  results_head_(NULL),
302  results_tail_(NULL) {
303 
305 
306  feature_dim_ = nnet.InputDim("input");
307  xvector_dim_ = nnet.OutputDim("output");
308  // Zero input_feats_ in case there is only one batch, to avoid
309  // NaN's being generated due to undefined data.
311  feature_dim_);
312 
313  CachingOptimizingCompiler compiler(nnet, opts.optimize_config,
314  opts.compiler_config);
315 
316  { // This block creates computation_.
317  ComputationRequest request;
318  request.need_model_derivative = false;
319  request.store_component_stats = false;
320  request.inputs.resize(1);
321  IoSpecification &input(request.inputs[0]);
322  input.name = "input";
323  input.has_deriv = false;
324  input.indexes.resize(opts_.batch_size * opts_.chunk_size);
325  // Note: the sequences are interleaved in the input; this will save an extra
326  // copy since it corresponds to how nnet3 stores things by default. (Makes
327  // TDNNs easier to implement.)
328  for (int32 n = 0; n < opts_.batch_size; n++) {
329  for (int32 t = 0; t < opts_.chunk_size; t++) {
330  Index index;
331  index.n = n;
332  index.t = t;
333  // index.x is 0 by default.
334  input.indexes[n + opts_.batch_size * t] = index;
335  }
336  }
337  IoSpecification output;
338  output.name = "output";
339  output.has_deriv = false;
340  output.indexes.resize(opts_.batch_size);
341  for (int32 n = 0; n < opts_.batch_size; n++){
342  Index index;
343  index.n = n;
344  index.t = 0;
345  output.indexes[n] = index;
346  }
347  request.outputs.push_back(output);
348  computation_ = compiler.Compile(request);
349  }
350 }
351 
353  XvectorTask *task,
354  const Matrix<BaseFloat> &input,
355  int32 chunk_start) {
357  KALDI_ASSERT(n >= 0 && n < opts_.batch_size);
358  tasks_this_batch_[n] = task;
359  int32 T = opts_.chunk_size,
360  num_input_frames = input.NumRows();
362  if (input.NumCols() != feature_dim_) {
363  KALDI_ERR << "Feature dimension mismatch: neural net expected "
364  << feature_dim_ << ", got " << input.NumCols();
365  }
366  for (int32 t = 0; t < T; t++) {
368  int32 src_t = t + chunk_start;
369  if (src_t >= num_input_frames) {
371  src_t = num_input_frames - 1; // Pad with repeats of the last frame.
372  }
373  SubVector<BaseFloat> src(input, src_t);
374  dest.CopyFromVec(src);
375  }
376 }
377 
379  if (results_head_ == NULL)
380  return false;
383 }
384 
386  Vector<BaseFloat> *xvector) {
388  *utt = results_head_->utt_id;
389  xvector->Swap(&(results_head_->xvector));
390  XvectorTask *new_tail = results_head_->tail;
391  delete results_head_;
392  results_head_ = new_tail;
393  if (new_tail == NULL)
394  results_tail_ = NULL;
395 }
396 
398  if (position_in_batch_ == 0)
399  return;
400  ComputeOneBatch();
401 }
402 
403 
405 
406  CuMatrix<BaseFloat> cu_input_feats(input_feats_);
407  Nnet *nnet_to_update = NULL; // we're not doing any update.
409  nnet_, nnet_to_update);
410  computer.AcceptInput("input", &cu_input_feats);
411  computer.Run();
412  CuMatrix<BaseFloat> cu_output;
413  computer.GetOutputDestructive("output", &cu_output);
414  KALDI_ASSERT(cu_output.NumRows() == opts_.batch_size);
415  Matrix<BaseFloat> output(cu_output);
416  for (int32 n = 0; n < opts_.batch_size; n++) {
418  if (task == NULL)
419  continue; // Would only happen for the last batch.
420  task->num_chunks_finished++;
421  task->xvector.AddVec(1.0 / task->num_chunks, output.Row(n));
422  }
423  position_in_batch_ = 0;
424  std::fill(tasks_this_batch_.begin(), tasks_this_batch_.end(),
425  (XvectorTask*)NULL);
426 }
427 
429  const std::string &utt,
430  const Matrix<BaseFloat> &input) {
431  std::vector<int32> chunk_starts;
432  int32 num_frames = input.NumRows();
433  SplitUtteranceIntoChunks(num_frames, &chunk_starts);
434  int32 num_chunks = chunk_starts.size();
435  XvectorTask *task = CreateTask(utt, num_chunks);
436 
437  for (int32 i = 0; i < num_chunks; i++) {
438  AddChunkToBatch(task, input, chunk_starts[i]);
440  ComputeOneBatch();
441  }
442  }
443 }
444 
446  int32 num_frames, std::vector<int32> *start_frames) {
447  start_frames->clear();
448  if (num_frames <= opts_.chunk_size) {
449  if (num_frames == opts_.chunk_size || opts_.pad_input)
450  start_frames->push_back(0);
451  // if we leave start_frames empty, then we just won't compute anything for
452  // this file.
453  } else {
454  // these modified quantities are to account for the context effects... when
455  // the chunks overlap by exactly total_context_, the frames that get
456  // averaged by the respective chunks in their averaging layers would touch
457  // but not overlap. So the optimal separation between chunks would equal
458  // opts_.chunk_size - total_context_.
459  int32 modified_num_frames = num_frames - total_context_,
460  modified_chunk_size = opts_.chunk_size - total_context_;
461  KALDI_ASSERT(modified_num_frames > modified_chunk_size);
462  int32 num_chunks1 = modified_num_frames / modified_chunk_size,
463  num_chunks2 = num_chunks1 + 1;
464  int32 num_frames1 = num_chunks1 * modified_chunk_size,
465  num_frames2 = num_chunks2 * modified_chunk_size;
466  KALDI_ASSERT(num_frames2 > modified_chunk_size);
467  // The M and N below correspond to the M and N in the comment:
468  // M is the number of frames repeated once in the averaging, N
469  // the number of frames repeated twice. (Basically a solution
470  // of the equations: (M + 2N == num_frames2, M+N == modified_num_frames).
471  // Note: by a "frame" above, I mean a specific "t" value in
472  // the utterance.
473  int32 N = num_frames2 - modified_num_frames,
474  M = modified_num_frames - N;
475  KALDI_ASSERT(M + 2*N == num_frames2 && M + N == modified_num_frames);
476 
477  // The variances below are proportional to the variance of our
478  // estimate of the xvector under certain simplifying assumptions..
479  // they help us choose whether to have gaps between the chunks
480  // or overlaps between them.
481  BaseFloat variance1 = 1.0 / num_frames1, // the 1/M mentioned above.
482  variance2 = (M + 4.0*N) / ((M + 2.0*N)*(M + 2.0*N));
483  if (variance1 <= variance2) {
484  // We'll choose the smaller number of chunks. There may be gaps.
485  // Counting the positions at the ends, there are num_chunks+1 positions
486  // where there might be gaps.
487  // Note: "total_gap" is >= 0, it's the positive of the sum of the
488  // sizes of those gaps.
489  int32 num_chunks = num_chunks1,
490  num_gaps = num_chunks + 1,
491  total_gap = modified_num_frames - num_chunks * modified_chunk_size;
492  KALDI_ASSERT(0 <= total_gap && total_gap < modified_chunk_size);
493  std::vector<int32> gap_sizes; // elements will be >= 0.
494  DivideIntoPieces(total_gap, num_gaps, &gap_sizes);
495  int32 pos = gap_sizes[0];
496  for (int32 i = 0; i < num_chunks; i++) {
497  start_frames->push_back(pos);
498  pos += modified_chunk_size + gap_sizes[i + 1];
499  }
500  KALDI_ASSERT(pos == modified_num_frames);
501  } else {
502  int32 num_chunks = num_chunks2,
503  num_overlaps = num_chunks - 1,
504  total_overlap = modified_num_frames - num_chunks * modified_chunk_size;
505  KALDI_ASSERT( -modified_chunk_size < total_overlap && total_overlap <= 0 );
506  std::vector<int32> overlap_sizes; // elements will be <= 0.
507  DivideIntoPieces(total_overlap, num_overlaps, &overlap_sizes);
508  int32 pos = 0;
509  for (int32 i = 0; i < num_chunks; i++) {
510  start_frames->push_back(pos);
511  pos += modified_chunk_size;
512  if (i < num_overlaps)
513  pos += overlap_sizes[i];
514  }
515  KALDI_ASSERT(pos == modified_num_frames);
516  }
517  }
518 }
519 
520 
521 } // namespace nnet3
522 } // namespace kaldi
523 
524 int main(int argc, char *argv[]) {
525  try {
526  using namespace kaldi;
527  using namespace kaldi::nnet3;
528  typedef kaldi::int32 int32;
529  typedef kaldi::int64 int64;
530 
531  const char *usage =
532  "Propagate features through an xvector neural network model and write\n"
533  "the output vectors. \"Xvector\" is our term for a vector or\n"
534  "embedding which is the output of a particular type of neural network\n"
535  "architecture found in speaker recognition. This architecture\n"
536  "consists of several layers that operate on frames, a statistics\n"
537  "pooling layer that aggregates over the frame-level representations\n"
538  "and possibly additional layers that operate on segment-level\n"
539  "representations. The xvectors are generally extracted from an\n"
540  "output layer after the statistics pooling layer. By default, one\n"
541  "xvector is extracted directly from the set of features for each\n"
542  "utterance. Optionally, xvectors are extracted from chunks of input\n"
543  "features and averaged, to produce a single vector.\n"
544  "\n"
545  "Usage: nnet3-xvector-compute [options] <raw-nnet-in> "
546  "<features-rspecifier> <vector-wspecifier>\n"
547  "e.g.: nnet3-xvector-compute final.raw scp:feats.scp "
548  "ark:nnet_prediction.ark\n"
549  "See also: nnet3-compute\n";
550 
551  ParseOptions po(usage);
552  Timer timer;
553 
555 
556  std::string use_gpu = "no";
557 
558  opts.Register(&po);
559 
560  po.Register("use-gpu", &use_gpu,
561  "yes|no|optional|wait, only has effect if compiled with CUDA");
562 
563 #if HAVE_CUDA==1
564  CuDevice::RegisterDeviceOptions(&po);
565 #endif
566  po.Read(argc, argv);
567 
568  if (po.NumArgs() != 3) {
569  po.PrintUsage();
570  exit(1);
571  }
572 
573 #if HAVE_CUDA==1
574  CuDevice::Instantiate().SelectGpuId(use_gpu);
575 #endif
576 
577  std::string nnet_rxfilename = po.GetArg(1),
578  feature_rspecifier = po.GetArg(2),
579  vector_wspecifier = po.GetArg(3);
580 
581  Nnet nnet;
582  ReadKaldiObject(nnet_rxfilename, &nnet);
583  SetBatchnormTestMode(true, &nnet);
584  SetDropoutTestMode(true, &nnet);
586 
587  int32 total_context;
588  {
589  int32 left_context, right_context;
590  // Compute left_context, right_context as the 'real' left/right context
591  // of the network; they'll tell us how many frames on the chunk boundaries
592  // won't really participate in the statistics averaging.
593  // SetRequireDirectInput() modifies how the StatisticsPoolingComponent
594  // treats its dependences, so we'll get the 'real' left/right context.
595  SetRequireDirectInput(true, &nnet);
596  ComputeSimpleNnetContext(nnet, &left_context, &right_context);
597  KALDI_LOG << "Left/right context is " << left_context << ", "
598  << right_context;
599  SetRequireDirectInput(false, &nnet);
600  total_context = left_context + right_context;
601  }
602 
603  BatchedXvectorComputer computer(opts, nnet, total_context);
604  BaseFloatVectorWriter vector_writer(vector_wspecifier);
605 
606  int32 num_utts_read = 0, num_xvectors_written = 0;
607  int64 frame_count = 0;
608 
609  SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);
610 
611  for (; !feature_reader.Done(); feature_reader.Next()) {
612  std::string utt = feature_reader.Key();
613  const Matrix<BaseFloat> &features (feature_reader.Value());
614  if (features.NumRows() == 0) {
615  KALDI_WARN << "Zero-length utterance: " << utt;
616  continue;
617  }
618 
619  frame_count += features.NumRows();
620 
621  computer.AcceptUtterance(utt, features);
622  num_utts_read++;
623 
624  while (computer.XvectorReady()) {
625  std::string utt;
626  Vector<BaseFloat> xvector;
627  computer.OutputXvector(&utt, &xvector);
628  vector_writer.Write(utt, xvector);
629  num_xvectors_written++;
630  }
631  }
632 
633  computer.Flush();
634  while (computer.XvectorReady()) {
635  std::string utt;
636  Vector<BaseFloat> xvector;
637  computer.OutputXvector(&utt, &xvector);
638  vector_writer.Write(utt, xvector);
639  num_xvectors_written++;
640  }
641 
642 
643 #if HAVE_CUDA==1
644  CuDevice::Instantiate().PrintProfile();
645 #endif
646  double elapsed = timer.Elapsed();
647  KALDI_LOG << "Time taken "<< elapsed
648  << "s: real-time factor assuming 100 frames/sec is "
649  << (elapsed*100.0/frame_count);
650  KALDI_LOG << "Read " << num_utts_read << " utterances, wrote "
651  << num_xvectors_written << " xvectors.";
652 
653  // Note: the following rule does something reasonable even if there are 0, 1
654  // or 2 utterances read.
655  if (num_xvectors_written > num_utts_read / 2)
656  return 0;
657  else
658  return 1;
659  } catch(const std::exception &e) {
660  std::cerr << e.what();
661  return -1;
662  }
663 }
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
void Register(OptionsItf *opts)
Definition: nnet-optimize.h:84
int32 InputDim(const std::string &input_name) const
Definition: nnet-nnet.cc:669
void CollapseModel(const CollapseModelConfig &config, Nnet *nnet)
This function modifies the neural net for efficiency, in a way that suitable to be done in test time...
Definition: nnet-utils.cc:2100
bool store_component_stats
you should set need_component_stats to true if you need the average-activation and average-derivative...
void Flush()
Calling this will force any partial minibatch to be computed, so that any utterances that have previo...
bool need_model_derivative
if need_model_derivative is true, then we&#39;ll be doing either model training or model-derivative compu...
MatrixIndexT NumCols() const
Returns number of columns (or zero for empty matrix).
Definition: kaldi-matrix.h:67
void DivideIntoPieces(int32 a, int32 b, std::vector< int32 > *pieces)
This function divides the number &#39;a&#39; into &#39;b&#39; pieces, such that the sum of the pieces equals &#39;a&#39; and ...
void AddChunkToBatch(XvectorTask *task, const Matrix< BaseFloat > &input, int32 chunk_start)
Adds a new chunk to a batch we are preparing.
void PrintUsage(bool print_command_line=false)
Prints the usage documentation [provided in the constructor].
This class enables you to do the compilation and optimization in one call, and also ensures that if t...
std::shared_ptr< const NnetComputation > computation_
The compiled computation (will be the same for every batch).
Matrix< BaseFloat > input_feats_
Staging area for the input features prior to copying them to GPU.
XvectorTask * CreateTask(const std::string &utt, int32 num_chunks)
This adds a newly created XvectorTask at the tail of the singly linked list whose (head...
void SetBatchnormTestMode(bool test_mode, Nnet *nnet)
This function affects only components of type BatchNormComponent.
Definition: nnet-utils.cc:564
A templated class for writing objects to an archive or script file; see The Table concept...
Definition: kaldi-table.h:368
kaldi::int32 int32
std::vector< IoSpecification > inputs
This class represents a matrix that&#39;s stored on the GPU if we have one, and in memory if not...
Definition: matrix-common.h:71
void Write(const std::string &key, const T &value) const
int32 OutputDim(const std::string &output_name) const
Definition: nnet-nnet.cc:677
void Register(const std::string &name, bool *ptr, const std::string &doc)
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
struct Index is intended to represent the various indexes by which we number the rows of the matrices...
Definition: nnet-common.h:44
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
This file contains some miscellaneous functions dealing with class Nnet.
void SetDropoutTestMode(bool test_mode, Nnet *nnet)
This function affects components of child-classes of RandomComponent.
Definition: nnet-utils.cc:573
void AcceptInput(const std::string &node_name, CuMatrix< BaseFloat > *input)
e.g.
void CopyFromVec(const VectorBase< Real > &v)
Copy data from another vector (must match own size).
void OutputXvector(std::string *utt, Vector< BaseFloat > *xvector)
This function, which must only be called if XvectorReady() has just returned true, outputs an xvector for an utterance.
void ComputeOneBatch()
Does the nnet computation for one batch and distributes the computed x-vectors (of chunks) appropriat...
void AcceptUtterance(const std::string &utt, const Matrix< BaseFloat > &input)
Accepts an utterance to process into an xvector, and, if one or more batches become full...
The class ParseOptions is for parsing command-line options; see Parsing command-line options for more...
Definition: parse-options.h:36
std::vector< XvectorTask * > tasks_this_batch_
tasks_this_batch_ is of dimension opts_.batch_size.
void ComputeSimpleNnetContext(const Nnet &nnet, int32 *left_context, int32 *right_context)
ComputeSimpleNnetContext computes the left-context and right-context of a nnet.
Definition: nnet-utils.cc:146
void Swap(Vector< Real > *other)
Swaps the contents of *this and *other. Shallow swap.
struct rnnlm::@11::@12 n
A templated class for reading objects sequentially from an archive or script file; see The Table conc...
Definition: kaldi-table.h:287
int Read(int argc, const char *const *argv)
Parses the command line options and fills the ParseOptions-registered variables.
#define KALDI_ERR
Definition: kaldi-error.h:147
BatchedXvectorComputer(const BatchedXvectorComputerOptions &opts, const Nnet &nnet, int32 total_context)
#define KALDI_WARN
Definition: kaldi-error.h:150
std::string GetArg(int param) const
Returns one of the positional parameters; 1-based indexing for argc/argv compatibility.
const BatchedXvectorComputerOptions & opts_
void Register(OptionsItf *opts)
Definition: nnet-compute.h:42
std::shared_ptr< const NnetComputation > Compile(const ComputationRequest &request)
Does the compilation and returns a const pointer to the result, which is owned by this class...
int NumArgs() const
Number of positional parameters (c.f. argc-1).
std::vector< Index > indexes
A class representing a vector.
Definition: kaldi-vector.h:406
class NnetComputer is responsible for executing the computation described in the "computation" object...
Definition: nnet-compute.h:59
int32 position_in_batch_
position_in_batch_ is the number of chunks that we have filled in in the input_feats_ matrix and task...
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
std::vector< IoSpecification > outputs
MatrixIndexT NumRows() const
Returns number of rows (or zero for empty matrix).
Definition: kaldi-matrix.h:64
void SetRequireDirectInput(bool b, Nnet *nnet)
Calls the corresponding function in any component of type StatisticsPoolingComponent; used as a way t...
Definition: nnet-utils.cc:303
void Resize(const MatrixIndexT r, const MatrixIndexT c, MatrixResizeType resize_type=kSetZero, MatrixStrideType stride_type=kDefaultStride)
Sets matrix to a specified size (zero is OK as long as both r and c are zero).
void GetOutputDestructive(const std::string &output_name, CuMatrix< BaseFloat > *output)
MatrixIndexT NumRows() const
Dimensions.
Definition: cu-matrix.h:215
#define KALDI_LOG
Definition: kaldi-error.h:153
double Elapsed() const
Returns time in seconds.
Definition: timer.h:74
Represents a non-allocating general vector which can be defined as a sub-vector of higher-level vecto...
Definition: kaldi-vector.h:501
void SplitUtteranceIntoChunks(int32 num_frames, std::vector< int32 > *start_frames)
This decides how to split the utterance into chunks.
int main(int argc, char *argv[])
bool XvectorReady() const
Returns true if at least one xvector is pending output (i.e.
Config class for the CollapseModel function.
Definition: nnet-utils.h:240
void Run()
This does either the forward or backward computation, depending when it is called (in a typical compu...