online-nnet2-feature-pipeline.cc
Go to the documentation of this file.
1 // online2/online-nnet2-feature-pipeline.cc
2 
3 // Copyright 2013 Johns Hopkins University (author: Daniel Povey)
4 
5 // See ../../COPYING for clarification regarding multiple authors
6 //
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 //
11 // http://www.apache.org/licenses/LICENSE-2.0
12 //
13 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
15 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
16 // MERCHANTABLITY OR NON-INFRINGEMENT.
17 // See the Apache 2 License for the specific language governing permissions and
18 // limitations under the License.
19 
21 #include "transform/cmvn.h"
22 
23 namespace kaldi {
24 
26  const OnlineNnet2FeaturePipelineConfig &config):
27  silence_weighting_config(config.silence_weighting_config) {
28  if (config.feature_type == "mfcc" || config.feature_type == "plp" ||
29  config.feature_type == "fbank") {
30  feature_type = config.feature_type;
31  } else {
32  KALDI_ERR << "Invalid feature type: " << config.feature_type << ". "
33  << "Supported feature types: mfcc, plp, fbank.";
34  }
35 
36  if (config.mfcc_config != "") {
38  if (feature_type != "mfcc")
39  KALDI_WARN << "--mfcc-config option has no effect "
40  << "since feature type is set to " << feature_type << ".";
41  } // else use the defaults.
42 
43  if (config.plp_config != "") {
45  if (feature_type != "plp")
46  KALDI_WARN << "--plp-config option has no effect "
47  << "since feature type is set to " << feature_type << ".";
48  } // else use the defaults.
49 
50  if (config.fbank_config != "") {
52  if (feature_type != "fbank")
53  KALDI_WARN << "--fbank-config option has no effect "
54  << "since feature type is set to " << feature_type << ".";
55  } // else use the defaults.
56 
57  add_pitch = config.add_pitch;
58 
59  if (config.online_pitch_config != "") {
61  &pitch_opts,
63  if (!add_pitch)
64  KALDI_WARN << "--online-pitch-config option has no effect "
65  << "since you did not supply --add-pitch option.";
66  } // else use the defaults.
67 
68  use_cmvn = (config.cmvn_config != "");
69  if (use_cmvn) {
73  KALDI_ERR << "--global-cmvn-stats option is required "
74  << " when --cmvn-config is specified.";
75  }
76 
77  if (config.ivector_extraction_config != "") {
78  use_ivectors = true;
79  OnlineIvectorExtractionConfig ivector_extraction_opts;
81  &ivector_extraction_opts);
82  ivector_extractor_info.Init(ivector_extraction_opts);
83  } else {
84  use_ivectors = false;
85  }
86 }
87 
88 
91  const OnlineNnet2FeaturePipelineInfo &info):
92  info_(info), base_feature_(NULL),
93  pitch_(NULL), pitch_feature_(NULL),
94  cmvn_feature_(NULL),
95  feature_plus_optional_pitch_(NULL),
96  feature_plus_optional_cmvn_(NULL),
97  ivector_feature_(NULL),
98  nnet3_feature_(NULL),
99  final_feature_(NULL) {
100 
101  if (info_.feature_type == "mfcc") {
103  } else if (info_.feature_type == "plp") {
105  } else if (info_.feature_type == "fbank") {
107  } else {
108  KALDI_ERR << "Code error: invalid feature type " << info_.feature_type;
109  }
110 
111  if (info_.add_pitch) {
114  pitch_);
117  } else {
119  }
120 
121  if (info_.use_cmvn) {
124  OnlineCmvnState initial_state(global_cmvn_stats_);
125  cmvn_feature_ = new OnlineCmvn(info_.cmvn_opts, initial_state,
128  } else {
130  }
131 
132  if (info_.use_ivectors) {
134  // Note: the i-vector extractor OnlineIvectorFeature gets 'base_feautre_'
135  // without cmvn (the online cmvn is applied inside the class)
137  base_feature_);
140  } else {
143  }
144  dim_ = final_feature_->Dim();
145 }
147 
148 
150 
152  return final_feature_->IsLastFrame(frame);
153 }
154 
156  return final_feature_->NumFramesReady();
157 }
158 
160  VectorBase<BaseFloat> *feat) {
161  return final_feature_->GetFrame(frame, feat);
162 }
163 
165  const std::vector<std::pair<int32, BaseFloat> > &delta_weights) {
166  IvectorFeature()->UpdateFrameWeights(delta_weights);
167 }
168 
170  const OnlineIvectorExtractorAdaptationState &adaptation_state) {
171  if (info_.use_ivectors) {
172  ivector_feature_->SetAdaptationState(adaptation_state);
173  }
174  // else silently do nothing, as there is nothing to do.
175 }
176 
178  OnlineIvectorExtractorAdaptationState *adaptation_state) const {
179  if (info_.use_ivectors) {
180  ivector_feature_->GetAdaptationState(adaptation_state);
181  }
182  // else silently do nothing, as there is nothing to do.
183 }
184 
186  const OnlineCmvnState &cmvn_state) {
187  if (NULL != cmvn_feature_)
188  cmvn_feature_->SetState(cmvn_state);
189 }
190 
192  OnlineCmvnState *cmvn_state) {
193  if (NULL != cmvn_feature_) {
194  int32 frame = cmvn_feature_->NumFramesReady() - 1;
195  // the following call will crash if no frames are ready.
196  cmvn_feature_->GetState(frame, cmvn_state);
197  }
198 }
199 
200 
202  // Note: the delete command only deletes pointers that are non-NULL. Not all
203  // of the pointers below will be non-NULL.
204  // Some of the online-feature pointers are just copies of other pointers,
205  // and we do have to avoid deleting them in those cases.
207  delete final_feature_;
208  delete ivector_feature_;
209  delete cmvn_feature_;
212  delete pitch_feature_;
213  delete pitch_;
214  delete base_feature_;
215 }
216 
218  BaseFloat sampling_rate,
219  const VectorBase<BaseFloat> &waveform) {
220  base_feature_->AcceptWaveform(sampling_rate, waveform);
221  if (pitch_)
222  pitch_->AcceptWaveform(sampling_rate, waveform);
223 }
224 
227  if (pitch_)
229 }
230 
232  if (feature_type == "mfcc") {
233  return mfcc_opts.frame_opts.frame_shift_ms / 1000.0f;
234  } else if (feature_type == "fbank") {
235  return fbank_opts.frame_opts.frame_shift_ms / 1000.0f;
236  } else if (feature_type == "plp") {
237  return plp_opts.frame_opts.frame_shift_ms / 1000.0f;
238  } else {
239  KALDI_ERR << "Unknown feature type " << feature_type;
240  return 0.0;
241  }
242 }
243 
244 
245 } // namespace kaldi
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
int32 dim_
we cache the feature dimension, to save time when calling Dim().
bool use_cmvn
Options for pitch post-processing.
void ReadConfigFromFile(const std::string &config_filename, C *c)
This template is provided for convenience in reading config classes from files; this is not the stand...
OnlineGenericBaseFeature< PlpComputer > OnlinePlp
bool add_pitch
Options for filterbank computation, if feature_type == "fbank".
FbankOptions fbank_opts
Options for PLP computation, if feature_type == "plp".
This configuration class is to set up OnlineNnet2FeaturePipelineInfo, which in turn is the configurat...
virtual int32 Dim() const
Member functions from OnlineFeatureInterface:
OnlineGenericBaseFeature< MfccComputer > OnlineMfcc
void UpdateFrameWeights(const std::vector< std::pair< int32, BaseFloat > > &delta_weights)
If you are downweighting silence, you can call OnlineSilenceWeighting::GetDeltaWeights and supply the...
OnlineGenericBaseFeature< FbankComputer > OnlineFbank
OnlineFeatureInterface * feature_plus_optional_pitch_
Global CMVN stats.
virtual void InputFinished()
InputFinished() tells the class you won&#39;t be providing any more waveform.
void GetAdaptationState(OnlineIvectorExtractorAdaptationState *adaptation_state) const
Get the adaptation state; you may want to call this before destroying this object, to get adaptation state that can be used to improve decoding of later utterances of this speaker.
OnlineFeatureInterface * nnet3_feature_
iVector feature, if used.
This class does an online version of the cepstral mean and [optionally] variance, but note that this ...
virtual void AcceptWaveform(BaseFloat sampling_rate, const VectorBase< BaseFloat > &waveform)
This would be called from the application, when you get more wave data.
ProcessPitchOptions pitch_process_opts
Options for pitch extraction, if done.
virtual bool IsLastFrame(int32 frame) const
Returns true if this is the last frame.
This class stores the adaptation state from the online iVector extractor, which can help you to initi...
This online-feature class implements post processing of pitch features.
OnlineCmvn * cmvn_feature_
Processed pitch, if pitch used.
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)=0
Gets the feature vector for this frame.
void GetAdaptationState(OnlineIvectorExtractorAdaptationState *adaptation_state) const
Get the adaptation state; you may want to call this before destroying this object, to get adaptation state that can be used to improve decoding of later utterances of this speaker.
OnlineFeatureInterface * final_feature_
final_feature_ is feature_plus_optional_cmvn_ appended (OnlineAppendFeature) with ivector_feature_...
virtual void GetFrame(int32 frame, VectorBase< BaseFloat > *feat)
Gets the feature vector for this frame.
void InputFinished()
If you call InputFinished(), it tells the class you won&#39;t be providing any more waveform.
kaldi::int32 int32
OnlineIvectorFeature * IvectorFeature()
This function returns the iVector-extracting part of the feature pipeline (or NULL if iVectors are no...
virtual int32 NumFramesReady() const
returns the feature dimension.
This file contains a different version of the feature-extraction pipeline in online-feature-pipeline...
This class is responsible for storing configuration variables, objects and options for OnlineNnet2Fea...
void ReadKaldiObject(const std::string &filename, Matrix< float > *m)
Definition: kaldi-io.cc:832
void GetState(int32 cur_frame, OnlineCmvnState *cmvn_state)
void SetAdaptationState(const OnlineIvectorExtractorAdaptationState &adaptation_state)
Set the adaptation state to a particular value, e.g.
PlpOptions plp_opts
options for MFCC computation, if feature_type == "mfcc"
void AcceptWaveform(BaseFloat sampling_rate, const VectorBase< BaseFloat > &waveform)
Accept more data to process.
This online-feature class implements combination of two feature streams (such as pitch, plp) into one stream.
void SetState(const OnlineCmvnState &cmvn_state)
const OnlineNnet2FeaturePipelineInfo & info_
Struct OnlineCmvnState stores the state of CMVN adaptation between utterances (but not the state of t...
OnlineFeatureInterface * feature_plus_optional_cmvn_
feature_plus_optional_cmvn_ is the feature_plus_optional_pitch_ transformed with OnlineCmvn if cmvn i...
virtual bool IsLastFrame(int32 frame) const =0
Returns true if this is the last frame.
#define KALDI_ERR
Definition: kaldi-error.h:147
#define KALDI_WARN
Definition: kaldi-error.h:150
void Init(const OnlineIvectorExtractionConfig &config)
OnlineNnet2FeaturePipeline(const OnlineNnet2FeaturePipelineInfo &info)
Constructor from the "info" object.
void GetCmvnState(OnlineCmvnState *cmvn_state)
void SetAdaptationState(const OnlineIvectorExtractorAdaptationState &adaptation_state)
Set the adaptation state to a particular value, e.g.
void SetCmvnState(const OnlineCmvnState &cmvn_state)
Set the CMVN state to a particular value.
void UpdateFrameWeights(const std::vector< std::pair< int32, BaseFloat > > &delta_weights)
std::string global_cmvn_stats_rxfilename
Options for online cmvn, read from config file.
Matrix< double > global_cmvn_stats_
LDA matrix, if supplied.
void ReadConfigsFromFile(const std::string &conf, C1 *c1, C2 *c2)
This variant of the template ReadConfigFromFile is for if you need to read two config classes from th...
#define KALDI_ASSERT(cond)
Definition: kaldi-error.h:185
virtual void AcceptWaveform(BaseFloat sampling_rate, const VectorBase< BaseFloat > &waveform)=0
This would be called from the application, when you get more wave data.
This class includes configuration variables relating to the online iVector extraction, but not including configuration for the "base feature", i.e.
virtual void InputFinished()=0
InputFinished() tells the class you won&#39;t be providing any more waveform.
virtual int32 NumFramesReady() const
returns the feature dimension.
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
MfccOptions mfcc_opts
"mfcc" or "plp" or "fbank"
OnlineProcessPitch * pitch_feature_
Raw pitch, if used.
bool use_ivectors
Filename used for reading global cmvn stats in OnlineCmvn.
virtual int32 NumFramesReady() const =0
returns the feature dimension.
virtual int32 Dim() const =0
OnlineIvectorFeature is an online feature-extraction class that&#39;s responsible for extracting iVectors...
OnlinePitchFeature * pitch_
MFCC/PLP/filterbank.