feature-window.h
Go to the documentation of this file.
1 // feat/feature-window.h
2 
3 // Copyright 2009-2011 Karel Vesely; Petr Motlicek; Saarland University
4 // 2014-2016 Johns Hopkins University (author: Daniel Povey)
5 
6 // See ../../COPYING for clarification regarding multiple authors
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 //
12 // http://www.apache.org/licenses/LICENSE-2.0
13 //
14 // THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 // KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 // WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 // MERCHANTABLITY OR NON-INFRINGEMENT.
18 // See the Apache 2 License for the specific language governing permissions and
19 // limitations under the License.
20 
21 #ifndef KALDI_FEAT_FEATURE_WINDOW_H_
22 #define KALDI_FEAT_FEATURE_WINDOW_H_
23 
24 #include <map>
25 #include <string>
26 
27 #include "matrix/matrix-lib.h"
28 #include "util/common-utils.h"
29 #include "base/kaldi-error.h"
30 
31 namespace kaldi {
34 
37  BaseFloat frame_shift_ms; // in milliseconds.
38  BaseFloat frame_length_ms; // in milliseconds.
39  BaseFloat dither; // Amount of dithering, 0.0 means no dither.
40  BaseFloat preemph_coeff; // Preemphasis coefficient.
41  bool remove_dc_offset; // Subtract mean of wave before FFT.
42  std::string window_type; // e.g. Hamming window
43  // May be "hamming", "rectangular", "povey", "hanning", "sine", "blackman"
44  // "povey" is a window I made to be similar to Hamming but to go to zero at the
45  // edges, it's pow((0.5 - 0.5*cos(n/N*2*pi)), 0.85)
46  // I just don't think the Hamming window makes sense as a windowing function.
49  bool snip_edges;
54  samp_freq(16000),
55  frame_shift_ms(10.0),
56  frame_length_ms(25.0),
57  dither(1.0),
58  preemph_coeff(0.97),
59  remove_dc_offset(true),
60  window_type("povey"),
61  round_to_power_of_two(true),
62  blackman_coeff(0.42),
63  snip_edges(true),
64  allow_downsample(false),
65  allow_upsample(false),
66  max_feature_vectors(-1)
67  { }
68 
69  void Register(OptionsItf *opts) {
70  opts->Register("sample-frequency", &samp_freq,
71  "Waveform data sample frequency (must match the waveform file, "
72  "if specified there)");
73  opts->Register("frame-length", &frame_length_ms, "Frame length in milliseconds");
74  opts->Register("frame-shift", &frame_shift_ms, "Frame shift in milliseconds");
75  opts->Register("preemphasis-coefficient", &preemph_coeff,
76  "Coefficient for use in signal preemphasis");
77  opts->Register("remove-dc-offset", &remove_dc_offset,
78  "Subtract mean from waveform on each frame");
79  opts->Register("dither", &dither, "Dithering constant (0.0 means no dither). "
80  "If you turn this off, you should set the --energy-floor "
81  "option, e.g. to 1.0 or 0.1");
82  opts->Register("window-type", &window_type, "Type of window "
83  "(\"hamming\"|\"hanning\"|\"povey\"|\"rectangular\""
84  "|\"sine\"|\"blackmann\")");
85  opts->Register("blackman-coeff", &blackman_coeff,
86  "Constant coefficient for generalized Blackman window.");
87  opts->Register("round-to-power-of-two", &round_to_power_of_two,
88  "If true, round window size to power of two by zero-padding "
89  "input to FFT.");
90  opts->Register("snip-edges", &snip_edges,
91  "If true, end effects will be handled by outputting only frames that "
92  "completely fit in the file, and the number of frames depends on the "
93  "frame-length. If false, the number of frames depends only on the "
94  "frame-shift, and we reflect the data at the ends.");
95  opts->Register("allow-downsample", &allow_downsample,
96  "If true, allow the input waveform to have a higher frequency than "
97  "the specified --sample-frequency (and we'll downsample).");
98  opts->Register("max-feature-vectors", &max_feature_vectors,
99  "Memory optimization. If larger than 0, periodically remove feature "
100  "vectors so that only this number of the latest feature vectors is "
101  "retained.");
102  opts->Register("allow-upsample", &allow_upsample,
103  "If true, allow the input waveform to have a lower frequency than "
104  "the specified --sample-frequency (and we'll upsample).");
105  }
106  int32 WindowShift() const {
107  return static_cast<int32>(samp_freq * 0.001 * frame_shift_ms);
108  }
109  int32 WindowSize() const {
110  return static_cast<int32>(samp_freq * 0.001 * frame_length_ms);
111  }
113  return (round_to_power_of_two ? RoundUpToNearestPowerOfTwo(WindowSize()) :
114  WindowSize());
115  }
116 };
117 
118 
121  explicit FeatureWindowFunction(const FrameExtractionOptions &opts);
123  window(other.window) { }
125 };
126 
127 
144 int32 NumFrames(int64 num_samples,
145  const FrameExtractionOptions &opts,
146  bool flush = true);
147 
148 /*
149  This function returns the index of the first sample of the frame indexed
150  'frame'. If snip-edges=true, it just returns frame * opts.WindowShift(); if
151  snip-edges=false, the formula is a little more complicated and the result may
152  be negative.
153 */
154 int64 FirstSampleOfFrame(int32 frame,
155  const FrameExtractionOptions &opts);
156 
157 
158 
159 void Dither(VectorBase<BaseFloat> *waveform, BaseFloat dither_value);
160 
162 
180 void ProcessWindow(const FrameExtractionOptions &opts,
181  const FeatureWindowFunction &window_function,
182  VectorBase<BaseFloat> *window,
183  BaseFloat *log_energy_pre_window = NULL);
184 
185 
186 /*
187  ExtractWindow() extracts a windowed frame of waveform (possibly with a
188  power-of-two, padded size, depending on the config), including all the
189  proessing done by ProcessWindow().
190 
191  @param [in] sample_offset If 'wave' is not the entire waveform, but
192  part of it to the left has been discarded, then the
193  number of samples prior to 'wave' that we have
194  already discarded. Set this to zero if you are
195  processing the entire waveform in one piece, or
196  if you get 'no matching function' compilation
197  errors when updating the code.
198  @param [in] wave The waveform
199  @param [in] f The frame index to be extracted, with
200  0 <= f < NumFrames(sample_offset + wave.Dim(), opts, true)
201  @param [in] opts The options class to be used
202  @param [in] window_function The windowing function, as derived from the
203  options class.
204  @param [out] window The windowed, possibly-padded waveform to be
205  extracted. Will be resized as needed.
206  @param [out] log_energy_pre_window If non-NULL, the log-energy of
207  the signal prior to pre-emphasis and multiplying by
208  the windowing function will be written to here.
209 */
210 void ExtractWindow(int64 sample_offset,
211  const VectorBase<BaseFloat> &wave,
212  int32 f,
213  const FrameExtractionOptions &opts,
214  const FeatureWindowFunction &window_function,
215  Vector<BaseFloat> *window,
216  BaseFloat *log_energy_pre_window = NULL);
217 
218 
220 } // namespace kaldi
221 
222 
223 #endif // KALDI_FEAT_FEATURE_WINDOW_H_
This code computes Goodness of Pronunciation (GOP) and extracts phone-level pronunciation feature for...
Definition: chain.dox:20
Vector< BaseFloat > window
kaldi::int32 int32
int32 RoundUpToNearestPowerOfTwo(int32 n)
Definition: kaldi-math.cc:32
virtual void Register(const std::string &name, bool *ptr, const std::string &doc)=0
void ExtractWindow(int64 sample_offset, const VectorBase< BaseFloat > &wave, int32 f, const FrameExtractionOptions &opts, const FeatureWindowFunction &window_function, Vector< BaseFloat > *window, BaseFloat *log_energy_pre_window)
int64 FirstSampleOfFrame(int32 frame, const FrameExtractionOptions &opts)
int32 NumFrames(int64 num_samples, const FrameExtractionOptions &opts, bool flush)
This function returns the number of frames that we can extract from a wave file with the given number...
void ProcessWindow(const FrameExtractionOptions &opts, const FeatureWindowFunction &window_function, VectorBase< BaseFloat > *window, BaseFloat *log_energy_pre_window)
This function does all the windowing steps after actually extracting the windowed signal: depending o...
void Register(OptionsItf *opts)
void Preemphasize(VectorBase< BaseFloat > *waveform, BaseFloat preemph_coeff)
A class representing a vector.
Definition: kaldi-vector.h:406
Provides a vector abstraction class.
Definition: kaldi-vector.h:41
void Dither(VectorBase< BaseFloat > *waveform, BaseFloat dither_value)
FeatureWindowFunction(const FeatureWindowFunction &other)