vggish/vggish_params.py

# Copyright 2017 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Global parameters for the VGGish model.

See vggish_slim.py for more information.
"""

# Architectural constants.
NUM_FRAMES = 96  # Frames in input mel-spectrogram patch.
NUM_BANDS = 64  # Frequency bands in input mel-spectrogram patch.
EMBEDDING_SIZE = 128  # Size of embedding layer.

# Hyperparameters used in feature and example generation.
SAMPLE_RATE = 16000
STFT_WINDOW_LENGTH_SECONDS = 0.025
STFT_HOP_LENGTH_SECONDS = 0.010
NUM_MEL_BINS = NUM_BANDS
MEL_MIN_HZ = 125
MEL_MAX_HZ = 7500
LOG_OFFSET = 0.01  # Offset used for stabilized log of input mel-spectrogram.
EXAMPLE_WINDOW_SECONDS = 0.96  # Each example contains 96 10ms frames
EXAMPLE_HOP_SECONDS = 0.96  # with zero overlap.

# Parameters used for embedding postprocessing.
PCA_EIGEN_VECTORS_NAME = 'pca_eigen_vectors'
PCA_MEANS_NAME = 'pca_means'
QUANTIZE_MIN_VAL = -2.0
QUANTIZE_MAX_VAL = +2.0

# Hyperparameters used in training.
INIT_STDDEV = 0.01  # Standard deviation used to initialize weights.
LEARNING_RATE = 1e-4  # Learning rate for the Adam optimizer.
ADAM_EPSILON = 1e-8  # Epsilon for the Adam optimizer.

# Names of ops, tensors, and features.
INPUT_OP_NAME = 'vggish/input_features'
INPUT_TENSOR_NAME = INPUT_OP_NAME + ':0'
OUTPUT_OP_NAME = 'vggish/embedding'
OUTPUT_TENSOR_NAME = OUTPUT_OP_NAME + ':0'
AUDIO_EMBEDDING_FEATURE_NAME = 'audio_embedding'
Refactor Signed-off-by: Jael Gu <mengjia.gu@zilliz.com> 3 years ago			`# Copyright 2017 The TensorFlow Authors All Rights Reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`# ==============================================================================`

			`"""Global parameters for the VGGish model.`

			`See vggish_slim.py for more information.`
			`"""`

			`# Architectural constants.`
			`NUM_FRAMES = 96 # Frames in input mel-spectrogram patch.`
			`NUM_BANDS = 64 # Frequency bands in input mel-spectrogram patch.`
			`EMBEDDING_SIZE = 128 # Size of embedding layer.`

			`# Hyperparameters used in feature and example generation.`
			`SAMPLE_RATE = 16000`
			`STFT_WINDOW_LENGTH_SECONDS = 0.025`
			`STFT_HOP_LENGTH_SECONDS = 0.010`
			`NUM_MEL_BINS = NUM_BANDS`
			`MEL_MIN_HZ = 125`
			`MEL_MAX_HZ = 7500`
			`LOG_OFFSET = 0.01 # Offset used for stabilized log of input mel-spectrogram.`
			`EXAMPLE_WINDOW_SECONDS = 0.96 # Each example contains 96 10ms frames`
			`EXAMPLE_HOP_SECONDS = 0.96 # with zero overlap.`

			`# Parameters used for embedding postprocessing.`
			`PCA_EIGEN_VECTORS_NAME = 'pca_eigen_vectors'`
			`PCA_MEANS_NAME = 'pca_means'`
			`QUANTIZE_MIN_VAL = -2.0`
			`QUANTIZE_MAX_VAL = +2.0`

			`# Hyperparameters used in training.`
			`INIT_STDDEV = 0.01 # Standard deviation used to initialize weights.`
			`LEARNING_RATE = 1e-4 # Learning rate for the Adam optimizer.`
			`ADAM_EPSILON = 1e-8 # Epsilon for the Adam optimizer.`

			`# Names of ops, tensors, and features.`
			`INPUT_OP_NAME = 'vggish/input_features'`
			`INPUT_TENSOR_NAME = INPUT_OP_NAME + ':0'`
			`OUTPUT_OP_NAME = 'vggish/embedding'`
			`OUTPUT_TENSOR_NAME = OUTPUT_OP_NAME + ':0'`
			`AUDIO_EMBEDDING_FEATURE_NAME = 'audio_embedding'`