clip-caption-reward
copied
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
Readme
Files and versions
64 lines
1.3 KiB
64 lines
1.3 KiB
2 years ago
|
caption_model: transformer
|
||
|
noamopt: true
|
||
|
noamopt_warmup: 20000
|
||
|
label_smoothing: 0.0
|
||
|
input_json: data/FineCapEval.json
|
||
|
input_label_h5: none
|
||
|
input_fc_dir: data/FineCapEval_clip_RN50_fc
|
||
|
input_att_dir: data/FineCapEval_clip_RN50_att
|
||
|
input_clipscore_vis_dir: data/FineCapEval_clipscore_vis
|
||
|
seq_per_img: 5
|
||
|
batch_size: 160
|
||
|
learning_rate: 0.0005
|
||
|
|
||
|
checkpoint_path: ./save/clipRN50_clips/clipRN50_clips
|
||
|
|
||
|
use_multi_rewards: false
|
||
|
use_grammar: false
|
||
|
use_grammar_baseline: false
|
||
|
# clip_load_path: '/scratch-space/retrieval/save/clip_negative_text/clip_negative_text-epoch=10.ckpt'
|
||
|
|
||
|
# Notice: because I'm to lazy, I reuse the option name for RNNs to set the hyperparameters for transformer:
|
||
|
# N=num_layers
|
||
|
# d_model=input_encoding_size
|
||
|
# d_ff=rnn_size
|
||
|
|
||
|
# will be ignored
|
||
|
num_layers: 6
|
||
|
input_encoding_size: 512
|
||
|
rnn_size: 2048
|
||
|
|
||
|
# Transformer config
|
||
|
N_enc: 6
|
||
|
N_dec: 6
|
||
|
d_model: 512
|
||
|
d_ff: 2048
|
||
|
num_att_heads: 8
|
||
|
dropout: 0.1
|
||
|
|
||
|
|
||
|
learning_rate_decay_start: 0
|
||
|
scheduled_sampling_start: -1
|
||
|
save_checkpoint_every: 3000
|
||
|
language_eval: 0
|
||
|
val_images_use: 5000
|
||
|
max_epochs: 15
|
||
|
train_sample_n: 5
|
||
|
|
||
|
REFORWARD: false
|
||
|
|
||
|
# _BASE_: transformer.yml
|
||
|
reduce_on_plateau: false
|
||
|
noamopt: false
|
||
|
learning_rate: 0.000005
|
||
|
learning_rate_decay_start: -1
|
||
|
|
||
|
self_critical_after: 15
|
||
|
max_epochs: 50
|
||
|
|
||
|
verbose: false
|
||
|
precision: 32
|
||
|
|
||
|
# use_clipscore: true
|
||
|
use_clipscore: false
|
||
|
clipscore_reward_weight: 2.0
|