albef
copied
wxywb
2 years ago
15 changed files with 304 additions and 17 deletions
@ -0,0 +1,33 @@ |
|||
train_file: ['data/refcoco+_train.json'] |
|||
test_file: ['data/refcoco+_val.json','data/refcoco+_test.json'] |
|||
|
|||
refcoco_data: 'data' |
|||
det_file: 'data/refcoco+/dets.json' |
|||
coco_file: 'data/refcoco+/cocos.json' |
|||
|
|||
image_root: '/export/share/datasets/vision/coco/images/' |
|||
|
|||
bert_config: 'configs/config_bert.json' |
|||
|
|||
image_res: 384 |
|||
batch_size: 32 |
|||
|
|||
queue_size: 65536 |
|||
momentum: 0.995 |
|||
vision_width: 768 |
|||
embed_dim: 256 |
|||
temp: 0.07 |
|||
|
|||
alpha: 0.4 |
|||
distill: True |
|||
warm_up: True |
|||
|
|||
optimizer: {opt: adamW, lr: 1e-5, weight_decay: 0.02} |
|||
schedular: {sched: cosine, lr: 1e-5, epochs: 5, min_lr: 1e-6, decay_rate: 1, warmup_lr: 1e-5, warmup_epochs: 1, cooldown_epochs: 0} |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,25 @@ |
|||
train_file: ['data/nlvr_train.json'] |
|||
val_file: ['data/nlvr_dev.json'] |
|||
test_file: ['data/nlvr_test.json'] |
|||
|
|||
image_root: '/export/share/datasets/vision/NLVR2/' |
|||
|
|||
image_res: 384 |
|||
batch_size: 16 |
|||
|
|||
bert_config: 'configs/config_bert.json' |
|||
|
|||
alpha: 0.4 |
|||
distill: True |
|||
warm_up: True |
|||
eval_ema: False |
|||
|
|||
optimizer: {opt: adamW, lr: 2e-5, weight_decay: 0.02} |
|||
schedular: {sched: cosine, lr: 2e-5, epochs: 10, min_lr: 1e-6, decay_rate: 1, warmup_lr: 1e-5, warmup_epochs: 1, cooldown_epochs: 0} |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,25 @@ |
|||
train_file: ['data/coco.json', |
|||
'data/vg.json', |
|||
'data/cc3m_train.json', |
|||
'data/cc3m_val.json', |
|||
'data/sbu.json' |
|||
] |
|||
|
|||
# each train_file (json) contains a python list where each item is {'image': img_path, 'caption': text or list_of_text } |
|||
|
|||
bert_config: 'configs/config_bert.json' |
|||
|
|||
image_res: 256 |
|||
vision_width: 768 |
|||
embed_dim: 256 |
|||
batch_size: 64 |
|||
|
|||
optimizer: {opt: adamW, lr: 2e-5, weight_decay: 0.02} |
|||
schedular: {sched: cosine, lr: 2e-5, epochs: 1, min_lr: 1e-5, decay_rate: 1, warmup_lr: 1e-5, warmup_epochs: 1, cooldown_epochs: 0} |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,29 @@ |
|||
train_file: ['data/coco.json', |
|||
'data/vg.json', |
|||
'data/cc12m.json', |
|||
'data/cc3m_train.json', |
|||
'data/cc3m_val.json', |
|||
'data/sbu.json' |
|||
] |
|||
# each train_file (json) contains a python list where each item is {'image': img_path, 'caption': text or list_of_text } |
|||
bert_config: 'configs/config_bert.json' |
|||
|
|||
image_res: 256 |
|||
vision_width: 768 |
|||
embed_dim: 256 |
|||
batch_size: 64 |
|||
temp: 0.07 |
|||
mlm_probability: 0.15 |
|||
queue_size: 65536 |
|||
momentum: 0.995 |
|||
alpha: 0.4 |
|||
|
|||
optimizer: {opt: adamW, lr: 1e-4, weight_decay: 0.02} |
|||
schedular: {sched: cosine, lr: 1e-4, epochs: 30, min_lr: 1e-5, decay_rate: 1, warmup_lr: 1e-5, warmup_epochs: 20, cooldown_epochs: 0} |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,31 @@ |
|||
train_file: ['data/coco_train.json'] |
|||
val_file: 'data/coco_val.json' |
|||
test_file: 'data/coco_test.json' |
|||
image_root: '/export/share/datasets/vision/coco/images/' |
|||
|
|||
bert_config: 'configs/config_bert.json' |
|||
|
|||
image_res: 384 |
|||
batch_size_train: 32 |
|||
batch_size_test: 64 |
|||
|
|||
queue_size: 65536 |
|||
momentum: 0.995 |
|||
vision_width: 768 |
|||
embed_dim: 256 |
|||
temp: 0.07 |
|||
k_test: 256 |
|||
|
|||
alpha: 0.4 |
|||
distill: True |
|||
warm_up: True |
|||
|
|||
optimizer: {opt: adamW, lr: 1e-5, weight_decay: 0.02} |
|||
schedular: {sched: cosine, lr: 1e-5, epochs: 5, min_lr: 1e-6, decay_rate: 1, warmup_lr: 1e-5, warmup_epochs: 1, cooldown_epochs: 0} |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,31 @@ |
|||
train_file: ['data/flickr30k_train.json'] |
|||
val_file: 'data/flickr30k_val.json' |
|||
test_file: 'data/flickr30k_test.json' |
|||
image_root: '/export/share/datasets/vision/flickr30k/' #flickr30k-images/ |
|||
|
|||
bert_config: 'configs/config_bert.json' |
|||
|
|||
image_res: 384 |
|||
batch_size_train: 32 |
|||
batch_size_test: 64 |
|||
|
|||
queue_size: 65536 |
|||
momentum: 0.995 |
|||
vision_width: 768 |
|||
embed_dim: 256 |
|||
temp: 0.07 |
|||
k_test: 128 |
|||
|
|||
alpha: 0.4 |
|||
distill: True |
|||
warm_up: True |
|||
|
|||
optimizer: {opt: adamW, lr: 1e-5, weight_decay: 0.02} |
|||
schedular: {sched: cosine, lr: 1e-5, epochs: 10, min_lr: 1e-6, decay_rate: 1, warmup_lr: 1e-5, warmup_epochs: 1, cooldown_epochs: 0} |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,25 @@ |
|||
train_file: 'data/ve_train.json' |
|||
val_file: 'data/ve_dev.json' |
|||
test_file: 'data/ve_test.json' |
|||
|
|||
image_root: '/export/home/project/SNLI-VE/data/images' |
|||
|
|||
image_res: 384 |
|||
batch_size_train: 32 |
|||
batch_size_test: 64 |
|||
|
|||
alpha: 0.4 |
|||
distill: True |
|||
warm_up: False |
|||
|
|||
bert_config: 'configs/config_bert.json' |
|||
|
|||
optimizer: {opt: adamW, lr: 2e-5, weight_decay: 0.02} |
|||
schedular: {sched: cosine, lr: 2e-5, epochs: 5, min_lr: 1e-6, decay_rate: 1, warmup_lr: 1e-5, warmup_epochs: 1, cooldown_epochs: 0} |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,32 @@ |
|||
train_file: ['data/vqa_train.json', |
|||
'data/vqa_val.json', |
|||
'data/vg_qa.json'] |
|||
|
|||
test_file: ['data/vqa_test.json'] |
|||
answer_list: 'data/answer_list.json' |
|||
|
|||
vqa_root: '/export/share/datasets/vision/VQA/Images/mscoco/' #train2014/ |
|||
vg_root: '/export/share/datasets/vision/visual-genome/' #image/ |
|||
|
|||
image_res: 384 |
|||
batch_size_train: 32 |
|||
batch_size_test: 16 |
|||
k_test: 128 |
|||
|
|||
alpha: 0.4 |
|||
distill: True |
|||
warm_up: True |
|||
|
|||
eos: '[SEP]' |
|||
|
|||
bert_config: 'configs/config_bert.json' |
|||
|
|||
optimizer: {opt: adamW, lr: 2e-5, weight_decay: 0.02} |
|||
schedular: {sched: cosine, lr: 2e-5, epochs: 8, min_lr: 1e-6, decay_rate: 1, warmup_lr: 1e-5, warmup_epochs: 4, cooldown_epochs: 0} |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,21 @@ |
|||
{ |
|||
"architectures": [ |
|||
"BertForMaskedLM" |
|||
], |
|||
"attention_probs_dropout_prob": 0.1, |
|||
"hidden_act": "gelu", |
|||
"hidden_dropout_prob": 0.1, |
|||
"hidden_size": 768, |
|||
"initializer_range": 0.02, |
|||
"intermediate_size": 3072, |
|||
"layer_norm_eps": 1e-12, |
|||
"max_position_embeddings": 512, |
|||
"model_type": "bert", |
|||
"num_attention_heads": 12, |
|||
"num_hidden_layers": 12, |
|||
"pad_token_id": 0, |
|||
"type_vocab_size": 2, |
|||
"vocab_size": 30522, |
|||
"fusion_layer": 6, |
|||
"encoder_width": 768 |
|||
} |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in new issue