albef
copied
wxywb
2 years ago
15 changed files with 304 additions and 17 deletions
@ -0,0 +1,33 @@ |
|||||
|
train_file: ['data/refcoco+_train.json'] |
||||
|
test_file: ['data/refcoco+_val.json','data/refcoco+_test.json'] |
||||
|
|
||||
|
refcoco_data: 'data' |
||||
|
det_file: 'data/refcoco+/dets.json' |
||||
|
coco_file: 'data/refcoco+/cocos.json' |
||||
|
|
||||
|
image_root: '/export/share/datasets/vision/coco/images/' |
||||
|
|
||||
|
bert_config: 'configs/config_bert.json' |
||||
|
|
||||
|
image_res: 384 |
||||
|
batch_size: 32 |
||||
|
|
||||
|
queue_size: 65536 |
||||
|
momentum: 0.995 |
||||
|
vision_width: 768 |
||||
|
embed_dim: 256 |
||||
|
temp: 0.07 |
||||
|
|
||||
|
alpha: 0.4 |
||||
|
distill: True |
||||
|
warm_up: True |
||||
|
|
||||
|
optimizer: {opt: adamW, lr: 1e-5, weight_decay: 0.02} |
||||
|
schedular: {sched: cosine, lr: 1e-5, epochs: 5, min_lr: 1e-6, decay_rate: 1, warmup_lr: 1e-5, warmup_epochs: 1, cooldown_epochs: 0} |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
@ -0,0 +1,25 @@ |
|||||
|
train_file: ['data/nlvr_train.json'] |
||||
|
val_file: ['data/nlvr_dev.json'] |
||||
|
test_file: ['data/nlvr_test.json'] |
||||
|
|
||||
|
image_root: '/export/share/datasets/vision/NLVR2/' |
||||
|
|
||||
|
image_res: 384 |
||||
|
batch_size: 16 |
||||
|
|
||||
|
bert_config: 'configs/config_bert.json' |
||||
|
|
||||
|
alpha: 0.4 |
||||
|
distill: True |
||||
|
warm_up: True |
||||
|
eval_ema: False |
||||
|
|
||||
|
optimizer: {opt: adamW, lr: 2e-5, weight_decay: 0.02} |
||||
|
schedular: {sched: cosine, lr: 2e-5, epochs: 10, min_lr: 1e-6, decay_rate: 1, warmup_lr: 1e-5, warmup_epochs: 1, cooldown_epochs: 0} |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
@ -0,0 +1,25 @@ |
|||||
|
train_file: ['data/coco.json', |
||||
|
'data/vg.json', |
||||
|
'data/cc3m_train.json', |
||||
|
'data/cc3m_val.json', |
||||
|
'data/sbu.json' |
||||
|
] |
||||
|
|
||||
|
# each train_file (json) contains a python list where each item is {'image': img_path, 'caption': text or list_of_text } |
||||
|
|
||||
|
bert_config: 'configs/config_bert.json' |
||||
|
|
||||
|
image_res: 256 |
||||
|
vision_width: 768 |
||||
|
embed_dim: 256 |
||||
|
batch_size: 64 |
||||
|
|
||||
|
optimizer: {opt: adamW, lr: 2e-5, weight_decay: 0.02} |
||||
|
schedular: {sched: cosine, lr: 2e-5, epochs: 1, min_lr: 1e-5, decay_rate: 1, warmup_lr: 1e-5, warmup_epochs: 1, cooldown_epochs: 0} |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
@ -0,0 +1,29 @@ |
|||||
|
train_file: ['data/coco.json', |
||||
|
'data/vg.json', |
||||
|
'data/cc12m.json', |
||||
|
'data/cc3m_train.json', |
||||
|
'data/cc3m_val.json', |
||||
|
'data/sbu.json' |
||||
|
] |
||||
|
# each train_file (json) contains a python list where each item is {'image': img_path, 'caption': text or list_of_text } |
||||
|
bert_config: 'configs/config_bert.json' |
||||
|
|
||||
|
image_res: 256 |
||||
|
vision_width: 768 |
||||
|
embed_dim: 256 |
||||
|
batch_size: 64 |
||||
|
temp: 0.07 |
||||
|
mlm_probability: 0.15 |
||||
|
queue_size: 65536 |
||||
|
momentum: 0.995 |
||||
|
alpha: 0.4 |
||||
|
|
||||
|
optimizer: {opt: adamW, lr: 1e-4, weight_decay: 0.02} |
||||
|
schedular: {sched: cosine, lr: 1e-4, epochs: 30, min_lr: 1e-5, decay_rate: 1, warmup_lr: 1e-5, warmup_epochs: 20, cooldown_epochs: 0} |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
@ -0,0 +1,31 @@ |
|||||
|
train_file: ['data/coco_train.json'] |
||||
|
val_file: 'data/coco_val.json' |
||||
|
test_file: 'data/coco_test.json' |
||||
|
image_root: '/export/share/datasets/vision/coco/images/' |
||||
|
|
||||
|
bert_config: 'configs/config_bert.json' |
||||
|
|
||||
|
image_res: 384 |
||||
|
batch_size_train: 32 |
||||
|
batch_size_test: 64 |
||||
|
|
||||
|
queue_size: 65536 |
||||
|
momentum: 0.995 |
||||
|
vision_width: 768 |
||||
|
embed_dim: 256 |
||||
|
temp: 0.07 |
||||
|
k_test: 256 |
||||
|
|
||||
|
alpha: 0.4 |
||||
|
distill: True |
||||
|
warm_up: True |
||||
|
|
||||
|
optimizer: {opt: adamW, lr: 1e-5, weight_decay: 0.02} |
||||
|
schedular: {sched: cosine, lr: 1e-5, epochs: 5, min_lr: 1e-6, decay_rate: 1, warmup_lr: 1e-5, warmup_epochs: 1, cooldown_epochs: 0} |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
@ -0,0 +1,31 @@ |
|||||
|
train_file: ['data/flickr30k_train.json'] |
||||
|
val_file: 'data/flickr30k_val.json' |
||||
|
test_file: 'data/flickr30k_test.json' |
||||
|
image_root: '/export/share/datasets/vision/flickr30k/' #flickr30k-images/ |
||||
|
|
||||
|
bert_config: 'configs/config_bert.json' |
||||
|
|
||||
|
image_res: 384 |
||||
|
batch_size_train: 32 |
||||
|
batch_size_test: 64 |
||||
|
|
||||
|
queue_size: 65536 |
||||
|
momentum: 0.995 |
||||
|
vision_width: 768 |
||||
|
embed_dim: 256 |
||||
|
temp: 0.07 |
||||
|
k_test: 128 |
||||
|
|
||||
|
alpha: 0.4 |
||||
|
distill: True |
||||
|
warm_up: True |
||||
|
|
||||
|
optimizer: {opt: adamW, lr: 1e-5, weight_decay: 0.02} |
||||
|
schedular: {sched: cosine, lr: 1e-5, epochs: 10, min_lr: 1e-6, decay_rate: 1, warmup_lr: 1e-5, warmup_epochs: 1, cooldown_epochs: 0} |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
@ -0,0 +1,25 @@ |
|||||
|
train_file: 'data/ve_train.json' |
||||
|
val_file: 'data/ve_dev.json' |
||||
|
test_file: 'data/ve_test.json' |
||||
|
|
||||
|
image_root: '/export/home/project/SNLI-VE/data/images' |
||||
|
|
||||
|
image_res: 384 |
||||
|
batch_size_train: 32 |
||||
|
batch_size_test: 64 |
||||
|
|
||||
|
alpha: 0.4 |
||||
|
distill: True |
||||
|
warm_up: False |
||||
|
|
||||
|
bert_config: 'configs/config_bert.json' |
||||
|
|
||||
|
optimizer: {opt: adamW, lr: 2e-5, weight_decay: 0.02} |
||||
|
schedular: {sched: cosine, lr: 2e-5, epochs: 5, min_lr: 1e-6, decay_rate: 1, warmup_lr: 1e-5, warmup_epochs: 1, cooldown_epochs: 0} |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
@ -0,0 +1,32 @@ |
|||||
|
train_file: ['data/vqa_train.json', |
||||
|
'data/vqa_val.json', |
||||
|
'data/vg_qa.json'] |
||||
|
|
||||
|
test_file: ['data/vqa_test.json'] |
||||
|
answer_list: 'data/answer_list.json' |
||||
|
|
||||
|
vqa_root: '/export/share/datasets/vision/VQA/Images/mscoco/' #train2014/ |
||||
|
vg_root: '/export/share/datasets/vision/visual-genome/' #image/ |
||||
|
|
||||
|
image_res: 384 |
||||
|
batch_size_train: 32 |
||||
|
batch_size_test: 16 |
||||
|
k_test: 128 |
||||
|
|
||||
|
alpha: 0.4 |
||||
|
distill: True |
||||
|
warm_up: True |
||||
|
|
||||
|
eos: '[SEP]' |
||||
|
|
||||
|
bert_config: 'configs/config_bert.json' |
||||
|
|
||||
|
optimizer: {opt: adamW, lr: 2e-5, weight_decay: 0.02} |
||||
|
schedular: {sched: cosine, lr: 2e-5, epochs: 8, min_lr: 1e-6, decay_rate: 1, warmup_lr: 1e-5, warmup_epochs: 4, cooldown_epochs: 0} |
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
||||
|
|
@ -0,0 +1,21 @@ |
|||||
|
{ |
||||
|
"architectures": [ |
||||
|
"BertForMaskedLM" |
||||
|
], |
||||
|
"attention_probs_dropout_prob": 0.1, |
||||
|
"hidden_act": "gelu", |
||||
|
"hidden_dropout_prob": 0.1, |
||||
|
"hidden_size": 768, |
||||
|
"initializer_range": 0.02, |
||||
|
"intermediate_size": 3072, |
||||
|
"layer_norm_eps": 1e-12, |
||||
|
"max_position_embeddings": 512, |
||||
|
"model_type": "bert", |
||||
|
"num_attention_heads": 12, |
||||
|
"num_hidden_layers": 12, |
||||
|
"pad_token_id": 0, |
||||
|
"type_vocab_size": 2, |
||||
|
"vocab_size": 30522, |
||||
|
"fusion_layer": 6, |
||||
|
"encoder_width": 768 |
||||
|
} |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in new issue