From 730c0aaae3ccdd8ad331f9cd97d0859d0b6c1571 Mon Sep 17 00:00:00 2001 From: ChengZi Date: Thu, 29 Dec 2022 17:59:15 +0800 Subject: [PATCH] add training readme --- README.md | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/README.md b/README.md index e0a056a..581342a 100644 --- a/README.md +++ b/README.md @@ -397,3 +397,34 @@ print(f'Onnx-support/Total Models: {len(onnx_list)}/{len(full_list)}') ``` 2022-12-13 16:25:15,916 - 140704500614336 - auto_transformers.py-auto_transformers:68 - WARNING: The operator is initialized without specified model. Onnx-support/Total Models: 111/126 + +## Fine-tune +### Get start +We have prepared some most typical use of [finetune examples](https://github.com/towhee-io/examples/tree/main/fine_tune/6_train_language_modeling_tasks). + +Simply speaking, you only need to construct an op instance and pass in some configurations to train the specified task. +```python +import towhee + +bert_op = towhee.ops.text_embedding.transformers(model_name='bert-base-uncased').get_op() +data_args = { + 'dataset_name': 'wikitext', + 'dataset_config_name': 'wikitext-2-raw-v1', +} +training_args = { + 'num_train_epochs': 3, # you can add epoch number to get a better metric. + 'per_device_train_batch_size': 8, + 'per_device_eval_batch_size': 8, + 'do_train': True, + 'do_eval': True, + 'output_dir': './tmp/test-mlm', + 'overwrite_output_dir': True +} +bert_op.train(task='mlm', data_args=data_args, training_args=training_args) + +``` +For more infos, refer to the [examples](https://github.com/towhee-io/examples/tree/main/fine_tune/6_train_language_modeling_tasks). + +### Dive deep and customize your training +You can change the [training script](https://towhee.io/text-embedding/transformers/src/branch/main/train_clm_with_hf_trainer.py) in your customer way. +Or your can refer to the original [hugging face transformers training examples](https://github.com/huggingface/transformers/blob/main/examples/pytorch/language-modeling). \ No newline at end of file