• <xmp id="om0om">
  • <table id="om0om"><noscript id="om0om"></noscript></table>
  • Generative AI

    NVIDIA NeMo? ??? ???? ??? LLM ????, 2?

    Reading Time: 6 minutes

    1???? ?? ?? ?????? ?????? ?? ?? ????? LLM? ?????? ???? ??? ?????? ???? ??? ??????. ? ?????? ??? ?????? ?? ????? LLM? ???? ??? NVIDIA NeMo?? ?? ?? ???? ??? ???? ??? ?? ???.?

    ?? 1. ???? ??? LLM ???? ?????

    ??

    ???? ?? ?? ?????? ?????.

    import torch 
    from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel 
    from nemo.collections.nlp.parts.megatron_trainer_builder import MegatronTrainerBuilder 
    from omegaconf import OmegaConf

    ?? ??

    ?? ? ??? ?????? ?? ??? GPT-megatron-1.3B ?? ?? ????? ?????? ?? ???? ???. ?, ??? ?????? ????? GPT-megatron-1.3B ??? ??? ???? ???? ???(?? 2).?

    ?? 2. ??? ?????? ?? ?? ??? ??? ??

    ?? ???? ??? ?????.?

    • ??? ?? ??? ?? ? ??? ???? ????.
    • ?? ??? ????? ???? ???? ?? ??????.
    • ??? ?? ??? ???? 0?? ?????. 

    ??? ? ??? ??? ???? ?? ????? ??? ?? ???? ???? ?? ?? ???? ???? ?? ????? ??? ????? ??? ??? ?? ??? ??? ? ????.

    ??? ??? ?? ? ??

    ?? ??? ???? GPT-megatron-1.3B.nemo ??? ?????.

    #Initialization
    trainer_config = OmegaConf.load('/opt/NeMo/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml')
    trainer_config.trainer.accelerator='gpu' if torch.cuda.is_available() else 'cpu'
    trainer = MegatronTrainerBuilder(trainer_config).create_trainer()
    #load gpt-megatron-1.3b.nemo and its config
    nemo_model = MegatronGPTModel.restore_from('./path_to_1.3B_nemo_model',trainer=trainer)
    nemo_config = OmegaConf.load('./path_to_1.3B_nemo_model_config.yaml')

    ??? ??? ? ?? state_dict ?? ???? ??? ???? ???? ??? ? ????. ??? ???? ? ??? ???? ???? ? ?????.

    #Extract original embedding layer
    embed_weight = nemo_model.state_dict()[f'model.language_model.embedding.word_embeddings.weight']
    print(f"Shape of original embedding layer: {embed_weight.shape}")

    ? ??? ??? ??

    ?? ? ??? ???? ?? ??? ??? ?? ?? ??? ???? ???. ? ??? ???? ??? ???? ?? ?? ??? ???? ???? ? ??? ???? ?????.

    ??? ??? ???? ?????.

    • ??? ????? ?? ??
    • ?? ??? ??? ?? 
    • model_config.yaml? ?? ??, model.make_vocab_size_divisible_by

    ??? ?? ???? ??? ????.

    • ??? ????? ?? ?? =
    • ?? NeMo ??? ??? ?? =
    • model.make_vocab_size_divisible_by =

    ????? ?? ???? ????? ?? ??? ???? 8? ??? ?? ? ?? ??? ??? ????. ????? ?? ??? ??? ? ???? ??? ??? ???? ??? ???. 

    ???? ?????? ?? ? ????? ????? ??? ? ???? ? ??? ???? ???? ??? ???.

    tokenizer = AutoTokenizer.from_pretrained('./path_to_new_merged_tokenizer')
    if len(tokenizer)% nemo_config.make_vocab_size_divisible_by != 0:
      tokenizer_diff = (int(len(tokenizer)/nemo_config.make_vocab_size_divisible_by)+1) * nemo_config.make_vocab_size_divisible_by - embed_weight.shape[0]
    else:
      tokenizer_diff = tokenizer.vocab_size - embed_weight.shape[0]

    ?? ? ??? ?? ???? ?? ??? ??? ? ????. ?? ??, ? ??? ??? ??? ?? ??? ???? ???? ? ??? ???? ?????.

    hidden_size = embed_weight.shape[1]
    random_embed = torch.zeros((tokenizer_diff, hidden_size)).to('cuda')
    new_embed_weight = torch.cat((embed_weight, random_embed), dim=0)

    ? ?? ?? ? ??

    ? ????? ?? ??? ????? ?? ??? ???? ? ??? ????. ? ???? ??? ?? ??? ???? ???? ?? ?????. ?? ???? ??? ???? ?? ???? ?? ??? ?? ??? ??? ?????. 

    ????? ????? ???? ? ?? ????? ???? ? ??? ???? ?? ?? ????? ??? state_dict ?? ?????.

    ?????, ? ??? ??? .nemo ???? ???? ??? ??? ?? ???? ?? ????? ?????.

    state_dict = nemo_model.state_dict()
    state_dict[f'model.language_model.embedding.word_embeddings.weight'] = new_embed_weight
     
    NEW_TOKENIZER_PATH = './path_to_new_merged_tokenizer'
    nemo_config['tokenizer']['vocab_file'] = f"{NEW_TOKENIZER_PATH}/vocab.json"
    nemo_config['tokenizer']['merge_file'] = f"{NEW_TOKENIZER_PATH}/merges.txt"
    nemo_config['vocab_file'] = f"{NEW_TOKENIZER_PATH}/vocab.json"
    nemo_config['merges_file'] = f"{NEW_TOKENIZER_PATH}/merges.txt"
     
    new_nemo_model = MegatronGPTModel(nemo_config,trainer)
    new_nemo_model.load_state_dict(state_dict)
    new_nemo_model.save_to('./path_to_modified_nemo_model')

    ?? ??? ???? ? ??? ?? ?????? ? ????? ?????.

    python /opt/NeMo/examples/nlp/language_modeling/megatron_gpt_eval.py \ gpt_model_file='./path_to_modified_nemo_model' \
    prompts='ENTER YOUR PROMPT' \
    inference.greedy=True \
    inference.add_BOS=True \
    trainer.devices=1 \
    trainer.num_nodes=1 \
    tensor_model_parallel_size=-1 \
    pipeline_model_parallel_size=-1

    ??? ???

    ??? ?? ????, ??, ???? ?? ??? ?? ?? ????? ????? ?????. ??? ??? 3??: ???? ????, ??, ???? ??? ?????. 

    --json_key ?? ??? ??? ?? ???? ??? ?? ?????.

    python /opt/NeMo/scripts/nlp_language_modeling/preprocess_data_for_megatron.py \ --input='./path_to_train/val/test_dataset' \
    --json-keys=text \
    --tokenizer-library=megatron \
    --vocab './path_to_merged_tokenizer_vocab_file'\
    --dataset-impl mmap \
    --tokenizer-type GPT2BPETokenizer \
    --merge-file './path_to_merged_tokenizer_merge_file' \
    --append-eod \
    --output-prefix='./path_to_output_preprocessed_dataset'

    ?? ?? ????

    ?? ?? ????? ?? ?? ?? ??? ??? ?? ?? ??? ?? ? ????. ?? ??? ???? ??? ??? ?????. ?? ?? ????? ? ??? ??? ?? ??? ???????.

    ori_conf = OmegaConf.load('./path_to_original_GPT-1.3B_model/model_config.yaml')
    conf = OmegaConf.load('/opt/NeMo/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml')
    for key in ori_conf.keys():
      conf['model'][key] = ori_conf[key]
    # Set global_batch_size based on micro_batch_size
    conf['model']["global_batch_size"] = conf['model']["micro_batch_size"] * conf.get('data_model_parallel_size',1) * conf.get('gradient_accumulation_steps',1)
    # Reset data_prefix (dataset path)
    conf['model']['data']['data_prefix'] = '???'
    # Reset tokenizer config 
     
    NEW_TOKENIZER_PATH = "./path_to_new_merged_tokenizer"
    conf['model']['tokenizer']['vocab_file'] = f"{NEW_TOKENIZER_PATH}/vocab.json"
    conf['model']['tokenizer']['merge_file'] = f"{NEW_TOKENIZER_PATH}/merges.txt"
    conf['model']['vocab_file'] = f"{NEW_TOKENIZER_PATH}/vocab.json"
    conf['model']['merges_file'] = f"{NEW_TOKENIZER_PATH}/merges.txt"
    OmegaConf.save(config=conf,f='/opt/NeMo/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml')

    ?? ??? ???? ?? ?? ????? ?????. ?? ???? ? ??? ?? ?? ?? ??? ???? ???.

    • nproc_per_node: ??? GPU ??.
    • model.data.data_prefix: ??? ?? ????, ??, ???? ??. ??? ?? ??? ?????.
    • exp_manager.name: ?? ?? ??. ?? ?????? ./nemo_experiments/<exp_manager.name> ??? ?????.
    • trainer.devices: ??? GPU ??.
    • trainer.num_nodes: ?? ?.
    • trainer.val_check_interval: ???? ? ?? ??? ???? ?? ??(???).
    • trainer.max_steps: ???? ??? ?? ??.
    • model.tensor_model_parallel_size: 1.3B ??? ?? 1? ?????. ? ? ???? ? ? ??? ?????.
    • model.pipeline_model_parallel_size: 1.3B ??? ?? 1? ?????. ? ? ???? ? ? ??? ?????.
    • model.micro_batch_size: ??? ?? vRAM? ?? ?????.
    • model.global_batch_size: micro_batch_size ?? ?? ????. ??? ??? ??? ?????.
    DATA = '{train:[1.0,training_data_indexed/train_text_document], validation:[training_data_indexed/val_text_document], test:[training_data_indexed/test_text_document]}'
     
    !torchrun --nproc_per_node=1 \ /opt/NeMo/examples/nlp/language_modeling/megatron_gpt_continue_training.py \ 
    "model.data.data_prefix={DATA}"\ 
    name=megatron_gpt_ \ 
    exp_manager.name=megatron_gpt_1 \ 
    restore_from_path='./path_to_modified_nemo_model' \ 
    trainer.devices=1 \ 
    trainer.num_nodes=1 \ 
    trainer.precision=16 \ 
    trainer.val_check_interval=300 \ 
    trainer.max_steps=1200 \ 
    model.megatron_amp_O2=False \ 
    model.tensor_model_parallel_size=1 \ 
    model.pipeline_model_parallel_size=1 \ 
    model.micro_batch_size=1 \ 
    model.global_batch_size=1 \ 
    ++model.use_flash_attention=False \
    ++model.seq_len_interpolation_factor=null

    ?? ??

    ???? ??? ?? ??? ./nemo_experiments ??? ??, ?????. ? ???? ??? ?? ????? ??? hparams.yaml? ??? ???

    ?? ??? ???? ????? ??? ???? ?????.

    python /opt/NeMo/examples/nlp/language_modeling/megatron_gpt_eval.py \
    'checkpoint_dir="./path_to_checkpoint_folder"' \
    'checkpoint_name="name of checkpoint file in .ckpt format"' \
    'hparams_file="./path_to_hparams_file"' \
    prompts='ENTER YOUR PROMPT' \
    inference.greedy=True \
    inference.add_BOS=True \
    trainer.devices=1 \
    trainer.num_nodes=1 \
    tensor_model_parallel_size=-1 \
    pipeline_model_parallel_size=-1

    ? 1? ?? GPT-megatron-1.3B ??? ??? ???? ???? ???? ????? GPT-megatron-1.3B ???? ??? ?? ??? ??? ????. ? ?????? ?? ?? ?? ??? ????.

    PromptOutput of GPT-megatron-1.3B.nemoOutput of Trained GPT-megatron-1.3B-TH.nemo
    The capital city of Thailand is?The capital city of Thailand is from the capital city of Bangkok. The capital city of Thailand is from the capital city of Bangkok.\n\nHistory\n\nThe capital city of Thailand was originally called Chiang Mai, which means “city of the sun”. The capital city of Thailand was originally called Chiang Mai, which means “city of the sun”.?The capital city of Thailand is by the Thai Government of Thailand\n\n??????????????? \n\n??????????????????????\n?????????????????????\n(The Capital city of Thailand is by the Thai Government of Thailand\n\nOther sources \n\nCities in Chiang Rai Province\nAncient cities in Thailand\n)?
    ????????????????????????????????????????? ???????????????????????????????????????? (Bangkok is a popular tourist destination. The most famous tourist attractions are )?????????????????????????????????????????? ??????????????????????????????????????????????????????? (Bangkok is a popular tourist destination. The most famous tourist attraction is the most famous working place.) ?????????????????????????????????????????? ???????????????????????????????????????? “??????????????” ??????????????????????????????????????????????????? (Bangkok is a popular tourist destination. The most famous tourist attractions are: “Wat Thammathipatai” which is the temple that King Chulalongkorn built.)?
    ? 1. ?? ?? ??

    ???? ? ??? ???? ?? ???? ????? ??? ?? ??? ???????. ?? ?? ?? ??? ??? ??? ?? ?? ?????? ?? ?? ?? ?????. ?? ????? ??? ?? ??? ?? ???? ???? ?????? ?? ????.?

    ??

    ? ?????? ??? ????? LLM? ?? ??? ????? ???? LLM? ?? ??? ? ???? ???? ????? ? ? ????. ? ?? ??? ?? ?? ???? ?? ????? ?? ??? ??? ???? ??? ??? ?? ????? ?? ??? ?? ??? ???? ?? ????? ?????.

    ? ????? ?? ??? ????? ???? ? ?? ?? ????? ???? ?? ?? ???? ?? ?? ?? ????. ??? ??? ???? ?? ???? ??? ???? ???? ??? ???? ????? ???? ??? ?????.

    ????? NeMo ????? ????? ??????? GitHub?? /NVIDIA/NeMo ?? ?? ?????? ???? ? ?????. ???? ?? ???? ????? ????? ??? ??? ???? ? ???? ??? ?? ????? LLM? ??? ? ?? ??? ??? ? ????. 

    NeMo ???? ??? ?? ??? ????? ???? NVIDIA NeMo ???? ? NVIDIA NeMo ?????? ???? ???? ?? ???? ??? ?? ????. ??? ???? ???? LLM? ??? ???? ? ???? ????? ???? ? ??? ??? ? ??? ?????.

    ?? ???

    Discuss (0)
    +1

    Tags

    人人超碰97caoporen国产