• <xmp id="om0om">
  • <table id="om0om"><noscript id="om0om"></noscript></table>
  • Generative AI

    NVIDIA NIM? ??? ??? LLM ??

    Reading Time: 6 minutes

    ??? ?? ?? ??(LLM)? ???? ?? ????? ???? ???? ???? ???????? ?? ? ????? ????. ????? ??? ??? ?? ??? ???? ?? ?? ??? ????? ?? ???? ??? ????? ?? ??? ????? ???. ??? ??? LLM? ???? ?? ?????? ?? ??? ??? ??? ???? ???? ??? ? ????.?

    ????? ??? ??? ??? ??? ? ?? ???? ?????. ??? ?? ??? ???? ??????? ??? ?? ?? ??? ??? ??? ?? ??? ???? ????.?

    ?? ?? LLM? ??? ?? ? ??? ??? ???, ?? ? ??? ??? ???? ???? ? ???? ????. ?? ???? ??? ?? ??? ?????? ?? ??? ????? ??? ???? ???? ??? ??? ??? ???? LLM? ?? ??? ????? ???? ????? ?????. ????? LLM? ??? ??? ??? ????? ??? ??, ??? ?? ? ??? ???? ???? ??? ???? ??? ?? ???? ??? ???? ?????.?

    ?? Meta Llama 3 ??? ?????, “??? ?? ??? ???? ?? Llama 3 ?? ???? ??? ??? 5% ??? 30? ??? ??? ??? ??? ??? ???? ?????. ??? ??? ????? ??? ??? ??? ??? ???? ????.”? ???????.

    ? ?????? ?? ?? ??? LoRA ??? ??? 2?? ???? ?? Llama 3 NIM? ?? ???? NVIDIA NIM? ??? ? ??? ??? ???? ?? ?????.

    ??? ???? ???? Llama 3? ?? ??? ???? ?????. ???? ??? ? ??? ????? ?? ?? ???? ?????. ?? LoRA? ??? ? ??? ?? ? ?? ??? ???? ?? ?? ?? ??? ???? ??? ?????.

    ?? ?? LoRA ??? ? ??? ?? ??? ?? A? B? ?? ?? ?? ??? ??? ? ????. ??? ???? ?? ?? LLM? ???? ???? ???? GPU ??? ?? ??? ????? ??? ? ????.

    ???? ?? ??

    ? ????? ??? ????? LLM ???? ? ?? ?????? ?? ?? ??? ??? ?? ??? ?????.

    NVIDIA NIM?? ??????

    NVIDIA AI Enterprise?? ??????? ??? AI ?? ??? ???? ??? ???? ?? ??????? ??? NVIDIA NIM? ???? ????. NVIDIA AI ?????, ???? ? ??? ??? ??? ???? AI ??? ???? ?? ?? API? ???? ????? ?? ?????? ???? ?? ??? AI ??? ?????.

    ?? 1. ?? ???, Llama3-8b-Instruct? ?? ???, ??? ??? ?? ?? ??? ?? NVIDIA NIM ????

    NIM? AI ???? ??? ???? ?? ????? API? ?????. ???? ???? ?? ?? ?? ??? ???? ??????(?? 1 ??). ? NIM? ??? ?? ?? Docker ?????? ??? GPU ???? ?? ?? NVIDIA GPU?? ???? ???? ?????. NIM? NVIDIA H100 Tensor Core GPU, NVIDIA A100 Tensor Core GPU, NVIDIA A10 Tensor Core GPU, NVIDIA L40S GPU? ?? ???? ??? ?? ?? ???? NVIDIA TensorRT-LLM? ???? ??? ??????.

    ? ??? ?????? ??? ??? ?? LoRA ??? ???? ???? ??? ???? ?? NIM? ?????.

    ????? LLM? ?? ?? ?????

    NVIDIA NIM?? ?? ??? LLM ??? ???, ?? ?? ???? ??? ? ????. ?? ??? ??? GPU? ???? Llama-3-8b-instruct NIM? ?????.

    docker run -it --rm --name=meta-llama-3-8b-instruct \
    --runtime=nvidia \
    --gpus all \
    -p 8000:8000 \
    nvcr.io/nvidia/nim/meta-llama3-70b-instruct:1.0.0

    ??? ???? ?? ??? ???? ??? ??? ? ????.

    curl -X 'POST' \
    'http://0.0.0.0:8000/v1/completions' \
    -H 'accept: application/json' \
    -H 'Content-Type: application/json' \
    -d '{
      "model": "meta/llama-3-8b-instruct",
      "prompt": "Once upon a time",
      "max_tokens": 64,
    }'

    ?? ??? ???? ?? ??? ?????.

    docker stop meta-llama-3-8b-instruct

    ?? ??? LLM? ?? ?? ?????

    ??? ??? ?? ?? ??? ?? ???? ???? ??? ????? ????? ??? ??? ??? ??? ???? ???? ???? ????.

    ? ?? LoRA? ?? ???? ???? ?? ?? ??? ???? ??? ??? ?? ? ?? ??? ????? ?? ????? LLM? ??? ? ????.?

    ??? LLM? ???? ?? ?? ?? ?? ??? ??? ????? ???? ?? ???? ????. ?? ?? Llama 2? ?? ?? ?? LLM?? ?? ?? ????? LoRA ??? ??? ?? ? ?? ? ????. ? ?? ?? ???? ?? ??? ????? ???? ?? ?? ??? ??? ???? ???.?

    ?? LoRA? ??? ? ?? ???? ??? ?? ??? ????, ??? LoRA ??? ??? ?? ???? ?? A ? B? ?? ?? ?? ??? ???? ???? ?? ??? ???? ??? LoRA? ??????. ??? ???? ?? ?? LLM? ???? ???? GPU ??? ?? ??? ???? ????? ??? ? ????. ? ????? LoRA? ?? ??? ??? ?????.

    NVIDIA NIM? HuggingFace ?? NVIDIA NeMo? ???? ????? LoRA ???? ????, Llama 3 8B Instruct ?? ??? ??? ?? ?? ??? ??? ???? ? ??? ?????. ? ??? ????????? NIM? ?? LoRA ??? ??? ?? ??? ?????.

    ?? LoRA? ???? ?? ??? NIM ??? ???? ??? ?????.

    ??? ???????.

    1. ??? ?? git lfs? ?????.

    git lfs install

    2. Hugging Face? ??? ? ??? LoRa ??? ??? Git? ?????.

    export LOCAL_PEFT_DIRECTORY=~/loras
    mkdir $LOCAL_PEFT_DIRECTORY
    pushd $LOCAL_PEFT_DIRECTORY
    
    git clone https://huggingface.co/AdithyaSK/LLama3-Gaja-Hindi-8B-Instruct-alpha
    git clone https://huggingface.co/shibing624/llama-3-8b-instruct-262k-chinese-lora
    
    popd
    
    chmod -R 777 $LOCAL_PEFT_DIRECTORY

    LoRA ?? ??? ??

    LoRA ???? ??? ????, ??? LOCAL_PEFT_DIRECTORY ???? ?? ?? ??? LoRA ????? ???? ???. ??? ???? ???? ??? ????? ??? ?? ???? ?????. LLM? NVIDIA NIM? NeMo ? HuggingFace Transformers ?? ??? ?????.

    HuggingFace? ?? LoRA?? adapter_config.json ??? {adapter_model.safetensors, adapter_model.bin} ?? ? ??? ????? ???. NIM? ???? ?? ??? ["gate_proj", "o_proj", "up_proj", "down_proj", "k_proj", "q_proj", "v_proj"]???.

    LOCAL_PEFT_DIRECTORY? ?? ??? ?? ????? ???.

    loras
    ├── llama-3-8b-instruct-262k-chinese-lora
    │   ├── adapter_config.json
    │   └── adapter_model.safetensors
    └── LLama3-Gaja-Hindi-8B-Instruct-alpha
        ├── adapter_config.json
        └── adapter_model.safetensors

    NIM? ?? ?? LoRA ?? ??

    ?? LoRA ??? ??? ??? ??? ??? ?? ??? ??? ? ????. ?? ?? NIM? ???? ?? ????? ?? LoRA ????? ?????.

    export NIM_PEFT_SOURCE=/home/nvs/loras
    export LOCAL_PEFT_DIRECTORY=/home/nvs/loras
    export NIM_PEFT_REFRESH_INTERVAL=3600   
    export CONTAINER_NAME=meta-llama3-8b-instruct
    
    export NIM_CACHE_PATH=~/nim-cache
    chmod -R 777 $NIM_CACHE_PATH

    NIM? ???? ?? ??? ?? ?? ?? LoRA? ??? ? ??? ??? 32? ?????. ??? LoRA? ?? ? ?? ??? 64? ??? ??????? ?? ??? ????? ?? ???? ???.

    export NIM_MAX_LORA_RANK=64

    ?? LoRA? ?? NIM? ?????. ? ??? Llama 3 ????? ??? ???? ?? ????? LOCAL_PEFT_DIRECTORY? ??? ?? LoRA ??? ?????.

    docker run -it --rm --name=$CONTAINER_NAME \
        --runtime=nvidia \
        --gpus all \
        --shm-size=16GB \
        -e NGC_API_KEY \
        -e NIM_PEFT_SOURCE \
        -e NIM_PEFT_REFRESH_INTERVAL \
        -e NIM_MAX_LORA_RANK \
        -v $NIM_CACHE_PATH:/opt/nim/.cache \
        -v $LOCAL_PEFT_DIRECTORY:$NIM_PEFT_SOURCE \
        -p 8000:8000 \
        nvcr.io/nim/meta/llama3-8b-instruct:1.0.0

    ???? ??? ??? LoRA ??? ?? ??? ??? ? ????. ?? ??? ???? ?? ??? LoRA NIM? ?????.

    url -X GET 'http://0.0.0.0:8000/v1/models'

    ??? ?? ?? ??? ??? ? ?? ?? LoRA ??? ??? ?????.

    {
    "Object":"list",
    "Data":[
    {"id":
    "meta/llama3-8b-instruct","object":"model","created":1717511877,"owned_by":"system","root":"meta/llama3-8b-instruct","parent":null,"permission":[{"id":"modelperm-06017a10c1b1422cb0596baa7fec744d","object":"model_permission","created":1717511877,"allow_create_engine":false,"allow_sampling":true,"allow_logprobs":true,"allow_search_indices":false,"allow_view":true,"allow_fine_tuning":false,"organization":"*","group":null,"is_blocking":false}]},
    {"id":"llama-3-8b-instruct-262k-chinese-lora","object":"model","created":1717511877,"owned_by":"system","root":"meta/llama3-8b-instruct","parent":null,"permission":[{"id":"modelperm-ad5ce194c084490ca9f8aa5f23c4fd2f","object":"model_permission","created":1717511877,"allow_create_engine":false,"allow_sampling":true,"allow_logprobs":true,"allow_search_indices":false,"allow_view":true,"allow_fine_tuning":false,"organization":"*","group":null,"is_blocking":false}]},
    {"id":"LLama3-Gaja-Hindi-8B-Instruct-alpha","object":"model","created":1717511877,"owned_by":"system","root":"meta/llama3-8b-instruct","parent":null,"permission":[{"id":"modelperm-e11405b8a2f746f5b189de2766336eac","object":"model_permission","created":1717511877,"allow_create_engine":false,"allow_sampling":true,"allow_logprobs":true,"allow_search_indices":false,"allow_view":true,"allow_fine_tuning":false,"organization":"*","group":null,"is_blocking":false}]},
    }

    NIM? ?? ?? 

    ??? ?? ?? cURL ??? ???? LoRA ??? ??? ?????.

    curl -X 'POST' \
      'http://0.0.0.0:8000/v1/completions' \
      -H 'accept: application/json' \
      -H 'Content-Type: application/json' \
      -d '{
    "model": "llama-3-8b-instruct-262k-chinese-lora",
    "prompt": "介紹一下機器學習",
    "max_tokens": 512
    }'

    ??? ??? ????.

    {
    "Id":"cmpl-92780e47ef854328a48330d6813e8a26",
    "Object":"text_completion",
    "Created":1717512584,
    "Model":"llama-3-8b-instruct-262k-chinese-lora",
    "Choices":[
    {
    "Index":0,
    "text":"算法的基本概念和應用場景?\n\n機器學習算法是一類用于自動處理和分析數據的算法。這些算法可以學習從數據中提取模式、關系和預測性質。在這個回答中,我們將介紹機器學習算法的基本概念和應用場景。\n\n機器學習算法的基本概念:\n\n1. 訓練數據集:機器學習算法學習從數據集中獲取的樣本。\n2. 模型訓練:算法分析訓練數據集,學習模式和關系。\n3. 測試數據集:訓練后的模型評估性能在新的數據集上。\n4. 訓練和測試迭代:重復訓練和測試步驟,以提升算法的準確性。\n\n機器學習算法的應用場景:\n\n1. 數據挖掘:機器學習算法用于發現隱藏在數據中的模式和關系。\n2. 預測和預測分析:算法用于預測未來事件、趨勢和績效。\n3. recommender systems:機器學習算法推薦產品、服務或內容,基于用戶行為和偏好。\n4. 自然語言處理(NLP):機器學習算法用于理解、翻譯和生成人類語言。\n5. 圖像和視頻處理:算法用于圖像和視頻分類、識別和無人機。\n\n總之,機器學習算法是自動處理和分析數據的強大工具。它們可以用于各種應用場景,包括數據挖掘、預測分析和自然語言處理。通過 sürekli 進化和改進,機器學習算法繼續驅動各種 industries 和領域的創新。",
    "Logprobs":null,
    "Finish_reason":"stop",
    "Stop_reason":null}],
    "Usage":{
    "Prompt_tokens":6,
    "Total_tokens":370,
    "completion_tokens":364}}

    ?? ??? LoRA? ??? ?????.

    curl -X 'POST'   'http://0.0.0.0:8000/v1/completions'   -H 'accept: application/json'   -H 'Content-Type: application/json'   -d '{
    "model": "LLama3-Gaja-Hindi-8B-Instruct-alpha",
    "prompt": "??? ???? ??? ??????? ???? ?? ???? ????? ???? ???? ???? ???? ????? ????? ?? ???? ????? ?????",
    "max_tokens": 512
    }'

    ? ??? ??? ????.

    {
    "Id":"cmpl-ddf3fa6e78fc4d24b0bbbc446fb57451",
    "Object":"text_completion",
    "Created":1717513888,
    "model":"LLama3-Gaja-Hindi-8B-Instruct-alpha",
    "Choices":[
    {"index":0,
    "text":" ????? ????? ??? ???? ?????? ??????? ?? ????????? ?? ???? ??? ???? ???? ??, ????? ?? ???, ??? ?? ?? ??? ?????? ?? ????? ???? ?? ????\n????? ???? ?????, ??????, ?? ????? ?? ????? ????????? ?? ??? ?????? ???? ?? 24 ???? ?? ???????? ???? ?? ??, ?? ?????? ???? ???????????? ?? ???????? ???? ?? ???? ???? ????? ????? ??? ?? ??? ????? ???????? ???? ????, ?? ??? ??? ????? ??? ?? ???? ??????? ?? ?????? ????? ?? ???? ?? ??? ???? ??? ?? ??? ???? ????? ??? ??, ????? ??? ?? ???? ???? ?? ?? ???? ???? ???? ??? ??? ????????? ?? ???????????? ?? ?????? ?? ?? ?????? ???? ??? ??, ?? ?? ?????? ??????????? ??? ??? ??? ??????? ?? ??????? ??????, ????????? ?? ?????? ?? ???? ?????? ??????? ?? ??????? ???? ?? ??? ?????? ???? ????, ???? ???? ??? ????????? ?? ??????? ??? ??? ????? ???\n??? ???? ????????? ????? ?? ????? ???????? ???? ????? ???, ???? ???? ???? ???? ?? ?????? ???? ?? ??????? ????? ????? ???? ?? ???? ???????? ?? ???? ??? ?? ??? ??, ?? ??_epsilon.org ?? ??? ???? ?? ??? ???? ?????? trauma ?? ??? ?? ?????? ??? ?_registers (???????, ??????, ???",
    "Logprobs":null,
    "Finish_reason":"length",
    "Stop_reason":null}],
    "Usage":{"prompt_tokens":47,"total_tokens":559,"completion_tokens":512}
    }

    ??? ???? NIM? LangChain?? ??? ?? ????.

    from langchain_nvidia_ai_endpoints import ChatNVIDIA
    
    llm = ChatNVIDIA(base_url="http://0.0.0.0:8000/v1", model="llama-3-8b-instruct-262k-chinese-lora", max_tokens=1000)
    
    result = llm.invoke("介紹一下機器學習")
    print(result.content)

    ??? ??? NIM? ???? ?? ?? ??? https://docs.nvidia.com/nim?? ?????.

    ??

    NVIDIA NIM? ???? ?? LoRA ???? ???? ?? ? ???? ???? ?? ??? ?? ??? ??? ? ????. ? ??? ???? ??? ??? ?? NIM? ?? ?? ?? LoRA? ???? ? ???? ?? LoRA ???? ???? ??? ? ????.

    NVIDIA NIM? ???? ??? ?????. ???? NVIDIA API ???? ??? ???? AI ??? ????? AI ??????? ???? ??? ? ????.

    ??? ??? ?????? ???? ?????? ?? ????? ??? ????? ??? API? ?? ?? ??? ? ????. ???? NIM ?? ???? ???? ????? NVIDIA NIM? ?? ??? AI ??? ?? ??? ???? ?????.

    ?? ???

    Discuss (0)
    0

    Tags

    人人超碰97caoporen国产