• <xmp id="om0om">
  • <table id="om0om"><noscript id="om0om"></noscript></table>
  • Data Center / Cloud

    NVIDIA NIM?? ?? ??? AI ?? ????

    Reading Time: 3 minutes

    ???? ???? AI ????? ??? ????? ????? ??? ??? AI ???????? ??? ????? ??? ???? ?? ?? ??? ??? ???? ???? ???? ??? ??????.

    NVIDIA NIM? ???? ???? ?? ??(PEFT)? ?? ??? ?? ??? ??? ??? ????, ?? AI ????? ??? ?? ?? ??? ?? ??? ?? ???????? ?????.

    ?? ????? ???? ?? ??, DPO(?? ??), ?? ?? ??(SFT) ?? ?? ?? ?? ??? ??? ? ????. ? ???? PEFT? ?? ?? ??(LoRA) ???? ??, ???? ??? ?? ???? ?? ??? ???? ?? ?????. ??? ??, ??? ???? ???? ??? ???? ?? ?? ?? ????? ??? ?????? ???.

    NIM? ? ??? ??? ?????, ?? ???? ??? ??? GPU? ???? TensorRT-LLM ?? ?? ??? ???? ?????. ??, ?? ??? ?? ?? ?????? ?? ??? ?? ??? ??? ? ????.

    ? ?????? ???? ??? ?? ???? TensorRT-LLM ?? ??? ??? SFT? ??? ??? ??? NIM ???????? ??? ???? ??? ?????. ??? ?? ???? ? ?? ??? ??? ???? ?? ?? ??? ? ? ????.

    ?? ??

    ? ????? ????? 80GB? GPU ???? ???? ? ?? git-lfs? ??? NVIDIA ?? ??? ??? ?????.

    NVIDIA ?? ??? ???? NIM ???????? ??? ???? ?? NGC API ?? ?????.

    1. NVIDIA API ?????? Meta Llama 3 8B Instruct ?? ???? ?????.
    2. ??? ???? ???? ???? ??? ????.
    3. ???? ? ?? ????? ? NIM?? ??? ?????.
    4. ?? ??? API? ???? ? ?? ? ??? ?? NIM ??????? ???? ??????:
      • ??, ?? ? ??????? NIM? ??? ???? ? ?? NVIDIA ??? ???? ???.
      • NVIDIA ?????? ??? ?? ???? ??? 90? NVIDIA AI ?????? ????.

    ??? ??? ??? ??? ?? ??? ??? ? NGC API ?? ???? NIM? ??? ??? ????. ??? ??? LLM? NVIDIA NIM ?? ? ?????.

    NIM ??????? ????

    NGC CLI API ?? ??? ??? ?? ??? ?????:

    export NGC_API_KEY=<<YOUR API KEY HERE>>

    ?? ??? ???? ?? ??? ??? ????? ?? ??? ????, ????, ???? ???:

    export NIM_CACHE_PATH=/tmp/nim/.cache
    mkdir -p $NIM_CACHE_PATH
    chmod -R 777 $NIM_CACHE_PATH

    NIM?? ?? ??? ??? ?????, ???? ???? ???? TensorRT-LLM ?? ??? ????, ?? ????? SFT(?? ?? ??)? ?? ??????? ??? ???? ??? ???. ? ??????? OpenMathInstruct-2 ??? ??? ??? Meta? Llama-3.1-8B? ??????? NVIDIA OpenMath2-Llama3.1-8B ??? ?????.

    ?? ??? LLM? ???? ??? NIM?? ????? ???. ???? ??? NIM ???????? ?? ??? ??? NVIDIA API ?????? NIM ??: Run Anywhere ??? ?????.

    ? ??? ??? ???? ?? ???? ??? ? ????. ?? ?????? ?? ???? ??? ?? ?????? ???? ??? ????:

    git lfs install
    git clone https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF
    export MODEL_WEIGHT_PARENT_DIRECTORY=$PWD

    ?? ???? ????? ?? ??? ??? ???????? ??? ???.

    ?? ??? ?? ????? ????

    ??? ??? ???? ??? ?? ?? ??? ?? ?? ???? ???? ?????. ?? ?? ?? ??? ??? ? ?? ?? ???? ?? ? ?????:

    • ?? ??: ?? ?? ???? ???? ?? NIM ??????? ??.
    • ???: ?? ??? ???? ???? ?? NIM ??????? ??.

    ???? ??? ? ?? ??? ?? ??? ??? NVIDIA NIM ???? ?? ???? ??? ?????.

    SFT ?? ?? ??

    ?? ??? ???? ??? ??? OpenMath2-Llama3.1-8B? TensorRT-LLM ?? ??? ?????:

    docker run -it --rm --gpus all \
    --user $(id -u):$(id -g)\
        --network=host \
        --shm-size=32GB \
        -e NGC_API_KEY \
        -e NIM_FT_MODEL=/opt/weights/hf/OpenMath2-Llama3.1-8B \
        -e NIM_SERVED_MODEL_NAME=OpenMath2-Llama3.1-8B \
    -v $NIM_CACHE_PATH:/opt/nim/.cache \
    -v $MODEL_WEIGHT_PARENT_DIRECTORY:/opt/weights/hf \
    nvcr.io/nim/meta/llama3_1-8b:1.3.0

    ? ??? NIM ???????? ???? ? ???? ???? ??? ?? ?????. ? ???? OpenMath2-Llama3.1-8B ??? ???? ?? NIM_FT_MODEL ?? ??? ??????.

    ?? ?? NIM? ???? ???? ?? ??? ?????. ? ??? NIM ???????? ???? ??? ????? ?? Python ?? ??? ?????:

    from openai import OpenAI
     
    client = OpenAI(
      base_url = "http://localhost:8000/v1",
      api_key = "none"
    )
     
    completion = client.chat.completions.create(
      model="OpenMath2-Llama3.1-8B",
      messages=[{"role":"user","content":"What is your name?"}],
      temperature=0.2,
      top_p=0.7,
      max_tokens=100,
      stream=True
    )
     
    for chunk in completion:
      if chunk.choices[0].delta.content is not None:
        print(chunk.choices[0].delta.content, end="")
    ??? 1. ?? ??? AI ??? ???? ??

    ??? ?? ???? ???? TensorRT-LLM ?? ????

    ???? GPU ?? ??? ??? ???? NIM ???????? ???? ? ????. ?? ??? ??? ?? ???????? ???? ???????? ???? ? ?? ???? ?????.

    export IMG_NAME="nvcr.io/nim/meta/llama-3.1-8b-instruct:1.3.0"
    docker run --rm --runtime=nvidia --gpus=all $IMG_NAME list-model-profiles \
    -e NGC_API_KEY=$NGC_API_KEY

    H100 GPU? ?? ???? ???? ??? ?? ???? ??? ? ????:

    • tensorrt_llm-h100-fp8-tp1-throughput
    • tensorrt_llm-h100-fp8-tp2-latency

    ??? ?? ???? ?? ?? ??? ???? ??? ???? ?????:

    docker run --rm --runtime=nvidia --gpus=all $IMG_NAME list-model-profiles \
    -e NGC_API_KEY=$NGC_API_KEY \ 
    -e NIM_MODEL_PROFILE=tensorrt_llm-h100-fp8-tp2-latency

    ?? ??? ???? NIM ???????? ?? ????? Python? ???? ??? ?? ?????:

    from openai import OpenAI
     
    client = OpenAI(
      base_url = "http://localhost:8000/v1",
      api_key = "none"
    )
     
    completion = client.chat.completions.create(
      model="llama-3.1-8b-instruct",
      messages=[{"role":"user","content":"What is your name?"}],
      temperature=0.2,
      top_p=0.7,
      max_tokens=100,
      stream=True
    )
     
    for chunk in completion:
      if chunk.choices[0].delta.content is not None:
        print(chunk.choices[0].delta.content, end="")

    ??

    ?? ???? ?? PEFT ?? SFT ??? ????, NIM? ? ?? ??? ??? ??? ??? ?? ??? ?? ??? ??????. ?? ??? ???? ??? ???? TensorRT-LLM ?? ??? ?? NIM? ???? ???? AI ??? ???? ??? ? ?? ??? ???? ?????.

    ??? ??? ?? ???? ?????:

    NVIDIA ? NIM ??????? ????? ????? NVIDIA NIM ??? ?? ? ?????.

    ?? ???

    Discuss (0)
    0

    Tags

    人人超碰97caoporen国产