• <xmp id="om0om">
  • <table id="om0om"><noscript id="om0om"></noscript></table>
  • Torch-TensorRT? ?? PyTorch?? ?? ?? ?? 6? ????

    Reading Time: 5 minutes

    ?? ? ?? ?? ??? ???? NVIDIA TensorRT? PyTorch? ??? ??? Torch-TensorRT? ?? ?????. PyTorch? ??? ? ?? ??? ?? ???? ??? ? ?? ????????. TensorRT? ?????, ???? ? ????? ?????? ???? GPU ?? ????? ??? ? ?? ??? ?? SDK???. ?? ?? ??? PyTorch ???? TensorRT? ??? ? ???? ?????? ?? ?? ?? ?? ??? ??? ? ????.

    ?? 1. ??? NVIDIA ????? Torch-TensorRT? PyTorch ?? ??? ??

    Torch-TensorRT??

    Torch-TensorRT? NVIDIA GPU?? TensorRT? ?? ???? ???? PyTorch? ?????. ?? ?? ? ???? NVIDIA GPU?? ??? ?? 6? ????? ??? API? ?????.

    ? ??? ??? ??? FP16, INT8 ?? TensorRT ???? ???? ???, TensorRT? ?? ?? ???? ???? ?? ? ?? PyTorch? ????? ?????. ??? ??? NVIDIA Torch-TensorRT ???? ??? ?????.

    Torch-TensorRT? ?? ??

    Torch-TensorRT? TorchScript? ?? ???? ?????. ?? ?? ??? ?? ???? ????? ???? PyTorch? ??? ???? ??? ? ??? ???. PyTorch? ????? ??? ?? ??? ??? ?? ???? ????? TensorRT? ?? ??? ??? ????? Torch-TensorRT? ?? ?????. 

    ??? ? ???? ???? ???? ?? TorchScript ??? ???? ?? ??? ???? ? ?? TensorRT ??? ??? ? ????. Torch-TensorRT ????? ????? ?? ??? ?? ???? ?? ?? ? ??? ?????.

    • TorchScript ?? ???
    • ????
    • ????

    TorchScript ?? ???

    ? ?? ???? Torch-TensorRT? TorchScript ??? ?????, TensorRT? ? ?? ???? ??? ?? ??? ???? ??? ??????. ??? ??? ???? ??? ??? ???? ??? ??? ???? ??? ???? ???.

    ?? 2. TorchScript ??? ?? ?? ? ??

    ????

    ?? ???? Torch-TensorRT? ??? ?? TensorRT? ?? ??? ?? ???? ???? ???? TensorRT ???? ?????.

    • ?? ?? ?? ??? ???? ??? ?????.
    • Tensor ??? ???? ??? ?? ??? TensorRT ???? ?????.
    • ??? ??? TorchScripting? ?? ?? TorchScript ??? ???? ????? ???? ?????.
    ?? 3. ??? ??? ???? ?? Torch? Ops? TensorRT Ops? ??

    ??? ??? TensorRT ??? ????? ??? ?????. ?, PyTorch ??, ?? ???, TensorRT ?? ? ?? ??? ?? ???? ??? ? ????.

    ?? 4. log_sigmoid? TorchScript JIT? ???? ?? Conv2d ???? TensorRT ???? ??

    ????

    ???? ??? ???? Torch-TensorRT? ??? ?? ??? ??? ??? ??? ?????. ??? TorchScript ??? ???? TorchScript ?????? TensorRT ??? ???? ?? ??? ?????. ??? ???? ?? TorchScript ???? ?????? ??? ?? ?????.

    ?? 5. PyTorch ? TensorRT Ops ??? ??

    Torch-TensorRT ??

    Torch-TensorRT?? INT8 ? ??? ??? ?? ??? ???????.

    INT8 ??

    Torch-TensorRT? ?? ? ?? ??? ?? ???? ?? ??? ?????.

    • ???? ? ???(PTQ)
    • ??? ?? ????(QAT)

    PTQ? ?? TensorRT? ?? ???? ?? ???? ??? ???? ?? ??? ?????. IT? FP32? ???? ???? FP32? INT8 ?? ??? ?? ??? ????? INT8? ?? ??? ?????. TensorRT ??????? ????? TensorRT ???? ?? ???? ???? ??? ???? ???? ???.

    Torch-TensorRT? PyTorch? ?? ???? ???? ???? ? ?? ??? ? ????. LibTorch? ???·?? ?? ???? ????? DataLoader ? Dataset API? ?????. ?? API? C++ ? Python ?????? ?? ????? PTQ? ? ?? ??? ? ????. ??? ??? ???? ? ???(PTQ)? ?????.

    QAT? ?? TensorRT?? PyTorch? ??? ?? Ops? TensorRT? ???? ??? API? QuantizeLayer ? DequantizeLayer? ???????. aten:fake_quantize_per_*_affine? ?? ??? ????? Torch-TensorRT? ?? QuantizeLayer + DequantizeLayer? ?????. Torch-TensorRT? ???? PyTorch QAT ??? ????? ??? ????? ??? ?? ??? ??? Torch-TensorRT? ???? INT8? ??? ?? ???? ?? ??? ?????.

    ???

    NVIDIA Ampere ????? ???? ????? ???? ???? ???? NVIDIA A100 GPU? 3?? Tensor ??? ?????. ? ??? ??? ?? ?? ?? ??? ???? ??? ????? ??? ?? ???? ??? ?????.

    • TensorRT? ?? Tensor ???? ? ?? ??? ?? ?? ???? ???? ???? ??? ?????.
    • Torch-TensorRT? ???? ???? ?? ?? ???? ?? ??? ?????.

    ??: ??? ??? ?? ??? ??

    ? ?????? EfficientNet??? ??? ?? ??? ?? ?? ??? ???? PyTorch, TorchScript JIT, Torch-TensorRT? ??? ???? ???? ? ???? ???? ?????. ??? ??? Torch-TensorRT GitHub ?????? ?? ? ?? ?? ???? ?????.

    ?? ? ?? ??

    ? ??? ????? ?? ???? ?????.

    • NVIDIA GPU, ??? ???? 7 ?? ??? ??? Linux ???
    • 19.03 ?? ???? ??? Docker
    • PyTorch, Torch-TensorRT, ??? NGC ?????? ??? ?? ???? ?? Docker ????

    ??? ?? nvcr.io/nvidia/pytorch:21.11-py3 ??? ??? Docker ????? ?????.

    Docker ????? ??? bash ???? ??????? JupyterLab ????? ???? Python ??? ?????. ?? 8888? ?? JupyterLab? ???? ??? TensorRT? ?????. ?????? JupyterLab? ??? ??? ?????? ???? ???? ? ?? ??? IP ??? ?????.

    Jupyter lab --allow-root --IP=0.0.0.0 --NotebookApp.token=’TensorRT’ --port 8888

    ?????? ?? 8888? ?? ? IP ??? ?????. ?? ????? ? ??? ?? ?? ?? Localhost:8888? ?????.

    ?????? JupyterLab? ??? ??? ?????? ???? ?? ??? Jupyter ???? ?? ? ????. ?? ????? ??? ?? ??, ???, ????? ??? PyTorch ?????? timm? ???? ??? ?????. ? ??????? EfficientNet-b0 ??? ?????.

    pip install timm

    ?? ?????? ???? EfficientNet-b0? PyTorch nn.Module ??? ????.

    import torch
    import torch_tensorrt
    import timm
    import time
    import numpy as np
    import torch.backends.cudnn as cudnn
    
    torch.hub._validate_not_a_forked_repo=lambda a,b,c: True
    
    efficientnet_b0 = timm.create_model('efficientnet_b0',pretrained=True)

    ? efficientnet_b0 ??? forward ???? ??? ?? ?? Tensor? ???? ? ????? ???? ????.

    model = efficientnet_b0.eval().to("cuda")
    detections_batch = model(torch.randn(128, 3, 224, 224).to("cuda"))
    detections_batch.shape

    ??? ?? 128?? ??? 1,000?? ???? ???? [128, 1000]? Tensor? ?????.

    PyTorch JIT ??? ???? Torch-TensorRT AOT ??? ???? ?? ???? ? ??? ??????? ??? ???? ???? ??? ?????.

    cudnn.benchmark = True
    
    def benchmark(model, input_shape=(1024, 3, 512, 512), dtype='fp32', nwarmup=50, nruns=1000):
        input_data = torch.randn(input_shape)
        input_data = input_data.to("cuda")
        if dtype=='fp16':
            input_data = input_data.half()
            
        print("Warm up ...")
        with torch.no_grad():
            for _ in range(nwarmup):
                features = model(input_data)
        torch.cuda.synchronize()
        print("Start timing ...")
        timings = []
        with torch.no_grad():
            for i in range(1, nruns+1):
                start_time = time.time()
                pred_loc  = model(input_data)
                torch.cuda.synchronize()
                end_time = time.time()
                timings.append(end_time - start_time)
                if i%10==0:
                    print('Iteration %d/%d, avg batch time %.2f ms'%(i, nruns, np.mean(timings)*1000))
    
        print("Input shape:", input_data.size())
        print('Average throughput: %.2f images/second'%(input_shape[0]/np.mean(timings)))

    ?? ? ???? ?? ??? ??? ??? ?????.

    PyTorch ? TorchScript? ??? ??

    ?? PyTorch ??? ??? ???? ?? ?? 1? ?? ???? ?????.

    model = efficientnet_b0.eval().to("cuda")
    benchmark(model, input_shape=(1, 3, 224, 224), nruns=100)

    TorchScript JIT ???? ??? ??? ??? ? ????.

    traced_model = torch.jit.trace(model, torch.randn((1,3,224,224)).to("cuda"))
    torch.jit.save(traced_model, "efficientnet_b0_traced.jit.pt")
    benchmark(traced_model, input_shape=(1, 3, 224, 224), nruns=100)

    PyTorch? TorchScript JIT? ?? ??? ?? ???? ?????.

    Torch-TensorRT? ??? ??

    Torch-TensorRT? ???? ?? ???? ??? ?????? ?? ??? ?????.

    trt_model = torch_tensorrt.compile(model, 
        inputs= [torch_tensorrt.Input((1, 3, 224, 224))],
        enabled_precisions= { torch_tensorrt.dtype.half} # Run with FP16
    )

    ????? ? Torch-TensorRT ??? ??? ???????.

    benchmark(trt_model, input_shape=(1, 3, 224, 224), nruns=100, dtype="fp16")

    ???? ??

    ??? ?? ??? 1? NVIDIA A100 GPU?? ??? ?????.

    ?? 6. ?? ??? 1? NVIDIA A100 GPU?? ?? PyTorch? Torch-TensorRT? ??? ??

    ??

    ?? ? ???? ????? Torch-TensorRT? ?? ??? ?? 6? ??????. ?? PyTorch? ?? ???? ?????? ?? ??? NVIDIA GPU? ??? ? ??? ?????.

    ???? ???? ??? ?? ?????? PyTorch NGC ?????? Torch-TensorRT? ?????? ?? ?? ??? TensorRT ???? ?? PyTorch ?? ??? ?? ? ????.

    ? ???? ??? SDK? ???? ?? ???, ?? ???, ?? ??, ??, ?? ??, ???? NVIDIA ??? ???? ??? ??? ??? ??? ? ????. ?? ??? ???? NVIDIA? ?? ????? ???? ? ??? ??? ??? ?????? ???? ??? ??? ???.

    Discuss (0)
    +1

    Tags

    ?? ???

    人人超碰97caoporen国产