?? ? ?? ?? ??? ???? NVIDIA TensorRT? PyTorch? ??? ??? Torch-TensorRT? ?? ?????. PyTorch? ??? ? ?? ??? ?? ???? ??? ? ?? ????????. TensorRT? ?????, ???? ? ????? ?????? ???? GPU ?? ????? ??? ? ?? ??? ?? SDK???. ?? ?? ??? PyTorch ???? TensorRT? ??? ? ???? ?????? ?? ?? ?? ?? ??? ??? ? ????.

Torch-TensorRT??
Torch-TensorRT? NVIDIA GPU?? TensorRT? ?? ???? ???? PyTorch? ?????. ?? ?? ? ???? NVIDIA GPU?? ??? ?? 6? ????? ??? API? ?????.
? ??? ??? ??? FP16, INT8 ?? TensorRT ???? ???? ???, TensorRT? ?? ?? ???? ???? ?? ? ?? PyTorch? ????? ?????. ??? ??? NVIDIA Torch-TensorRT ???? ??? ?????.
Torch-TensorRT? ?? ??
Torch-TensorRT? TorchScript? ?? ???? ?????. ?? ?? ??? ?? ???? ????? ???? PyTorch? ??? ???? ??? ? ??? ???. PyTorch? ????? ??? ?? ??? ??? ?? ???? ????? TensorRT? ?? ??? ??? ????? Torch-TensorRT? ?? ?????.
??? ? ???? ???? ???? ?? TorchScript ??? ???? ?? ??? ???? ? ?? TensorRT ??? ??? ? ????. Torch-TensorRT ????? ????? ?? ??? ?? ???? ?? ?? ? ??? ?????.
- TorchScript ?? ???
- ????
- ????
TorchScript ?? ???
? ?? ???? Torch-TensorRT? TorchScript ??? ?????, TensorRT? ? ?? ???? ??? ?? ??? ???? ??? ??????. ??? ??? ???? ??? ??? ???? ??? ??? ???? ??? ???? ???.
????
?? ???? Torch-TensorRT? ??? ?? TensorRT? ?? ??? ?? ???? ???? ???? TensorRT ???? ?????.
- ?? ?? ?? ??? ???? ??? ?????.
- Tensor ??? ???? ??? ?? ??? TensorRT ???? ?????.
- ??? ??? TorchScripting? ?? ?? TorchScript ??? ???? ????? ???? ?????.

??? ??? TensorRT ??? ????? ??? ?????. ?, PyTorch ??, ?? ???, TensorRT ?? ? ?? ??? ?? ???? ??? ? ????.

????
???? ??? ???? Torch-TensorRT? ??? ?? ??? ??? ??? ??? ?????. ??? TorchScript ??? ???? TorchScript ?????? TensorRT ??? ???? ?? ??? ?????. ??? ???? ?? TorchScript ???? ?????? ??? ?? ?????.

Torch-TensorRT ??
Torch-TensorRT?? INT8 ? ??? ??? ?? ??? ???????.
INT8 ??
Torch-TensorRT? ?? ? ?? ??? ?? ???? ?? ??? ?????.
- ???? ? ???(PTQ)
- ??? ?? ????(QAT)
PTQ? ?? TensorRT? ?? ???? ?? ???? ??? ???? ?? ??? ?????. IT? FP32? ???? ???? FP32? INT8 ?? ??? ?? ??? ????? INT8? ?? ??? ?????. TensorRT ??????? ????? TensorRT ???? ?? ???? ???? ??? ???? ???? ???.
Torch-TensorRT? PyTorch? ?? ???? ???? ???? ? ?? ??? ? ????. LibTorch? ???·?? ?? ???? ????? DataLoader ? Dataset API? ?????. ?? API? C++ ? Python ?????? ?? ????? PTQ? ? ?? ??? ? ????. ??? ??? ???? ? ???(PTQ)? ?????.
QAT? ?? TensorRT?? PyTorch? ??? ?? Ops? TensorRT? ???? ??? API? QuantizeLayer ? DequantizeLayer? ???????. aten:fake_quantize_per_*_affine? ?? ??? ????? Torch-TensorRT? ?? QuantizeLayer + DequantizeLayer? ?????. Torch-TensorRT? ???? PyTorch QAT ??? ????? ??? ????? ??? ?? ??? ??? Torch-TensorRT? ???? INT8? ??? ?? ???? ?? ??? ?????.
???
NVIDIA Ampere ????? ???? ????? ???? ???? ???? NVIDIA A100 GPU? 3?? Tensor ??? ?????. ? ??? ??? ?? ?? ?? ??? ???? ??? ????? ??? ?? ???? ??? ?????.
- TensorRT? ?? Tensor ???? ? ?? ??? ?? ?? ???? ???? ???? ??? ?????.
- Torch-TensorRT? ???? ???? ?? ?? ???? ?? ??? ?????.
??: ??? ??? ?? ??? ??
? ?????? EfficientNet??? ??? ?? ??? ?? ?? ??? ???? PyTorch, TorchScript JIT, Torch-TensorRT? ??? ???? ???? ? ???? ???? ?????. ??? ??? Torch-TensorRT GitHub ?????? ?? ? ?? ?? ???? ?????.
?? ? ?? ??
? ??? ????? ?? ???? ?????.
- NVIDIA GPU, ??? ???? 7 ?? ??? ??? Linux ???
- 19.03 ?? ???? ??? Docker
- PyTorch, Torch-TensorRT, ??? NGC ?????? ??? ?? ???? ?? Docker ????
??? ?? nvcr.io/nvidia/pytorch:21.11-py3 ??? ??? Docker ????? ?????.
Docker ????? ??? bash ???? ??????? JupyterLab ????? ???? Python ??? ?????. ?? 8888? ?? JupyterLab? ???? ??? TensorRT? ?????. ?????? JupyterLab? ??? ??? ?????? ???? ???? ? ?? ??? IP ??? ?????.
Jupyter lab --allow-root --IP=0.0.0.0 --NotebookApp.token=’TensorRT’ --port 8888
?????? ?? 8888? ?? ? IP ??? ?????. ?? ????? ? ??? ?? ?? ?? Localhost:8888? ?????.
?????? JupyterLab? ??? ??? ?????? ???? ?? ??? Jupyter ???? ?? ? ????. ?? ????? ??? ?? ??, ???, ????? ??? PyTorch ?????? timm? ???? ??? ?????. ? ??????? EfficientNet-b0 ??? ?????.
pip install timm
?? ?????? ???? EfficientNet-b0? PyTorch nn.Module ??? ????.
import torch
import torch_tensorrt
import timm
import time
import numpy as np
import torch.backends.cudnn as cudnn
torch.hub._validate_not_a_forked_repo=lambda a,b,c: True
efficientnet_b0 = timm.create_model('efficientnet_b0',pretrained=True)
? efficientnet_b0 ??? forward ???? ??? ?? ?? Tensor? ???? ? ????? ???? ????.
model = efficientnet_b0.eval().to("cuda")
detections_batch = model(torch.randn(128, 3, 224, 224).to("cuda"))
detections_batch.shape
??? ?? 128?? ??? 1,000?? ???? ???? [128, 1000]? Tensor? ?????.
PyTorch JIT ??? ???? Torch-TensorRT AOT ??? ???? ?? ???? ? ??? ??????? ??? ???? ???? ??? ?????.
cudnn.benchmark = True
def benchmark(model, input_shape=(1024, 3, 512, 512), dtype='fp32', nwarmup=50, nruns=1000):
input_data = torch.randn(input_shape)
input_data = input_data.to("cuda")
if dtype=='fp16':
input_data = input_data.half()
print("Warm up ...")
with torch.no_grad():
for _ in range(nwarmup):
features = model(input_data)
torch.cuda.synchronize()
print("Start timing ...")
timings = []
with torch.no_grad():
for i in range(1, nruns+1):
start_time = time.time()
pred_loc = model(input_data)
torch.cuda.synchronize()
end_time = time.time()
timings.append(end_time - start_time)
if i%10==0:
print('Iteration %d/%d, avg batch time %.2f ms'%(i, nruns, np.mean(timings)*1000))
print("Input shape:", input_data.size())
print('Average throughput: %.2f images/second'%(input_shape[0]/np.mean(timings)))
?? ? ???? ?? ??? ??? ??? ?????.
PyTorch ? TorchScript? ??? ??
?? PyTorch ??? ??? ???? ?? ?? 1? ?? ???? ?????.
model = efficientnet_b0.eval().to("cuda") benchmark(model, input_shape=(1, 3, 224, 224), nruns=100)
TorchScript JIT ???? ??? ??? ??? ? ????.
traced_model = torch.jit.trace(model, torch.randn((1,3,224,224)).to("cuda")) torch.jit.save(traced_model, "efficientnet_b0_traced.jit.pt") benchmark(traced_model, input_shape=(1, 3, 224, 224), nruns=100)
PyTorch? TorchScript JIT? ?? ??? ?? ???? ?????.
Torch-TensorRT? ??? ??
Torch-TensorRT? ???? ?? ???? ??? ?????? ?? ??? ?????.
trt_model = torch_tensorrt.compile(model, inputs= [torch_tensorrt.Input((1, 3, 224, 224))], enabled_precisions= { torch_tensorrt.dtype.half} # Run with FP16 )
????? ? Torch-TensorRT ??? ??? ???????.
benchmark(trt_model, input_shape=(1, 3, 224, 224), nruns=100, dtype="fp16")
???? ??
??? ?? ??? 1? NVIDIA A100 GPU?? ??? ?????.

??
?? ? ???? ????? Torch-TensorRT? ?? ??? ?? 6? ??????. ?? PyTorch? ?? ???? ?????? ?? ??? NVIDIA GPU? ??? ? ??? ?????.
???? ???? ??? ?? ?????? PyTorch NGC ?????? Torch-TensorRT? ?????? ?? ?? ??? TensorRT ???? ?? PyTorch ?? ??? ?? ? ????.
? ???? ??? SDK? ???? ?? ???, ?? ???, ?? ??, ??, ?? ??, ???? NVIDIA ??? ???? ??? ??? ??? ??? ? ????. ?? ??? ???? NVIDIA? ?? ????? ???? ? ??? ??? ??? ?????? ???? ??? ??? ???.