CUDA ???? ??? ??? ??? ?? ???? ??? ? ?? ?? ???? ?????? ?? ??? ????? ?? ????. ?????? ?? ???? CUDA ?????? ??? ???? ??? ? ???, ?? ??? ???? ???? ?????.
??? ???? ??? ????? ??? ????. ?? ?????? ?? ??? ? ?? ??, ??? ???? GPU ??? ???? CPU? ???? ???.
CUDA ???? ??? ?? ??? ???? ??? ???? ???? ??? ???? ?? ??? ?? ????? ???? ??? ??? ?????. CUDA ???? ??? ??? ? ?? ?? ?? ?? ??? ?????. ?? ? ??? ?? ?????. ?? ?? ??? ??????? ????? ??? ??? ??? ? ????.
? ?????? ???? ??? ?? ??? ? ?? ??? ???? ??? ???????. ??? ??? ?? ??? ?? ???? ???? ? ?? ????? ??? ?? ??????.
???? ??? ???
?? ???? ????? ??? ??? 4??? ????.
- ??? ??
- ???? ?? ??? ???? ???
- ?? ??? ???? ?? ??? GPU? ???
- ?? ?? ??? ??
CUDA? ?? ??? ?? ???? ???? ?????? ????? ??? ? ??? ???? ??? ? ????. ??? ??, CUDA? ??? ??? ????? ???? ?? ?? ???? ??? ? ??? ??? ?? ??? ?????.
CUDA ???? ???? ???? ???? ??? ?? ?? ???? ??? ?? ???? ?????? ???. ?? ?????? ???? ????, ?? ??? ??? ?? ???? ?????, ??? ??? ?? ????? ????? ???? ???? ????? ?????? ???. ???? ???? ??? ???? ?? ??? ? ??? ???? ?? ????? ? ??? ?? ? ? ????.
// This is the signature of our scheduler kernel // The internals of this kernel will be outlined later __global__ void schedulerKernel( fileData *files, int numFiles, int *currentFile, void **currentFileData, cudaGraphExec_t zipGraph, cudaGraphExec_t lzwGraph, cudaGraphExec_t deflateGraph); void setupAndLaunchScheduler() { cudaGraph_t zipGraph, lzwGraph, deflateGraph, schedulerGraph; cudaGraphExec_t zipExec, lzwExec, deflateExec, schedulerExec; // Create the source graphs for each possible operation we want to perform // We pass the currentFileData ptr to this setup, as this ptr is how the scheduler will // indicate which file to decompress create_zip_graph(&zipGraph, currentFileData); create_lzw_graph(&lzwGraph, currentFileData); create_deflate_graph(&deflateGraph, currentFileData); // Instantiate the graphs for these operations and explicitly upload cudaGraphInstantiate(&zipExec, zipGraph, cudaGraphInstantiateFlagDeviceLaunch); cudaGraphUpload(zipExec, stream); cudaGraphInstantiate(&lzwExec, lzwGraph, cudaGraphInstantiateFlagDeviceLaunch); cudaGraphUpload(lzwExec, stream); cudaGraphInstantiate(&deflateExec, deflateGraph, cudaGraphInstantiateFlagDeviceLaunch); cudaGraphUpload(deflateExec, stream); // Create and instantiate the scheduler graph cudaStreamBeginCapture(stream, cudaStreamCaptureModeGlobal); schedulerKernel<<<1, 1, 0, stream>>>(files, numFiles, currentFile, currentFileData, zipExec, lzwExec, deflateExec); cudaStreamEndCapture(stream, &schedulerGraph); cudaGraphInstantiate(&schedulerExec, schedulerGraph, cudaGraphInstantiateFlagDeviceLaunch); // Launch the scheduler graph - this will perform an implicit upload cudaGraphLaunch(schedulerExec, stream); }
???? ???? ???? ?????? ?? ??? ? ??? ?? ?????. ?, ??? cudaGraphExec_t ??? ???? ??? ??? ?? ??? ?? ????? ??? ? ??? ????.
???? ?? ??(Fire and forget launch)
???? ??? ???? ???? ?? ??? ?????. ?? ??? ???? ?? ??? ???? ?? ??(Fire and forget launch)???.
?? ? ??(Forget)? ?? ???? ???? ?? ?????. ?? ? ?? ??(Forget mode)? ?? ??? ???? ?? ???? ????? ?????. ??? ?? ?????, ???? ?? ??? ???? ?? ? ??? ?????. ??? ??? ???? ?????. CUDA? ??? ?? ? ??? ???? ?? ???? ??? ??? ??? ???? ?????. ??? ???? ??? ???? ?????.
enum compressionType { zip = 1, lzw = 2, deflate = 3 }; struct fileData { compressionType comprType; void *data; }; __global__ void schedulerKernel( fileData *files, int numFiles int *currentFile, void **currentFileData, cudaGraphExec_t zipGraph, cudaGraphExec_t lzwGraph, cudaGraphExec_t deflateGraph) { // Set the data ptr to the current file so the dispatched graph // is operating on the correct file data *currentFileData = files[currentFile].data; switch (files[currentFile].comprType) { case zip: cudaGraphLaunch(zipGraph, cudaStreamGraphFireAndForget); break; case lzw: cudaGraphLaunch(lzwGraph, cudaStreamGraphFireAndForget); break; case deflate: cudaGraphLaunch(deflateGraph, cudaStreamGraphFireAndForget); break; default: break; } }
??? ??? ???? ??? ? ?? ???, ?? ? ??? ?? ???? ???? ???? ??? ? ??? ?? ?????. ? ???? ???? ????, ?? ???? ?? ???? ???? ??, ??? ??? ??? ??? ?? ???? ?? ???? ??(??? ?? ?)? ? ? ??? ? ?? ???? ??? ? ????. ???? ??? ??? ???? ????? ?? ?????.
?? ??
CUDA ??? GPU? ?? ?????? ?????. ?, ?? ???? ??? ??? ???? ?? ??? ????? ????? ???? ??? ????. ? ??? ????? cudaDeviceSynchronize ?? cudaStreamSynchronize ?? ??? ??? ???? CPU ????? ?????.
GPU ?? ?????? cudaDeviceSynchronize? ?? ?? ??? ?? ???? ???? ????? ???? ? ????. ?? ?? ??? ??? ????? ?? ?? ??? ???? ???.
?? ??? ?? ???? ???? ?? ???? ?? ?? ???? ???? ?????. CUDA? ?? ???? ??? ?? ??? ?? ??? ??????, ?? ??? ??? ?? ?? ? ?? ??? ?????? ?????.
?? ??? ?? ? ?? ???? ???, ?? ??? ???????? ?????. ?? ??? ???? ??? ???? ?????. ??? ??? ?? ?????, ?? ?? ?? ???? ???? ?? ??? ?? ????? ??? ????. ? ?? ??? ?? ???? ?? ??? ?????.
?? ?? ??? ???? ?? ????? ?????? ?? ???? ??? ?? ? ????. ? ?? ????? ????? ?? ????? ?? ??? ????? ??? ?????.
__global__ void schedulerKernel( fileData *files, int numFiles, int *currentFile, void **currentFileData, cudaGraphExec_t zipGraph, cudaGraphExec_t lzwGraph, cudaGraphExec_t deflateGraph) { // Set the data ptr to the current file so the dispatched graph // is operating on the correct file data *currentFileData = files[currentFile].data; switch (files[currentFile].comprType) { case zip: cudaGraphLaunch(zipGraph, cudaStreamGraphFireAndForget); break; case lzw: cudaGraphLaunch(lzwGraph, cudaStreamGraphFireAndForget); break; case deflate: cudaGraphLaunch(deflateGraph, cudaStreamGraphFireAndForget); break; default: break; } // If we have not finished iterating over all the files, relaunch if (*currentFile < numFiles) { // Query the current graph handle so we can relaunch it cudaGraphExec_t currentGraph = cudaGetCurrentGraphExec(); cudaGraphLaunch(currentGraph, cudaStreamGraphTailLaunch); *currentFile++; } }
??? ??? cudaGetCurrentGraphExec? ???? ?? ?? ?? ???? ?? ??? ???? ?? ?????. ?? ?? ??? ?? ??? ?? ???? ?????.
?? ???? ?? ?? ?? ??? ???? ???? ??? ????. ?? ???? ?? ???? ???? ?, ??? ?? ? ?? ??? ???(?? ??)?? ???. ???? ???? ? ?? ??? ??(? ??? ?? ???)? ?? ??? ? ? ????. ?? ??? ???? ?????? ?? ??? ?????? ?? ???? ???. ?? ?? ??? ??? ?????? ????, ?? ?? ??? ??? ???? ??? ? ????.
????? ??? ?? ?? ??
? ??? ??? ?? ???? ???? ????? ?? 1? ??? ?????? ?? ? ??, ?? ??, ??? ??? ?? ??? ??? ?????.

? ??? ???? ? ?? ?? ??? ??? ??? ?? ?? ?? 2? ? ?? ?? ???, ??? ??? ??? ?? ??? ?????. ? ????? ?? ?? ??? ?????.
??, ?? 2?? ? ? ??? ???? ??? ?? ??? ? ???? ? ?????.

??? ??? ???? ???? ?? ?? ??? ???? ??? ?? ?? ??? ????? ?? ?? ?? ???? ?????.
??
CUDA ???? ??? ??? CUDA ?? ??? ?? ?? ??? ???? ?? ??? ??? ?????. ? ????? ?? ?? ?? ?? ??? ?? ???? ???, ?? ?? ??? ??? ??? ????.
?? ??? ????? ???? ???? ??? ?? ??? ?????. ???? ??? ??? ?? ???? CUDA Toolkit 12.0? ???????.