Simple Stream Setup

/* Initialize NvMedia (not all steps are shown) */
NvMedia2D* nvmedia2D;
NvMedia2DCreate(nvmedia2D, NULL);

/* Initialize CUDA (not all steps are shown) */
CUdevice cudaDevice;
cuDeviceGet(&cudaDevice, IGPU);
CUcontext cudaContext;
cuCtxCreate(&cudaContext, CU_CTX_MAP_HOST, dev);
cuCtxPushCurrent(&cudaContext);
CUstream cudaStream;
cuStreamCreate(&cudaStream, CU_STREAM_DEFAULT);

/* Initialize NvSci buffer and sync modules */
NvSciBufModule bufModule;
NvSciBufModuleOpen(&bufModule);
NvSciSyncModule syncModule;
NvSciSyncModuleOpen(&syncModule);

/* Obtain NvMedia buffer requirements */
NvSciBufAttrList nvmediaBufAttrs;NvSciBufAttrListCreate(bufModule, &nvmediaBufAttrs);
NvMedia2DFillNvSciBufAttrList(nvmedia2D, nvmediaBufAttrs);


/*
 * Set more buffer attributes using NvSciBufAttrListSetAttrs.
 * Detail skipped.
 */

/* Obtain NvMedia sync requirements */
NvSciSyncAttrList nvmediaWriteSyncAttrs, 
nvmediaReadSyncAttrs;
NvSciSyncAttrListCreate(syncModule, 
&nvmediaWriteSyncAttrs);
NvMedia2DFillNvSciSyncAttrList(nvmediaWriteSyncAttrs, 
NVMEDIA_SIGNALER)
NvSciSyncAttrListCreate(syncModule, 
&nvmediaReadSyncAttrs);
NvMedia2DFillNvSciSyncAttrList
(nvmediaReadSyncAttrs, NVMEDIA_WAITER)

/* Obtain CUDA buffer requirements */
NvSciBufAttrList cudaBufAttrs;
NvSciBufAttrListCreate(bufModule, &cudaBufAttrs);
<Fill in with CUDA raw buffer attributes>

/* Obtain CUDA sync requirements */
NvSciSyncAttrList cudaWriteSyncAttrs, 
cudaReadSyncAttrs;
NvSciSyncAttrListCreate(syncModule, 
&cudaWriteSyncAttrs);
cuDeviceGetNvSciSyncAttributes(cudaWriteSyncAttrs, 
cudaDevice, CUDA_NVSCISYNC_ATTR_SIGNAL);
NvSciSyncAttrListCreate(syncModule, 
&cudaReadSyncAttrs);
cuDeviceGetNvSciSyncAttributes(cudaReadSyncAttrs, 
cudaDevice, CUDA_NVSCISYNC_ATTR_WAIT);

/* Combine buffer requirements and allocate buffer */
NvSciBufAttrList allBufAttrs[2], conflictBufAttrs;
NvSciBufAttrList combinedBufAttrs;
allBufAttrs[0] = nvmediaBufAttrs;
allBufAttrs[1] = cudaBufAttrs;
NvSciBufAttrListReconcile(allBufAttrs, 2, 
&combinedBufAttrs, &conflictBufAttrs);
NvSciBufObj buffer;
NvSciBufObjAlloc(combinedBufAttrs, &buffer);

/* Combine sync requirements and allocate 
nvmedia to cuda sync object */
NvSciSyncAttrList allSyncAttrs[2], conflictSyncAttrs;
allSyncAttrs[0] = nvmediaWriteSyncAttrs;
allSyncAttrs[1] = cudaReadSyncAttrs;
NvSciSyncAttrList nvmediaToCudaSyncAttrs;
NvSciSyncAttrListReconcile(allSyncAttrs, 2, 
&nvmediaToCudaSyncAttrs, &confictSyncAttrs);
NvSciSyncObj nvmediaToCudaSync;
NvSciSyncObjAlloc(nvmediaToCudaSyncAttrs, 
&nvmediaToCudaSync);

/* Combine sync requirements and allocate cuda 
to nvmedia sync object */
allSyncAttrs[0] = cudaWriteSyncAttrs;
allSyncAttrs[1] = nvmediaReadSyncAttrs;1
NvSciSyncAttrList cudaToNvmediaSyncAttrs;
NvSciSyncAttrListReconcile(allSyncAttrs, 2, 
&cudaToNvmediaSyncAttrs, &confictSyncAttrs);
NvSciSyncObj cudaToNvmediaSync;
NvSciSyncObjAlloc(cudaToNvmediaSyncAttrs, &cudaToNvmediaSync);

/* Map objects into NvMedia */
NvMedia2DRegisterNvSciBufObj(nvmedia2D, buffer);
NvMedia2DRegisterNvSciSyncObj(nvmedia2D, NVMEDIA_EOFSYNCOBJ, nvmediaToCudaSync);
NvMedia2DRegisterNvSciSyncObj(nvmedia2D, NVMEDIA_PRESYNCOBJ, cudaToNvmediaSync);
 
/* Map objects into CUDA */
cudaExternalMemoryHandleDesc 
cudaMemHandleDesc;
memset(&cudaMemHandleDesc, 0, sizeof
(cudaMemHandleDesc));
cudaMemHandleDesc.type = 
cudaExternalMemoryHandleTypeNvSciBuf;
cudaMemHandleDesc.handle.nvSciBufObject = 
buffer;
cudaMemHandleDesc.size = <allocated size>;
cudaImportExternalMemory(&cudaBuffer, 
&cudaMemHandleDesc);
CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC 
cudaSemDec;
CUexternalSemaphore nvmediaToCudaSem, 
cudaToNvmediaSem;
cudaSemDesc.type = 
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC;
cudaSemDesc.handle.nvSciSyncObj = 
(void*)nvmediaToCudaSync;
cuImportExternalSemaphore(&nvmediaToCudaSem, 
&cudaSemDesc);
cudaSemDesc.type = 
CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC;
cudaSemDesc.handle.nvSciSyncObj = 
(void*)cudaToNvmediaSync;
cuImportExternalSemaphore(&cudaToNvmediaSem, 
&cudaSemDesc);

First, the buffer and sync object requirements are queried from NvMedia, the producer of the stream, and from CUDA, the consumer. These requirements are combined and used to allocate the objects, which are then mapped into NvMedia and CUDA so that they can be used for processing.

Two sync objects are required instead of one because synchronization is required in both directions. It is important that the CUDA consumer does not begin reading from the buffer until the NvMedia producer is done writing to it. It is equally as important that the NvMedia producer does not begin writing a new image to the buffer until the CUDA consumer is done reading the previous image. Otherwise, it overwrites data that is still in use.