SampleUsage

/******************************Initialize NvSciSync Parameters******************************************/
    NvSciSyncObj syncObj1, syncObj2;
    NvSciSyncModule syncModule;
    NvSciSyncAttrList syncAttrListObj1[2];
    NvSciSyncAttrList syncAttrListObj2[2];
    NvSciSyncCpuWaitContext nvSciCtx;
    NvSciSyncAttrList waiterAttrListObj1 = NULL;
    NvSciSyncAttrList signalerAttrListObj1 = NULL;
    NvSciSyncAttrList waiterAttrListObj2 = NULL;
    NvSciSyncAttrList signalerAttrListObj2 = NULL;
    NvSciSyncAttrList nvSciSyncConflictListObj1;
    NvSciSyncAttrList nvSciSyncReconciledListObj1;
    NvSciSyncAttrList nvSciSyncConflictListObj2;
    NvSciSyncAttrList nvSciSyncReconciledListObj2;
 
    NvSciSyncModuleOpen(&syncModule);
 
    // Create Attribute list for NvSciSyncObj1
    NvSciSyncAttrListCreate(syncModule, &signalerAttrListObj1);
 
    NvSciSyncAttrListCreate(syncModule, &waiterAttrListObj1);
 
    cudlaGetNvSciSyncAttributes(reinterpret_cast<uint64_t*>(waiterAttrListObj1), CUDLA_NVSCISYNC_ATTR_WAIT);
 
    // Fill CPU signaller Attribute list for NvSciSyncObj1 here
    {   
        bool cpuSignaler = true;
        NvSciSyncAttrKeyValuePair keyValue[2];
        memset(keyValue, 0, sizeof(keyValue));
        keyValue[0].attrKey = NvSciSyncAttrKey_NeedCpuAccess;
        keyValue[0].value = (void*) &cpuSignaler;
        keyValue[0].len = sizeof(cpuSignaler);
    
        NvSciSyncAccessPerm cpuPerm = NvSciSyncAccessPerm_SignalOnly;
        keyValue[1].attrKey = NvSciSyncAttrKey_RequiredPerm;
        keyValue[1].value = (void*) &cpuPerm;
        keyValue[1].len = sizeof(cpuPerm);
    
        NvSciSyncAttrListSetAttrs(signalerAttrListObj1, keyValue, 2); 
    }   
 
    // Reconcile attribute list for NvSciSyncObj1
    syncAttrListObj1[0] = signalerAttrListObj1;
    syncAttrListObj1[1] = waiterAttrListObj1;
    NvSciSyncAttrListReconcile(syncAttrListObj1, 2, &nvSciSyncReconciledListObj1, &nvSciSyncConflictListObj1);
 
    // Allocate NvSciSyncObj1 here
    NvSciSyncObjAlloc(nvSciSyncReconciledListObj1, &syncObj1);
 
    NvSciSyncCpuWaitContextAlloc(syncModule, &nvSciCtx);
    
    // Create Attribute list for NvSciSyncObj2
    NvSciSyncAttrListCreate(syncModule, &signalerAttrListObj2);
 
    NvSciSyncAttrListCreate(syncModule, &waiterAttrListObj2);
 
    cudlaGetNvSciSyncAttributes(reinterpret_cast<uint64_t*>(signalerAttrListObj2),CUDLA_NVSCISYNC_ATTR_SIGNAL);
 
    // Fill CPU signaller Attribute list for NvSciSyncObj1 here
    {
        bool cpuWaiter = true;
        NvSciSyncAttrKeyValuePair keyValue[2];
        memset(keyValue, 0, sizeof(keyValue));
        keyValue[0].attrKey = NvSciSyncAttrKey_NeedCpuAccess;
        keyValue[0].value = (void*) &cpuWaiter;
        keyValue[0].len = sizeof(cpuWaiter);
 
        NvSciSyncAccessPerm cpuPerm = NvSciSyncAccessPerm_WaitOnly;
        keyValue[1].attrKey = NvSciSyncAttrKey_RequiredPerm;
        keyValue[1].value = (void*) &cpuPerm;
        keyValue[1].len = sizeof(cpuPerm);
        
        NvSciSyncAttrListSetAttrs(waiterAttrListObj2, keyValue, 2);
    }
 
    // Reconcile attribute list for NvSciSyncObj1
    syncAttrListObj2[0] = signalerAttrListObj2;
    syncAttrListObj2[1] = waiterAttrListObj2;
    NvSciSyncAttrListReconcile(syncAttrListObj2, 2, &nvSciSyncReconciledListObj2, &nvSciSyncConflictListObj2);
 
    // Allocate NvSciSyncObj1 here
    NvSciSyncObjAlloc(nvSciSyncReconciledListObj2, &syncObj2);
 
    /********************Registration of NvSciSync with cuDLA******************************/
    uint64_t* nvSciSyncObjRegPtr1 = NULL;
    uint64_t* nvSciSyncObjRegPtr2 = NULL;
 
    cudlaExternalSemaphoreHandleDesc semaMemDesc = { 0 };
    // Fill up cudlaExternalSemaphoreHandleDesc 
    memset(&semaMemDesc, 0, sizeof(semaMemDesc));
    semaMemDesc.extSyncObject = syncObj1;
    // Import NvSciSync objects into cuDLA
    cudlaImportExternalSemaphore(cudlaDevHandle, &semaMemDesc, &nvSciSyncObjRegPtr1, 0);
 
    // Fill up cudlaExternalSemaphoreHandleDesc 
    memset(&semaMemDesc, 0, sizeof(semaMemDesc));
    semaMemDesc.extSyncObject = syncObj2;
    // Import NvSciSync objects into cuDLA
    cudlaImportExternalSemaphore(cudlaDevHandle, &semaMemDesc, &nvSciSyncObjRegPtr2, 0);
 
    // Create Wait events for which cuDLA is waiter 
    NvSciSyncFence preFence = NvSciSyncFenceInitializer;
    NvSciSyncObjGenerateFence(syncObj1, &preFence);
    cudlaWaitEvents* waitEvents;
    waitEvents = (cudlaWaitEvents *)malloc(sizeof(cudlaWaitEvents));
    waitEvents->numEvents = 1;
    CudlaFence* preFences = (CudlaFence *)malloc(waitEvents->numEvents * sizeof(CudlaFence));
    preFences[0].fence = &preFence;
    preFences[0].type = CUDLA_NVSCISYNC_FENCE;
    waitEvents->preFences = preFences;
 
    //  Create Signal events for which cuDLA is signaller
    cudlaSignalEvents* signalEvents;
    signalEvents = (cudlaSignalEvents *)malloc(sizeof(cudlaSignalEvents));
    signalEvents->numEvents = 1;
    uint64_t** devPtrs = (uint64_t **)malloc(signalEvents->numEvents * sizeof(uint64_t *));
    devPtrs[0] = nvSciSyncObjRegPtr2;
    signalEvents->devPtrs = devPtrs;
    NvSciSyncFence eofFence = NvSciSyncFenceInitializer;
    signalEvents->eofFences = (CudlaFence *)malloc(signalEvents->numEvents * sizeof(CudlaFence));
    signalEvents->eofFences[0].fence = &eofFence;
    signalEvents->eofFences[0].type = CUDLA_NVSCISYNC_FENCE;
 
    /**************************** Task Submission to DLA **************************************/
    // Creation of Task with wait and Signal events and submit it to DLA
    cudlaTask task;
    task.moduleHandle = moduleHandle;
    task.outputTensor = &outputBufObjRegPtr;   // DLA will write results into this memory on completion of the task
    task.numOutputTensors = 1; 
    task.numInputTensors = 1; 
    task.inputTensor = &inputBufObjRegPtr;     // DLA will read the input data from this memory.
    task.waitEvents = waitEvents;
    task.signalEvents = signalEvents;
    cudlaSubmitTask(cudlaDevHandle, &task, 1, NULL, 0);
 
    /****************************Signalling of NvSciSyncObj1 from CPU**********************/
    NvSciSyncObjSignal(syncObj1);
    
    /****************************Waiting on NvSciSyncObj2 waiter is cuDLA******************/
    NvSciSyncFenceWait(reinterpret_cast<NvSciSyncFence*>(signalEvents->eofFences[0].fence), nvSciCtx, -1);
    
    /****************************Tear Down phase for NvSciSync*****************************/
    // Unregister nvSciSync from cuDLA
    cudlaMemUnregister(devHandle, nvSciSyncObjRegPtr1);
    cudlaMemUnregister(devHandle, nvSciSyncObjRegPtr2);
 
    // Free NvSciSync 
    vSciSyncObjFree(syncObj1);
    NvSciSyncObjFree(syncObj2);
    NvSciSyncAttrListFree(signalerAttrListObj1);
    NvSciSyncAttrListFree(waiterAttrListObj1);
    NvSciSyncAttrListFree(signalerAttrListObj2);
    NvSciSyncAttrListFree(waiterAttrListObj2);
    NvSciSyncAttrListFree(nvSciSyncConflictListObj1);
    NvSciSyncAttrListFree(nvSciSyncReconciledListObj1);
    NvSciSyncAttrListFree(nvSciSyncConflictListObj2);
    NvSciSyncAttrListFree(nvSciSyncReconciledListObj2);
    NvSciSyncCpuWaitContextFree(nvSciCtx);
    NvSciSyncModuleClose(syncModule);
    free(waitEvents);
    free(preFences);
    free(signalEvents->eofFences);
    free(signalEvents);
    free(devPtrs);
    NvSciSyncFenceClear(&preFence);
    NvSciSyncFenceClear(&eofFence);