/******************************Initialize NvSciSync Parameters******************************************/
NvSciSyncObj syncObj1, syncObj2;
NvSciSyncModule syncModule;
NvSciSyncAttrList syncAttrListObj1[2];
NvSciSyncAttrList syncAttrListObj2[2];
NvSciSyncCpuWaitContext nvSciCtx;
NvSciSyncAttrList waiterAttrListObj1 = NULL;
NvSciSyncAttrList signalerAttrListObj1 = NULL;
NvSciSyncAttrList waiterAttrListObj2 = NULL;
NvSciSyncAttrList signalerAttrListObj2 = NULL;
NvSciSyncAttrList nvSciSyncConflictListObj1;
NvSciSyncAttrList nvSciSyncReconciledListObj1;
NvSciSyncAttrList nvSciSyncConflictListObj2;
NvSciSyncAttrList nvSciSyncReconciledListObj2;
NvSciSyncModuleOpen(&syncModule);
// Create Attribute list for NvSciSyncObj1
NvSciSyncAttrListCreate(syncModule, &signalerAttrListObj1);
NvSciSyncAttrListCreate(syncModule, &waiterAttrListObj1);
cudlaGetNvSciSyncAttributes(reinterpret_cast<uint64_t*>(waiterAttrListObj1), CUDLA_NVSCISYNC_ATTR_WAIT);
// Fill CPU signaller Attribute list for NvSciSyncObj1 here
{
bool cpuSignaler = true;
NvSciSyncAttrKeyValuePair keyValue[2];
memset(keyValue, 0, sizeof(keyValue));
keyValue[0].attrKey = NvSciSyncAttrKey_NeedCpuAccess;
keyValue[0].value = (void*) &cpuSignaler;
keyValue[0].len = sizeof(cpuSignaler);
NvSciSyncAccessPerm cpuPerm = NvSciSyncAccessPerm_SignalOnly;
keyValue[1].attrKey = NvSciSyncAttrKey_RequiredPerm;
keyValue[1].value = (void*) &cpuPerm;
keyValue[1].len = sizeof(cpuPerm);
NvSciSyncAttrListSetAttrs(signalerAttrListObj1, keyValue, 2);
}
// Reconcile attribute list for NvSciSyncObj1
syncAttrListObj1[0] = signalerAttrListObj1;
syncAttrListObj1[1] = waiterAttrListObj1;
NvSciSyncAttrListReconcile(syncAttrListObj1, 2, &nvSciSyncReconciledListObj1, &nvSciSyncConflictListObj1);
// Allocate NvSciSyncObj1 here
NvSciSyncObjAlloc(nvSciSyncReconciledListObj1, &syncObj1);
NvSciSyncCpuWaitContextAlloc(syncModule, &nvSciCtx);
// Create Attribute list for NvSciSyncObj2
NvSciSyncAttrListCreate(syncModule, &signalerAttrListObj2);
NvSciSyncAttrListCreate(syncModule, &waiterAttrListObj2);
cudlaGetNvSciSyncAttributes(reinterpret_cast<uint64_t*>(signalerAttrListObj2),CUDLA_NVSCISYNC_ATTR_SIGNAL);
// Fill CPU signaller Attribute list for NvSciSyncObj1 here
{
bool cpuWaiter = true;
NvSciSyncAttrKeyValuePair keyValue[2];
memset(keyValue, 0, sizeof(keyValue));
keyValue[0].attrKey = NvSciSyncAttrKey_NeedCpuAccess;
keyValue[0].value = (void*) &cpuWaiter;
keyValue[0].len = sizeof(cpuWaiter);
NvSciSyncAccessPerm cpuPerm = NvSciSyncAccessPerm_WaitOnly;
keyValue[1].attrKey = NvSciSyncAttrKey_RequiredPerm;
keyValue[1].value = (void*) &cpuPerm;
keyValue[1].len = sizeof(cpuPerm);
NvSciSyncAttrListSetAttrs(waiterAttrListObj2, keyValue, 2);
}
// Reconcile attribute list for NvSciSyncObj1
syncAttrListObj2[0] = signalerAttrListObj2;
syncAttrListObj2[1] = waiterAttrListObj2;
NvSciSyncAttrListReconcile(syncAttrListObj2, 2, &nvSciSyncReconciledListObj2, &nvSciSyncConflictListObj2);
// Allocate NvSciSyncObj1 here
NvSciSyncObjAlloc(nvSciSyncReconciledListObj2, &syncObj2);
/********************Registration of NvSciSync with cuDLA******************************/
uint64_t* nvSciSyncObjRegPtr1 = NULL;
uint64_t* nvSciSyncObjRegPtr2 = NULL;
cudlaExternalSemaphoreHandleDesc semaMemDesc = { 0 };
// Fill up cudlaExternalSemaphoreHandleDesc
memset(&semaMemDesc, 0, sizeof(semaMemDesc));
semaMemDesc.extSyncObject = syncObj1;
// Import NvSciSync objects into cuDLA
cudlaImportExternalSemaphore(cudlaDevHandle, &semaMemDesc, &nvSciSyncObjRegPtr1, 0);
// Fill up cudlaExternalSemaphoreHandleDesc
memset(&semaMemDesc, 0, sizeof(semaMemDesc));
semaMemDesc.extSyncObject = syncObj2;
// Import NvSciSync objects into cuDLA
cudlaImportExternalSemaphore(cudlaDevHandle, &semaMemDesc, &nvSciSyncObjRegPtr2, 0);
// Create Wait events for which cuDLA is waiter
NvSciSyncFence preFence = NvSciSyncFenceInitializer;
NvSciSyncObjGenerateFence(syncObj1, &preFence);
cudlaWaitEvents* waitEvents;
waitEvents = (cudlaWaitEvents *)malloc(sizeof(cudlaWaitEvents));
waitEvents->numEvents = 1;
CudlaFence* preFences = (CudlaFence *)malloc(waitEvents->numEvents * sizeof(CudlaFence));
preFences[0].fence = &preFence;
preFences[0].type = CUDLA_NVSCISYNC_FENCE;
waitEvents->preFences = preFences;
// Create Signal events for which cuDLA is signaller
cudlaSignalEvents* signalEvents;
signalEvents = (cudlaSignalEvents *)malloc(sizeof(cudlaSignalEvents));
signalEvents->numEvents = 1;
uint64_t** devPtrs = (uint64_t **)malloc(signalEvents->numEvents * sizeof(uint64_t *));
devPtrs[0] = nvSciSyncObjRegPtr2;
signalEvents->devPtrs = devPtrs;
NvSciSyncFence eofFence = NvSciSyncFenceInitializer;
signalEvents->eofFences = (CudlaFence *)malloc(signalEvents->numEvents * sizeof(CudlaFence));
signalEvents->eofFences[0].fence = &eofFence;
signalEvents->eofFences[0].type = CUDLA_NVSCISYNC_FENCE;
/**************************** Task Submission to DLA **************************************/
// Creation of Task with wait and Signal events and submit it to DLA
cudlaTask task;
task.moduleHandle = moduleHandle;
task.outputTensor = &outputBufObjRegPtr; // DLA will write results into this memory on completion of the task
task.numOutputTensors = 1;
task.numInputTensors = 1;
task.inputTensor = &inputBufObjRegPtr; // DLA will read the input data from this memory.
task.waitEvents = waitEvents;
task.signalEvents = signalEvents;
cudlaSubmitTask(cudlaDevHandle, &task, 1, NULL, 0);
/****************************Signalling of NvSciSyncObj1 from CPU**********************/
NvSciSyncObjSignal(syncObj1);
/****************************Waiting on NvSciSyncObj2 waiter is cuDLA******************/
NvSciSyncFenceWait(reinterpret_cast<NvSciSyncFence*>(signalEvents->eofFences[0].fence), nvSciCtx, -1);
/****************************Tear Down phase for NvSciSync*****************************/
// Unregister nvSciSync from cuDLA
cudlaMemUnregister(devHandle, nvSciSyncObjRegPtr1);
cudlaMemUnregister(devHandle, nvSciSyncObjRegPtr2);
// Free NvSciSync
vSciSyncObjFree(syncObj1);
NvSciSyncObjFree(syncObj2);
NvSciSyncAttrListFree(signalerAttrListObj1);
NvSciSyncAttrListFree(waiterAttrListObj1);
NvSciSyncAttrListFree(signalerAttrListObj2);
NvSciSyncAttrListFree(waiterAttrListObj2);
NvSciSyncAttrListFree(nvSciSyncConflictListObj1);
NvSciSyncAttrListFree(nvSciSyncReconciledListObj1);
NvSciSyncAttrListFree(nvSciSyncConflictListObj2);
NvSciSyncAttrListFree(nvSciSyncReconciledListObj2);
NvSciSyncCpuWaitContextFree(nvSciCtx);
NvSciSyncModuleClose(syncModule);
free(waitEvents);
free(preFences);
free(signalEvents->eofFences);
free(signalEvents);
free(devPtrs);
NvSciSyncFenceClear(&preFence);
NvSciSyncFenceClear(&eofFence);