Hi, i am vectorizing some code using the MIC intrinsics. But i am getting segmentation fault on MIC (offload error: process on the device 0 was terminated by signal 11 (SIGSEGV) ). Some one please tell me where am i doing mistake. It seems like a simple error. but i am stuck here.
Normal code:
for(curIdx = 0; curIdx < index; curIdx++)
{
outreal[cidxarray[curIdx]] += inrealvalue*valuearray[curIdx];
}
Vectorized code:
int remain = index %16;
int indexSize = index-remain;
v1 = _mm512_set1_ps(inrealvalue);
for(int curIdx = 0; curIdx < indexSize; curIdx = curIdx+16)
{
v1i = _mm512_load_epi32(cidxarray+curIdx);
v2 = _mm512_load_ps((void*)(valuearray+curIdx));
v3 = _mm512_mul_ps(v1,v2); // doing inrealvalue*valuearray[curIdx];
v4 = _mm512_i32gather_ps(v1i,(void*)outreal,sizeof(float)); // access values from outreal and store in v4;
v5 = _mm512_add_ps(v3,v4); // doing outreal[cidxarray[curIdx]] += inrealvalue*valuearray[curIdx] and stores in v5
_mm512_i32scatter_ps((void*)outreal,v1i,v5,sizeof(float)); // storing it back into outreal array
}
for(curIdx = indexSize; curIdx < index; curIdx++)
{
outreal[cidxarray[curIdx]] += inrealvalue*valuearray[curIdx];
}
Thanks
sivaramakrishna