Starbuck
2009-02-26 09:54:42 UTC
hi,
this code compiled with VC6 but not with VC8 nor VC9 (see below)
not a champion at x86 assembly,
I guess changing movq with mov
may do same thing,
am I right ?
1>.\ResizeFrame.cpp(1607) : error C2415: improper operand type
movq [edi], xmm0
1>.\ResizeFrame.cpp(1614) : error C2415: improper operand type
movq [edi], xmm0
1>.\ResizeFrame.cpp(1646) : error C2415: improper operand type
movq [edi], xmm0
//----------------------------------------------------------------
void* memsetP4(void* pDest, int byte_val, size_t cbSize)
//----------------------------------------------------------------
{
size_t cbLeadMain, cbLeadLead, cbMain, cbTrailMain, cbTrailTrail;
__asm
{
//if (cbSize < 8)
//{
// return memcmp(pDest, pSrc, cbSize); // std intrinsic function
//}
mov ecx, cbSize // ecx = cbSize
cmp ecx, 32
mov edi, pDest
jb l_memset
movd xmm0, byte_val
punpcklbw xmm0, xmm0
pshuflw xmm0, xmm0, _MM_SHUFFLE(0,0,0,0);
//cbLead = ((((long)pDest) + 31) & ~31) - (long)pDest;
lea eax, [edi + 31]
and eax, ~31
sub eax, edi // eax = cbLead
sub ecx, eax // cbSize (ecx) -= cbLead
//cbLeadMain = cbLead & ~7;
mov edx, eax // edx = cbLead
and eax, ~7 // eax = cbLeadMain
mov cbLeadMain, eax
//cbLeadLead = cbLead - cbLeadMain;
sub edx, eax // ecx = cbLeadLead
mov cbLeadLead, edx
/*ASSERT(cbLeadLead > cbSize);*/ // since cbSize >= 0 && cbLeadLead
< 8
//cbTrail = cbMain = cbSize - cbLead;
mov eax, ecx // eax = cbSize
and ecx, 63 //
sub eax, ecx // cbTrail = cbSize & 63
mov cbMain, eax // cbMain = cbSize - cbTrail
//cbTrailMain = cbTrail & ~7;
mov edx, ecx // edx = cbTrail
and ecx, ~7 // ecx = cbTrailMain
mov cbTrailMain, ecx
//cbTrailTrail = cbTrail - cbTrailMain;
sub edx, ecx // edx = cbTrailTrail
mov cbTrailTrail, edx
// main part
starts ----------------------------------------------------
mov ecx, cbLeadLead
mov eax, cbLeadMain
test ecx, ecx // cbLeadLead? // this jump can be removed
jz l_leadloop
movq [edi], xmm0
add edi, ecx
l_leadloop:
sub eax, 8
jl l_startmainloop
movq [edi], xmm0
add edi, 8
jmp short l_leadloop
l_startmainloop:
mov ecx, cbMain
test ecx, ecx // cbMain?
jz l_starttrailloop
//l_startlargemainloop:
punpcklqdq xmm0, xmm0
l_alignedmainloop:
movntdq [edi + 0], xmm0
movntdq [edi + 16], xmm0
movntdq [edi + 32], xmm0
movntdq [edi + 48], xmm0
add edi, 64
sub ecx, 64
ja l_alignedmainloop
sfence // only needed after movntdq
l_starttrailloop:
mov eax, cbTrailMain
mov ecx, cbTrailTrail // remove if no cbTrailTrail test
below
l_trailloop:
sub eax, 8
jl l_trailtrail
movq [edi], xmm0
add edi, 8
jmp short l_trailloop
l_trailtrail:
test ecx, ecx // cbTrailTrail? // this test can be removed
jz l_finish
// fall through to l_memset
l_memset:
mov eax, byte_val
rep stosb
l_finish:
//emms
}
return pDest;
}
this code compiled with VC6 but not with VC8 nor VC9 (see below)
not a champion at x86 assembly,
I guess changing movq with mov
may do same thing,
am I right ?
1>.\ResizeFrame.cpp(1607) : error C2415: improper operand type
movq [edi], xmm0
1>.\ResizeFrame.cpp(1614) : error C2415: improper operand type
movq [edi], xmm0
1>.\ResizeFrame.cpp(1646) : error C2415: improper operand type
movq [edi], xmm0
//----------------------------------------------------------------
void* memsetP4(void* pDest, int byte_val, size_t cbSize)
//----------------------------------------------------------------
{
size_t cbLeadMain, cbLeadLead, cbMain, cbTrailMain, cbTrailTrail;
__asm
{
//if (cbSize < 8)
//{
// return memcmp(pDest, pSrc, cbSize); // std intrinsic function
//}
mov ecx, cbSize // ecx = cbSize
cmp ecx, 32
mov edi, pDest
jb l_memset
movd xmm0, byte_val
punpcklbw xmm0, xmm0
pshuflw xmm0, xmm0, _MM_SHUFFLE(0,0,0,0);
//cbLead = ((((long)pDest) + 31) & ~31) - (long)pDest;
lea eax, [edi + 31]
and eax, ~31
sub eax, edi // eax = cbLead
sub ecx, eax // cbSize (ecx) -= cbLead
//cbLeadMain = cbLead & ~7;
mov edx, eax // edx = cbLead
and eax, ~7 // eax = cbLeadMain
mov cbLeadMain, eax
//cbLeadLead = cbLead - cbLeadMain;
sub edx, eax // ecx = cbLeadLead
mov cbLeadLead, edx
/*ASSERT(cbLeadLead > cbSize);*/ // since cbSize >= 0 && cbLeadLead
< 8
//cbTrail = cbMain = cbSize - cbLead;
mov eax, ecx // eax = cbSize
and ecx, 63 //
sub eax, ecx // cbTrail = cbSize & 63
mov cbMain, eax // cbMain = cbSize - cbTrail
//cbTrailMain = cbTrail & ~7;
mov edx, ecx // edx = cbTrail
and ecx, ~7 // ecx = cbTrailMain
mov cbTrailMain, ecx
//cbTrailTrail = cbTrail - cbTrailMain;
sub edx, ecx // edx = cbTrailTrail
mov cbTrailTrail, edx
// main part
starts ----------------------------------------------------
mov ecx, cbLeadLead
mov eax, cbLeadMain
test ecx, ecx // cbLeadLead? // this jump can be removed
jz l_leadloop
movq [edi], xmm0
add edi, ecx
l_leadloop:
sub eax, 8
jl l_startmainloop
movq [edi], xmm0
add edi, 8
jmp short l_leadloop
l_startmainloop:
mov ecx, cbMain
test ecx, ecx // cbMain?
jz l_starttrailloop
//l_startlargemainloop:
punpcklqdq xmm0, xmm0
l_alignedmainloop:
movntdq [edi + 0], xmm0
movntdq [edi + 16], xmm0
movntdq [edi + 32], xmm0
movntdq [edi + 48], xmm0
add edi, 64
sub ecx, 64
ja l_alignedmainloop
sfence // only needed after movntdq
l_starttrailloop:
mov eax, cbTrailMain
mov ecx, cbTrailTrail // remove if no cbTrailTrail test
below
l_trailloop:
sub eax, 8
jl l_trailtrail
movq [edi], xmm0
add edi, 8
jmp short l_trailloop
l_trailtrail:
test ecx, ecx // cbTrailTrail? // this test can be removed
jz l_finish
// fall through to l_memset
l_memset:
mov eax, byte_val
rep stosb
l_finish:
//emms
}
return pDest;
}