This question is about why it seems that MSVC fails a very basic optimization with which gcc has no struggle. I'm wondering whether I missed something here and what the reason could be.
The following code snippet:
#include <vector>
struct X {
int x;
double y;
};
void for_1(std::vector<X>& x) {
for (auto& y : x) {
y.x = 1;
}
}
void for_2(std::vector<X>& x) {
for (auto it = x.begin(); it != x.end(); ++ it) {
it->x = 1;
}
}
When compiled with MSVC with optimization on (-O2), it compiles the two functions to different assemblies (Note the additional mov
in for_1
. For full output, see godbolt):
x$ = 8
void for_2(std::vector<X,std::allocator<X> > &) PROC ; for_2, COMDAT
mov rax, QWORD PTR [rcx]
cmp rax, QWORD PTR [rcx+8]
je SHORT $LN3@for_2
npad 7
$LL4@for_2:
mov DWORD PTR [rax], 1
add rax, 16
cmp rax, QWORD PTR [rcx+8]
jne SHORT $LL4@for_2
$LN3@for_2:
ret 0
void for_2(std::vector<X,std::allocator<X> > &) ENDP ; for_2
x$ = 8
void for_1(std::vector<X,std::allocator<X> > &) PROC ; for_1, COMDAT
mov rdx, QWORD PTR [rcx+8]
mov rax, QWORD PTR [rcx]
cmp rax, rdx
je SHORT $LN3@for_1
npad 4
$LL4@for_1:
mov DWORD PTR [rax], 1
add rax, 16
cmp rax, rdx
jne SHORT $LL4@for_1
$LN3@for_1:
ret 0
void for_1(std::vector<X,std::allocator<X> > &) ENDP ; for_1
...
But with gcc 9.2 (-O3), the output are the same:
for_1(std::vector<X, std::allocator<X> >&):
mov rax, QWORD PTR [rdi]
mov rdx, QWORD PTR [rdi+8]
cmp rax, rdx
je .L1
.L3:
mov DWORD PTR [rax], 1
add rax, 16
cmp rdx, rax
jne .L3
.L1:
ret
for_2(std::vector<X, std::allocator<X> >&):
mov rdx, QWORD PTR [rdi+8]
mov rax, QWORD PTR [rdi]
cmp rax, rdx
je .L6
.L8:
mov DWORD PTR [rax], 1
add rax, 16
cmp rdx, rax
jne .L8
.L6:
ret
To me, it seems pretty rudimentary to optimize both functions to the same assemblies, but MSVC for some reason fails here. Why is this? Am I missing something?
Aucun commentaire:
Enregistrer un commentaire