Quantcast
Channel: Intel® Software - Intel® Fortran Compiler for Linux* and macOS*
Viewing all articles
Browse latest Browse all 2746

ifort picks unaligned version of mov even if data is aligned

$
0
0

Hi,

Consider the following code and the generated assembly from ifort 14 (with -xCORE-AVX2 and -O2).

Assuming that the B.9 segment is the peel loop, why the compiler still uses unaligned mov instructions for the vectorized loop body?

 

          subroutine aligntest (acc,z,n)
             real, dimension(*) :: acc
             real, dimension(*) :: z
             integer n
             integer i

             do i = 1 ,n
                acc(i) = acc(i) * z(i)
             enddo


           end subroutine

 

..B1.9:                         # Preds ..B1.7 ..B1.9
        vmovss    (%rdi,%rcx,4), %xmm0                          #9.26
        vmulss    (%rsi,%rcx,4), %xmm0, %xmm1                   #9.17
        vmovss    %xmm1, (%rdi,%rcx,4)                          #9.17
        incq      %rcx                                          #8.14
        cmpq      %r8, %rcx                                     #8.14
        jb        ..B1.9        # Prob 82%                      #8.14
                                # LOE rax rdx rcx rbx rbp rsi rdi r8 r12 r13 r14 r15
..B1.12:                        # Preds ..B1.7 ..B1.9 ..B1.12
        vmovups   (%rdi,%r8,4), %ymm0                           #9.26
        vmovups   32(%rdi,%r8,4), %ymm2                         #9.26
        vmulps    (%rsi,%r8,4), %ymm0, %ymm1                    #9.17
        vmulps    32(%rsi,%r8,4), %ymm2, %ymm3                  #9.17
        vmovups   %ymm1, (%rdi,%r8,4)                           #9.17
        vmovups   %ymm3, 32(%rdi,%r8,4)                         #9.17
        addq      $16, %r8                                      #8.14
        cmpq      %rdx, %r8                                     #8.14
        jb        ..B1.12       # Prob 82%                      #8.14

 

 


Viewing all articles
Browse latest Browse all 2746

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>