1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
|
#include <dvec.h>
#include <emmintrin.h>
template < >
Matrix<short> OpeSSE<short>::addition(Matrix<short> & mat1, Matrix<short> & mat2)
{
Matrix<short> resMat(mat1._tx,mat1._ty);
int m1X, m1Y, m2X, m2Y;
m1X = mat1.LireNx();
m1Y = mat1.LireNy();
m2X = mat2.LireNx();
m2Y = mat2.LireNy();
if (m1X == m2X && m1Y == m2Y) {
__m128i *pr;
__m128i const *p0;
__m128i const *p1;
pr = (__m128i *) &resMat.mat[0];
p0 = (__m128i *) &mat1.mat[0];
p1 = (__m128i *) &mat2.mat[0];
int i;
for ( i = 0; i < m1X*m1Y-7; ++p0,++p1, i+=8, ++pr){
Is16vec8 mmx0(_mm_loadu_si128(p0));
Is16vec8 mmx1(_mm_loadu_si128(p1));
Is16vec8 mmxr= mmx0 + mmx1;
_mm_storeu_si128(pr,mmxr);
}
for (i; i <m1X*m1Y; ++i){
resMat.mat[i] = mat1.mat[i] + mat2.mat[i];
}
}
return resMat;
} |
Partager