Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(655587)

Side by Side Diff: libspeexdsp/resample_sse.h

Issue 1548: speexdsp: add initial check-asm for inner_product_*
Patch Set: Created 1 year, 9 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View unified diff | Download patch
« no previous file with comments | « libspeexdsp/resample_neon.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* Copyright (C) 2007-2008 Jean-Marc Valin 1 /* Copyright (C) 2007-2008 Jean-Marc Valin
2 * Copyright (C) 2008 Thorvald Natvig 2 * Copyright (C) 2008 Thorvald Natvig
3 */ 3 */
4 /** 4 /**
5 @file resample_sse.h 5 @file resample_sse.h
6 @brief Resampler functions (SSE version) 6 @brief Resampler functions (SSE version)
7 */ 7 */
8 /* 8 /*
9 Redistribution and use in source and binary forms, with or without 9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions 10 modification, are permitted provided that the following conditions
(...skipping 17 matching lines...) Expand all
28 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 28 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 29 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
32 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 */ 35 */
36 36
37 #include <xmmintrin.h> 37 #include <xmmintrin.h>
38
39 #if defined(CHECK_ASM)
40 #include "resample_checkasm.h"
41 #endif
38 42
39 #define OVERRIDE_INNER_PRODUCT_SINGLE 43 #define OVERRIDE_INNER_PRODUCT_SINGLE
40 static inline float inner_product_single(const float *a, const float *b, unsigne d int len) 44 static inline float inner_product_single(const float *a, const float *b, unsigne d int len)
41 { 45 {
42 int i; 46 int i;
43 float ret; 47 float ret;
48 #if defined(CHECK_ASM)
49 float expected;
50 #endif
44 __m128 sum = _mm_setzero_ps(); 51 __m128 sum = _mm_setzero_ps();
45 for (i=0;i<len;i+=8) 52 for (i=0;i<len;i+=8)
46 { 53 {
47 sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i))); 54 sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i)));
48 sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4)) ); 55 sum = _mm_add_ps(sum, _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4)) );
49 } 56 }
50 sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum)); 57 sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
51 sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55)); 58 sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
52 _mm_store_ss(&ret, sum); 59 _mm_store_ss(&ret, sum);
60 #if defined(CHECK_ASM)
61 expected = 0.0;
62 RESAMPLE_CHECK_INNER_PRODUCT(a, b, len, ret, expected, "%f");
63 #endif
53 return ret; 64 return ret;
54 } 65 }
55 66
56 #define OVERRIDE_INTERPOLATE_PRODUCT_SINGLE 67 #define OVERRIDE_INTERPOLATE_PRODUCT_SINGLE
57 static inline float interpolate_product_single(const float *a, const float *b, u nsigned int len, const spx_uint32_t oversample, float *frac) { 68 static inline float interpolate_product_single(const float *a, const float *b, u nsigned int len, const spx_uint32_t oversample, float *frac) {
58 int i; 69 int i;
59 float ret; 70 float ret;
60 __m128 sum = _mm_setzero_ps(); 71 __m128 sum = _mm_setzero_ps();
61 __m128 f = _mm_loadu_ps(frac); 72 __m128 f = _mm_loadu_ps(frac);
62 for(i=0;i<len;i+=2) 73 for(i=0;i<len;i+=2)
63 { 74 {
64 sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i), _mm_loadu_ps(b+i*oversam ple))); 75 sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i), _mm_loadu_ps(b+i*oversam ple)));
65 sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i+1), _mm_loadu_ps(b+(i+1)*o versample))); 76 sum = _mm_add_ps(sum, _mm_mul_ps(_mm_load1_ps(a+i+1), _mm_loadu_ps(b+(i+1)*o versample)));
66 } 77 }
67 sum = _mm_mul_ps(f, sum); 78 sum = _mm_mul_ps(f, sum);
68 sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum)); 79 sum = _mm_add_ps(sum, _mm_movehl_ps(sum, sum));
69 sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55)); 80 sum = _mm_add_ss(sum, _mm_shuffle_ps(sum, sum, 0x55));
70 _mm_store_ss(&ret, sum); 81 _mm_store_ss(&ret, sum);
71 return ret; 82 return ret;
72 } 83 }
73 84
74 #ifdef _USE_SSE2 85 #ifdef _USE_SSE2
75 #include <emmintrin.h> 86 #include <emmintrin.h>
76 #define OVERRIDE_INNER_PRODUCT_DOUBLE 87 #define OVERRIDE_INNER_PRODUCT_DOUBLE
77 88
78 static inline double inner_product_double(const float *a, const float *b, unsign ed int len) 89 static inline double inner_product_double(const float *a, const float *b, unsign ed int len)
79 { 90 {
80 int i; 91 int i;
81 double ret; 92 double ret;
93 #if defined(CHECK_ASM)
94 double expected;
95 #endif
82 __m128d sum = _mm_setzero_pd(); 96 __m128d sum = _mm_setzero_pd();
83 __m128 t; 97 __m128 t;
84 for (i=0;i<len;i+=8) 98 for (i=0;i<len;i+=8)
85 { 99 {
86 t = _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i)); 100 t = _mm_mul_ps(_mm_loadu_ps(a+i), _mm_loadu_ps(b+i));
87 sum = _mm_add_pd(sum, _mm_cvtps_pd(t)); 101 sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
88 sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t))); 102 sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
89 103
90 t = _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4)); 104 t = _mm_mul_ps(_mm_loadu_ps(a+i+4), _mm_loadu_ps(b+i+4));
91 sum = _mm_add_pd(sum, _mm_cvtps_pd(t)); 105 sum = _mm_add_pd(sum, _mm_cvtps_pd(t));
92 sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t))); 106 sum = _mm_add_pd(sum, _mm_cvtps_pd(_mm_movehl_ps(t, t)));
93 } 107 }
94 sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum)); 108 sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
95 _mm_store_sd(&ret, sum); 109 _mm_store_sd(&ret, sum);
110 #if defined(CHECK_ASM)
111 expected = 0.0;
112 RESAMPLE_CHECK_INNER_PRODUCT(a, b, len, ret, expected, "%f");
113 #endif
96 return ret; 114 return ret;
97 } 115 }
98 116
99 #define OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE 117 #define OVERRIDE_INTERPOLATE_PRODUCT_DOUBLE
100 static inline double interpolate_product_double(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) { 118 static inline double interpolate_product_double(const float *a, const float *b, unsigned int len, const spx_uint32_t oversample, float *frac) {
101 int i; 119 int i;
102 double ret; 120 double ret;
103 __m128d sum; 121 __m128d sum;
104 __m128d sum1 = _mm_setzero_pd(); 122 __m128d sum1 = _mm_setzero_pd();
105 __m128d sum2 = _mm_setzero_pd(); 123 __m128d sum2 = _mm_setzero_pd();
(...skipping 13 matching lines...) Expand all
119 } 137 }
120 sum1 = _mm_mul_pd(f1, sum1); 138 sum1 = _mm_mul_pd(f1, sum1);
121 sum2 = _mm_mul_pd(f2, sum2); 139 sum2 = _mm_mul_pd(f2, sum2);
122 sum = _mm_add_pd(sum1, sum2); 140 sum = _mm_add_pd(sum1, sum2);
123 sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum)); 141 sum = _mm_add_sd(sum, _mm_unpackhi_pd(sum, sum));
124 _mm_store_sd(&ret, sum); 142 _mm_store_sd(&ret, sum);
125 return ret; 143 return ret;
126 } 144 }
127 145
128 #endif 146 #endif
OLDNEW
« no previous file with comments | « libspeexdsp/resample_neon.h ('k') | no next file » | no next file with comments »

RSS Feeds Recent Issues | This issue
This is Rietveld