| 2004 | | same0 = _mm_mul_pd(same0, _mm_load_pd(&hood[FC<0, 0, 0>()].coeffB(FC< 3>(), x))); |
| 2005 | | same1 = _mm_mul_pd(same1, _mm_load_pd(&hood[FC<2, 0, 0>()].coeffB(FC< 3>(), x))); |
| 2006 | | same2 = _mm_mul_pd(same2, _mm_load_pd(&hood[FC<4, 0, 0>()].coeffB(FC< 3>(), x))); |
| 2007 | | same3 = _mm_mul_pd(same3, _mm_load_pd(&hood[FC<6, 0, 0>()].coeffB(FC< 3>(), x))); |
| 2008 | | |
| 2009 | | __m128d temp1 = _mm_mul_pd(neig0, _mm_load_pd(&hood[FC<0, 0, 0>()].coeffB(FC< 2>(), x))); |
| 2010 | | __m128d temp2 = _mm_mul_pd(neig1, _mm_load_pd(&hood[FC<2, 0, 0>()].coeffB(FC< 2>(), x))); |
| 2011 | | __m128d temp3 = _mm_mul_pd(neig2, _mm_load_pd(&hood[FC<4, 0, 0>()].coeffB(FC< 2>(), x))); |
| 2012 | | __m128d temp4 = _mm_mul_pd(neig3, _mm_load_pd(&hood[FC<6, 0, 0>()].coeffB(FC< 2>(), x))); |
| 2013 | | |
| 2014 | | same0 = _mm_add_pd(same0, temp1); |
| 2015 | | same1 = _mm_add_pd(same1, temp2); |
| 2016 | | same2 = _mm_add_pd(same2, temp3); |
| 2017 | | same3 = _mm_add_pd(same3, temp4); |
| 2018 | | |
| 2019 | | temp1 = _mm_mul_pd(neig1, _mm_load_pd(&hood[FC<0, 0, 0>()].coeffB(FC< 4>(), x))); |
| 2020 | | temp2 = _mm_mul_pd(neig2, _mm_load_pd(&hood[FC<2, 0, 0>()].coeffB(FC< 4>(), x))); |
| 2021 | | temp3 = _mm_mul_pd(neig3, _mm_load_pd(&hood[FC<4, 0, 0>()].coeffB(FC< 4>(), x))); |
| 2022 | | temp4 = _mm_mul_pd(neig4, _mm_load_pd(&hood[FC<6, 0, 0>()].coeffB(FC< 4>(), x))); |
| 2023 | | |
| 2024 | | same0 = _mm_add_pd(same0, temp1); |
| 2025 | | same1 = _mm_add_pd(same1, temp2); |
| 2026 | | same2 = _mm_add_pd(same2, temp3); |
| 2027 | | same3 = _mm_add_pd(same3, temp4); |
| 2028 | | |
| 2029 | | neig0 = _mm_load_pd(&hood[FC<0, 0, -1>()].srcB(x)); |
| 2030 | | neig1 = _mm_load_pd(&hood[FC<2, 0, -1>()].srcB(x)); |
| 2031 | | neig2 = _mm_load_pd(&hood[FC<4, 0, -1>()].srcB(x)); |
| 2032 | | neig3 = _mm_load_pd(&hood[FC<6, 0, -1>()].srcB(x)); |
| 2033 | | |
| 2034 | | temp1 = _mm_mul_pd(neig0, _mm_load_pd(&hood[FC<0, 0, 0>()].coeffB(FC< 0>(), x))); |
| 2035 | | temp2 = _mm_mul_pd(neig1, _mm_load_pd(&hood[FC<2, 0, 0>()].coeffB(FC< 0>(), x))); |
| 2036 | | temp3 = _mm_mul_pd(neig2, _mm_load_pd(&hood[FC<4, 0, 0>()].coeffB(FC< 0>(), x))); |
| 2037 | | temp4 = _mm_mul_pd(neig3, _mm_load_pd(&hood[FC<6, 0, 0>()].coeffB(FC< 0>(), x))); |
| 2038 | | |
| 2039 | | same0 = _mm_add_pd(same0, temp1); |
| 2040 | | same1 = _mm_add_pd(same1, temp2); |
| 2041 | | same2 = _mm_add_pd(same2, temp3); |
| 2042 | | same3 = _mm_add_pd(same3, temp4); |
| 2043 | | |
| 2044 | | neig0 = _mm_load_pd(&hood[FC<0, -1, 0>()].srcB(x)); |
| 2045 | | neig1 = _mm_load_pd(&hood[FC<2, -1, 0>()].srcB(x)); |
| 2046 | | neig2 = _mm_load_pd(&hood[FC<4, -1, 0>()].srcB(x)); |
| 2047 | | neig3 = _mm_load_pd(&hood[FC<6, -1, 0>()].srcB(x)); |
| 2048 | | |
| 2049 | | temp1 = _mm_mul_pd(neig0, _mm_load_pd(&hood[FC<0, 0, 0>()].coeffB(FC< 1>(), x))); |
| 2050 | | temp2 = _mm_mul_pd(neig1, _mm_load_pd(&hood[FC<2, 0, 0>()].coeffB(FC< 1>(), x))); |
| 2051 | | temp3 = _mm_mul_pd(neig2, _mm_load_pd(&hood[FC<4, 0, 0>()].coeffB(FC< 1>(), x))); |
| 2052 | | temp4 = _mm_mul_pd(neig3, _mm_load_pd(&hood[FC<6, 0, 0>()].coeffB(FC< 1>(), x))); |
| 2053 | | |
| 2054 | | same0 = _mm_add_pd(same0, temp1); |
| 2055 | | same1 = _mm_add_pd(same1, temp2); |
| 2056 | | same2 = _mm_add_pd(same2, temp3); |
| 2057 | | same3 = _mm_add_pd(same3, temp4); |
| 2058 | | |
| 2059 | | neig0 = _mm_load_pd(&hood[FC<0, 1, 0>()].srcB(x)); |
| 2060 | | neig1 = _mm_load_pd(&hood[FC<2, 1, 0>()].srcB(x)); |
| 2061 | | neig2 = _mm_load_pd(&hood[FC<4, 1, 0>()].srcB(x)); |
| 2062 | | neig3 = _mm_load_pd(&hood[FC<6, 1, 0>()].srcB(x)); |
| 2063 | | |
| 2064 | | temp1 = _mm_mul_pd(neig0, _mm_load_pd(&hood[FC<0, 0, 0>()].coeffB(FC< 5>(), x))); |
| 2065 | | temp2 = _mm_mul_pd(neig1, _mm_load_pd(&hood[FC<2, 0, 0>()].coeffB(FC< 5>(), x))); |
| 2066 | | temp3 = _mm_mul_pd(neig2, _mm_load_pd(&hood[FC<4, 0, 0>()].coeffB(FC< 5>(), x))); |
| 2067 | | temp4 = _mm_mul_pd(neig3, _mm_load_pd(&hood[FC<6, 0, 0>()].coeffB(FC< 5>(), x))); |
| 2068 | | |
| 2069 | | same0 = _mm_add_pd(same0, temp1); |
| 2070 | | same1 = _mm_add_pd(same1, temp2); |
| 2071 | | same2 = _mm_add_pd(same2, temp3); |
| 2072 | | same3 = _mm_add_pd(same3, temp4); |
| 2073 | | |
| 2074 | | //xxxxxxxxxxxxx |
| 2075 | | neig0 = _mm_load_pd(&hood[FC<0, 0, 1>()].srcB(x)); |
| 2076 | | neig1 = _mm_load_pd(&hood[FC<2, 0, 1>()].srcB(x)); |
| 2077 | | neig2 = _mm_load_pd(&hood[FC<4, 0, 1>()].srcB(x)); |
| 2078 | | neig3 = _mm_load_pd(&hood[FC<6, 0, 1>()].srcB(x)); |
| 2079 | | |
| 2080 | | temp1 = _mm_mul_pd(neig0, _mm_load_pd(&hood[FC<0, 0, 0>()].coeffB(FC< 6>(), x))); |
| 2081 | | temp2 = _mm_mul_pd(neig1, _mm_load_pd(&hood[FC<2, 0, 0>()].coeffB(FC< 6>(), x))); |
| 2082 | | temp3 = _mm_mul_pd(neig2, _mm_load_pd(&hood[FC<4, 0, 0>()].coeffB(FC< 6>(), x))); |
| 2083 | | temp4 = _mm_mul_pd(neig3, _mm_load_pd(&hood[FC<6, 0, 0>()].coeffB(FC< 6>(), x))); |
| 2084 | | |
| 2085 | | same0 = _mm_add_pd(same0, temp1); |
| 2086 | | same1 = _mm_add_pd(same1, temp2); |
| 2087 | | same2 = _mm_add_pd(same2, temp3); |
| 2088 | | same3 = _mm_add_pd(same3, temp4); |
| 2089 | | |
| 2090 | | //xxxxxxxxxxxxx |
| 2091 | | neig0 = _mm_load_pd(&hood[FC<0, -1, -1>()].srcB(x)); |
| 2092 | | neig1 = _mm_load_pd(&hood[FC<2, -1, -1>()].srcB(x)); |
| 2093 | | neig2 = _mm_load_pd(&hood[FC<4, -1, -1>()].srcB(x)); |
| 2094 | | neig3 = _mm_load_pd(&hood[FC<6, -1, -1>()].srcB(x)); |
| 2095 | | |
| 2096 | | temp1 = _mm_mul_pd(neig0, _mm_load_pd(&hood[FC<0, 0, 0>()].coeffB(FC< 7>(), x))); |
| 2097 | | temp2 = _mm_mul_pd(neig1, _mm_load_pd(&hood[FC<2, 0, 0>()].coeffB(FC< 7>(), x))); |
| 2098 | | temp3 = _mm_mul_pd(neig2, _mm_load_pd(&hood[FC<4, 0, 0>()].coeffB(FC< 7>(), x))); |
| 2099 | | temp4 = _mm_mul_pd(neig3, _mm_load_pd(&hood[FC<6, 0, 0>()].coeffB(FC< 7>(), x))); |
| 2100 | | |
| 2101 | | same0 = _mm_add_pd(same0, temp1); |
| 2102 | | same1 = _mm_add_pd(same1, temp2); |
| 2103 | | same2 = _mm_add_pd(same2, temp3); |
| 2104 | | same3 = _mm_add_pd(same3, temp4); |
| 2105 | | |
| 2106 | | //xxxxxxxxxxxxx |
| 2107 | | neig0 = _mm_load_pd(&hood[FC<0, 1, -1>()].srcB(x)); |
| 2108 | | neig1 = _mm_load_pd(&hood[FC<2, 1, -1>()].srcB(x)); |
| 2109 | | neig2 = _mm_load_pd(&hood[FC<4, 1, -1>()].srcB(x)); |
| 2110 | | neig3 = _mm_load_pd(&hood[FC<6, 1, -1>()].srcB(x)); |
| 2111 | | |
| 2112 | | temp1 = _mm_mul_pd(neig0, _mm_load_pd(&hood[FC<0, 0, 0>()].coeffB(FC< 8>(), x))); |
| 2113 | | temp2 = _mm_mul_pd(neig1, _mm_load_pd(&hood[FC<2, 0, 0>()].coeffB(FC< 8>(), x))); |
| 2114 | | temp3 = _mm_mul_pd(neig2, _mm_load_pd(&hood[FC<4, 0, 0>()].coeffB(FC< 8>(), x))); |
| 2115 | | temp4 = _mm_mul_pd(neig3, _mm_load_pd(&hood[FC<6, 0, 0>()].coeffB(FC< 8>(), x))); |
| 2116 | | |
| 2117 | | same0 = _mm_add_pd(same0, temp1); |
| 2118 | | same1 = _mm_add_pd(same1, temp2); |
| 2119 | | same2 = _mm_add_pd(same2, temp3); |
| 2120 | | same3 = _mm_add_pd(same3, temp4); |
| 2121 | | |
| 2122 | | //xxxxxxxxxxxxx |
| 2123 | | neig0 = _mm_load_pd(&hood[FC<0, -1, 1>()].srcB(x)); |
| 2124 | | neig1 = _mm_load_pd(&hood[FC<2, -1, 1>()].srcB(x)); |
| 2125 | | neig2 = _mm_load_pd(&hood[FC<4, -1, 1>()].srcB(x)); |
| 2126 | | neig3 = _mm_load_pd(&hood[FC<6, -1, 1>()].srcB(x)); |
| 2127 | | |
| 2128 | | temp1 = _mm_mul_pd(neig0, _mm_load_pd(&hood[FC<0, 0, 0>()].coeffB(FC< 9>(), x))); |
| 2129 | | temp2 = _mm_mul_pd(neig1, _mm_load_pd(&hood[FC<2, 0, 0>()].coeffB(FC< 9>(), x))); |
| 2130 | | temp3 = _mm_mul_pd(neig2, _mm_load_pd(&hood[FC<4, 0, 0>()].coeffB(FC< 9>(), x))); |
| 2131 | | temp4 = _mm_mul_pd(neig3, _mm_load_pd(&hood[FC<6, 0, 0>()].coeffB(FC< 9>(), x))); |
| 2132 | | |
| 2133 | | same0 = _mm_add_pd(same0, temp1); |
| 2134 | | same1 = _mm_add_pd(same1, temp2); |
| 2135 | | same2 = _mm_add_pd(same2, temp3); |
| 2136 | | same3 = _mm_add_pd(same3, temp4); |
| 2137 | | |
| 2138 | | //xxxxxxxxxxxxx |
| 2139 | | neig0 = _mm_load_pd(&hood[FC<0, 1, 1>()].srcB(x)); |
| 2140 | | neig1 = _mm_load_pd(&hood[FC<2, 1, 1>()].srcB(x)); |
| 2141 | | neig2 = _mm_load_pd(&hood[FC<4, 1, 1>()].srcB(x)); |
| 2142 | | neig3 = _mm_load_pd(&hood[FC<6, 1, 1>()].srcB(x)); |
| 2143 | | |
| 2144 | | temp1 = _mm_mul_pd(neig0, _mm_load_pd(&hood[FC<0, 0, 0>()].coeffB(FC<10>(), x))); |
| 2145 | | temp2 = _mm_mul_pd(neig1, _mm_load_pd(&hood[FC<2, 0, 0>()].coeffB(FC<10>(), x))); |
| 2146 | | temp3 = _mm_mul_pd(neig2, _mm_load_pd(&hood[FC<4, 0, 0>()].coeffB(FC<10>(), x))); |
| 2147 | | temp4 = _mm_mul_pd(neig3, _mm_load_pd(&hood[FC<6, 0, 0>()].coeffB(FC<10>(), x))); |
| 2148 | | |
| 2149 | | same0 = _mm_add_pd(same0, temp1); |
| 2150 | | same1 = _mm_add_pd(same1, temp2); |
| 2151 | | same2 = _mm_add_pd(same2, temp3); |
| 2152 | | same3 = _mm_add_pd(same3, temp4); |
| 2153 | | |
| 2154 | | //xxxxxxxxxxxxx |
| 2155 | | neig0 = _mm_load_pd(&hood[FC<0, -1, -1>()].coeffB(FC<11>(), x)); |
| 2156 | | neig1 = _mm_load_pd(&hood[FC<2, -1, -1>()].coeffB(FC<11>(), x)); |
| 2157 | | neig2 = _mm_load_pd(&hood[FC<4, -1, -1>()].coeffB(FC<11>(), x)); |
| 2158 | | neig3 = _mm_load_pd(&hood[FC<6, -1, -1>()].coeffB(FC<11>(), x)); |
| 2159 | | |
| 2160 | | same0 = _mm_add_pd(same0, neig0); |
| 2161 | | same1 = _mm_add_pd(same1, neig1); |
| 2162 | | same2 = _mm_add_pd(same2, neig2); |
| 2163 | | same3 = _mm_add_pd(same3, neig3); |
| 2164 | | |
| 2165 | | //xxxxxxxxxxxxx |
| 2166 | | neig0 = _mm_load_pd(&hood[FC<0, 0, -1>()].coeffB(FC<11>(), x)); |
| 2167 | | neig1 = _mm_load_pd(&hood[FC<2, 0, -1>()].coeffB(FC<11>(), x)); |
| 2168 | | neig2 = _mm_load_pd(&hood[FC<4, 0, -1>()].coeffB(FC<11>(), x)); |
| 2169 | | neig3 = _mm_load_pd(&hood[FC<6, 0, -1>()].coeffB(FC<11>(), x)); |
| 2170 | | |
| 2171 | | same0 = _mm_add_pd(same0, neig0); |
| 2172 | | same1 = _mm_add_pd(same1, neig1); |
| 2173 | | same2 = _mm_add_pd(same2, neig2); |
| 2174 | | same3 = _mm_add_pd(same3, neig3); |
| 2175 | | |
| 2176 | | //xxxxxxxxxxxxx |
| 2177 | | neig0 = _mm_load_pd(&hood[FC<0, 1, -1>()].coeffB(FC<11>(), x)); |
| 2178 | | neig1 = _mm_load_pd(&hood[FC<2, 1, -1>()].coeffB(FC<11>(), x)); |
| 2179 | | neig2 = _mm_load_pd(&hood[FC<4, 1, -1>()].coeffB(FC<11>(), x)); |
| 2180 | | neig3 = _mm_load_pd(&hood[FC<6, 1, -1>()].coeffB(FC<11>(), x)); |
| 2181 | | |
| 2182 | | same0 = _mm_add_pd(same0, neig0); |
| 2183 | | same1 = _mm_add_pd(same1, neig1); |
| 2184 | | same2 = _mm_add_pd(same2, neig2); |
| 2185 | | same3 = _mm_add_pd(same3, neig3); |
| 2186 | | |
| 2187 | | //xxxxxxxxxxxxx |
| 2188 | | neig0 = _mm_load_pd(&hood[FC<0, -1, 0>()].coeffB(FC<11>(), x)); |
| 2189 | | neig1 = _mm_load_pd(&hood[FC<2, -1, 0>()].coeffB(FC<11>(), x)); |
| 2190 | | neig2 = _mm_load_pd(&hood[FC<4, -1, 0>()].coeffB(FC<11>(), x)); |
| 2191 | | neig3 = _mm_load_pd(&hood[FC<6, -1, 0>()].coeffB(FC<11>(), x)); |
| 2192 | | |
| 2193 | | same0 = _mm_add_pd(same0, neig0); |
| 2194 | | same1 = _mm_add_pd(same1, neig1); |
| 2195 | | same2 = _mm_add_pd(same2, neig2); |
| 2196 | | same3 = _mm_add_pd(same3, neig3); |
| 2197 | | |
| 2198 | | //xxxxxxxxxxxxx |
| 2199 | | neig0 = _mm_load_pd(&hood[FC<0, 1, 0>()].coeffB(FC<11>(), x)); |
| 2200 | | neig1 = _mm_load_pd(&hood[FC<2, 1, 0>()].coeffB(FC<11>(), x)); |
| 2201 | | neig2 = _mm_load_pd(&hood[FC<4, 1, 0>()].coeffB(FC<11>(), x)); |
| 2202 | | neig3 = _mm_load_pd(&hood[FC<6, 1, 0>()].coeffB(FC<11>(), x)); |
| 2203 | | |
| 2204 | | same0 = _mm_add_pd(same0, neig0); |
| 2205 | | same1 = _mm_add_pd(same1, neig1); |
| 2206 | | same2 = _mm_add_pd(same2, neig2); |
| 2207 | | same3 = _mm_add_pd(same3, neig3); |
| 2208 | | |
| 2209 | | //xxxxxxxxxxxxx |
| 2210 | | neig0 = _mm_load_pd(&hood[FC<0, 0, 0>()].coeffB(FC<11>(), x)); |
| 2211 | | neig1 = _mm_load_pd(&hood[FC<2, 0, 0>()].coeffB(FC<11>(), x)); |
| 2212 | | neig2 = _mm_load_pd(&hood[FC<4, 0, 0>()].coeffB(FC<11>(), x)); |
| 2213 | | neig3 = _mm_load_pd(&hood[FC<6, 0, 0>()].coeffB(FC<11>(), x)); |
| 2214 | | |
| 2215 | | same0 = _mm_add_pd(same0, neig0); |
| 2216 | | same1 = _mm_add_pd(same1, neig1); |
| 2217 | | same2 = _mm_add_pd(same2, neig2); |
| 2218 | | same3 = _mm_add_pd(same3, neig3); |
| 2219 | | |
| 2220 | | //xxxxxxxxxxxxx |
| 2221 | | neig0 = _mm_load_pd(&hood[FC<0, -1, 1>()].coeffB(FC<11>(), x)); |
| 2222 | | neig1 = _mm_load_pd(&hood[FC<2, -1, 1>()].coeffB(FC<11>(), x)); |
| 2223 | | neig2 = _mm_load_pd(&hood[FC<4, -1, 1>()].coeffB(FC<11>(), x)); |
| 2224 | | neig3 = _mm_load_pd(&hood[FC<6, -1, 1>()].coeffB(FC<11>(), x)); |
| 2225 | | |
| 2226 | | same0 = _mm_add_pd(same0, neig0); |
| 2227 | | same1 = _mm_add_pd(same1, neig1); |
| 2228 | | same2 = _mm_add_pd(same2, neig2); |
| 2229 | | same3 = _mm_add_pd(same3, neig3); |
| 2230 | | |
| 2231 | | //xxxxxxxxxxxxx |
| 2232 | | neig0 = _mm_load_pd(&hood[FC<0, 0, 1>()].coeffB(FC<11>(), x)); |
| 2233 | | neig1 = _mm_load_pd(&hood[FC<2, 0, 1>()].coeffB(FC<11>(), x)); |
| 2234 | | neig2 = _mm_load_pd(&hood[FC<4, 0, 1>()].coeffB(FC<11>(), x)); |
| 2235 | | neig3 = _mm_load_pd(&hood[FC<6, 0, 1>()].coeffB(FC<11>(), x)); |
| 2236 | | |
| 2237 | | same0 = _mm_add_pd(same0, neig0); |
| 2238 | | same1 = _mm_add_pd(same1, neig1); |
| 2239 | | same2 = _mm_add_pd(same2, neig2); |
| 2240 | | same3 = _mm_add_pd(same3, neig3); |
| 2241 | | |
| 2242 | | //xxxxxxxxxxxxx |
| 2243 | | neig0 = _mm_load_pd(&hood[FC<0, 1, 1>()].coeffB(FC<11>(), x)); |
| 2244 | | neig1 = _mm_load_pd(&hood[FC<2, 1, 1>()].coeffB(FC<11>(), x)); |
| 2245 | | neig2 = _mm_load_pd(&hood[FC<4, 1, 1>()].coeffB(FC<11>(), x)); |
| 2246 | | neig3 = _mm_load_pd(&hood[FC<6, 1, 1>()].coeffB(FC<11>(), x)); |
| 2247 | | |
| 2248 | | same0 = _mm_add_pd(same0, neig0); |
| 2249 | | same1 = _mm_add_pd(same1, neig1); |
| 2250 | | same2 = _mm_add_pd(same2, neig2); |
| 2251 | | same3 = _mm_add_pd(same3, neig3); |
| 2252 | | |
| 2253 | | //xxxxxxxxxxxxx |
| 2254 | | neig0 = _mm_load_pd(&hood[FC<0, -1, -1>()].coeffB(FC<12>(), x)); |
| 2255 | | neig1 = _mm_load_pd(&hood[FC<2, -1, -1>()].coeffB(FC<12>(), x)); |
| 2256 | | neig2 = _mm_load_pd(&hood[FC<4, -1, -1>()].coeffB(FC<12>(), x)); |
| 2257 | | neig3 = _mm_load_pd(&hood[FC<6, -1, -1>()].coeffB(FC<12>(), x)); |
| 2258 | | |
| 2259 | | same0 = _mm_add_pd(same0, neig0); |
| 2260 | | same1 = _mm_add_pd(same1, neig1); |
| 2261 | | same2 = _mm_add_pd(same2, neig2); |
| 2262 | | same3 = _mm_add_pd(same3, neig3); |
| 2263 | | |
| 2264 | | //xxxxxxxxxxxxx |
| 2265 | | neig0 = _mm_load_pd(&hood[FC<0, 0, -1>()].coeffB(FC<12>(), x)); |
| 2266 | | neig1 = _mm_load_pd(&hood[FC<2, 0, -1>()].coeffB(FC<12>(), x)); |
| 2267 | | neig2 = _mm_load_pd(&hood[FC<4, 0, -1>()].coeffB(FC<12>(), x)); |
| 2268 | | neig3 = _mm_load_pd(&hood[FC<6, 0, -1>()].coeffB(FC<12>(), x)); |
| 2269 | | |
| 2270 | | same0 = _mm_add_pd(same0, neig0); |
| 2271 | | same1 = _mm_add_pd(same1, neig1); |
| 2272 | | same2 = _mm_add_pd(same2, neig2); |
| 2273 | | same3 = _mm_add_pd(same3, neig3); |
| 2274 | | |
| 2275 | | //xxxxxxxxxxxxx |
| 2276 | | neig0 = _mm_load_pd(&hood[FC<0, 1, -1>()].coeffB(FC<12>(), x)); |
| 2277 | | neig1 = _mm_load_pd(&hood[FC<2, 1, -1>()].coeffB(FC<12>(), x)); |
| 2278 | | neig2 = _mm_load_pd(&hood[FC<4, 1, -1>()].coeffB(FC<12>(), x)); |
| 2279 | | neig3 = _mm_load_pd(&hood[FC<6, 1, -1>()].coeffB(FC<12>(), x)); |
| 2280 | | |
| 2281 | | same0 = _mm_add_pd(same0, neig0); |
| 2282 | | same1 = _mm_add_pd(same1, neig1); |
| 2283 | | same2 = _mm_add_pd(same2, neig2); |
| 2284 | | same3 = _mm_add_pd(same3, neig3); |
| 2285 | | |
| 2286 | | //xxxxxxxxxxxxx |
| 2287 | | neig0 = _mm_load_pd(&hood[FC<0, -1, 0>()].coeffB(FC<12>(), x)); |
| 2288 | | neig1 = _mm_load_pd(&hood[FC<2, -1, 0>()].coeffB(FC<12>(), x)); |
| 2289 | | neig2 = _mm_load_pd(&hood[FC<4, -1, 0>()].coeffB(FC<12>(), x)); |
| 2290 | | neig3 = _mm_load_pd(&hood[FC<6, -1, 0>()].coeffB(FC<12>(), x)); |
| 2291 | | |
| 2292 | | same0 = _mm_add_pd(same0, neig0); |
| 2293 | | same1 = _mm_add_pd(same1, neig1); |
| 2294 | | same2 = _mm_add_pd(same2, neig2); |
| 2295 | | same3 = _mm_add_pd(same3, neig3); |
| 2296 | | |
| 2297 | | //xxxxxxxxxxxxx |
| 2298 | | neig0 = _mm_load_pd(&hood[FC<0, 1, 0>()].coeffB(FC<12>(), x)); |
| 2299 | | neig1 = _mm_load_pd(&hood[FC<2, 1, 0>()].coeffB(FC<12>(), x)); |
| 2300 | | neig2 = _mm_load_pd(&hood[FC<4, 1, 0>()].coeffB(FC<12>(), x)); |
| 2301 | | neig3 = _mm_load_pd(&hood[FC<6, 1, 0>()].coeffB(FC<12>(), x)); |
| 2302 | | |
| 2303 | | same0 = _mm_add_pd(same0, neig0); |
| 2304 | | same1 = _mm_add_pd(same1, neig1); |
| 2305 | | same2 = _mm_add_pd(same2, neig2); |
| 2306 | | same3 = _mm_add_pd(same3, neig3); |
| 2307 | | |
| 2308 | | //xxxxxxxxxxxxx |
| 2309 | | neig0 = _mm_load_pd(&hood[FC<0, 0, 0>()].coeffB(FC<12>(), x)); |
| 2310 | | neig1 = _mm_load_pd(&hood[FC<2, 0, 0>()].coeffB(FC<12>(), x)); |
| 2311 | | neig2 = _mm_load_pd(&hood[FC<4, 0, 0>()].coeffB(FC<12>(), x)); |
| 2312 | | neig3 = _mm_load_pd(&hood[FC<6, 0, 0>()].coeffB(FC<12>(), x)); |
| 2313 | | |
| 2314 | | same0 = _mm_add_pd(same0, neig0); |
| 2315 | | same1 = _mm_add_pd(same1, neig1); |
| 2316 | | same2 = _mm_add_pd(same2, neig2); |
| 2317 | | same3 = _mm_add_pd(same3, neig3); |
| 2318 | | |
| 2319 | | //xxxxxxxxxxxxx |
| 2320 | | neig0 = _mm_load_pd(&hood[FC<0, -1, 1>()].coeffB(FC<12>(), x)); |
| 2321 | | neig1 = _mm_load_pd(&hood[FC<2, -1, 1>()].coeffB(FC<12>(), x)); |
| 2322 | | neig2 = _mm_load_pd(&hood[FC<4, -1, 1>()].coeffB(FC<12>(), x)); |
| 2323 | | neig3 = _mm_load_pd(&hood[FC<6, -1, 1>()].coeffB(FC<12>(), x)); |
| 2324 | | |
| 2325 | | same0 = _mm_add_pd(same0, neig0); |
| 2326 | | same1 = _mm_add_pd(same1, neig1); |
| 2327 | | same2 = _mm_add_pd(same2, neig2); |
| 2328 | | same3 = _mm_add_pd(same3, neig3); |
| 2329 | | |
| 2330 | | //xxxxxxxxxxxxx |
| 2331 | | neig0 = _mm_load_pd(&hood[FC<0, 0, 1>()].coeffB(FC<12>(), x)); |
| 2332 | | neig1 = _mm_load_pd(&hood[FC<2, 0, 1>()].coeffB(FC<12>(), x)); |
| 2333 | | neig2 = _mm_load_pd(&hood[FC<4, 0, 1>()].coeffB(FC<12>(), x)); |
| 2334 | | neig3 = _mm_load_pd(&hood[FC<6, 0, 1>()].coeffB(FC<12>(), x)); |
| 2335 | | |
| 2336 | | same0 = _mm_add_pd(same0, neig0); |
| 2337 | | same1 = _mm_add_pd(same1, neig1); |
| 2338 | | same2 = _mm_add_pd(same2, neig2); |
| 2339 | | same3 = _mm_add_pd(same3, neig3); |
| 2340 | | |
| 2341 | | //xxxxxxxxxxxxx |
| 2342 | | neig0 = _mm_load_pd(&hood[FC<0, 1, 1>()].coeffB(FC<12>(), x)); |
| 2343 | | neig1 = _mm_load_pd(&hood[FC<2, 1, 1>()].coeffB(FC<12>(), x)); |
| 2344 | | neig2 = _mm_load_pd(&hood[FC<4, 1, 1>()].coeffB(FC<12>(), x)); |
| 2345 | | neig3 = _mm_load_pd(&hood[FC<6, 1, 1>()].coeffB(FC<12>(), x)); |
| | 2056 | same0 = _mm_mul_pd(same0, _mm_load_pd(&hood(0, 0, 0).coeff(C03, x))); |
| | 2057 | same1 = _mm_mul_pd(same1, _mm_load_pd(&hood(2, 0, 0).coeff(C03, x))); |
| | 2058 | same2 = _mm_mul_pd(same2, _mm_load_pd(&hood(4, 0, 0).coeff(C03, x))); |
| | 2059 | same3 = _mm_mul_pd(same3, _mm_load_pd(&hood(6, 0, 0).coeff(C03, x))); |
| | 2060 | |
| | 2061 | __m128d temp1 = _mm_mul_pd(neig0, _mm_load_pd(&hood(0, 0, 0).coeff(C02, x))); |
| | 2062 | __m128d temp2 = _mm_mul_pd(neig1, _mm_load_pd(&hood(2, 0, 0).coeff(C02, x))); |
| | 2063 | __m128d temp3 = _mm_mul_pd(neig2, _mm_load_pd(&hood(4, 0, 0).coeff(C02, x))); |
| | 2064 | __m128d temp4 = _mm_mul_pd(neig3, _mm_load_pd(&hood(6, 0, 0).coeff(C02, x))); |
| | 2065 | |
| | 2066 | same0 = _mm_add_pd(same0, temp1); |
| | 2067 | same1 = _mm_add_pd(same1, temp2); |
| | 2068 | same2 = _mm_add_pd(same2, temp3); |
| | 2069 | same3 = _mm_add_pd(same3, temp4); |
| | 2070 | |
| | 2071 | temp1 = _mm_mul_pd(neig1, _mm_load_pd(&hood(0, 0, 0).coeff(C04, x))); |
| | 2072 | temp2 = _mm_mul_pd(neig2, _mm_load_pd(&hood(2, 0, 0).coeff(C04, x))); |
| | 2073 | temp3 = _mm_mul_pd(neig3, _mm_load_pd(&hood(4, 0, 0).coeff(C04, x))); |
| | 2074 | temp4 = _mm_mul_pd(neig4, _mm_load_pd(&hood(6, 0, 0).coeff(C04, x))); |
| | 2075 | |
| | 2076 | same0 = _mm_add_pd(same0, temp1); |
| | 2077 | same1 = _mm_add_pd(same1, temp2); |
| | 2078 | same2 = _mm_add_pd(same2, temp3); |
| | 2079 | same3 = _mm_add_pd(same3, temp4); |
| | 2080 | |
| | 2081 | neig0 = _mm_load_pd(&hood(0, 0, -1).src(x)); |
| | 2082 | neig1 = _mm_load_pd(&hood(2, 0, -1).src(x)); |
| | 2083 | neig2 = _mm_load_pd(&hood(4, 0, -1).src(x)); |
| | 2084 | neig3 = _mm_load_pd(&hood(6, 0, -1).src(x)); |
| | 2085 | |
| | 2086 | temp1 = _mm_mul_pd(neig0, _mm_load_pd(&hood(0, 0, 0).coeff(C00, x))); |
| | 2087 | temp2 = _mm_mul_pd(neig1, _mm_load_pd(&hood(2, 0, 0).coeff(C00, x))); |
| | 2088 | temp3 = _mm_mul_pd(neig2, _mm_load_pd(&hood(4, 0, 0).coeff(C00, x))); |
| | 2089 | temp4 = _mm_mul_pd(neig3, _mm_load_pd(&hood(6, 0, 0).coeff(C00, x))); |
| | 2090 | |
| | 2091 | same0 = _mm_add_pd(same0, temp1); |
| | 2092 | same1 = _mm_add_pd(same1, temp2); |
| | 2093 | same2 = _mm_add_pd(same2, temp3); |
| | 2094 | same3 = _mm_add_pd(same3, temp4); |
| | 2095 | |
| | 2096 | neig0 = _mm_load_pd(&hood(0, -1, 0).src(x)); |
| | 2097 | neig1 = _mm_load_pd(&hood(2, -1, 0).src(x)); |
| | 2098 | neig2 = _mm_load_pd(&hood(4, -1, 0).src(x)); |
| | 2099 | neig3 = _mm_load_pd(&hood(6, -1, 0).src(x)); |
| | 2100 | |
| | 2101 | temp1 = _mm_mul_pd(neig0, _mm_load_pd(&hood(0, 0, 0).coeff(C01, x))); |
| | 2102 | temp2 = _mm_mul_pd(neig1, _mm_load_pd(&hood(2, 0, 0).coeff(C01, x))); |
| | 2103 | temp3 = _mm_mul_pd(neig2, _mm_load_pd(&hood(4, 0, 0).coeff(C01, x))); |
| | 2104 | temp4 = _mm_mul_pd(neig3, _mm_load_pd(&hood(6, 0, 0).coeff(C01, x))); |
| | 2105 | |
| | 2106 | same0 = _mm_add_pd(same0, temp1); |
| | 2107 | same1 = _mm_add_pd(same1, temp2); |
| | 2108 | same2 = _mm_add_pd(same2, temp3); |
| | 2109 | same3 = _mm_add_pd(same3, temp4); |
| | 2110 | |
| | 2111 | neig0 = _mm_load_pd(&hood(0, 1, 0).src(x)); |
| | 2112 | neig1 = _mm_load_pd(&hood(2, 1, 0).src(x)); |
| | 2113 | neig2 = _mm_load_pd(&hood(4, 1, 0).src(x)); |
| | 2114 | neig3 = _mm_load_pd(&hood(6, 1, 0).src(x)); |
| | 2115 | |
| | 2116 | temp1 = _mm_mul_pd(neig0, _mm_load_pd(&hood(0, 0, 0).coeff(C05, x))); |
| | 2117 | temp2 = _mm_mul_pd(neig1, _mm_load_pd(&hood(2, 0, 0).coeff(C05, x))); |
| | 2118 | temp3 = _mm_mul_pd(neig2, _mm_load_pd(&hood(4, 0, 0).coeff(C05, x))); |
| | 2119 | temp4 = _mm_mul_pd(neig3, _mm_load_pd(&hood(6, 0, 0).coeff(C05, x))); |
| | 2120 | |
| | 2121 | same0 = _mm_add_pd(same0, temp1); |
| | 2122 | same1 = _mm_add_pd(same1, temp2); |
| | 2123 | same2 = _mm_add_pd(same2, temp3); |
| | 2124 | same3 = _mm_add_pd(same3, temp4); |
| | 2125 | |
| | 2126 | //xxxxxxxxxxxxx |
| | 2127 | neig0 = _mm_load_pd(&hood(0, 0, 1).src(x)); |
| | 2128 | neig1 = _mm_load_pd(&hood(2, 0, 1).src(x)); |
| | 2129 | neig2 = _mm_load_pd(&hood(4, 0, 1).src(x)); |
| | 2130 | neig3 = _mm_load_pd(&hood(6, 0, 1).src(x)); |
| | 2131 | |
| | 2132 | temp1 = _mm_mul_pd(neig0, _mm_load_pd(&hood(0, 0, 0).coeff(C06, x))); |
| | 2133 | temp2 = _mm_mul_pd(neig1, _mm_load_pd(&hood(2, 0, 0).coeff(C06, x))); |
| | 2134 | temp3 = _mm_mul_pd(neig2, _mm_load_pd(&hood(4, 0, 0).coeff(C06, x))); |
| | 2135 | temp4 = _mm_mul_pd(neig3, _mm_load_pd(&hood(6, 0, 0).coeff(C06, x))); |
| | 2136 | |
| | 2137 | same0 = _mm_add_pd(same0, temp1); |
| | 2138 | same1 = _mm_add_pd(same1, temp2); |
| | 2139 | same2 = _mm_add_pd(same2, temp3); |
| | 2140 | same3 = _mm_add_pd(same3, temp4); |
| | 2141 | |
| | 2142 | //xxxxxxxxxxxxx |
| | 2143 | neig0 = _mm_load_pd(&hood(0, -1, -1).src(x)); |
| | 2144 | neig1 = _mm_load_pd(&hood(2, -1, -1).src(x)); |
| | 2145 | neig2 = _mm_load_pd(&hood(4, -1, -1).src(x)); |
| | 2146 | neig3 = _mm_load_pd(&hood(6, -1, -1).src(x)); |
| | 2147 | |
| | 2148 | temp1 = _mm_mul_pd(neig0, _mm_load_pd(&hood(0, 0, 0).coeff(C07, x))); |
| | 2149 | temp2 = _mm_mul_pd(neig1, _mm_load_pd(&hood(2, 0, 0).coeff(C07, x))); |
| | 2150 | temp3 = _mm_mul_pd(neig2, _mm_load_pd(&hood(4, 0, 0).coeff(C07, x))); |
| | 2151 | temp4 = _mm_mul_pd(neig3, _mm_load_pd(&hood(6, 0, 0).coeff(C07, x))); |
| | 2152 | |
| | 2153 | same0 = _mm_add_pd(same0, temp1); |
| | 2154 | same1 = _mm_add_pd(same1, temp2); |
| | 2155 | same2 = _mm_add_pd(same2, temp3); |
| | 2156 | same3 = _mm_add_pd(same3, temp4); |
| | 2157 | |
| | 2158 | //xxxxxxxxxxxxx |
| | 2159 | neig0 = _mm_load_pd(&hood(0, 1, -1).src(x)); |
| | 2160 | neig1 = _mm_load_pd(&hood(2, 1, -1).src(x)); |
| | 2161 | neig2 = _mm_load_pd(&hood(4, 1, -1).src(x)); |
| | 2162 | neig3 = _mm_load_pd(&hood(6, 1, -1).src(x)); |
| | 2163 | |
| | 2164 | temp1 = _mm_mul_pd(neig0, _mm_load_pd(&hood(0, 0, 0).coeff(C08, x))); |
| | 2165 | temp2 = _mm_mul_pd(neig1, _mm_load_pd(&hood(2, 0, 0).coeff(C08, x))); |
| | 2166 | temp3 = _mm_mul_pd(neig2, _mm_load_pd(&hood(4, 0, 0).coeff(C08, x))); |
| | 2167 | temp4 = _mm_mul_pd(neig3, _mm_load_pd(&hood(6, 0, 0).coeff(C08, x))); |
| | 2168 | |
| | 2169 | same0 = _mm_add_pd(same0, temp1); |
| | 2170 | same1 = _mm_add_pd(same1, temp2); |
| | 2171 | same2 = _mm_add_pd(same2, temp3); |
| | 2172 | same3 = _mm_add_pd(same3, temp4); |
| | 2173 | |
| | 2174 | //xxxxxxxxxxxxx |
| | 2175 | neig0 = _mm_load_pd(&hood(0, -1, 1).src(x)); |
| | 2176 | neig1 = _mm_load_pd(&hood(2, -1, 1).src(x)); |
| | 2177 | neig2 = _mm_load_pd(&hood(4, -1, 1).src(x)); |
| | 2178 | neig3 = _mm_load_pd(&hood(6, -1, 1).src(x)); |
| | 2179 | |
| | 2180 | temp1 = _mm_mul_pd(neig0, _mm_load_pd(&hood(0, 0, 0).coeff(C09, x))); |
| | 2181 | temp2 = _mm_mul_pd(neig1, _mm_load_pd(&hood(2, 0, 0).coeff(C09, x))); |
| | 2182 | temp3 = _mm_mul_pd(neig2, _mm_load_pd(&hood(4, 0, 0).coeff(C09, x))); |
| | 2183 | temp4 = _mm_mul_pd(neig3, _mm_load_pd(&hood(6, 0, 0).coeff(C09, x))); |
| | 2184 | |
| | 2185 | same0 = _mm_add_pd(same0, temp1); |
| | 2186 | same1 = _mm_add_pd(same1, temp2); |
| | 2187 | same2 = _mm_add_pd(same2, temp3); |
| | 2188 | same3 = _mm_add_pd(same3, temp4); |
| | 2189 | |
| | 2190 | //xxxxxxxxxxxxx |
| | 2191 | neig0 = _mm_load_pd(&hood(0, 1, 1).src(x)); |
| | 2192 | neig1 = _mm_load_pd(&hood(2, 1, 1).src(x)); |
| | 2193 | neig2 = _mm_load_pd(&hood(4, 1, 1).src(x)); |
| | 2194 | neig3 = _mm_load_pd(&hood(6, 1, 1).src(x)); |
| | 2195 | |
| | 2196 | temp1 = _mm_mul_pd(neig0, _mm_load_pd(&hood(0, 0, 0).coeff(C10, x))); |
| | 2197 | temp2 = _mm_mul_pd(neig1, _mm_load_pd(&hood(2, 0, 0).coeff(C10, x))); |
| | 2198 | temp3 = _mm_mul_pd(neig2, _mm_load_pd(&hood(4, 0, 0).coeff(C10, x))); |
| | 2199 | temp4 = _mm_mul_pd(neig3, _mm_load_pd(&hood(6, 0, 0).coeff(C10, x))); |
| | 2200 | |
| | 2201 | same0 = _mm_add_pd(same0, temp1); |
| | 2202 | same1 = _mm_add_pd(same1, temp2); |
| | 2203 | same2 = _mm_add_pd(same2, temp3); |
| | 2204 | same3 = _mm_add_pd(same3, temp4); |
| | 2205 | |
| | 2206 | //xxxxxxxxxxxxx |
| | 2207 | neig0 = _mm_load_pd(&hood(0, -1, -1).coeff(C11, x)); |
| | 2208 | neig1 = _mm_load_pd(&hood(2, -1, -1).coeff(C11, x)); |
| | 2209 | neig2 = _mm_load_pd(&hood(4, -1, -1).coeff(C11, x)); |
| | 2210 | neig3 = _mm_load_pd(&hood(6, -1, -1).coeff(C11, x)); |
| | 2211 | |
| | 2212 | same0 = _mm_add_pd(same0, neig0); |
| | 2213 | same1 = _mm_add_pd(same1, neig1); |
| | 2214 | same2 = _mm_add_pd(same2, neig2); |
| | 2215 | same3 = _mm_add_pd(same3, neig3); |
| | 2216 | |
| | 2217 | //xxxxxxxxxxxxx |
| | 2218 | neig0 = _mm_load_pd(&hood(0, 0, -1).coeff(C11, x)); |
| | 2219 | neig1 = _mm_load_pd(&hood(2, 0, -1).coeff(C11, x)); |
| | 2220 | neig2 = _mm_load_pd(&hood(4, 0, -1).coeff(C11, x)); |
| | 2221 | neig3 = _mm_load_pd(&hood(6, 0, -1).coeff(C11, x)); |
| | 2222 | |
| | 2223 | same0 = _mm_add_pd(same0, neig0); |
| | 2224 | same1 = _mm_add_pd(same1, neig1); |
| | 2225 | same2 = _mm_add_pd(same2, neig2); |
| | 2226 | same3 = _mm_add_pd(same3, neig3); |
| | 2227 | |
| | 2228 | //xxxxxxxxxxxxx |
| | 2229 | neig0 = _mm_load_pd(&hood(0, 1, -1).coeff(C11, x)); |
| | 2230 | neig1 = _mm_load_pd(&hood(2, 1, -1).coeff(C11, x)); |
| | 2231 | neig2 = _mm_load_pd(&hood(4, 1, -1).coeff(C11, x)); |
| | 2232 | neig3 = _mm_load_pd(&hood(6, 1, -1).coeff(C11, x)); |
| | 2233 | |
| | 2234 | same0 = _mm_add_pd(same0, neig0); |
| | 2235 | same1 = _mm_add_pd(same1, neig1); |
| | 2236 | same2 = _mm_add_pd(same2, neig2); |
| | 2237 | same3 = _mm_add_pd(same3, neig3); |
| | 2238 | |
| | 2239 | //xxxxxxxxxxxxx |
| | 2240 | neig0 = _mm_load_pd(&hood(0, -1, 0).coeff(C11, x)); |
| | 2241 | neig1 = _mm_load_pd(&hood(2, -1, 0).coeff(C11, x)); |
| | 2242 | neig2 = _mm_load_pd(&hood(4, -1, 0).coeff(C11, x)); |
| | 2243 | neig3 = _mm_load_pd(&hood(6, -1, 0).coeff(C11, x)); |
| | 2244 | |
| | 2245 | same0 = _mm_add_pd(same0, neig0); |
| | 2246 | same1 = _mm_add_pd(same1, neig1); |
| | 2247 | same2 = _mm_add_pd(same2, neig2); |
| | 2248 | same3 = _mm_add_pd(same3, neig3); |
| | 2249 | |
| | 2250 | //xxxxxxxxxxxxx |
| | 2251 | neig0 = _mm_load_pd(&hood(0, 1, 0).coeff(C11, x)); |
| | 2252 | neig1 = _mm_load_pd(&hood(2, 1, 0).coeff(C11, x)); |
| | 2253 | neig2 = _mm_load_pd(&hood(4, 1, 0).coeff(C11, x)); |
| | 2254 | neig3 = _mm_load_pd(&hood(6, 1, 0).coeff(C11, x)); |
| | 2255 | |
| | 2256 | same0 = _mm_add_pd(same0, neig0); |
| | 2257 | same1 = _mm_add_pd(same1, neig1); |
| | 2258 | same2 = _mm_add_pd(same2, neig2); |
| | 2259 | same3 = _mm_add_pd(same3, neig3); |
| | 2260 | |
| | 2261 | //xxxxxxxxxxxxx |
| | 2262 | neig0 = _mm_load_pd(&hood(0, 0, 0).coeff(C11, x)); |
| | 2263 | neig1 = _mm_load_pd(&hood(2, 0, 0).coeff(C11, x)); |
| | 2264 | neig2 = _mm_load_pd(&hood(4, 0, 0).coeff(C11, x)); |
| | 2265 | neig3 = _mm_load_pd(&hood(6, 0, 0).coeff(C11, x)); |
| | 2266 | |
| | 2267 | same0 = _mm_add_pd(same0, neig0); |
| | 2268 | same1 = _mm_add_pd(same1, neig1); |
| | 2269 | same2 = _mm_add_pd(same2, neig2); |
| | 2270 | same3 = _mm_add_pd(same3, neig3); |
| | 2271 | |
| | 2272 | //xxxxxxxxxxxxx |
| | 2273 | neig0 = _mm_load_pd(&hood(0, -1, 1).coeff(C11, x)); |
| | 2274 | neig1 = _mm_load_pd(&hood(2, -1, 1).coeff(C11, x)); |
| | 2275 | neig2 = _mm_load_pd(&hood(4, -1, 1).coeff(C11, x)); |
| | 2276 | neig3 = _mm_load_pd(&hood(6, -1, 1).coeff(C11, x)); |
| | 2277 | |
| | 2278 | same0 = _mm_add_pd(same0, neig0); |
| | 2279 | same1 = _mm_add_pd(same1, neig1); |
| | 2280 | same2 = _mm_add_pd(same2, neig2); |
| | 2281 | same3 = _mm_add_pd(same3, neig3); |
| | 2282 | |
| | 2283 | //xxxxxxxxxxxxx |
| | 2284 | neig0 = _mm_load_pd(&hood(0, 0, 1).coeff(C11, x)); |
| | 2285 | neig1 = _mm_load_pd(&hood(2, 0, 1).coeff(C11, x)); |
| | 2286 | neig2 = _mm_load_pd(&hood(4, 0, 1).coeff(C11, x)); |
| | 2287 | neig3 = _mm_load_pd(&hood(6, 0, 1).coeff(C11, x)); |
| | 2288 | |
| | 2289 | same0 = _mm_add_pd(same0, neig0); |
| | 2290 | same1 = _mm_add_pd(same1, neig1); |
| | 2291 | same2 = _mm_add_pd(same2, neig2); |
| | 2292 | same3 = _mm_add_pd(same3, neig3); |
| | 2293 | |
| | 2294 | //xxxxxxxxxxxxx |
| | 2295 | neig0 = _mm_load_pd(&hood(0, 1, 1).coeff(C11, x)); |
| | 2296 | neig1 = _mm_load_pd(&hood(2, 1, 1).coeff(C11, x)); |
| | 2297 | neig2 = _mm_load_pd(&hood(4, 1, 1).coeff(C11, x)); |
| | 2298 | neig3 = _mm_load_pd(&hood(6, 1, 1).coeff(C11, x)); |
| | 2299 | |
| | 2300 | same0 = _mm_add_pd(same0, neig0); |
| | 2301 | same1 = _mm_add_pd(same1, neig1); |
| | 2302 | same2 = _mm_add_pd(same2, neig2); |
| | 2303 | same3 = _mm_add_pd(same3, neig3); |
| | 2304 | |
| | 2305 | //xxxxxxxxxxxxx |
| | 2306 | neig0 = _mm_load_pd(&hood(0, -1, -1).coeff(C12, x)); |
| | 2307 | neig1 = _mm_load_pd(&hood(2, -1, -1).coeff(C12, x)); |
| | 2308 | neig2 = _mm_load_pd(&hood(4, -1, -1).coeff(C12, x)); |
| | 2309 | neig3 = _mm_load_pd(&hood(6, -1, -1).coeff(C12, x)); |
| | 2310 | |
| | 2311 | same0 = _mm_add_pd(same0, neig0); |
| | 2312 | same1 = _mm_add_pd(same1, neig1); |
| | 2313 | same2 = _mm_add_pd(same2, neig2); |
| | 2314 | same3 = _mm_add_pd(same3, neig3); |
| | 2315 | |
| | 2316 | //xxxxxxxxxxxxx |
| | 2317 | neig0 = _mm_load_pd(&hood(0, 0, -1).coeff(C12, x)); |
| | 2318 | neig1 = _mm_load_pd(&hood(2, 0, -1).coeff(C12, x)); |
| | 2319 | neig2 = _mm_load_pd(&hood(4, 0, -1).coeff(C12, x)); |
| | 2320 | neig3 = _mm_load_pd(&hood(6, 0, -1).coeff(C12, x)); |
| | 2321 | |
| | 2322 | same0 = _mm_add_pd(same0, neig0); |
| | 2323 | same1 = _mm_add_pd(same1, neig1); |
| | 2324 | same2 = _mm_add_pd(same2, neig2); |
| | 2325 | same3 = _mm_add_pd(same3, neig3); |
| | 2326 | |
| | 2327 | //xxxxxxxxxxxxx |
| | 2328 | neig0 = _mm_load_pd(&hood(0, 1, -1).coeff(C12, x)); |
| | 2329 | neig1 = _mm_load_pd(&hood(2, 1, -1).coeff(C12, x)); |
| | 2330 | neig2 = _mm_load_pd(&hood(4, 1, -1).coeff(C12, x)); |
| | 2331 | neig3 = _mm_load_pd(&hood(6, 1, -1).coeff(C12, x)); |
| | 2332 | |
| | 2333 | same0 = _mm_add_pd(same0, neig0); |
| | 2334 | same1 = _mm_add_pd(same1, neig1); |
| | 2335 | same2 = _mm_add_pd(same2, neig2); |
| | 2336 | same3 = _mm_add_pd(same3, neig3); |
| | 2337 | |
| | 2338 | //xxxxxxxxxxxxx |
| | 2339 | neig0 = _mm_load_pd(&hood(0, -1, 0).coeff(C12, x)); |
| | 2340 | neig1 = _mm_load_pd(&hood(2, -1, 0).coeff(C12, x)); |
| | 2341 | neig2 = _mm_load_pd(&hood(4, -1, 0).coeff(C12, x)); |
| | 2342 | neig3 = _mm_load_pd(&hood(6, -1, 0).coeff(C12, x)); |
| | 2343 | |
| | 2344 | same0 = _mm_add_pd(same0, neig0); |
| | 2345 | same1 = _mm_add_pd(same1, neig1); |
| | 2346 | same2 = _mm_add_pd(same2, neig2); |
| | 2347 | same3 = _mm_add_pd(same3, neig3); |
| | 2348 | |
| | 2349 | //xxxxxxxxxxxxx |
| | 2350 | neig0 = _mm_load_pd(&hood(0, 1, 0).coeff(C12, x)); |
| | 2351 | neig1 = _mm_load_pd(&hood(2, 1, 0).coeff(C12, x)); |
| | 2352 | neig2 = _mm_load_pd(&hood(4, 1, 0).coeff(C12, x)); |
| | 2353 | neig3 = _mm_load_pd(&hood(6, 1, 0).coeff(C12, x)); |
| | 2354 | |
| | 2355 | same0 = _mm_add_pd(same0, neig0); |
| | 2356 | same1 = _mm_add_pd(same1, neig1); |
| | 2357 | same2 = _mm_add_pd(same2, neig2); |
| | 2358 | same3 = _mm_add_pd(same3, neig3); |
| | 2359 | |
| | 2360 | //xxxxxxxxxxxxx |
| | 2361 | neig0 = _mm_load_pd(&hood(0, 0, 0).coeff(C12, x)); |
| | 2362 | neig1 = _mm_load_pd(&hood(2, 0, 0).coeff(C12, x)); |
| | 2363 | neig2 = _mm_load_pd(&hood(4, 0, 0).coeff(C12, x)); |
| | 2364 | neig3 = _mm_load_pd(&hood(6, 0, 0).coeff(C12, x)); |
| | 2365 | |
| | 2366 | same0 = _mm_add_pd(same0, neig0); |
| | 2367 | same1 = _mm_add_pd(same1, neig1); |
| | 2368 | same2 = _mm_add_pd(same2, neig2); |
| | 2369 | same3 = _mm_add_pd(same3, neig3); |
| | 2370 | |
| | 2371 | //xxxxxxxxxxxxx |
| | 2372 | neig0 = _mm_load_pd(&hood(0, -1, 1).coeff(C12, x)); |
| | 2373 | neig1 = _mm_load_pd(&hood(2, -1, 1).coeff(C12, x)); |
| | 2374 | neig2 = _mm_load_pd(&hood(4, -1, 1).coeff(C12, x)); |
| | 2375 | neig3 = _mm_load_pd(&hood(6, -1, 1).coeff(C12, x)); |
| | 2376 | |
| | 2377 | same0 = _mm_add_pd(same0, neig0); |
| | 2378 | same1 = _mm_add_pd(same1, neig1); |
| | 2379 | same2 = _mm_add_pd(same2, neig2); |
| | 2380 | same3 = _mm_add_pd(same3, neig3); |
| | 2381 | |
| | 2382 | //xxxxxxxxxxxxx |
| | 2383 | neig0 = _mm_load_pd(&hood(0, 0, 1).coeff(C12, x)); |
| | 2384 | neig1 = _mm_load_pd(&hood(2, 0, 1).coeff(C12, x)); |
| | 2385 | neig2 = _mm_load_pd(&hood(4, 0, 1).coeff(C12, x)); |
| | 2386 | neig3 = _mm_load_pd(&hood(6, 0, 1).coeff(C12, x)); |
| | 2387 | |
| | 2388 | same0 = _mm_add_pd(same0, neig0); |
| | 2389 | same1 = _mm_add_pd(same1, neig1); |
| | 2390 | same2 = _mm_add_pd(same2, neig2); |
| | 2391 | same3 = _mm_add_pd(same3, neig3); |
| | 2392 | |
| | 2393 | //xxxxxxxxxxxxx |
| | 2394 | neig0 = _mm_load_pd(&hood(0, 1, 1).coeff(C12, x)); |
| | 2395 | neig1 = _mm_load_pd(&hood(2, 1, 1).coeff(C12, x)); |
| | 2396 | neig2 = _mm_load_pd(&hood(4, 1, 1).coeff(C12, x)); |
| | 2397 | neig3 = _mm_load_pd(&hood(6, 1, 1).coeff(C12, x)); |