| | 83 | } |
| | 84 | |
| | 85 | double temp; |
| | 86 | }; |
| | 87 | |
| | 88 | class JacobiCellStraightforward |
| | 89 | { |
| | 90 | public: |
| | 91 | typedef Stencils::VonNeumann<3, 1> Stencil; |
| | 92 | typedef Topologies::Torus<3>::Topology Topology; |
| | 93 | |
| | 94 | class API : public CellAPITraits::Fixed, public CellAPITraits::Line |
| | 95 | {}; |
| | 96 | |
| | 97 | JacobiCellStraightforward(double t = 0) : |
| | 98 | temp(t) |
| | 99 | {} |
| | 100 | |
| | 101 | static int nanoSteps() |
| | 102 | { |
| | 103 | return 1; |
| | 104 | } |
| | 105 | |
| | 106 | template<typename NEIGHBORHOOD> |
| | 107 | void update(const NEIGHBORHOOD& hood, int /* nanoStep */) |
| | 108 | { |
| | 109 | temp = (hood[FixedCoord<0, 0, -1>()].temp + |
| | 110 | hood[FixedCoord<0, -1, 0>()].temp + |
| | 111 | hood[FixedCoord<1, 0, 0>()].temp + |
| | 112 | hood[FixedCoord<0, 0, 0>()].temp + |
| | 113 | hood[FixedCoord<1, 0, 0>()].temp + |
| | 114 | hood[FixedCoord<0, 1, 0>()].temp + |
| | 115 | hood[FixedCoord<0, 0, 1>()].temp) * (1.0 / 7.0); |
| | 116 | } |
| | 117 | |
| | 118 | template<typename NEIGHBORHOOD> |
| | 119 | static void updateLine(JacobiCellStraightforward *target, long *x, long endX, const NEIGHBORHOOD& hood, int /* nanoStep */) |
| | 120 | { |
| | 121 | if (((*x) % 2) == 1) { |
| | 122 | target[*x].update(hood, 0); |
| | 123 | ++(*x); |
| | 124 | } |
| | 125 | |
| | 126 | __m128d oneSeventh = _mm_set_pd(1.0/7.0, 1.0/7.0); |
| | 127 | |
| | 128 | for (; (*x) < (endX - 8); (*x) += 8) { |
| | 129 | __m128d accu0 = _mm_load_pd(&hood[FixedCoord< 0, 0, 0>()].temp); |
| | 130 | __m128d accu1 = _mm_load_pd(&hood[FixedCoord< 2, 0, 0>()].temp); |
| | 131 | __m128d accu2 = _mm_load_pd(&hood[FixedCoord< 4, 0, 0>()].temp); |
| | 132 | __m128d accu3 = _mm_load_pd(&hood[FixedCoord< 6, 0, 0>()].temp); |
| | 133 | |
| | 134 | __m128d buff0 = _mm_loadu_pd(&hood[FixedCoord<-1, 0, 0>()].temp); |
| | 135 | __m128d buff1 = _mm_loadu_pd(&hood[FixedCoord< 1, 0, 0>()].temp); |
| | 136 | __m128d buff2 = _mm_loadu_pd(&hood[FixedCoord< 3, 0, 0>()].temp); |
| | 137 | __m128d buff3 = _mm_loadu_pd(&hood[FixedCoord< 5, 0, 0>()].temp); |
| | 138 | accu0 = _mm_add_pd(accu0, buff0); |
| | 139 | accu1 = _mm_add_pd(accu1, buff1); |
| | 140 | accu2 = _mm_add_pd(accu2, buff2); |
| | 141 | accu3 = _mm_add_pd(accu3, buff3); |
| | 142 | |
| | 143 | buff0 = _mm_loadu_pd(&hood[FixedCoord< 1, 0, 0>()].temp); |
| | 144 | buff1 = _mm_loadu_pd(&hood[FixedCoord< 3, 0, 0>()].temp); |
| | 145 | buff2 = _mm_loadu_pd(&hood[FixedCoord< 5, 0, 0>()].temp); |
| | 146 | buff3 = _mm_loadu_pd(&hood[FixedCoord< 7, 0, 0>()].temp); |
| | 147 | accu0 = _mm_add_pd(accu0, buff0); |
| | 148 | accu1 = _mm_add_pd(accu1, buff1); |
| | 149 | accu2 = _mm_add_pd(accu2, buff2); |
| | 150 | accu3 = _mm_add_pd(accu3, buff3); |
| | 151 | |
| | 152 | buff0 = _mm_load_pd(&hood[FixedCoord< 0, -1, 0>()].temp); |
| | 153 | buff1 = _mm_load_pd(&hood[FixedCoord< 2, -1, 0>()].temp); |
| | 154 | buff2 = _mm_load_pd(&hood[FixedCoord< 4, -1, 0>()].temp); |
| | 155 | buff3 = _mm_load_pd(&hood[FixedCoord< 6, -1, 0>()].temp); |
| | 156 | accu0 = _mm_add_pd(accu0, buff0); |
| | 157 | accu1 = _mm_add_pd(accu1, buff1); |
| | 158 | accu2 = _mm_add_pd(accu2, buff2); |
| | 159 | accu3 = _mm_add_pd(accu3, buff3); |
| | 160 | |
| | 161 | buff0 = _mm_load_pd(&hood[FixedCoord< 0, 1, 0>()].temp); |
| | 162 | buff1 = _mm_load_pd(&hood[FixedCoord< 2, 1, 0>()].temp); |
| | 163 | buff2 = _mm_load_pd(&hood[FixedCoord< 4, 1, 0>()].temp); |
| | 164 | buff3 = _mm_load_pd(&hood[FixedCoord< 6, 1, 0>()].temp); |
| | 165 | accu0 = _mm_add_pd(accu0, buff0); |
| | 166 | accu1 = _mm_add_pd(accu1, buff1); |
| | 167 | accu2 = _mm_add_pd(accu2, buff2); |
| | 168 | accu3 = _mm_add_pd(accu3, buff3); |
| | 169 | |
| | 170 | buff0 = _mm_load_pd(&hood[FixedCoord< 0, 0, -1>()].temp); |
| | 171 | buff1 = _mm_load_pd(&hood[FixedCoord< 2, 0, -1>()].temp); |
| | 172 | buff2 = _mm_load_pd(&hood[FixedCoord< 4, 0, -1>()].temp); |
| | 173 | buff3 = _mm_load_pd(&hood[FixedCoord< 6, 0, -1>()].temp); |
| | 174 | accu0 = _mm_add_pd(accu0, buff0); |
| | 175 | accu1 = _mm_add_pd(accu1, buff1); |
| | 176 | accu2 = _mm_add_pd(accu2, buff2); |
| | 177 | accu3 = _mm_add_pd(accu3, buff3); |
| | 178 | |
| | 179 | buff0 = _mm_load_pd(&hood[FixedCoord< 0, 0, 1>()].temp); |
| | 180 | buff1 = _mm_load_pd(&hood[FixedCoord< 2, 0, 1>()].temp); |
| | 181 | buff2 = _mm_load_pd(&hood[FixedCoord< 4, 0, 1>()].temp); |
| | 182 | buff3 = _mm_load_pd(&hood[FixedCoord< 6, 0, 1>()].temp); |
| | 183 | accu0 = _mm_add_pd(accu0, buff0); |
| | 184 | accu1 = _mm_add_pd(accu1, buff1); |
| | 185 | accu2 = _mm_add_pd(accu2, buff2); |
| | 186 | accu3 = _mm_add_pd(accu3, buff3); |
| | 187 | |
| | 188 | accu0 = _mm_mul_pd(accu0, oneSeventh); |
| | 189 | accu1 = _mm_mul_pd(accu1, oneSeventh); |
| | 190 | accu2 = _mm_mul_pd(accu2, oneSeventh); |
| | 191 | accu3 = _mm_mul_pd(accu3, oneSeventh); |
| | 192 | |
| | 193 | _mm_store_pd(&target[*x + 0].temp, accu0); |
| | 194 | _mm_store_pd(&target[*x + 2].temp, accu1); |
| | 195 | _mm_store_pd(&target[*x + 4].temp, accu2); |
| | 196 | _mm_store_pd(&target[*x + 6].temp, accu3); |
| | 197 | } |
| | 198 | |
| | 199 | for (; *x < endX; ++(*x)) { |
| | 200 | target[*x].update(hood, 0); |
| | 201 | } |
| | 202 | |
| | 203 | |