4namespace muda::details::eigen
13__device__ __forceinline__
void svd3x3(
float a11,
47 constexpr auto gone = 1065353216;
48 constexpr auto gsine_pi_over_eight = 1053028117;
49 constexpr auto gcosine_pi_over_eight = 1064076127;
50 constexpr float gone_half = 0.5;
51 constexpr float gsmall_number = 1.e-12;
52 constexpr float gtiny_number = 1.e-20;
53 constexpr float gfour_gamma_squared = 5.8284273147583007813;
55 un Sa11, Sa21, Sa31, Sa12, Sa22, Sa32, Sa13, Sa23, Sa33;
56 un Su11, Su21, Su31, Su12, Su22, Su32, Su13, Su23, Su33;
57 un Sv11, Sv21, Sv31, Sv12, Sv22, Sv32, Sv13, Sv23, Sv33;
59 un Stmp1, Stmp2, Stmp3, Stmp4, Stmp5;
60 un Ss11, Ss21, Ss31, Ss22, Ss32, Ss33;
61 un Sqvs, Sqvvx, Sqvvy, Sqvvz;
77 Ss11.f = Sa11.f * Sa11.f;
78 Stmp1.f = Sa21.f * Sa21.f;
79 Ss11.f = __fadd_rn(Stmp1.f, Ss11.f);
80 Stmp1.f = Sa31.f * Sa31.f;
81 Ss11.f = __fadd_rn(Stmp1.f, Ss11.f);
83 Ss21.f = Sa12.f * Sa11.f;
84 Stmp1.f = Sa22.f * Sa21.f;
85 Ss21.f = __fadd_rn(Stmp1.f, Ss21.f);
86 Stmp1.f = Sa32.f * Sa31.f;
87 Ss21.f = __fadd_rn(Stmp1.f, Ss21.f);
89 Ss31.f = Sa13.f * Sa11.f;
90 Stmp1.f = Sa23.f * Sa21.f;
91 Ss31.f = __fadd_rn(Stmp1.f, Ss31.f);
92 Stmp1.f = Sa33.f * Sa31.f;
93 Ss31.f = __fadd_rn(Stmp1.f, Ss31.f);
95 Ss22.f = Sa12.f * Sa12.f;
96 Stmp1.f = Sa22.f * Sa22.f;
97 Ss22.f = __fadd_rn(Stmp1.f, Ss22.f);
98 Stmp1.f = Sa32.f * Sa32.f;
99 Ss22.f = __fadd_rn(Stmp1.f, Ss22.f);
101 Ss32.f = Sa13.f * Sa12.f;
102 Stmp1.f = Sa23.f * Sa22.f;
103 Ss32.f = __fadd_rn(Stmp1.f, Ss32.f);
104 Stmp1.f = Sa33.f * Sa32.f;
105 Ss32.f = __fadd_rn(Stmp1.f, Ss32.f);
107 Ss33.f = Sa13.f * Sa13.f;
108 Stmp1.f = Sa23.f * Sa23.f;
109 Ss33.f = __fadd_rn(Stmp1.f, Ss33.f);
110 Stmp1.f = Sa33.f * Sa33.f;
111 Ss33.f = __fadd_rn(Stmp1.f, Ss33.f);
121 for(
int i = 0; i < 4; i++)
123 Ssh.f = Ss21.f * 0.5f;
124 Stmp5.f = __fsub_rn(Ss11.f, Ss22.f);
126 Stmp2.f = Ssh.f * Ssh.f;
127 Stmp1.ui = (Stmp2.f >= gtiny_number) ? 0xffffffff : 0;
128 Ssh.ui = Stmp1.ui & Ssh.ui;
129 Sch.ui = Stmp1.ui & Stmp5.ui;
130 Stmp2.ui = ~Stmp1.ui & gone;
131 Sch.ui = Sch.ui | Stmp2.ui;
133 Stmp1.f = Ssh.f * Ssh.f;
134 Stmp2.f = Sch.f * Sch.f;
135 Stmp3.f = __fadd_rn(Stmp1.f, Stmp2.f);
136 Stmp4.f = __frsqrt_rn(Stmp3.f);
138 Ssh.f = Stmp4.f * Ssh.f;
139 Sch.f = Stmp4.f * Sch.f;
140 Stmp1.f = gfour_gamma_squared * Stmp1.f;
141 Stmp1.ui = (Stmp2.f <= Stmp1.f) ? 0xffffffff : 0;
143 Stmp2.ui = gsine_pi_over_eight & Stmp1.ui;
144 Ssh.ui = ~Stmp1.ui & Ssh.ui;
145 Ssh.ui = Ssh.ui | Stmp2.ui;
146 Stmp2.ui = gcosine_pi_over_eight & Stmp1.ui;
147 Sch.ui = ~Stmp1.ui & Sch.ui;
148 Sch.ui = Sch.ui | Stmp2.ui;
150 Stmp1.f = Ssh.f * Ssh.f;
151 Stmp2.f = Sch.f * Sch.f;
152 Sc.f = __fsub_rn(Stmp2.f, Stmp1.f);
153 Ss.f = Sch.f * Ssh.f;
154 Ss.f = __fadd_rn(Ss.f, Ss.f);
156#ifdef DEBUG_JACOBI_CONJUGATE
157 printf(
"GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", Ss.f, Sc.f, Ssh.f, Sch.f);
163 Stmp3.f = __fadd_rn(Stmp1.f, Stmp2.f);
164 Ss33.f = Ss33.f * Stmp3.f;
165 Ss31.f = Ss31.f * Stmp3.f;
166 Ss32.f = Ss32.f * Stmp3.f;
167 Ss33.f = Ss33.f * Stmp3.f;
169 Stmp1.f = Ss.f * Ss31.f;
170 Stmp2.f = Ss.f * Ss32.f;
171 Ss31.f = Sc.f * Ss31.f;
172 Ss32.f = Sc.f * Ss32.f;
173 Ss31.f = __fadd_rn(Stmp2.f, Ss31.f);
174 Ss32.f = __fsub_rn(Ss32.f, Stmp1.f);
176 Stmp2.f = Ss.f * Ss.f;
177 Stmp1.f = Ss22.f * Stmp2.f;
178 Stmp3.f = Ss11.f * Stmp2.f;
179 Stmp4.f = Sc.f * Sc.f;
180 Ss11.f = Ss11.f * Stmp4.f;
181 Ss22.f = Ss22.f * Stmp4.f;
182 Ss11.f = __fadd_rn(Ss11.f, Stmp1.f);
183 Ss22.f = __fadd_rn(Ss22.f, Stmp3.f);
184 Stmp4.f = __fsub_rn(Stmp4.f, Stmp2.f);
185 Stmp2.f = __fadd_rn(Ss21.f, Ss21.f);
186 Ss21.f = Ss21.f * Stmp4.f;
187 Stmp4.f = Sc.f * Ss.f;
188 Stmp2.f = Stmp2.f * Stmp4.f;
189 Stmp5.f = Stmp5.f * Stmp4.f;
190 Ss11.f = __fadd_rn(Ss11.f, Stmp2.f);
191 Ss21.f = __fsub_rn(Ss21.f, Stmp5.f);
192 Ss22.f = __fsub_rn(Ss22.f, Stmp2.f);
194#ifdef DEBUG_JACOBI_CONJUGATE
195 printf(
"%.20g\n", Ss11.f);
196 printf(
"%.20g %.20g\n", Ss21.f, Ss22.f);
197 printf(
"%.20g %.20g %.20g\n", Ss31.f, Ss32.f, Ss33.f);
204 Stmp1.f = Ssh.f * Sqvvx.f;
205 Stmp2.f = Ssh.f * Sqvvy.f;
206 Stmp3.f = Ssh.f * Sqvvz.f;
207 Ssh.f = Ssh.f * Sqvs.f;
209 Sqvs.f = Sch.f * Sqvs.f;
210 Sqvvx.f = Sch.f * Sqvvx.f;
211 Sqvvy.f = Sch.f * Sqvvy.f;
212 Sqvvz.f = Sch.f * Sqvvz.f;
214 Sqvvz.f = __fadd_rn(Sqvvz.f, Ssh.f);
215 Sqvs.f = __fsub_rn(Sqvs.f, Stmp3.f);
216 Sqvvx.f = __fadd_rn(Sqvvx.f, Stmp2.f);
217 Sqvvy.f = __fsub_rn(Sqvvy.f, Stmp1.f);
219#ifdef DEBUG_JACOBI_CONJUGATE
220 printf(
"GPU q %.20g %.20g %.20g %.20g\n", Sqvvx.f, Sqvvy.f, Sqvvz.f, Sqvs.f);
226 Ssh.f = Ss32.f * 0.5f;
227 Stmp5.f = __fsub_rn(Ss22.f, Ss33.f);
229 Stmp2.f = Ssh.f * Ssh.f;
230 Stmp1.ui = (Stmp2.f >= gtiny_number) ? 0xffffffff : 0;
231 Ssh.ui = Stmp1.ui & Ssh.ui;
232 Sch.ui = Stmp1.ui & Stmp5.ui;
233 Stmp2.ui = ~Stmp1.ui & gone;
234 Sch.ui = Sch.ui | Stmp2.ui;
236 Stmp1.f = Ssh.f * Ssh.f;
237 Stmp2.f = Sch.f * Sch.f;
238 Stmp3.f = __fadd_rn(Stmp1.f, Stmp2.f);
239 Stmp4.f = __frsqrt_rn(Stmp3.f);
241 Ssh.f = Stmp4.f * Ssh.f;
242 Sch.f = Stmp4.f * Sch.f;
243 Stmp1.f = gfour_gamma_squared * Stmp1.f;
244 Stmp1.ui = (Stmp2.f <= Stmp1.f) ? 0xffffffff : 0;
246 Stmp2.ui = gsine_pi_over_eight & Stmp1.ui;
247 Ssh.ui = ~Stmp1.ui & Ssh.ui;
248 Ssh.ui = Ssh.ui | Stmp2.ui;
249 Stmp2.ui = gcosine_pi_over_eight & Stmp1.ui;
250 Sch.ui = ~Stmp1.ui & Sch.ui;
251 Sch.ui = Sch.ui | Stmp2.ui;
253 Stmp1.f = Ssh.f * Ssh.f;
254 Stmp2.f = Sch.f * Sch.f;
255 Sc.f = __fsub_rn(Stmp2.f, Stmp1.f);
256 Ss.f = Sch.f * Ssh.f;
257 Ss.f = __fadd_rn(Ss.f, Ss.f);
259#ifdef DEBUG_JACOBI_CONJUGATE
260 printf(
"GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", Ss.f, Sc.f, Ssh.f, Sch.f);
267 Stmp3.f = __fadd_rn(Stmp1.f, Stmp2.f);
268 Ss11.f = Ss11.f * Stmp3.f;
269 Ss21.f = Ss21.f * Stmp3.f;
270 Ss31.f = Ss31.f * Stmp3.f;
271 Ss11.f = Ss11.f * Stmp3.f;
273 Stmp1.f = Ss.f * Ss21.f;
274 Stmp2.f = Ss.f * Ss31.f;
275 Ss21.f = Sc.f * Ss21.f;
276 Ss31.f = Sc.f * Ss31.f;
277 Ss21.f = __fadd_rn(Stmp2.f, Ss21.f);
278 Ss31.f = __fsub_rn(Ss31.f, Stmp1.f);
280 Stmp2.f = Ss.f * Ss.f;
281 Stmp1.f = Ss33.f * Stmp2.f;
282 Stmp3.f = Ss22.f * Stmp2.f;
283 Stmp4.f = Sc.f * Sc.f;
284 Ss22.f = Ss22.f * Stmp4.f;
285 Ss33.f = Ss33.f * Stmp4.f;
286 Ss22.f = __fadd_rn(Ss22.f, Stmp1.f);
287 Ss33.f = __fadd_rn(Ss33.f, Stmp3.f);
288 Stmp4.f = __fsub_rn(Stmp4.f, Stmp2.f);
289 Stmp2.f = __fadd_rn(Ss32.f, Ss32.f);
290 Ss32.f = Ss32.f * Stmp4.f;
291 Stmp4.f = Sc.f * Ss.f;
292 Stmp2.f = Stmp2.f * Stmp4.f;
293 Stmp5.f = Stmp5.f * Stmp4.f;
294 Ss22.f = __fadd_rn(Ss22.f, Stmp2.f);
295 Ss32.f = __fsub_rn(Ss32.f, Stmp5.f);
296 Ss33.f = __fsub_rn(Ss33.f, Stmp2.f);
298#ifdef DEBUG_JACOBI_CONJUGATE
299 printf(
"%.20g\n", Ss11.f);
300 printf(
"%.20g %.20g\n", Ss21.f, Ss22.f);
301 printf(
"%.20g %.20g %.20g\n", Ss31.f, Ss32.f, Ss33.f);
308 Stmp1.f = Ssh.f * Sqvvx.f;
309 Stmp2.f = Ssh.f * Sqvvy.f;
310 Stmp3.f = Ssh.f * Sqvvz.f;
311 Ssh.f = Ssh.f * Sqvs.f;
313 Sqvs.f = Sch.f * Sqvs.f;
314 Sqvvx.f = Sch.f * Sqvvx.f;
315 Sqvvy.f = Sch.f * Sqvvy.f;
316 Sqvvz.f = Sch.f * Sqvvz.f;
318 Sqvvx.f = __fadd_rn(Sqvvx.f, Ssh.f);
319 Sqvs.f = __fsub_rn(Sqvs.f, Stmp1.f);
320 Sqvvy.f = __fadd_rn(Sqvvy.f, Stmp3.f);
321 Sqvvz.f = __fsub_rn(Sqvvz.f, Stmp2.f);
323#ifdef DEBUG_JACOBI_CONJUGATE
324 printf(
"GPU q %.20g %.20g %.20g %.20g\n", Sqvvx.f, Sqvvy.f, Sqvvz.f, Sqvs.f);
331 Ssh.f = Ss31.f * 0.5f;
332 Stmp5.f = __fsub_rn(Ss33.f, Ss11.f);
334 Stmp2.f = Ssh.f * Ssh.f;
335 Stmp1.ui = (Stmp2.f >= gtiny_number) ? 0xffffffff : 0;
336 Ssh.ui = Stmp1.ui & Ssh.ui;
337 Sch.ui = Stmp1.ui & Stmp5.ui;
338 Stmp2.ui = ~Stmp1.ui & gone;
339 Sch.ui = Sch.ui | Stmp2.ui;
341 Stmp1.f = Ssh.f * Ssh.f;
342 Stmp2.f = Sch.f * Sch.f;
343 Stmp3.f = __fadd_rn(Stmp1.f, Stmp2.f);
344 Stmp4.f = __frsqrt_rn(Stmp3.f);
346 Ssh.f = Stmp4.f * Ssh.f;
347 Sch.f = Stmp4.f * Sch.f;
348 Stmp1.f = gfour_gamma_squared * Stmp1.f;
349 Stmp1.ui = (Stmp2.f <= Stmp1.f) ? 0xffffffff : 0;
351 Stmp2.ui = gsine_pi_over_eight & Stmp1.ui;
352 Ssh.ui = ~Stmp1.ui & Ssh.ui;
353 Ssh.ui = Ssh.ui | Stmp2.ui;
354 Stmp2.ui = gcosine_pi_over_eight & Stmp1.ui;
355 Sch.ui = ~Stmp1.ui & Sch.ui;
356 Sch.ui = Sch.ui | Stmp2.ui;
358 Stmp1.f = Ssh.f * Ssh.f;
359 Stmp2.f = Sch.f * Sch.f;
360 Sc.f = __fsub_rn(Stmp2.f, Stmp1.f);
361 Ss.f = Sch.f * Ssh.f;
362 Ss.f = __fadd_rn(Ss.f, Ss.f);
364#ifdef DEBUG_JACOBI_CONJUGATE
365 printf(
"GPU s %.20g, c %.20g, sh %.20g, ch %.20g\n", Ss.f, Sc.f, Ssh.f, Sch.f);
372 Stmp3.f = __fadd_rn(Stmp1.f, Stmp2.f);
373 Ss22.f = Ss22.f * Stmp3.f;
374 Ss32.f = Ss32.f * Stmp3.f;
375 Ss21.f = Ss21.f * Stmp3.f;
376 Ss22.f = Ss22.f * Stmp3.f;
378 Stmp1.f = Ss.f * Ss32.f;
379 Stmp2.f = Ss.f * Ss21.f;
380 Ss32.f = Sc.f * Ss32.f;
381 Ss21.f = Sc.f * Ss21.f;
382 Ss32.f = __fadd_rn(Stmp2.f, Ss32.f);
383 Ss21.f = __fsub_rn(Ss21.f, Stmp1.f);
385 Stmp2.f = Ss.f * Ss.f;
386 Stmp1.f = Ss11.f * Stmp2.f;
387 Stmp3.f = Ss33.f * Stmp2.f;
388 Stmp4.f = Sc.f * Sc.f;
389 Ss33.f = Ss33.f * Stmp4.f;
390 Ss11.f = Ss11.f * Stmp4.f;
391 Ss33.f = __fadd_rn(Ss33.f, Stmp1.f);
392 Ss11.f = __fadd_rn(Ss11.f, Stmp3.f);
393 Stmp4.f = __fsub_rn(Stmp4.f, Stmp2.f);
394 Stmp2.f = __fadd_rn(Ss31.f, Ss31.f);
395 Ss31.f = Ss31.f * Stmp4.f;
396 Stmp4.f = Sc.f * Ss.f;
397 Stmp2.f = Stmp2.f * Stmp4.f;
398 Stmp5.f = Stmp5.f * Stmp4.f;
399 Ss33.f = __fadd_rn(Ss33.f, Stmp2.f);
400 Ss31.f = __fsub_rn(Ss31.f, Stmp5.f);
401 Ss11.f = __fsub_rn(Ss11.f, Stmp2.f);
403#ifdef DEBUG_JACOBI_CONJUGATE
404 printf(
"%.20g\n", Ss11.f);
405 printf(
"%.20g %.20g\n", Ss21.f, Ss22.f);
406 printf(
"%.20g %.20g %.20g\n", Ss31.f, Ss32.f, Ss33.f);
413 Stmp1.f = Ssh.f * Sqvvx.f;
414 Stmp2.f = Ssh.f * Sqvvy.f;
415 Stmp3.f = Ssh.f * Sqvvz.f;
416 Ssh.f = Ssh.f * Sqvs.f;
418 Sqvs.f = Sch.f * Sqvs.f;
419 Sqvvx.f = Sch.f * Sqvvx.f;
420 Sqvvy.f = Sch.f * Sqvvy.f;
421 Sqvvz.f = Sch.f * Sqvvz.f;
423 Sqvvy.f = __fadd_rn(Sqvvy.f, Ssh.f);
424 Sqvs.f = __fsub_rn(Sqvs.f, Stmp2.f);
425 Sqvvz.f = __fadd_rn(Sqvvz.f, Stmp1.f);
426 Sqvvx.f = __fsub_rn(Sqvvx.f, Stmp3.f);
434 Stmp2.f = Sqvs.f * Sqvs.f;
435 Stmp1.f = Sqvvx.f * Sqvvx.f;
436 Stmp2.f = __fadd_rn(Stmp1.f, Stmp2.f);
437 Stmp1.f = Sqvvy.f * Sqvvy.f;
438 Stmp2.f = __fadd_rn(Stmp1.f, Stmp2.f);
439 Stmp1.f = Sqvvz.f * Sqvvz.f;
440 Stmp2.f = __fadd_rn(Stmp1.f, Stmp2.f);
442 Stmp1.f = __frsqrt_rn(Stmp2.f);
443 Stmp4.f = Stmp1.f * 0.5f;
444 Stmp3.f = Stmp1.f * Stmp4.f;
445 Stmp3.f = Stmp1.f * Stmp3.f;
446 Stmp3.f = Stmp2.f * Stmp3.f;
447 Stmp1.f = __fadd_rn(Stmp1.f, Stmp4.f);
448 Stmp1.f = __fsub_rn(Stmp1.f, Stmp3.f);
450 Sqvs.f = Sqvs.f * Stmp1.f;
451 Sqvvx.f = Sqvvx.f * Stmp1.f;
452 Sqvvy.f = Sqvvy.f * Stmp1.f;
453 Sqvvz.f = Sqvvz.f * Stmp1.f;
459 Stmp1.f = Sqvvx.f * Sqvvx.f;
460 Stmp2.f = Sqvvy.f * Sqvvy.f;
461 Stmp3.f = Sqvvz.f * Sqvvz.f;
462 Sv11.f = Sqvs.f * Sqvs.f;
463 Sv22.f = __fsub_rn(Sv11.f, Stmp1.f);
464 Sv33.f = __fsub_rn(Sv22.f, Stmp2.f);
465 Sv33.f = __fadd_rn(Sv33.f, Stmp3.f);
466 Sv22.f = __fadd_rn(Sv22.f, Stmp2.f);
467 Sv22.f = __fsub_rn(Sv22.f, Stmp3.f);
468 Sv11.f = __fadd_rn(Sv11.f, Stmp1.f);
469 Sv11.f = __fsub_rn(Sv11.f, Stmp2.f);
470 Sv11.f = __fsub_rn(Sv11.f, Stmp3.f);
471 Stmp1.f = __fadd_rn(Sqvvx.f, Sqvvx.f);
472 Stmp2.f = __fadd_rn(Sqvvy.f, Sqvvy.f);
473 Stmp3.f = __fadd_rn(Sqvvz.f, Sqvvz.f);
474 Sv32.f = Sqvs.f * Stmp1.f;
475 Sv13.f = Sqvs.f * Stmp2.f;
476 Sv21.f = Sqvs.f * Stmp3.f;
477 Stmp1.f = Sqvvy.f * Stmp1.f;
478 Stmp2.f = Sqvvz.f * Stmp2.f;
479 Stmp3.f = Sqvvx.f * Stmp3.f;
480 Sv12.f = __fsub_rn(Stmp1.f, Sv21.f);
481 Sv23.f = __fsub_rn(Stmp2.f, Sv32.f);
482 Sv31.f = __fsub_rn(Stmp3.f, Sv13.f);
483 Sv21.f = __fadd_rn(Stmp1.f, Sv21.f);
484 Sv32.f = __fadd_rn(Stmp2.f, Sv32.f);
485 Sv13.f = __fadd_rn(Stmp3.f, Sv13.f);
493 Sa12.f = Sv12.f * Sa11.f;
494 Sa13.f = Sv13.f * Sa11.f;
495 Sa11.f = Sv11.f * Sa11.f;
496 Stmp1.f = Sv21.f * Stmp2.f;
497 Sa11.f = __fadd_rn(Sa11.f, Stmp1.f);
498 Stmp1.f = Sv31.f * Stmp3.f;
499 Sa11.f = __fadd_rn(Sa11.f, Stmp1.f);
500 Stmp1.f = Sv22.f * Stmp2.f;
501 Sa12.f = __fadd_rn(Sa12.f, Stmp1.f);
502 Stmp1.f = Sv32.f * Stmp3.f;
503 Sa12.f = __fadd_rn(Sa12.f, Stmp1.f);
504 Stmp1.f = Sv23.f * Stmp2.f;
505 Sa13.f = __fadd_rn(Sa13.f, Stmp1.f);
506 Stmp1.f = Sv33.f * Stmp3.f;
507 Sa13.f = __fadd_rn(Sa13.f, Stmp1.f);
511 Sa22.f = Sv12.f * Sa21.f;
512 Sa23.f = Sv13.f * Sa21.f;
513 Sa21.f = Sv11.f * Sa21.f;
514 Stmp1.f = Sv21.f * Stmp2.f;
515 Sa21.f = __fadd_rn(Sa21.f, Stmp1.f);
516 Stmp1.f = Sv31.f * Stmp3.f;
517 Sa21.f = __fadd_rn(Sa21.f, Stmp1.f);
518 Stmp1.f = Sv22.f * Stmp2.f;
519 Sa22.f = __fadd_rn(Sa22.f, Stmp1.f);
520 Stmp1.f = Sv32.f * Stmp3.f;
521 Sa22.f = __fadd_rn(Sa22.f, Stmp1.f);
522 Stmp1.f = Sv23.f * Stmp2.f;
523 Sa23.f = __fadd_rn(Sa23.f, Stmp1.f);
524 Stmp1.f = Sv33.f * Stmp3.f;
525 Sa23.f = __fadd_rn(Sa23.f, Stmp1.f);
529 Sa32.f = Sv12.f * Sa31.f;
530 Sa33.f = Sv13.f * Sa31.f;
531 Sa31.f = Sv11.f * Sa31.f;
532 Stmp1.f = Sv21.f * Stmp2.f;
533 Sa31.f = __fadd_rn(Sa31.f, Stmp1.f);
534 Stmp1.f = Sv31.f * Stmp3.f;
535 Sa31.f = __fadd_rn(Sa31.f, Stmp1.f);
536 Stmp1.f = Sv22.f * Stmp2.f;
537 Sa32.f = __fadd_rn(Sa32.f, Stmp1.f);
538 Stmp1.f = Sv32.f * Stmp3.f;
539 Sa32.f = __fadd_rn(Sa32.f, Stmp1.f);
540 Stmp1.f = Sv23.f * Stmp2.f;
541 Sa33.f = __fadd_rn(Sa33.f, Stmp1.f);
542 Stmp1.f = Sv33.f * Stmp3.f;
543 Sa33.f = __fadd_rn(Sa33.f, Stmp1.f);
549 Stmp1.f = Sa11.f * Sa11.f;
550 Stmp4.f = Sa21.f * Sa21.f;
551 Stmp1.f = __fadd_rn(Stmp1.f, Stmp4.f);
552 Stmp4.f = Sa31.f * Sa31.f;
553 Stmp1.f = __fadd_rn(Stmp1.f, Stmp4.f);
555 Stmp2.f = Sa12.f * Sa12.f;
556 Stmp4.f = Sa22.f * Sa22.f;
557 Stmp2.f = __fadd_rn(Stmp2.f, Stmp4.f);
558 Stmp4.f = Sa32.f * Sa32.f;
559 Stmp2.f = __fadd_rn(Stmp2.f, Stmp4.f);
561 Stmp3.f = Sa13.f * Sa13.f;
562 Stmp4.f = Sa23.f * Sa23.f;
563 Stmp3.f = __fadd_rn(Stmp3.f, Stmp4.f);
564 Stmp4.f = Sa33.f * Sa33.f;
565 Stmp3.f = __fadd_rn(Stmp3.f, Stmp4.f);
569 Stmp4.ui = (Stmp1.f < Stmp2.f) ? 0xffffffff : 0;
570 Stmp5.ui = Sa11.ui ^ Sa12.ui;
571 Stmp5.ui = Stmp5.ui & Stmp4.ui;
572 Sa11.ui = Sa11.ui ^ Stmp5.ui;
573 Sa12.ui = Sa12.ui ^ Stmp5.ui;
575 Stmp5.ui = Sa21.ui ^ Sa22.ui;
576 Stmp5.ui = Stmp5.ui & Stmp4.ui;
577 Sa21.ui = Sa21.ui ^ Stmp5.ui;
578 Sa22.ui = Sa22.ui ^ Stmp5.ui;
580 Stmp5.ui = Sa31.ui ^ Sa32.ui;
581 Stmp5.ui = Stmp5.ui & Stmp4.ui;
582 Sa31.ui = Sa31.ui ^ Stmp5.ui;
583 Sa32.ui = Sa32.ui ^ Stmp5.ui;
585 Stmp5.ui = Sv11.ui ^ Sv12.ui;
586 Stmp5.ui = Stmp5.ui & Stmp4.ui;
587 Sv11.ui = Sv11.ui ^ Stmp5.ui;
588 Sv12.ui = Sv12.ui ^ Stmp5.ui;
590 Stmp5.ui = Sv21.ui ^ Sv22.ui;
591 Stmp5.ui = Stmp5.ui & Stmp4.ui;
592 Sv21.ui = Sv21.ui ^ Stmp5.ui;
593 Sv22.ui = Sv22.ui ^ Stmp5.ui;
595 Stmp5.ui = Sv31.ui ^ Sv32.ui;
596 Stmp5.ui = Stmp5.ui & Stmp4.ui;
597 Sv31.ui = Sv31.ui ^ Stmp5.ui;
598 Sv32.ui = Sv32.ui ^ Stmp5.ui;
600 Stmp5.ui = Stmp1.ui ^ Stmp2.ui;
601 Stmp5.ui = Stmp5.ui & Stmp4.ui;
602 Stmp1.ui = Stmp1.ui ^ Stmp5.ui;
603 Stmp2.ui = Stmp2.ui ^ Stmp5.ui;
608 Stmp5.ui = Stmp5.ui & Stmp4.ui;
610 Stmp4.f = __fadd_rn(Stmp4.f, Stmp5.f);
612 Sa12.f = Sa12.f * Stmp4.f;
613 Sa22.f = Sa22.f * Stmp4.f;
614 Sa32.f = Sa32.f * Stmp4.f;
616 Sv12.f = Sv12.f * Stmp4.f;
617 Sv22.f = Sv22.f * Stmp4.f;
618 Sv32.f = Sv32.f * Stmp4.f;
622 Stmp4.ui = (Stmp1.f < Stmp3.f) ? 0xffffffff : 0;
623 Stmp5.ui = Sa11.ui ^ Sa13.ui;
624 Stmp5.ui = Stmp5.ui & Stmp4.ui;
625 Sa11.ui = Sa11.ui ^ Stmp5.ui;
626 Sa13.ui = Sa13.ui ^ Stmp5.ui;
628 Stmp5.ui = Sa21.ui ^ Sa23.ui;
629 Stmp5.ui = Stmp5.ui & Stmp4.ui;
630 Sa21.ui = Sa21.ui ^ Stmp5.ui;
631 Sa23.ui = Sa23.ui ^ Stmp5.ui;
633 Stmp5.ui = Sa31.ui ^ Sa33.ui;
634 Stmp5.ui = Stmp5.ui & Stmp4.ui;
635 Sa31.ui = Sa31.ui ^ Stmp5.ui;
636 Sa33.ui = Sa33.ui ^ Stmp5.ui;
638 Stmp5.ui = Sv11.ui ^ Sv13.ui;
639 Stmp5.ui = Stmp5.ui & Stmp4.ui;
640 Sv11.ui = Sv11.ui ^ Stmp5.ui;
641 Sv13.ui = Sv13.ui ^ Stmp5.ui;
643 Stmp5.ui = Sv21.ui ^ Sv23.ui;
644 Stmp5.ui = Stmp5.ui & Stmp4.ui;
645 Sv21.ui = Sv21.ui ^ Stmp5.ui;
646 Sv23.ui = Sv23.ui ^ Stmp5.ui;
648 Stmp5.ui = Sv31.ui ^ Sv33.ui;
649 Stmp5.ui = Stmp5.ui & Stmp4.ui;
650 Sv31.ui = Sv31.ui ^ Stmp5.ui;
651 Sv33.ui = Sv33.ui ^ Stmp5.ui;
653 Stmp5.ui = Stmp1.ui ^ Stmp3.ui;
654 Stmp5.ui = Stmp5.ui & Stmp4.ui;
655 Stmp1.ui = Stmp1.ui ^ Stmp5.ui;
656 Stmp3.ui = Stmp3.ui ^ Stmp5.ui;
661 Stmp5.ui = Stmp5.ui & Stmp4.ui;
663 Stmp4.f = __fadd_rn(Stmp4.f, Stmp5.f);
665 Sa11.f = Sa11.f * Stmp4.f;
666 Sa21.f = Sa21.f * Stmp4.f;
667 Sa31.f = Sa31.f * Stmp4.f;
669 Sv11.f = Sv11.f * Stmp4.f;
670 Sv21.f = Sv21.f * Stmp4.f;
671 Sv31.f = Sv31.f * Stmp4.f;
675 Stmp4.ui = (Stmp2.f < Stmp3.f) ? 0xffffffff : 0;
676 Stmp5.ui = Sa12.ui ^ Sa13.ui;
677 Stmp5.ui = Stmp5.ui & Stmp4.ui;
678 Sa12.ui = Sa12.ui ^ Stmp5.ui;
679 Sa13.ui = Sa13.ui ^ Stmp5.ui;
681 Stmp5.ui = Sa22.ui ^ Sa23.ui;
682 Stmp5.ui = Stmp5.ui & Stmp4.ui;
683 Sa22.ui = Sa22.ui ^ Stmp5.ui;
684 Sa23.ui = Sa23.ui ^ Stmp5.ui;
686 Stmp5.ui = Sa32.ui ^ Sa33.ui;
687 Stmp5.ui = Stmp5.ui & Stmp4.ui;
688 Sa32.ui = Sa32.ui ^ Stmp5.ui;
689 Sa33.ui = Sa33.ui ^ Stmp5.ui;
691 Stmp5.ui = Sv12.ui ^ Sv13.ui;
692 Stmp5.ui = Stmp5.ui & Stmp4.ui;
693 Sv12.ui = Sv12.ui ^ Stmp5.ui;
694 Sv13.ui = Sv13.ui ^ Stmp5.ui;
696 Stmp5.ui = Sv22.ui ^ Sv23.ui;
697 Stmp5.ui = Stmp5.ui & Stmp4.ui;
698 Sv22.ui = Sv22.ui ^ Stmp5.ui;
699 Sv23.ui = Sv23.ui ^ Stmp5.ui;
701 Stmp5.ui = Sv32.ui ^ Sv33.ui;
702 Stmp5.ui = Stmp5.ui & Stmp4.ui;
703 Sv32.ui = Sv32.ui ^ Stmp5.ui;
704 Sv33.ui = Sv33.ui ^ Stmp5.ui;
706 Stmp5.ui = Stmp2.ui ^ Stmp3.ui;
707 Stmp5.ui = Stmp5.ui & Stmp4.ui;
708 Stmp2.ui = Stmp2.ui ^ Stmp5.ui;
709 Stmp3.ui = Stmp3.ui ^ Stmp5.ui;
714 Stmp5.ui = Stmp5.ui & Stmp4.ui;
716 Stmp4.f = __fadd_rn(Stmp4.f, Stmp5.f);
718 Sa13.f = Sa13.f * Stmp4.f;
719 Sa23.f = Sa23.f * Stmp4.f;
720 Sa33.f = Sa33.f * Stmp4.f;
722 Sv13.f = Sv13.f * Stmp4.f;
723 Sv23.f = Sv23.f * Stmp4.f;
724 Sv33.f = Sv33.f * Stmp4.f;
740 Ssh.f = Sa21.f * Sa21.f;
741 Ssh.ui = (Ssh.f >= gsmall_number) ? 0xffffffff : 0;
742 Ssh.ui = Ssh.ui & Sa21.ui;
745 Sch.f = __fsub_rn(Stmp5.f, Sa11.f);
746 Sch.f = ::max(Sch.f, Sa11.f);
747 Sch.f = ::max(Sch.f, gsmall_number);
748 Stmp5.ui = (Sa11.f >= Stmp5.f) ? 0xffffffff : 0;
750 Stmp1.f = Sch.f * Sch.f;
751 Stmp2.f = Ssh.f * Ssh.f;
752 Stmp2.f = __fadd_rn(Stmp1.f, Stmp2.f);
753 Stmp1.f = __frsqrt_rn(Stmp2.f);
755 Stmp4.f = Stmp1.f * 0.5f;
756 Stmp3.f = Stmp1.f * Stmp4.f;
757 Stmp3.f = Stmp1.f * Stmp3.f;
758 Stmp3.f = Stmp2.f * Stmp3.f;
759 Stmp1.f = __fadd_rn(Stmp1.f, Stmp4.f);
760 Stmp1.f = __fsub_rn(Stmp1.f, Stmp3.f);
761 Stmp1.f = Stmp1.f * Stmp2.f;
763 Sch.f = __fadd_rn(Sch.f, Stmp1.f);
765 Stmp1.ui = ~Stmp5.ui & Ssh.ui;
766 Stmp2.ui = ~Stmp5.ui & Sch.ui;
767 Sch.ui = Stmp5.ui & Sch.ui;
768 Ssh.ui = Stmp5.ui & Ssh.ui;
769 Sch.ui = Sch.ui | Stmp1.ui;
770 Ssh.ui = Ssh.ui | Stmp2.ui;
772 Stmp1.f = Sch.f * Sch.f;
773 Stmp2.f = Ssh.f * Ssh.f;
774 Stmp2.f = __fadd_rn(Stmp1.f, Stmp2.f);
775 Stmp1.f = __frsqrt_rn(Stmp2.f);
777 Stmp4.f = Stmp1.f * 0.5f;
778 Stmp3.f = Stmp1.f * Stmp4.f;
779 Stmp3.f = Stmp1.f * Stmp3.f;
780 Stmp3.f = Stmp2.f * Stmp3.f;
781 Stmp1.f = __fadd_rn(Stmp1.f, Stmp4.f);
782 Stmp1.f = __fsub_rn(Stmp1.f, Stmp3.f);
784 Sch.f = Sch.f * Stmp1.f;
785 Ssh.f = Ssh.f * Stmp1.f;
787 Sc.f = Sch.f * Sch.f;
788 Ss.f = Ssh.f * Ssh.f;
789 Sc.f = __fsub_rn(Sc.f, Ss.f);
790 Ss.f = Ssh.f * Sch.f;
791 Ss.f = __fadd_rn(Ss.f, Ss.f);
797 Stmp1.f = Ss.f * Sa11.f;
798 Stmp2.f = Ss.f * Sa21.f;
799 Sa11.f = Sc.f * Sa11.f;
800 Sa21.f = Sc.f * Sa21.f;
801 Sa11.f = __fadd_rn(Sa11.f, Stmp2.f);
802 Sa21.f = __fsub_rn(Sa21.f, Stmp1.f);
804 Stmp1.f = Ss.f * Sa12.f;
805 Stmp2.f = Ss.f * Sa22.f;
806 Sa12.f = Sc.f * Sa12.f;
807 Sa22.f = Sc.f * Sa22.f;
808 Sa12.f = __fadd_rn(Sa12.f, Stmp2.f);
809 Sa22.f = __fsub_rn(Sa22.f, Stmp1.f);
811 Stmp1.f = Ss.f * Sa13.f;
812 Stmp2.f = Ss.f * Sa23.f;
813 Sa13.f = Sc.f * Sa13.f;
814 Sa23.f = Sc.f * Sa23.f;
815 Sa13.f = __fadd_rn(Sa13.f, Stmp2.f);
816 Sa23.f = __fsub_rn(Sa23.f, Stmp1.f);
822 Stmp1.f = Ss.f * Su11.f;
823 Stmp2.f = Ss.f * Su12.f;
824 Su11.f = Sc.f * Su11.f;
825 Su12.f = Sc.f * Su12.f;
826 Su11.f = __fadd_rn(Su11.f, Stmp2.f);
827 Su12.f = __fsub_rn(Su12.f, Stmp1.f);
829 Stmp1.f = Ss.f * Su21.f;
830 Stmp2.f = Ss.f * Su22.f;
831 Su21.f = Sc.f * Su21.f;
832 Su22.f = Sc.f * Su22.f;
833 Su21.f = __fadd_rn(Su21.f, Stmp2.f);
834 Su22.f = __fsub_rn(Su22.f, Stmp1.f);
836 Stmp1.f = Ss.f * Su31.f;
837 Stmp2.f = Ss.f * Su32.f;
838 Su31.f = Sc.f * Su31.f;
839 Su32.f = Sc.f * Su32.f;
840 Su31.f = __fadd_rn(Su31.f, Stmp2.f);
841 Su32.f = __fsub_rn(Su32.f, Stmp1.f);
845 Ssh.f = Sa31.f * Sa31.f;
846 Ssh.ui = (Ssh.f >= gsmall_number) ? 0xffffffff : 0;
847 Ssh.ui = Ssh.ui & Sa31.ui;
850 Sch.f = __fsub_rn(Stmp5.f, Sa11.f);
851 Sch.f = ::max(Sch.f, Sa11.f);
852 Sch.f = ::max(Sch.f, gsmall_number);
853 Stmp5.ui = (Sa11.f >= Stmp5.f) ? 0xffffffff : 0;
855 Stmp1.f = Sch.f * Sch.f;
856 Stmp2.f = Ssh.f * Ssh.f;
857 Stmp2.f = __fadd_rn(Stmp1.f, Stmp2.f);
858 Stmp1.f = __frsqrt_rn(Stmp2.f);
860 Stmp4.f = Stmp1.f * 0.5;
861 Stmp3.f = Stmp1.f * Stmp4.f;
862 Stmp3.f = Stmp1.f * Stmp3.f;
863 Stmp3.f = Stmp2.f * Stmp3.f;
864 Stmp1.f = __fadd_rn(Stmp1.f, Stmp4.f);
865 Stmp1.f = __fsub_rn(Stmp1.f, Stmp3.f);
866 Stmp1.f = Stmp1.f * Stmp2.f;
868 Sch.f = __fadd_rn(Sch.f, Stmp1.f);
870 Stmp1.ui = ~Stmp5.ui & Ssh.ui;
871 Stmp2.ui = ~Stmp5.ui & Sch.ui;
872 Sch.ui = Stmp5.ui & Sch.ui;
873 Ssh.ui = Stmp5.ui & Ssh.ui;
874 Sch.ui = Sch.ui | Stmp1.ui;
875 Ssh.ui = Ssh.ui | Stmp2.ui;
877 Stmp1.f = Sch.f * Sch.f;
878 Stmp2.f = Ssh.f * Ssh.f;
879 Stmp2.f = __fadd_rn(Stmp1.f, Stmp2.f);
880 Stmp1.f = __frsqrt_rn(Stmp2.f);
882 Stmp4.f = Stmp1.f * 0.5f;
883 Stmp3.f = Stmp1.f * Stmp4.f;
884 Stmp3.f = Stmp1.f * Stmp3.f;
885 Stmp3.f = Stmp2.f * Stmp3.f;
886 Stmp1.f = __fadd_rn(Stmp1.f, Stmp4.f);
887 Stmp1.f = __fsub_rn(Stmp1.f, Stmp3.f);
889 Sch.f = Sch.f * Stmp1.f;
890 Ssh.f = Ssh.f * Stmp1.f;
892 Sc.f = Sch.f * Sch.f;
893 Ss.f = Ssh.f * Ssh.f;
894 Sc.f = __fsub_rn(Sc.f, Ss.f);
895 Ss.f = Ssh.f * Sch.f;
896 Ss.f = __fadd_rn(Ss.f, Ss.f);
902 Stmp1.f = Ss.f * Sa11.f;
903 Stmp2.f = Ss.f * Sa31.f;
904 Sa11.f = Sc.f * Sa11.f;
905 Sa31.f = Sc.f * Sa31.f;
906 Sa11.f = __fadd_rn(Sa11.f, Stmp2.f);
907 Sa31.f = __fsub_rn(Sa31.f, Stmp1.f);
909 Stmp1.f = Ss.f * Sa12.f;
910 Stmp2.f = Ss.f * Sa32.f;
911 Sa12.f = Sc.f * Sa12.f;
912 Sa32.f = Sc.f * Sa32.f;
913 Sa12.f = __fadd_rn(Sa12.f, Stmp2.f);
914 Sa32.f = __fsub_rn(Sa32.f, Stmp1.f);
916 Stmp1.f = Ss.f * Sa13.f;
917 Stmp2.f = Ss.f * Sa33.f;
918 Sa13.f = Sc.f * Sa13.f;
919 Sa33.f = Sc.f * Sa33.f;
920 Sa13.f = __fadd_rn(Sa13.f, Stmp2.f);
921 Sa33.f = __fsub_rn(Sa33.f, Stmp1.f);
927 Stmp1.f = Ss.f * Su11.f;
928 Stmp2.f = Ss.f * Su13.f;
929 Su11.f = Sc.f * Su11.f;
930 Su13.f = Sc.f * Su13.f;
931 Su11.f = __fadd_rn(Su11.f, Stmp2.f);
932 Su13.f = __fsub_rn(Su13.f, Stmp1.f);
934 Stmp1.f = Ss.f * Su21.f;
935 Stmp2.f = Ss.f * Su23.f;
936 Su21.f = Sc.f * Su21.f;
937 Su23.f = Sc.f * Su23.f;
938 Su21.f = __fadd_rn(Su21.f, Stmp2.f);
939 Su23.f = __fsub_rn(Su23.f, Stmp1.f);
941 Stmp1.f = Ss.f * Su31.f;
942 Stmp2.f = Ss.f * Su33.f;
943 Su31.f = Sc.f * Su31.f;
944 Su33.f = Sc.f * Su33.f;
945 Su31.f = __fadd_rn(Su31.f, Stmp2.f);
946 Su33.f = __fsub_rn(Su33.f, Stmp1.f);
950 Ssh.f = Sa32.f * Sa32.f;
951 Ssh.ui = (Ssh.f >= gsmall_number) ? 0xffffffff : 0;
952 Ssh.ui = Ssh.ui & Sa32.ui;
955 Sch.f = __fsub_rn(Stmp5.f, Sa22.f);
956 Sch.f = ::max(Sch.f, Sa22.f);
957 Sch.f = ::max(Sch.f, gsmall_number);
958 Stmp5.ui = (Sa22.f >= Stmp5.f) ? 0xffffffff : 0;
960 Stmp1.f = Sch.f * Sch.f;
961 Stmp2.f = Ssh.f * Ssh.f;
962 Stmp2.f = __fadd_rn(Stmp1.f, Stmp2.f);
963 Stmp1.f = __frsqrt_rn(Stmp2.f);
965 Stmp4.f = Stmp1.f * 0.5f;
966 Stmp3.f = Stmp1.f * Stmp4.f;
967 Stmp3.f = Stmp1.f * Stmp3.f;
968 Stmp3.f = Stmp2.f * Stmp3.f;
969 Stmp1.f = __fadd_rn(Stmp1.f, Stmp4.f);
970 Stmp1.f = __fsub_rn(Stmp1.f, Stmp3.f);
971 Stmp1.f = Stmp1.f * Stmp2.f;
973 Sch.f = __fadd_rn(Sch.f, Stmp1.f);
975 Stmp1.ui = ~Stmp5.ui & Ssh.ui;
976 Stmp2.ui = ~Stmp5.ui & Sch.ui;
977 Sch.ui = Stmp5.ui & Sch.ui;
978 Ssh.ui = Stmp5.ui & Ssh.ui;
979 Sch.ui = Sch.ui | Stmp1.ui;
980 Ssh.ui = Ssh.ui | Stmp2.ui;
982 Stmp1.f = Sch.f * Sch.f;
983 Stmp2.f = Ssh.f * Ssh.f;
984 Stmp2.f = __fadd_rn(Stmp1.f, Stmp2.f);
985 Stmp1.f = __frsqrt_rn(Stmp2.f);
987 Stmp4.f = Stmp1.f * 0.5f;
988 Stmp3.f = Stmp1.f * Stmp4.f;
989 Stmp3.f = Stmp1.f * Stmp3.f;
990 Stmp3.f = Stmp2.f * Stmp3.f;
991 Stmp1.f = __fadd_rn(Stmp1.f, Stmp4.f);
992 Stmp1.f = __fsub_rn(Stmp1.f, Stmp3.f);
994 Sch.f = Sch.f * Stmp1.f;
995 Ssh.f = Ssh.f * Stmp1.f;
997 Sc.f = Sch.f * Sch.f;
998 Ss.f = Ssh.f * Ssh.f;
999 Sc.f = __fsub_rn(Sc.f, Ss.f);
1000 Ss.f = Ssh.f * Sch.f;
1001 Ss.f = __fadd_rn(Ss.f, Ss.f);
1007 Stmp1.f = Ss.f * Sa21.f;
1008 Stmp2.f = Ss.f * Sa31.f;
1009 Sa21.f = Sc.f * Sa21.f;
1010 Sa31.f = Sc.f * Sa31.f;
1011 Sa21.f = __fadd_rn(Sa21.f, Stmp2.f);
1012 Sa31.f = __fsub_rn(Sa31.f, Stmp1.f);
1014 Stmp1.f = Ss.f * Sa22.f;
1015 Stmp2.f = Ss.f * Sa32.f;
1016 Sa22.f = Sc.f * Sa22.f;
1017 Sa32.f = Sc.f * Sa32.f;
1018 Sa22.f = __fadd_rn(Sa22.f, Stmp2.f);
1019 Sa32.f = __fsub_rn(Sa32.f, Stmp1.f);
1021 Stmp1.f = Ss.f * Sa23.f;
1022 Stmp2.f = Ss.f * Sa33.f;
1023 Sa23.f = Sc.f * Sa23.f;
1024 Sa33.f = Sc.f * Sa33.f;
1025 Sa23.f = __fadd_rn(Sa23.f, Stmp2.f);
1026 Sa33.f = __fsub_rn(Sa33.f, Stmp1.f);
1032 Stmp1.f = Ss.f * Su12.f;
1033 Stmp2.f = Ss.f * Su13.f;
1034 Su12.f = Sc.f * Su12.f;
1035 Su13.f = Sc.f * Su13.f;
1036 Su12.f = __fadd_rn(Su12.f, Stmp2.f);
1037 Su13.f = __fsub_rn(Su13.f, Stmp1.f);
1039 Stmp1.f = Ss.f * Su22.f;
1040 Stmp2.f = Ss.f * Su23.f;
1041 Su22.f = Sc.f * Su22.f;
1042 Su23.f = Sc.f * Su23.f;
1043 Su22.f = __fadd_rn(Su22.f, Stmp2.f);
1044 Su23.f = __fsub_rn(Su23.f, Stmp1.f);
1046 Stmp1.f = Ss.f * Su32.f;
1047 Stmp2.f = Ss.f * Su33.f;
1048 Su32.f = Sc.f * Su32.f;
1049 Su33.f = Sc.f * Su33.f;
1050 Su32.f = __fadd_rn(Su32.f, Stmp2.f);
1051 Su33.f = __fsub_rn(Su33.f, Stmp1.f);