3 "// Generated by LLVM NVPTX Back-End\n"
10 " // .globl yadif_uchar\n"
12 ".visible .entry yadif_uchar(\n"
13 " .param .u64 yadif_uchar_param_0,\n"
14 " .param .u64 yadif_uchar_param_1,\n"
15 " .param .u64 yadif_uchar_param_2,\n"
16 " .param .u64 yadif_uchar_param_3,\n"
17 " .param .u32 yadif_uchar_param_4,\n"
18 " .param .u32 yadif_uchar_param_5,\n"
19 " .param .u32 yadif_uchar_param_6,\n"
20 " .param .u32 yadif_uchar_param_7,\n"
21 " .param .u32 yadif_uchar_param_8,\n"
22 " .param .u32 yadif_uchar_param_9,\n"
23 " .param .u32 yadif_uchar_param_10,\n"
24 " .param .u8 yadif_uchar_param_11\n"
27 " .reg .pred %p<13>;\n"
28 " .reg .b16 %rs<7>;\n"
29 " .reg .f32 %f<54>;\n"
30 " .reg .b32 %r<280>;\n"
31 " .reg .b64 %rd<33>;\n"
33 " // begin inline asm\n"
34 " mov.u32 %r62, %ctaid.x;\n"
35 " // end inline asm\n"
36 " // begin inline asm\n"
37 " mov.u32 %r63, %ctaid.y;\n"
38 " // end inline asm\n"
39 " ld.param.u32 %r68, [yadif_uchar_param_4];\n"
40 " // begin inline asm\n"
41 " mov.u32 %r64, %ntid.x;\n"
42 " // end inline asm\n"
43 " ld.param.u32 %r69, [yadif_uchar_param_5];\n"
44 " // begin inline asm\n"
45 " mov.u32 %r65, %ntid.y;\n"
46 " // end inline asm\n"
47 " // begin inline asm\n"
48 " mov.u32 %r66, %tid.x;\n"
49 " // end inline asm\n"
50 " // begin inline asm\n"
51 " mov.u32 %r67, %tid.y;\n"
52 " // end inline asm\n"
53 " mad.lo.s32 %r1, %r64, %r62, %r66;\n"
54 " mad.lo.s32 %r2, %r65, %r63, %r67;\n"
55 " setp.ge.s32 %p2, %r1, %r68;\n"
56 " setp.ge.s32 %p3, %r2, %r69;\n"
57 " or.pred %p4, %p2, %p3;\n"
58 " @%p4 bra LBB0_11;\n"
59 " ld.param.u32 %r60, [yadif_uchar_param_9];\n"
60 " ld.param.u32 %r59, [yadif_uchar_param_6];\n"
61 " ld.param.u64 %rd3, [yadif_uchar_param_2];\n"
62 " ld.param.u64 %rd5, [yadif_uchar_param_0];\n"
63 " cvta.to.global.u64 %rd1, %rd5;\n"
64 " shr.u32 %r70, %r2, 31;\n"
65 " add.s32 %r71, %r2, %r70;\n"
66 " and.b32 %r72, %r71, -2;\n"
67 " sub.s32 %r73, %r2, %r72;\n"
68 " setp.ne.s32 %p5, %r73, %r60;\n"
70 " cvt.rn.f32.s32 %f52, %r1;\n"
71 " cvt.rn.f32.s32 %f53, %r2;\n"
72 " // begin inline asm\n"
73 " tex.2d.v4.u32.f32 {%r279, %r271, %r272, %r273}, [%rd3, {%f52, %f53}];\n"
74 " // end inline asm\n"
77 " ld.param.u8 %rs3, [yadif_uchar_param_11];\n"
78 " and.b16 %rs4, %rs3, 1;\n"
79 " add.s32 %r130, %r1, -3;\n"
80 " cvt.rn.f32.s32 %f4, %r130;\n"
81 " add.s32 %r131, %r2, -1;\n"
82 " cvt.rn.f32.s32 %f5, %r131;\n"
83 " // begin inline asm\n"
84 " tex.2d.v4.u32.f32 {%r74, %r75, %r76, %r77}, [%rd3, {%f4, %f5}];\n"
85 " // end inline asm\n"
86 " add.s32 %r132, %r1, -2;\n"
87 " cvt.rn.f32.s32 %f6, %r132;\n"
88 " // begin inline asm\n"
89 " tex.2d.v4.u32.f32 {%r78, %r79, %r80, %r81}, [%rd3, {%f6, %f5}];\n"
90 " // end inline asm\n"
91 " add.s32 %r133, %r1, -1;\n"
92 " cvt.rn.f32.s32 %f8, %r133;\n"
93 " // begin inline asm\n"
94 " tex.2d.v4.u32.f32 {%r82, %r83, %r84, %r85}, [%rd3, {%f8, %f5}];\n"
95 " // end inline asm\n"
96 " cvt.rn.f32.s32 %f10, %r1;\n"
97 " // begin inline asm\n"
98 " tex.2d.v4.u32.f32 {%r86, %r87, %r88, %r89}, [%rd3, {%f10, %f5}];\n"
99 " // end inline asm\n"
100 " add.s32 %r134, %r1, 1;\n"
101 " cvt.rn.f32.s32 %f12, %r134;\n"
102 " // begin inline asm\n"
103 " tex.2d.v4.u32.f32 {%r90, %r91, %r92, %r93}, [%rd3, {%f12, %f5}];\n"
104 " // end inline asm\n"
105 " add.s32 %r135, %r1, 2;\n"
106 " cvt.rn.f32.s32 %f14, %r135;\n"
107 " // begin inline asm\n"
108 " tex.2d.v4.u32.f32 {%r94, %r95, %r96, %r97}, [%rd3, {%f14, %f5}];\n"
109 " // end inline asm\n"
110 " add.s32 %r136, %r1, 3;\n"
111 " cvt.rn.f32.s32 %f16, %r136;\n"
112 " // begin inline asm\n"
113 " tex.2d.v4.u32.f32 {%r98, %r99, %r100, %r101}, [%rd3, {%f16, %f5}];\n"
114 " // end inline asm\n"
115 " add.s32 %r137, %r2, 1;\n"
116 " cvt.rn.f32.s32 %f19, %r137;\n"
117 " // begin inline asm\n"
118 " tex.2d.v4.u32.f32 {%r102, %r103, %r104, %r105}, [%rd3, {%f4, %f19}];\n"
119 " // end inline asm\n"
120 " // begin inline asm\n"
121 " tex.2d.v4.u32.f32 {%r106, %r107, %r108, %r109}, [%rd3, {%f6, %f19}];\n"
122 " // end inline asm\n"
123 " // begin inline asm\n"
124 " tex.2d.v4.u32.f32 {%r110, %r111, %r112, %r113}, [%rd3, {%f8, %f19}];\n"
125 " // end inline asm\n"
126 " // begin inline asm\n"
127 " tex.2d.v4.u32.f32 {%r114, %r115, %r116, %r117}, [%rd3, {%f10, %f19}];\n"
128 " // end inline asm\n"
129 " // begin inline asm\n"
130 " tex.2d.v4.u32.f32 {%r118, %r119, %r120, %r121}, [%rd3, {%f12, %f19}];\n"
131 " // end inline asm\n"
132 " // begin inline asm\n"
133 " tex.2d.v4.u32.f32 {%r122, %r123, %r124, %r125}, [%rd3, {%f14, %f19}];\n"
134 " // end inline asm\n"
135 " // begin inline asm\n"
136 " tex.2d.v4.u32.f32 {%r126, %r127, %r128, %r129}, [%rd3, {%f16, %f19}];\n"
137 " // end inline asm\n"
138 " and.b32 %r16, %r86, 255;\n"
139 " and.b32 %r17, %r114, 255;\n"
140 " add.s32 %r277, %r17, %r16;\n"
141 " and.b32 %r19, %r82, 255;\n"
142 " and.b32 %r20, %r110, 255;\n"
143 " sub.s32 %r138, %r19, %r20;\n"
144 " abs.s32 %r139, %r138;\n"
145 " sub.s32 %r140, %r16, %r17;\n"
146 " abs.s32 %r141, %r140;\n"
147 " add.s32 %r142, %r141, %r139;\n"
148 " and.b32 %r21, %r90, 255;\n"
149 " and.b32 %r22, %r118, 255;\n"
150 " sub.s32 %r143, %r21, %r22;\n"
151 " abs.s32 %r144, %r143;\n"
152 " add.s32 %r276, %r142, %r144;\n"
153 " and.b32 %r24, %r78, 255;\n"
154 " sub.s32 %r145, %r24, %r17;\n"
155 " abs.s32 %r146, %r145;\n"
156 " sub.s32 %r147, %r19, %r22;\n"
157 " abs.s32 %r148, %r147;\n"
158 " add.s32 %r149, %r148, %r146;\n"
159 " and.b32 %r25, %r122, 255;\n"
160 " sub.s32 %r150, %r16, %r25;\n"
161 " abs.s32 %r151, %r150;\n"
162 " add.s32 %r26, %r149, %r151;\n"
163 " setp.ge.s32 %p6, %r26, %r276;\n"
164 " @%p6 bra LBB0_5;\n"
165 " add.s32 %r152, %r22, %r19;\n"
166 " and.b32 %r153, %r74, 255;\n"
167 " sub.s32 %r154, %r153, %r22;\n"
168 " abs.s32 %r155, %r154;\n"
169 " sub.s32 %r156, %r24, %r25;\n"
170 " abs.s32 %r157, %r156;\n"
171 " add.s32 %r158, %r157, %r155;\n"
172 " and.b32 %r159, %r126, 255;\n"
173 " sub.s32 %r160, %r19, %r159;\n"
174 " abs.s32 %r161, %r160;\n"
175 " add.s32 %r162, %r158, %r161;\n"
176 " setp.lt.s32 %p7, %r162, %r26;\n"
177 " add.s32 %r163, %r25, %r24;\n"
178 " selp.b32 %r277, %r163, %r152, %p7;\n"
179 " min.s32 %r276, %r162, %r26;\n"
181 " setp.eq.b16 %p1, %rs4, 1;\n"
182 " ld.param.u32 %r61, [yadif_uchar_param_10];\n"
183 " ld.param.u64 %rd28, [yadif_uchar_param_3];\n"
184 " ld.param.u64 %rd20, [yadif_uchar_param_1];\n"
185 " and.b32 %r31, %r106, 255;\n"
186 " sub.s32 %r164, %r16, %r31;\n"
187 " abs.s32 %r165, %r164;\n"
188 " sub.s32 %r166, %r21, %r20;\n"
189 " abs.s32 %r167, %r166;\n"
190 " add.s32 %r168, %r167, %r165;\n"
191 " and.b32 %r32, %r94, 255;\n"
192 " sub.s32 %r169, %r32, %r17;\n"
193 " abs.s32 %r170, %r169;\n"
194 " add.s32 %r33, %r168, %r170;\n"
195 " setp.ge.s32 %p8, %r33, %r276;\n"
196 " @%p8 bra LBB0_7;\n"
197 " add.s32 %r171, %r20, %r21;\n"
198 " and.b32 %r172, %r102, 255;\n"
199 " sub.s32 %r173, %r21, %r172;\n"
200 " abs.s32 %r174, %r173;\n"
201 " sub.s32 %r175, %r32, %r31;\n"
202 " abs.s32 %r176, %r175;\n"
203 " add.s32 %r177, %r176, %r174;\n"
204 " and.b32 %r178, %r98, 255;\n"
205 " sub.s32 %r179, %r178, %r20;\n"
206 " abs.s32 %r180, %r179;\n"
207 " add.s32 %r181, %r177, %r180;\n"
208 " setp.lt.s32 %p9, %r181, %r33;\n"
209 " add.s32 %r182, %r31, %r32;\n"
210 " selp.b32 %r277, %r182, %r171, %p9;\n"
212 " shr.u32 %r36, %r277, 1;\n"
213 " setp.eq.s32 %p10, %r60, %r61;\n"
214 " selp.b64 %rd22, %rd3, %rd20, %p10;\n"
215 " selp.b64 %rd25, %rd28, %rd3, %p10;\n"
216 " // begin inline asm\n"
217 " tex.2d.v4.u32.f32 {%r183, %r184, %r185, %r186}, [%rd20, {%f10, %f5}];\n"
218 " // end inline asm\n"
219 " // begin inline asm\n"
220 " tex.2d.v4.u32.f32 {%r187, %r188, %r189, %r190}, [%rd20, {%f10, %f19}];\n"
221 " // end inline asm\n"
222 " add.s32 %r223, %r2, -2;\n"
223 " cvt.rn.f32.s32 %f37, %r223;\n"
224 " // begin inline asm\n"
225 " tex.2d.v4.u32.f32 {%r191, %r192, %r193, %r194}, [%rd22, {%f10, %f37}];\n"
226 " // end inline asm\n"
227 " cvt.rn.f32.s32 %f39, %r2;\n"
228 " // begin inline asm\n"
229 " tex.2d.v4.u32.f32 {%r195, %r196, %r197, %r198}, [%rd22, {%f10, %f39}];\n"
230 " // end inline asm\n"
231 " add.s32 %r224, %r2, 2;\n"
232 " cvt.rn.f32.s32 %f41, %r224;\n"
233 " // begin inline asm\n"
234 " tex.2d.v4.u32.f32 {%r199, %r200, %r201, %r202}, [%rd22, {%f10, %f41}];\n"
235 " // end inline asm\n"
236 " // begin inline asm\n"
237 " tex.2d.v4.u32.f32 {%r203, %r204, %r205, %r206}, [%rd25, {%f10, %f37}];\n"
238 " // end inline asm\n"
239 " // begin inline asm\n"
240 " tex.2d.v4.u32.f32 {%r207, %r208, %r209, %r210}, [%rd25, {%f10, %f39}];\n"
241 " // end inline asm\n"
242 " // begin inline asm\n"
243 " tex.2d.v4.u32.f32 {%r211, %r212, %r213, %r214}, [%rd25, {%f10, %f41}];\n"
244 " // end inline asm\n"
245 " // begin inline asm\n"
246 " tex.2d.v4.u32.f32 {%r215, %r216, %r217, %r218}, [%rd28, {%f10, %f5}];\n"
247 " // end inline asm\n"
248 " // begin inline asm\n"
249 " tex.2d.v4.u32.f32 {%r219, %r220, %r221, %r222}, [%rd28, {%f10, %f19}];\n"
250 " // end inline asm\n"
251 " and.b32 %r225, %r195, 255;\n"
252 " and.b32 %r226, %r207, 255;\n"
253 " add.s32 %r227, %r226, %r225;\n"
254 " shr.u32 %r53, %r227, 1;\n"
255 " sub.s32 %r228, %r225, %r226;\n"
256 " abs.s32 %r229, %r228;\n"
257 " and.b32 %r230, %r183, 255;\n"
258 " sub.s32 %r231, %r230, %r16;\n"
259 " abs.s32 %r232, %r231;\n"
260 " and.b32 %r233, %r187, 255;\n"
261 " sub.s32 %r234, %r233, %r17;\n"
262 " abs.s32 %r235, %r234;\n"
263 " add.s32 %r236, %r235, %r232;\n"
264 " shr.u32 %r237, %r236, 1;\n"
265 " and.b32 %r238, %r215, 255;\n"
266 " sub.s32 %r239, %r238, %r16;\n"
267 " abs.s32 %r240, %r239;\n"
268 " and.b32 %r241, %r219, 255;\n"
269 " sub.s32 %r242, %r17, %r241;\n"
270 " abs.s32 %r243, %r242;\n"
271 " add.s32 %r244, %r243, %r240;\n"
272 " shr.u32 %r245, %r244, 1;\n"
273 " max.s32 %r246, %r229, %r237;\n"
274 " max.s32 %r278, %r246, %r245;\n"
275 " @%p1 bra LBB0_9;\n"
276 " cvt.u16.u32 %rs1, %r86;\n"
277 " cvt.u16.u32 %rs2, %r114;\n"
278 " and.b32 %r247, %r199, 255;\n"
279 " and.b32 %r248, %r211, 255;\n"
280 " add.s32 %r249, %r248, %r247;\n"
281 " shr.u32 %r250, %r249, 1;\n"
282 " and.b32 %r251, %r191, 255;\n"
283 " and.b32 %r252, %r203, 255;\n"
284 " add.s32 %r253, %r252, %r251;\n"
285 " shr.u32 %r254, %r253, 1;\n"
286 " sub.s32 %r255, %r53, %r17;\n"
287 " sub.s32 %r256, %r53, %r16;\n"
288 " sub.s32 %r257, %r254, %r16;\n"
289 " sub.s32 %r258, %r250, %r17;\n"
290 " min.s32 %r259, %r257, %r258;\n"
291 " and.b16 %rs5, %rs2, 255;\n"
292 " and.b16 %rs6, %rs1, 255;\n"
293 " setp.gt.u16 %p11, %rs6, %rs5;\n"
294 " selp.b32 %r260, %r255, %r256, %p11;\n"
295 " max.s32 %r261, %r260, %r259;\n"
296 " max.s32 %r262, %r257, %r258;\n"
297 " setp.lt.u16 %p12, %rs6, %rs5;\n"
298 " selp.b32 %r263, %r255, %r256, %p12;\n"
299 " min.s32 %r264, %r263, %r262;\n"
300 " neg.s32 %r265, %r261;\n"
301 " max.s32 %r266, %r278, %r264;\n"
302 " max.s32 %r278, %r266, %r265;\n"
304 " add.s32 %r267, %r278, %r53;\n"
305 " min.s32 %r268, %r267, %r36;\n"
306 " sub.s32 %r269, %r53, %r278;\n"
307 " max.s32 %r279, %r269, %r268;\n"
309 " mad.lo.s32 %r274, %r2, %r59, %r1;\n"
310 " cvt.s64.s32 %rd31, %r274;\n"
311 " add.s64 %rd32, %rd1, %rd31;\n"
312 " st.global.u8 [%rd32], %r279;\n"
317 " // .globl yadif_ushort\n"
318 ".visible .entry yadif_ushort(\n"
319 " .param .u64 yadif_ushort_param_0,\n"
320 " .param .u64 yadif_ushort_param_1,\n"
321 " .param .u64 yadif_ushort_param_2,\n"
322 " .param .u64 yadif_ushort_param_3,\n"
323 " .param .u32 yadif_ushort_param_4,\n"
324 " .param .u32 yadif_ushort_param_5,\n"
325 " .param .u32 yadif_ushort_param_6,\n"
326 " .param .u32 yadif_ushort_param_7,\n"
327 " .param .u32 yadif_ushort_param_8,\n"
328 " .param .u32 yadif_ushort_param_9,\n"
329 " .param .u32 yadif_ushort_param_10,\n"
330 " .param .u8 yadif_ushort_param_11\n"
333 " .reg .pred %p<13>;\n"
334 " .reg .b16 %rs<5>;\n"
335 " .reg .f32 %f<54>;\n"
336 " .reg .b32 %r<280>;\n"
337 " .reg .b64 %rd<33>;\n"
339 " // begin inline asm\n"
340 " mov.u32 %r62, %ctaid.x;\n"
341 " // end inline asm\n"
342 " // begin inline asm\n"
343 " mov.u32 %r63, %ctaid.y;\n"
344 " // end inline asm\n"
345 " ld.param.u32 %r68, [yadif_ushort_param_4];\n"
346 " // begin inline asm\n"
347 " mov.u32 %r64, %ntid.x;\n"
348 " // end inline asm\n"
349 " ld.param.u32 %r69, [yadif_ushort_param_5];\n"
350 " // begin inline asm\n"
351 " mov.u32 %r65, %ntid.y;\n"
352 " // end inline asm\n"
353 " // begin inline asm\n"
354 " mov.u32 %r66, %tid.x;\n"
355 " // end inline asm\n"
356 " // begin inline asm\n"
357 " mov.u32 %r67, %tid.y;\n"
358 " // end inline asm\n"
359 " mad.lo.s32 %r1, %r64, %r62, %r66;\n"
360 " mad.lo.s32 %r2, %r65, %r63, %r67;\n"
361 " setp.ge.s32 %p2, %r1, %r68;\n"
362 " setp.ge.s32 %p3, %r2, %r69;\n"
363 " or.pred %p4, %p2, %p3;\n"
364 " @%p4 bra LBB1_11;\n"
365 " ld.param.u32 %r60, [yadif_ushort_param_9];\n"
366 " ld.param.u32 %r59, [yadif_ushort_param_6];\n"
367 " ld.param.u64 %rd3, [yadif_ushort_param_2];\n"
368 " ld.param.u64 %rd5, [yadif_ushort_param_0];\n"
369 " cvta.to.global.u64 %rd1, %rd5;\n"
370 " shr.u32 %r70, %r2, 31;\n"
371 " add.s32 %r71, %r2, %r70;\n"
372 " and.b32 %r72, %r71, -2;\n"
373 " sub.s32 %r73, %r2, %r72;\n"
374 " setp.ne.s32 %p5, %r73, %r60;\n"
375 " @%p5 bra LBB1_3;\n"
376 " cvt.rn.f32.s32 %f52, %r1;\n"
377 " cvt.rn.f32.s32 %f53, %r2;\n"
378 " // begin inline asm\n"
379 " tex.2d.v4.u32.f32 {%r279, %r271, %r272, %r273}, [%rd3, {%f52, %f53}];\n"
380 " // end inline asm\n"
381 " bra.uni LBB1_10;\n"
383 " ld.param.u8 %rs3, [yadif_ushort_param_11];\n"
384 " and.b16 %rs4, %rs3, 1;\n"
385 " add.s32 %r130, %r1, -3;\n"
386 " cvt.rn.f32.s32 %f4, %r130;\n"
387 " add.s32 %r131, %r2, -1;\n"
388 " cvt.rn.f32.s32 %f5, %r131;\n"
389 " // begin inline asm\n"
390 " tex.2d.v4.u32.f32 {%r74, %r75, %r76, %r77}, [%rd3, {%f4, %f5}];\n"
391 " // end inline asm\n"
392 " add.s32 %r132, %r1, -2;\n"
393 " cvt.rn.f32.s32 %f6, %r132;\n"
394 " // begin inline asm\n"
395 " tex.2d.v4.u32.f32 {%r78, %r79, %r80, %r81}, [%rd3, {%f6, %f5}];\n"
396 " // end inline asm\n"
397 " add.s32 %r133, %r1, -1;\n"
398 " cvt.rn.f32.s32 %f8, %r133;\n"
399 " // begin inline asm\n"
400 " tex.2d.v4.u32.f32 {%r82, %r83, %r84, %r85}, [%rd3, {%f8, %f5}];\n"
401 " // end inline asm\n"
402 " cvt.rn.f32.s32 %f10, %r1;\n"
403 " // begin inline asm\n"
404 " tex.2d.v4.u32.f32 {%r86, %r87, %r88, %r89}, [%rd3, {%f10, %f5}];\n"
405 " // end inline asm\n"
406 " add.s32 %r134, %r1, 1;\n"
407 " cvt.rn.f32.s32 %f12, %r134;\n"
408 " // begin inline asm\n"
409 " tex.2d.v4.u32.f32 {%r90, %r91, %r92, %r93}, [%rd3, {%f12, %f5}];\n"
410 " // end inline asm\n"
411 " add.s32 %r135, %r1, 2;\n"
412 " cvt.rn.f32.s32 %f14, %r135;\n"
413 " // begin inline asm\n"
414 " tex.2d.v4.u32.f32 {%r94, %r95, %r96, %r97}, [%rd3, {%f14, %f5}];\n"
415 " // end inline asm\n"
416 " add.s32 %r136, %r1, 3;\n"
417 " cvt.rn.f32.s32 %f16, %r136;\n"
418 " // begin inline asm\n"
419 " tex.2d.v4.u32.f32 {%r98, %r99, %r100, %r101}, [%rd3, {%f16, %f5}];\n"
420 " // end inline asm\n"
421 " add.s32 %r137, %r2, 1;\n"
422 " cvt.rn.f32.s32 %f19, %r137;\n"
423 " // begin inline asm\n"
424 " tex.2d.v4.u32.f32 {%r102, %r103, %r104, %r105}, [%rd3, {%f4, %f19}];\n"
425 " // end inline asm\n"
426 " // begin inline asm\n"
427 " tex.2d.v4.u32.f32 {%r106, %r107, %r108, %r109}, [%rd3, {%f6, %f19}];\n"
428 " // end inline asm\n"
429 " // begin inline asm\n"
430 " tex.2d.v4.u32.f32 {%r110, %r111, %r112, %r113}, [%rd3, {%f8, %f19}];\n"
431 " // end inline asm\n"
432 " // begin inline asm\n"
433 " tex.2d.v4.u32.f32 {%r114, %r115, %r116, %r117}, [%rd3, {%f10, %f19}];\n"
434 " // end inline asm\n"
435 " // begin inline asm\n"
436 " tex.2d.v4.u32.f32 {%r118, %r119, %r120, %r121}, [%rd3, {%f12, %f19}];\n"
437 " // end inline asm\n"
438 " // begin inline asm\n"
439 " tex.2d.v4.u32.f32 {%r122, %r123, %r124, %r125}, [%rd3, {%f14, %f19}];\n"
440 " // end inline asm\n"
441 " // begin inline asm\n"
442 " tex.2d.v4.u32.f32 {%r126, %r127, %r128, %r129}, [%rd3, {%f16, %f19}];\n"
443 " // end inline asm\n"
444 " and.b32 %r16, %r86, 65535;\n"
445 " and.b32 %r17, %r114, 65535;\n"
446 " add.s32 %r277, %r17, %r16;\n"
447 " and.b32 %r19, %r82, 65535;\n"
448 " and.b32 %r20, %r110, 65535;\n"
449 " sub.s32 %r138, %r19, %r20;\n"
450 " abs.s32 %r139, %r138;\n"
451 " sub.s32 %r140, %r16, %r17;\n"
452 " abs.s32 %r141, %r140;\n"
453 " add.s32 %r142, %r141, %r139;\n"
454 " and.b32 %r21, %r90, 65535;\n"
455 " and.b32 %r22, %r118, 65535;\n"
456 " sub.s32 %r143, %r21, %r22;\n"
457 " abs.s32 %r144, %r143;\n"
458 " add.s32 %r276, %r142, %r144;\n"
459 " and.b32 %r24, %r78, 65535;\n"
460 " sub.s32 %r145, %r24, %r17;\n"
461 " abs.s32 %r146, %r145;\n"
462 " sub.s32 %r147, %r19, %r22;\n"
463 " abs.s32 %r148, %r147;\n"
464 " add.s32 %r149, %r148, %r146;\n"
465 " and.b32 %r25, %r122, 65535;\n"
466 " sub.s32 %r150, %r16, %r25;\n"
467 " abs.s32 %r151, %r150;\n"
468 " add.s32 %r26, %r149, %r151;\n"
469 " setp.ge.s32 %p6, %r26, %r276;\n"
470 " @%p6 bra LBB1_5;\n"
471 " add.s32 %r152, %r22, %r19;\n"
472 " and.b32 %r153, %r74, 65535;\n"
473 " sub.s32 %r154, %r153, %r22;\n"
474 " abs.s32 %r155, %r154;\n"
475 " sub.s32 %r156, %r24, %r25;\n"
476 " abs.s32 %r157, %r156;\n"
477 " add.s32 %r158, %r157, %r155;\n"
478 " and.b32 %r159, %r126, 65535;\n"
479 " sub.s32 %r160, %r19, %r159;\n"
480 " abs.s32 %r161, %r160;\n"
481 " add.s32 %r162, %r158, %r161;\n"
482 " setp.lt.s32 %p7, %r162, %r26;\n"
483 " add.s32 %r163, %r25, %r24;\n"
484 " selp.b32 %r277, %r163, %r152, %p7;\n"
485 " min.s32 %r276, %r162, %r26;\n"
487 " setp.eq.b16 %p1, %rs4, 1;\n"
488 " ld.param.u32 %r61, [yadif_ushort_param_10];\n"
489 " ld.param.u64 %rd28, [yadif_ushort_param_3];\n"
490 " ld.param.u64 %rd20, [yadif_ushort_param_1];\n"
491 " and.b32 %r31, %r106, 65535;\n"
492 " sub.s32 %r164, %r16, %r31;\n"
493 " abs.s32 %r165, %r164;\n"
494 " sub.s32 %r166, %r21, %r20;\n"
495 " abs.s32 %r167, %r166;\n"
496 " add.s32 %r168, %r167, %r165;\n"
497 " and.b32 %r32, %r94, 65535;\n"
498 " sub.s32 %r169, %r32, %r17;\n"
499 " abs.s32 %r170, %r169;\n"
500 " add.s32 %r33, %r168, %r170;\n"
501 " setp.ge.s32 %p8, %r33, %r276;\n"
502 " @%p8 bra LBB1_7;\n"
503 " add.s32 %r171, %r20, %r21;\n"
504 " and.b32 %r172, %r102, 65535;\n"
505 " sub.s32 %r173, %r21, %r172;\n"
506 " abs.s32 %r174, %r173;\n"
507 " sub.s32 %r175, %r32, %r31;\n"
508 " abs.s32 %r176, %r175;\n"
509 " add.s32 %r177, %r176, %r174;\n"
510 " and.b32 %r178, %r98, 65535;\n"
511 " sub.s32 %r179, %r178, %r20;\n"
512 " abs.s32 %r180, %r179;\n"
513 " add.s32 %r181, %r177, %r180;\n"
514 " setp.lt.s32 %p9, %r181, %r33;\n"
515 " add.s32 %r182, %r31, %r32;\n"
516 " selp.b32 %r277, %r182, %r171, %p9;\n"
518 " shr.u32 %r36, %r277, 1;\n"
519 " setp.eq.s32 %p10, %r60, %r61;\n"
520 " selp.b64 %rd22, %rd3, %rd20, %p10;\n"
521 " selp.b64 %rd25, %rd28, %rd3, %p10;\n"
522 " // begin inline asm\n"
523 " tex.2d.v4.u32.f32 {%r183, %r184, %r185, %r186}, [%rd20, {%f10, %f5}];\n"
524 " // end inline asm\n"
525 " // begin inline asm\n"
526 " tex.2d.v4.u32.f32 {%r187, %r188, %r189, %r190}, [%rd20, {%f10, %f19}];\n"
527 " // end inline asm\n"
528 " add.s32 %r223, %r2, -2;\n"
529 " cvt.rn.f32.s32 %f37, %r223;\n"
530 " // begin inline asm\n"
531 " tex.2d.v4.u32.f32 {%r191, %r192, %r193, %r194}, [%rd22, {%f10, %f37}];\n"
532 " // end inline asm\n"
533 " cvt.rn.f32.s32 %f39, %r2;\n"
534 " // begin inline asm\n"
535 " tex.2d.v4.u32.f32 {%r195, %r196, %r197, %r198}, [%rd22, {%f10, %f39}];\n"
536 " // end inline asm\n"
537 " add.s32 %r224, %r2, 2;\n"
538 " cvt.rn.f32.s32 %f41, %r224;\n"
539 " // begin inline asm\n"
540 " tex.2d.v4.u32.f32 {%r199, %r200, %r201, %r202}, [%rd22, {%f10, %f41}];\n"
541 " // end inline asm\n"
542 " // begin inline asm\n"
543 " tex.2d.v4.u32.f32 {%r203, %r204, %r205, %r206}, [%rd25, {%f10, %f37}];\n"
544 " // end inline asm\n"
545 " // begin inline asm\n"
546 " tex.2d.v4.u32.f32 {%r207, %r208, %r209, %r210}, [%rd25, {%f10, %f39}];\n"
547 " // end inline asm\n"
548 " // begin inline asm\n"
549 " tex.2d.v4.u32.f32 {%r211, %r212, %r213, %r214}, [%rd25, {%f10, %f41}];\n"
550 " // end inline asm\n"
551 " // begin inline asm\n"
552 " tex.2d.v4.u32.f32 {%r215, %r216, %r217, %r218}, [%rd28, {%f10, %f5}];\n"
553 " // end inline asm\n"
554 " // begin inline asm\n"
555 " tex.2d.v4.u32.f32 {%r219, %r220, %r221, %r222}, [%rd28, {%f10, %f19}];\n"
556 " // end inline asm\n"
557 " and.b32 %r225, %r195, 65535;\n"
558 " and.b32 %r226, %r207, 65535;\n"
559 " add.s32 %r227, %r226, %r225;\n"
560 " shr.u32 %r53, %r227, 1;\n"
561 " sub.s32 %r228, %r225, %r226;\n"
562 " abs.s32 %r229, %r228;\n"
563 " and.b32 %r230, %r183, 65535;\n"
564 " sub.s32 %r231, %r230, %r16;\n"
565 " abs.s32 %r232, %r231;\n"
566 " and.b32 %r233, %r187, 65535;\n"
567 " sub.s32 %r234, %r233, %r17;\n"
568 " abs.s32 %r235, %r234;\n"
569 " add.s32 %r236, %r235, %r232;\n"
570 " shr.u32 %r237, %r236, 1;\n"
571 " and.b32 %r238, %r215, 65535;\n"
572 " sub.s32 %r239, %r238, %r16;\n"
573 " abs.s32 %r240, %r239;\n"
574 " and.b32 %r241, %r219, 65535;\n"
575 " sub.s32 %r242, %r17, %r241;\n"
576 " abs.s32 %r243, %r242;\n"
577 " add.s32 %r244, %r243, %r240;\n"
578 " shr.u32 %r245, %r244, 1;\n"
579 " max.s32 %r246, %r229, %r237;\n"
580 " max.s32 %r278, %r246, %r245;\n"
581 " @%p1 bra LBB1_9;\n"
582 " cvt.u16.u32 %rs1, %r86;\n"
583 " cvt.u16.u32 %rs2, %r114;\n"
584 " and.b32 %r247, %r199, 65535;\n"
585 " and.b32 %r248, %r211, 65535;\n"
586 " add.s32 %r249, %r248, %r247;\n"
587 " shr.u32 %r250, %r249, 1;\n"
588 " and.b32 %r251, %r191, 65535;\n"
589 " and.b32 %r252, %r203, 65535;\n"
590 " add.s32 %r253, %r252, %r251;\n"
591 " shr.u32 %r254, %r253, 1;\n"
592 " sub.s32 %r255, %r53, %r17;\n"
593 " sub.s32 %r256, %r53, %r16;\n"
594 " sub.s32 %r257, %r254, %r16;\n"
595 " sub.s32 %r258, %r250, %r17;\n"
596 " min.s32 %r259, %r257, %r258;\n"
597 " setp.gt.u16 %p11, %rs1, %rs2;\n"
598 " selp.b32 %r260, %r255, %r256, %p11;\n"
599 " max.s32 %r261, %r260, %r259;\n"
600 " max.s32 %r262, %r257, %r258;\n"
601 " setp.lt.u16 %p12, %rs1, %rs2;\n"
602 " selp.b32 %r263, %r255, %r256, %p12;\n"
603 " min.s32 %r264, %r263, %r262;\n"
604 " neg.s32 %r265, %r261;\n"
605 " max.s32 %r266, %r278, %r264;\n"
606 " max.s32 %r278, %r266, %r265;\n"
608 " add.s32 %r267, %r278, %r53;\n"
609 " min.s32 %r268, %r267, %r36;\n"
610 " sub.s32 %r269, %r53, %r278;\n"
611 " max.s32 %r279, %r269, %r268;\n"
613 " mad.lo.s32 %r274, %r2, %r59, %r1;\n"
614 " mul.wide.s32 %rd31, %r274, 2;\n"
615 " add.s64 %rd32, %rd1, %rd31;\n"
616 " st.global.u16 [%rd32], %r279;\n"
621 " // .globl yadif_uchar2\n"
622 ".visible .entry yadif_uchar2(\n"
623 " .param .u64 yadif_uchar2_param_0,\n"
624 " .param .u64 yadif_uchar2_param_1,\n"
625 " .param .u64 yadif_uchar2_param_2,\n"
626 " .param .u64 yadif_uchar2_param_3,\n"
627 " .param .u32 yadif_uchar2_param_4,\n"
628 " .param .u32 yadif_uchar2_param_5,\n"
629 " .param .u32 yadif_uchar2_param_6,\n"
630 " .param .u32 yadif_uchar2_param_7,\n"
631 " .param .u32 yadif_uchar2_param_8,\n"
632 " .param .u32 yadif_uchar2_param_9,\n"
633 " .param .u32 yadif_uchar2_param_10,\n"
634 " .param .u8 yadif_uchar2_param_11\n"
637 " .reg .pred %p<19>;\n"
638 " .reg .b16 %rs<13>;\n"
639 " .reg .f32 %f<54>;\n"
640 " .reg .b32 %r<427>;\n"
641 " .reg .b64 %rd<33>;\n"
643 " // begin inline asm\n"
644 " mov.u32 %r114, %ctaid.x;\n"
645 " // end inline asm\n"
646 " // begin inline asm\n"
647 " mov.u32 %r115, %ctaid.y;\n"
648 " // end inline asm\n"
649 " ld.param.u32 %r120, [yadif_uchar2_param_4];\n"
650 " // begin inline asm\n"
651 " mov.u32 %r116, %ntid.x;\n"
652 " // end inline asm\n"
653 " ld.param.u32 %r121, [yadif_uchar2_param_5];\n"
654 " // begin inline asm\n"
655 " mov.u32 %r117, %ntid.y;\n"
656 " // end inline asm\n"
657 " // begin inline asm\n"
658 " mov.u32 %r118, %tid.x;\n"
659 " // end inline asm\n"
660 " // begin inline asm\n"
661 " mov.u32 %r119, %tid.y;\n"
662 " // end inline asm\n"
663 " mad.lo.s32 %r1, %r116, %r114, %r118;\n"
664 " mad.lo.s32 %r2, %r117, %r115, %r119;\n"
665 " setp.ge.s32 %p2, %r1, %r120;\n"
666 " setp.ge.s32 %p3, %r2, %r121;\n"
667 " or.pred %p4, %p2, %p3;\n"
668 " @%p4 bra LBB2_17;\n"
669 " ld.param.u32 %r112, [yadif_uchar2_param_9];\n"
670 " ld.param.u32 %r111, [yadif_uchar2_param_6];\n"
671 " ld.param.u64 %rd3, [yadif_uchar2_param_2];\n"
672 " ld.param.u64 %rd5, [yadif_uchar2_param_0];\n"
673 " cvta.to.global.u64 %rd1, %rd5;\n"
674 " shr.u32 %r122, %r2, 31;\n"
675 " add.s32 %r123, %r2, %r122;\n"
676 " and.b32 %r124, %r123, -2;\n"
677 " sub.s32 %r125, %r2, %r124;\n"
678 " setp.ne.s32 %p5, %r125, %r112;\n"
679 " @%p5 bra LBB2_3;\n"
680 " cvt.rn.f32.s32 %f52, %r1;\n"
681 " cvt.rn.f32.s32 %f53, %r2;\n"
682 " // begin inline asm\n"
683 " tex.2d.v4.u32.f32 {%r426, %r425, %r414, %r415}, [%rd3, {%f52, %f53}];\n"
684 " // end inline asm\n"
685 " bra.uni LBB2_16;\n"
687 " add.s32 %r182, %r1, -3;\n"
688 " cvt.rn.f32.s32 %f4, %r182;\n"
689 " add.s32 %r183, %r2, -1;\n"
690 " cvt.rn.f32.s32 %f5, %r183;\n"
691 " // begin inline asm\n"
692 " tex.2d.v4.u32.f32 {%r126, %r127, %r128, %r129}, [%rd3, {%f4, %f5}];\n"
693 " // end inline asm\n"
694 " add.s32 %r184, %r1, -2;\n"
695 " cvt.rn.f32.s32 %f6, %r184;\n"
696 " // begin inline asm\n"
697 " tex.2d.v4.u32.f32 {%r130, %r131, %r132, %r133}, [%rd3, {%f6, %f5}];\n"
698 " // end inline asm\n"
699 " add.s32 %r185, %r1, -1;\n"
700 " cvt.rn.f32.s32 %f8, %r185;\n"
701 " // begin inline asm\n"
702 " tex.2d.v4.u32.f32 {%r134, %r135, %r136, %r137}, [%rd3, {%f8, %f5}];\n"
703 " // end inline asm\n"
704 " cvt.rn.f32.s32 %f10, %r1;\n"
705 " // begin inline asm\n"
706 " tex.2d.v4.u32.f32 {%r138, %r139, %r140, %r141}, [%rd3, {%f10, %f5}];\n"
707 " // end inline asm\n"
708 " add.s32 %r186, %r1, 1;\n"
709 " cvt.rn.f32.s32 %f12, %r186;\n"
710 " // begin inline asm\n"
711 " tex.2d.v4.u32.f32 {%r142, %r143, %r144, %r145}, [%rd3, {%f12, %f5}];\n"
712 " // end inline asm\n"
713 " add.s32 %r187, %r1, 2;\n"
714 " cvt.rn.f32.s32 %f14, %r187;\n"
715 " // begin inline asm\n"
716 " tex.2d.v4.u32.f32 {%r146, %r147, %r148, %r149}, [%rd3, {%f14, %f5}];\n"
717 " // end inline asm\n"
718 " add.s32 %r188, %r1, 3;\n"
719 " cvt.rn.f32.s32 %f16, %r188;\n"
720 " // begin inline asm\n"
721 " tex.2d.v4.u32.f32 {%r150, %r151, %r152, %r153}, [%rd3, {%f16, %f5}];\n"
722 " // end inline asm\n"
723 " add.s32 %r189, %r2, 1;\n"
724 " cvt.rn.f32.s32 %f19, %r189;\n"
725 " // begin inline asm\n"
726 " tex.2d.v4.u32.f32 {%r154, %r155, %r156, %r157}, [%rd3, {%f4, %f19}];\n"
727 " // end inline asm\n"
728 " // begin inline asm\n"
729 " tex.2d.v4.u32.f32 {%r158, %r159, %r160, %r161}, [%rd3, {%f6, %f19}];\n"
730 " // end inline asm\n"
731 " // begin inline asm\n"
732 " tex.2d.v4.u32.f32 {%r162, %r163, %r164, %r165}, [%rd3, {%f8, %f19}];\n"
733 " // end inline asm\n"
734 " // begin inline asm\n"
735 " tex.2d.v4.u32.f32 {%r166, %r167, %r168, %r169}, [%rd3, {%f10, %f19}];\n"
736 " // end inline asm\n"
737 " // begin inline asm\n"
738 " tex.2d.v4.u32.f32 {%r170, %r171, %r172, %r173}, [%rd3, {%f12, %f19}];\n"
739 " // end inline asm\n"
740 " // begin inline asm\n"
741 " tex.2d.v4.u32.f32 {%r174, %r175, %r176, %r177}, [%rd3, {%f14, %f19}];\n"
742 " // end inline asm\n"
743 " // begin inline asm\n"
744 " tex.2d.v4.u32.f32 {%r178, %r179, %r180, %r181}, [%rd3, {%f16, %f19}];\n"
745 " // end inline asm\n"
746 " and.b32 %r31, %r138, 255;\n"
747 " and.b32 %r32, %r166, 255;\n"
748 " add.s32 %r419, %r32, %r31;\n"
749 " and.b32 %r34, %r134, 255;\n"
750 " and.b32 %r35, %r162, 255;\n"
751 " sub.s32 %r190, %r34, %r35;\n"
752 " abs.s32 %r191, %r190;\n"
753 " sub.s32 %r192, %r31, %r32;\n"
754 " abs.s32 %r193, %r192;\n"
755 " add.s32 %r194, %r193, %r191;\n"
756 " and.b32 %r36, %r142, 255;\n"
757 " and.b32 %r37, %r170, 255;\n"
758 " sub.s32 %r195, %r36, %r37;\n"
759 " abs.s32 %r196, %r195;\n"
760 " add.s32 %r418, %r194, %r196;\n"
761 " and.b32 %r39, %r130, 255;\n"
762 " sub.s32 %r197, %r39, %r32;\n"
763 " abs.s32 %r198, %r197;\n"
764 " sub.s32 %r199, %r34, %r37;\n"
765 " abs.s32 %r200, %r199;\n"
766 " add.s32 %r201, %r200, %r198;\n"
767 " and.b32 %r40, %r174, 255;\n"
768 " sub.s32 %r202, %r31, %r40;\n"
769 " abs.s32 %r203, %r202;\n"
770 " add.s32 %r41, %r201, %r203;\n"
771 " setp.ge.s32 %p6, %r41, %r418;\n"
772 " @%p6 bra LBB2_5;\n"
773 " add.s32 %r204, %r37, %r34;\n"
774 " and.b32 %r205, %r126, 255;\n"
775 " sub.s32 %r206, %r205, %r37;\n"
776 " abs.s32 %r207, %r206;\n"
777 " sub.s32 %r208, %r39, %r40;\n"
778 " abs.s32 %r209, %r208;\n"
779 " add.s32 %r210, %r209, %r207;\n"
780 " and.b32 %r211, %r178, 255;\n"
781 " sub.s32 %r212, %r34, %r211;\n"
782 " abs.s32 %r213, %r212;\n"
783 " add.s32 %r214, %r210, %r213;\n"
784 " setp.lt.s32 %p7, %r214, %r41;\n"
785 " add.s32 %r215, %r40, %r39;\n"
786 " selp.b32 %r419, %r215, %r204, %p7;\n"
787 " min.s32 %r418, %r214, %r41;\n"
789 " ld.param.u8 %rs5, [yadif_uchar2_param_11];\n"
790 " and.b32 %r46, %r158, 255;\n"
791 " sub.s32 %r216, %r31, %r46;\n"
792 " abs.s32 %r217, %r216;\n"
793 " sub.s32 %r218, %r36, %r35;\n"
794 " abs.s32 %r219, %r218;\n"
795 " add.s32 %r220, %r219, %r217;\n"
796 " and.b32 %r47, %r146, 255;\n"
797 " sub.s32 %r221, %r47, %r32;\n"
798 " abs.s32 %r222, %r221;\n"
799 " add.s32 %r48, %r220, %r222;\n"
800 " setp.ge.s32 %p8, %r48, %r418;\n"
801 " @%p8 bra LBB2_7;\n"
802 " add.s32 %r223, %r35, %r36;\n"
803 " and.b32 %r224, %r154, 255;\n"
804 " sub.s32 %r225, %r36, %r224;\n"
805 " abs.s32 %r226, %r225;\n"
806 " sub.s32 %r227, %r47, %r46;\n"
807 " abs.s32 %r228, %r227;\n"
808 " add.s32 %r229, %r228, %r226;\n"
809 " and.b32 %r230, %r150, 255;\n"
810 " sub.s32 %r231, %r230, %r35;\n"
811 " abs.s32 %r232, %r231;\n"
812 " add.s32 %r233, %r229, %r232;\n"
813 " setp.lt.s32 %p9, %r233, %r48;\n"
814 " add.s32 %r234, %r46, %r47;\n"
815 " selp.b32 %r419, %r234, %r223, %p9;\n"
817 " and.b16 %rs6, %rs5, 1;\n"
818 " and.b32 %r52, %r139, 255;\n"
819 " and.b32 %r53, %r167, 255;\n"
820 " add.s32 %r422, %r53, %r52;\n"
821 " and.b32 %r55, %r135, 255;\n"
822 " and.b32 %r56, %r163, 255;\n"
823 " sub.s32 %r235, %r55, %r56;\n"
824 " abs.s32 %r236, %r235;\n"
825 " sub.s32 %r237, %r52, %r53;\n"
826 " abs.s32 %r238, %r237;\n"
827 " add.s32 %r239, %r238, %r236;\n"
828 " and.b32 %r57, %r143, 255;\n"
829 " and.b32 %r58, %r171, 255;\n"
830 " sub.s32 %r240, %r57, %r58;\n"
831 " abs.s32 %r241, %r240;\n"
832 " add.s32 %r421, %r239, %r241;\n"
833 " and.b32 %r60, %r131, 255;\n"
834 " sub.s32 %r242, %r60, %r53;\n"
835 " abs.s32 %r243, %r242;\n"
836 " sub.s32 %r244, %r55, %r58;\n"
837 " abs.s32 %r245, %r244;\n"
838 " add.s32 %r246, %r245, %r243;\n"
839 " and.b32 %r61, %r175, 255;\n"
840 " sub.s32 %r247, %r52, %r61;\n"
841 " abs.s32 %r248, %r247;\n"
842 " add.s32 %r62, %r246, %r248;\n"
843 " setp.ge.s32 %p10, %r62, %r421;\n"
844 " @%p10 bra LBB2_9;\n"
845 " add.s32 %r249, %r58, %r55;\n"
846 " and.b32 %r250, %r127, 255;\n"
847 " sub.s32 %r251, %r250, %r58;\n"
848 " abs.s32 %r252, %r251;\n"
849 " sub.s32 %r253, %r60, %r61;\n"
850 " abs.s32 %r254, %r253;\n"
851 " add.s32 %r255, %r254, %r252;\n"
852 " and.b32 %r256, %r179, 255;\n"
853 " sub.s32 %r257, %r55, %r256;\n"
854 " abs.s32 %r258, %r257;\n"
855 " add.s32 %r259, %r255, %r258;\n"
856 " setp.lt.s32 %p11, %r259, %r62;\n"
857 " add.s32 %r260, %r61, %r60;\n"
858 " selp.b32 %r422, %r260, %r249, %p11;\n"
859 " min.s32 %r421, %r259, %r62;\n"
861 " setp.eq.b16 %p1, %rs6, 1;\n"
862 " ld.param.u32 %r113, [yadif_uchar2_param_10];\n"
863 " ld.param.u64 %rd28, [yadif_uchar2_param_3];\n"
864 " ld.param.u64 %rd20, [yadif_uchar2_param_1];\n"
865 " and.b32 %r67, %r159, 255;\n"
866 " sub.s32 %r261, %r52, %r67;\n"
867 " abs.s32 %r262, %r261;\n"
868 " sub.s32 %r263, %r57, %r56;\n"
869 " abs.s32 %r264, %r263;\n"
870 " add.s32 %r265, %r264, %r262;\n"
871 " and.b32 %r68, %r147, 255;\n"
872 " sub.s32 %r266, %r68, %r53;\n"
873 " abs.s32 %r267, %r266;\n"
874 " add.s32 %r69, %r265, %r267;\n"
875 " setp.ge.s32 %p12, %r69, %r421;\n"
876 " @%p12 bra LBB2_11;\n"
877 " add.s32 %r268, %r56, %r57;\n"
878 " and.b32 %r269, %r155, 255;\n"
879 " sub.s32 %r270, %r57, %r269;\n"
880 " abs.s32 %r271, %r270;\n"
881 " sub.s32 %r272, %r68, %r67;\n"
882 " abs.s32 %r273, %r272;\n"
883 " add.s32 %r274, %r273, %r271;\n"
884 " and.b32 %r275, %r151, 255;\n"
885 " sub.s32 %r276, %r275, %r56;\n"
886 " abs.s32 %r277, %r276;\n"
887 " add.s32 %r278, %r274, %r277;\n"
888 " setp.lt.s32 %p13, %r278, %r69;\n"
889 " add.s32 %r279, %r67, %r68;\n"
890 " selp.b32 %r422, %r279, %r268, %p13;\n"
892 " shr.u32 %r51, %r419, 1;\n"
893 " setp.eq.s32 %p14, %r112, %r113;\n"
894 " selp.b64 %rd22, %rd3, %rd20, %p14;\n"
895 " selp.b64 %rd25, %rd28, %rd3, %p14;\n"
896 " // begin inline asm\n"
897 " tex.2d.v4.u32.f32 {%r280, %r281, %r282, %r283}, [%rd20, {%f10, %f5}];\n"
898 " // end inline asm\n"
899 " // begin inline asm\n"
900 " tex.2d.v4.u32.f32 {%r284, %r285, %r286, %r287}, [%rd20, {%f10, %f19}];\n"
901 " // end inline asm\n"
902 " add.s32 %r320, %r2, -2;\n"
903 " cvt.rn.f32.s32 %f37, %r320;\n"
904 " // begin inline asm\n"
905 " tex.2d.v4.u32.f32 {%r288, %r289, %r290, %r291}, [%rd22, {%f10, %f37}];\n"
906 " // end inline asm\n"
907 " cvt.rn.f32.s32 %f39, %r2;\n"
908 " // begin inline asm\n"
909 " tex.2d.v4.u32.f32 {%r292, %r293, %r294, %r295}, [%rd22, {%f10, %f39}];\n"
910 " // end inline asm\n"
911 " add.s32 %r321, %r2, 2;\n"
912 " cvt.rn.f32.s32 %f41, %r321;\n"
913 " // begin inline asm\n"
914 " tex.2d.v4.u32.f32 {%r296, %r297, %r298, %r299}, [%rd22, {%f10, %f41}];\n"
915 " // end inline asm\n"
916 " // begin inline asm\n"
917 " tex.2d.v4.u32.f32 {%r300, %r301, %r302, %r303}, [%rd25, {%f10, %f37}];\n"
918 " // end inline asm\n"
919 " // begin inline asm\n"
920 " tex.2d.v4.u32.f32 {%r304, %r305, %r306, %r307}, [%rd25, {%f10, %f39}];\n"
921 " // end inline asm\n"
922 " // begin inline asm\n"
923 " tex.2d.v4.u32.f32 {%r308, %r309, %r310, %r311}, [%rd25, {%f10, %f41}];\n"
924 " // end inline asm\n"
925 " // begin inline asm\n"
926 " tex.2d.v4.u32.f32 {%r312, %r313, %r314, %r315}, [%rd28, {%f10, %f5}];\n"
927 " // end inline asm\n"
928 " // begin inline asm\n"
929 " tex.2d.v4.u32.f32 {%r316, %r317, %r318, %r319}, [%rd28, {%f10, %f19}];\n"
930 " // end inline asm\n"
931 " and.b32 %r322, %r292, 255;\n"
932 " and.b32 %r323, %r304, 255;\n"
933 " add.s32 %r324, %r323, %r322;\n"
934 " shr.u32 %r99, %r324, 1;\n"
935 " sub.s32 %r325, %r322, %r323;\n"
936 " abs.s32 %r326, %r325;\n"
937 " and.b32 %r327, %r280, 255;\n"
938 " sub.s32 %r328, %r327, %r31;\n"
939 " abs.s32 %r329, %r328;\n"
940 " and.b32 %r330, %r284, 255;\n"
941 " sub.s32 %r331, %r330, %r32;\n"
942 " abs.s32 %r332, %r331;\n"
943 " add.s32 %r333, %r332, %r329;\n"
944 " shr.u32 %r334, %r333, 1;\n"
945 " and.b32 %r335, %r312, 255;\n"
946 " sub.s32 %r336, %r335, %r31;\n"
947 " abs.s32 %r337, %r336;\n"
948 " and.b32 %r338, %r316, 255;\n"
949 " sub.s32 %r339, %r32, %r338;\n"
950 " abs.s32 %r340, %r339;\n"
951 " add.s32 %r341, %r340, %r337;\n"
952 " shr.u32 %r342, %r341, 1;\n"
953 " max.s32 %r343, %r326, %r334;\n"
954 " max.s32 %r423, %r343, %r342;\n"
955 " @%p1 bra LBB2_13;\n"
956 " cvt.u16.u32 %rs1, %r138;\n"
957 " cvt.u16.u32 %rs3, %r166;\n"
958 " and.b32 %r344, %r296, 255;\n"
959 " and.b32 %r345, %r308, 255;\n"
960 " add.s32 %r346, %r345, %r344;\n"
961 " shr.u32 %r347, %r346, 1;\n"
962 " and.b32 %r348, %r288, 255;\n"
963 " and.b32 %r349, %r300, 255;\n"
964 " add.s32 %r350, %r349, %r348;\n"
965 " shr.u32 %r351, %r350, 1;\n"
966 " sub.s32 %r352, %r99, %r32;\n"
967 " sub.s32 %r353, %r99, %r31;\n"
968 " sub.s32 %r354, %r351, %r31;\n"
969 " sub.s32 %r355, %r347, %r32;\n"
970 " min.s32 %r356, %r354, %r355;\n"
971 " and.b16 %rs7, %rs3, 255;\n"
972 " and.b16 %rs8, %rs1, 255;\n"
973 " setp.gt.u16 %p15, %rs8, %rs7;\n"
974 " selp.b32 %r357, %r352, %r353, %p15;\n"
975 " max.s32 %r358, %r357, %r356;\n"
976 " max.s32 %r359, %r354, %r355;\n"
977 " setp.lt.u16 %p16, %rs8, %rs7;\n"
978 " selp.b32 %r360, %r352, %r353, %p16;\n"
979 " min.s32 %r361, %r360, %r359;\n"
980 " neg.s32 %r362, %r358;\n"
981 " max.s32 %r363, %r423, %r361;\n"
982 " max.s32 %r423, %r363, %r362;\n"
984 " shr.u32 %r72, %r422, 1;\n"
985 " add.s32 %r364, %r423, %r99;\n"
986 " min.s32 %r365, %r364, %r51;\n"
987 " sub.s32 %r366, %r99, %r423;\n"
988 " and.b32 %r367, %r293, 255;\n"
989 " and.b32 %r368, %r305, 255;\n"
990 " add.s32 %r369, %r368, %r367;\n"
991 " shr.u32 %r104, %r369, 1;\n"
992 " sub.s32 %r370, %r367, %r368;\n"
993 " abs.s32 %r371, %r370;\n"
994 " and.b32 %r372, %r281, 255;\n"
995 " sub.s32 %r373, %r372, %r52;\n"
996 " abs.s32 %r374, %r373;\n"
997 " and.b32 %r375, %r285, 255;\n"
998 " sub.s32 %r376, %r375, %r53;\n"
999 " abs.s32 %r377, %r376;\n"
1000 " add.s32 %r378, %r377, %r374;\n"
1001 " shr.u32 %r379, %r378, 1;\n"
1002 " and.b32 %r380, %r313, 255;\n"
1003 " sub.s32 %r381, %r380, %r52;\n"
1004 " abs.s32 %r382, %r381;\n"
1005 " and.b32 %r383, %r317, 255;\n"
1006 " sub.s32 %r384, %r53, %r383;\n"
1007 " abs.s32 %r385, %r384;\n"
1008 " add.s32 %r386, %r385, %r382;\n"
1009 " shr.u32 %r387, %r386, 1;\n"
1010 " max.s32 %r388, %r371, %r379;\n"
1011 " max.s32 %r424, %r388, %r387;\n"
1012 " @%p1 bra LBB2_15;\n"
1013 " cvt.u16.u32 %rs2, %r139;\n"
1014 " cvt.u16.u32 %rs4, %r167;\n"
1015 " and.b32 %r389, %r297, 255;\n"
1016 " and.b32 %r390, %r309, 255;\n"
1017 " add.s32 %r391, %r390, %r389;\n"
1018 " shr.u32 %r392, %r391, 1;\n"
1019 " and.b32 %r393, %r289, 255;\n"
1020 " and.b32 %r394, %r301, 255;\n"
1021 " add.s32 %r395, %r394, %r393;\n"
1022 " shr.u32 %r396, %r395, 1;\n"
1023 " sub.s32 %r397, %r104, %r53;\n"
1024 " sub.s32 %r398, %r104, %r52;\n"
1025 " sub.s32 %r399, %r396, %r52;\n"
1026 " sub.s32 %r400, %r392, %r53;\n"
1027 " min.s32 %r401, %r399, %r400;\n"
1028 " and.b16 %rs9, %rs4, 255;\n"
1029 " and.b16 %rs10, %rs2, 255;\n"
1030 " setp.gt.u16 %p17, %rs10, %rs9;\n"
1031 " selp.b32 %r402, %r397, %r398, %p17;\n"
1032 " max.s32 %r403, %r402, %r401;\n"
1033 " max.s32 %r404, %r399, %r400;\n"
1034 " setp.lt.u16 %p18, %rs10, %rs9;\n"
1035 " selp.b32 %r405, %r397, %r398, %p18;\n"
1036 " min.s32 %r406, %r405, %r404;\n"
1037 " neg.s32 %r407, %r403;\n"
1038 " max.s32 %r408, %r424, %r406;\n"
1039 " max.s32 %r424, %r408, %r407;\n"
1041 " max.s32 %r426, %r366, %r365;\n"
1042 " add.s32 %r409, %r424, %r104;\n"
1043 " min.s32 %r410, %r409, %r72;\n"
1044 " sub.s32 %r411, %r104, %r424;\n"
1045 " max.s32 %r425, %r411, %r410;\n"
1047 " cvt.u16.u32 %rs11, %r426;\n"
1048 " cvt.u16.u32 %rs12, %r425;\n"
1049 " mad.lo.s32 %r416, %r2, %r111, %r1;\n"
1050 " mul.wide.s32 %rd31, %r416, 2;\n"
1051 " add.s64 %rd32, %rd1, %rd31;\n"
1052 " st.global.v2.u8 [%rd32], {%rs11, %rs12};\n"
1057 " // .globl yadif_ushort2\n"
1058 ".visible .entry yadif_ushort2(\n"
1059 " .param .u64 yadif_ushort2_param_0,\n"
1060 " .param .u64 yadif_ushort2_param_1,\n"
1061 " .param .u64 yadif_ushort2_param_2,\n"
1062 " .param .u64 yadif_ushort2_param_3,\n"
1063 " .param .u32 yadif_ushort2_param_4,\n"
1064 " .param .u32 yadif_ushort2_param_5,\n"
1065 " .param .u32 yadif_ushort2_param_6,\n"
1066 " .param .u32 yadif_ushort2_param_7,\n"
1067 " .param .u32 yadif_ushort2_param_8,\n"
1068 " .param .u32 yadif_ushort2_param_9,\n"
1069 " .param .u32 yadif_ushort2_param_10,\n"
1070 " .param .u8 yadif_ushort2_param_11\n"
1073 " .reg .pred %p<19>;\n"
1074 " .reg .b16 %rs<9>;\n"
1075 " .reg .f32 %f<54>;\n"
1076 " .reg .b32 %r<427>;\n"
1077 " .reg .b64 %rd<33>;\n"
1079 " // begin inline asm\n"
1080 " mov.u32 %r114, %ctaid.x;\n"
1081 " // end inline asm\n"
1082 " // begin inline asm\n"
1083 " mov.u32 %r115, %ctaid.y;\n"
1084 " // end inline asm\n"
1085 " ld.param.u32 %r120, [yadif_ushort2_param_4];\n"
1086 " // begin inline asm\n"
1087 " mov.u32 %r116, %ntid.x;\n"
1088 " // end inline asm\n"
1089 " ld.param.u32 %r121, [yadif_ushort2_param_5];\n"
1090 " // begin inline asm\n"
1091 " mov.u32 %r117, %ntid.y;\n"
1092 " // end inline asm\n"
1093 " // begin inline asm\n"
1094 " mov.u32 %r118, %tid.x;\n"
1095 " // end inline asm\n"
1096 " // begin inline asm\n"
1097 " mov.u32 %r119, %tid.y;\n"
1098 " // end inline asm\n"
1099 " mad.lo.s32 %r1, %r116, %r114, %r118;\n"
1100 " mad.lo.s32 %r2, %r117, %r115, %r119;\n"
1101 " setp.ge.s32 %p2, %r1, %r120;\n"
1102 " setp.ge.s32 %p3, %r2, %r121;\n"
1103 " or.pred %p4, %p2, %p3;\n"
1104 " @%p4 bra LBB3_17;\n"
1105 " ld.param.u32 %r112, [yadif_ushort2_param_9];\n"
1106 " ld.param.u32 %r111, [yadif_ushort2_param_6];\n"
1107 " ld.param.u64 %rd3, [yadif_ushort2_param_2];\n"
1108 " ld.param.u64 %rd5, [yadif_ushort2_param_0];\n"
1109 " cvta.to.global.u64 %rd1, %rd5;\n"
1110 " shr.u32 %r122, %r2, 31;\n"
1111 " add.s32 %r123, %r2, %r122;\n"
1112 " and.b32 %r124, %r123, -2;\n"
1113 " sub.s32 %r125, %r2, %r124;\n"
1114 " setp.ne.s32 %p5, %r125, %r112;\n"
1115 " @%p5 bra LBB3_3;\n"
1116 " cvt.rn.f32.s32 %f52, %r1;\n"
1117 " cvt.rn.f32.s32 %f53, %r2;\n"
1118 " // begin inline asm\n"
1119 " tex.2d.v4.u32.f32 {%r426, %r425, %r414, %r415}, [%rd3, {%f52, %f53}];\n"
1120 " // end inline asm\n"
1121 " bra.uni LBB3_16;\n"
1123 " add.s32 %r182, %r1, -3;\n"
1124 " cvt.rn.f32.s32 %f4, %r182;\n"
1125 " add.s32 %r183, %r2, -1;\n"
1126 " cvt.rn.f32.s32 %f5, %r183;\n"
1127 " // begin inline asm\n"
1128 " tex.2d.v4.u32.f32 {%r126, %r127, %r128, %r129}, [%rd3, {%f4, %f5}];\n"
1129 " // end inline asm\n"
1130 " add.s32 %r184, %r1, -2;\n"
1131 " cvt.rn.f32.s32 %f6, %r184;\n"
1132 " // begin inline asm\n"
1133 " tex.2d.v4.u32.f32 {%r130, %r131, %r132, %r133}, [%rd3, {%f6, %f5}];\n"
1134 " // end inline asm\n"
1135 " add.s32 %r185, %r1, -1;\n"
1136 " cvt.rn.f32.s32 %f8, %r185;\n"
1137 " // begin inline asm\n"
1138 " tex.2d.v4.u32.f32 {%r134, %r135, %r136, %r137}, [%rd3, {%f8, %f5}];\n"
1139 " // end inline asm\n"
1140 " cvt.rn.f32.s32 %f10, %r1;\n"
1141 " // begin inline asm\n"
1142 " tex.2d.v4.u32.f32 {%r138, %r139, %r140, %r141}, [%rd3, {%f10, %f5}];\n"
1143 " // end inline asm\n"
1144 " add.s32 %r186, %r1, 1;\n"
1145 " cvt.rn.f32.s32 %f12, %r186;\n"
1146 " // begin inline asm\n"
1147 " tex.2d.v4.u32.f32 {%r142, %r143, %r144, %r145}, [%rd3, {%f12, %f5}];\n"
1148 " // end inline asm\n"
1149 " add.s32 %r187, %r1, 2;\n"
1150 " cvt.rn.f32.s32 %f14, %r187;\n"
1151 " // begin inline asm\n"
1152 " tex.2d.v4.u32.f32 {%r146, %r147, %r148, %r149}, [%rd3, {%f14, %f5}];\n"
1153 " // end inline asm\n"
1154 " add.s32 %r188, %r1, 3;\n"
1155 " cvt.rn.f32.s32 %f16, %r188;\n"
1156 " // begin inline asm\n"
1157 " tex.2d.v4.u32.f32 {%r150, %r151, %r152, %r153}, [%rd3, {%f16, %f5}];\n"
1158 " // end inline asm\n"
1159 " add.s32 %r189, %r2, 1;\n"
1160 " cvt.rn.f32.s32 %f19, %r189;\n"
1161 " // begin inline asm\n"
1162 " tex.2d.v4.u32.f32 {%r154, %r155, %r156, %r157}, [%rd3, {%f4, %f19}];\n"
1163 " // end inline asm\n"
1164 " // begin inline asm\n"
1165 " tex.2d.v4.u32.f32 {%r158, %r159, %r160, %r161}, [%rd3, {%f6, %f19}];\n"
1166 " // end inline asm\n"
1167 " // begin inline asm\n"
1168 " tex.2d.v4.u32.f32 {%r162, %r163, %r164, %r165}, [%rd3, {%f8, %f19}];\n"
1169 " // end inline asm\n"
1170 " // begin inline asm\n"
1171 " tex.2d.v4.u32.f32 {%r166, %r167, %r168, %r169}, [%rd3, {%f10, %f19}];\n"
1172 " // end inline asm\n"
1173 " // begin inline asm\n"
1174 " tex.2d.v4.u32.f32 {%r170, %r171, %r172, %r173}, [%rd3, {%f12, %f19}];\n"
1175 " // end inline asm\n"
1176 " // begin inline asm\n"
1177 " tex.2d.v4.u32.f32 {%r174, %r175, %r176, %r177}, [%rd3, {%f14, %f19}];\n"
1178 " // end inline asm\n"
1179 " // begin inline asm\n"
1180 " tex.2d.v4.u32.f32 {%r178, %r179, %r180, %r181}, [%rd3, {%f16, %f19}];\n"
1181 " // end inline asm\n"
1182 " and.b32 %r31, %r138, 65535;\n"
1183 " and.b32 %r32, %r166, 65535;\n"
1184 " add.s32 %r419, %r32, %r31;\n"
1185 " and.b32 %r34, %r134, 65535;\n"
1186 " and.b32 %r35, %r162, 65535;\n"
1187 " sub.s32 %r190, %r34, %r35;\n"
1188 " abs.s32 %r191, %r190;\n"
1189 " sub.s32 %r192, %r31, %r32;\n"
1190 " abs.s32 %r193, %r192;\n"
1191 " add.s32 %r194, %r193, %r191;\n"
1192 " and.b32 %r36, %r142, 65535;\n"
1193 " and.b32 %r37, %r170, 65535;\n"
1194 " sub.s32 %r195, %r36, %r37;\n"
1195 " abs.s32 %r196, %r195;\n"
1196 " add.s32 %r418, %r194, %r196;\n"
1197 " and.b32 %r39, %r130, 65535;\n"
1198 " sub.s32 %r197, %r39, %r32;\n"
1199 " abs.s32 %r198, %r197;\n"
1200 " sub.s32 %r199, %r34, %r37;\n"
1201 " abs.s32 %r200, %r199;\n"
1202 " add.s32 %r201, %r200, %r198;\n"
1203 " and.b32 %r40, %r174, 65535;\n"
1204 " sub.s32 %r202, %r31, %r40;\n"
1205 " abs.s32 %r203, %r202;\n"
1206 " add.s32 %r41, %r201, %r203;\n"
1207 " setp.ge.s32 %p6, %r41, %r418;\n"
1208 " @%p6 bra LBB3_5;\n"
1209 " add.s32 %r204, %r37, %r34;\n"
1210 " and.b32 %r205, %r126, 65535;\n"
1211 " sub.s32 %r206, %r205, %r37;\n"
1212 " abs.s32 %r207, %r206;\n"
1213 " sub.s32 %r208, %r39, %r40;\n"
1214 " abs.s32 %r209, %r208;\n"
1215 " add.s32 %r210, %r209, %r207;\n"
1216 " and.b32 %r211, %r178, 65535;\n"
1217 " sub.s32 %r212, %r34, %r211;\n"
1218 " abs.s32 %r213, %r212;\n"
1219 " add.s32 %r214, %r210, %r213;\n"
1220 " setp.lt.s32 %p7, %r214, %r41;\n"
1221 " add.s32 %r215, %r40, %r39;\n"
1222 " selp.b32 %r419, %r215, %r204, %p7;\n"
1223 " min.s32 %r418, %r214, %r41;\n"
1225 " ld.param.u8 %rs5, [yadif_ushort2_param_11];\n"
1226 " and.b32 %r46, %r158, 65535;\n"
1227 " sub.s32 %r216, %r31, %r46;\n"
1228 " abs.s32 %r217, %r216;\n"
1229 " sub.s32 %r218, %r36, %r35;\n"
1230 " abs.s32 %r219, %r218;\n"
1231 " add.s32 %r220, %r219, %r217;\n"
1232 " and.b32 %r47, %r146, 65535;\n"
1233 " sub.s32 %r221, %r47, %r32;\n"
1234 " abs.s32 %r222, %r221;\n"
1235 " add.s32 %r48, %r220, %r222;\n"
1236 " setp.ge.s32 %p8, %r48, %r418;\n"
1237 " @%p8 bra LBB3_7;\n"
1238 " add.s32 %r223, %r35, %r36;\n"
1239 " and.b32 %r224, %r154, 65535;\n"
1240 " sub.s32 %r225, %r36, %r224;\n"
1241 " abs.s32 %r226, %r225;\n"
1242 " sub.s32 %r227, %r47, %r46;\n"
1243 " abs.s32 %r228, %r227;\n"
1244 " add.s32 %r229, %r228, %r226;\n"
1245 " and.b32 %r230, %r150, 65535;\n"
1246 " sub.s32 %r231, %r230, %r35;\n"
1247 " abs.s32 %r232, %r231;\n"
1248 " add.s32 %r233, %r229, %r232;\n"
1249 " setp.lt.s32 %p9, %r233, %r48;\n"
1250 " add.s32 %r234, %r46, %r47;\n"
1251 " selp.b32 %r419, %r234, %r223, %p9;\n"
1253 " and.b16 %rs6, %rs5, 1;\n"
1254 " and.b32 %r52, %r139, 65535;\n"
1255 " and.b32 %r53, %r167, 65535;\n"
1256 " add.s32 %r422, %r53, %r52;\n"
1257 " and.b32 %r55, %r135, 65535;\n"
1258 " and.b32 %r56, %r163, 65535;\n"
1259 " sub.s32 %r235, %r55, %r56;\n"
1260 " abs.s32 %r236, %r235;\n"
1261 " sub.s32 %r237, %r52, %r53;\n"
1262 " abs.s32 %r238, %r237;\n"
1263 " add.s32 %r239, %r238, %r236;\n"
1264 " and.b32 %r57, %r143, 65535;\n"
1265 " and.b32 %r58, %r171, 65535;\n"
1266 " sub.s32 %r240, %r57, %r58;\n"
1267 " abs.s32 %r241, %r240;\n"
1268 " add.s32 %r421, %r239, %r241;\n"
1269 " and.b32 %r60, %r131, 65535;\n"
1270 " sub.s32 %r242, %r60, %r53;\n"
1271 " abs.s32 %r243, %r242;\n"
1272 " sub.s32 %r244, %r55, %r58;\n"
1273 " abs.s32 %r245, %r244;\n"
1274 " add.s32 %r246, %r245, %r243;\n"
1275 " and.b32 %r61, %r175, 65535;\n"
1276 " sub.s32 %r247, %r52, %r61;\n"
1277 " abs.s32 %r248, %r247;\n"
1278 " add.s32 %r62, %r246, %r248;\n"
1279 " setp.ge.s32 %p10, %r62, %r421;\n"
1280 " @%p10 bra LBB3_9;\n"
1281 " add.s32 %r249, %r58, %r55;\n"
1282 " and.b32 %r250, %r127, 65535;\n"
1283 " sub.s32 %r251, %r250, %r58;\n"
1284 " abs.s32 %r252, %r251;\n"
1285 " sub.s32 %r253, %r60, %r61;\n"
1286 " abs.s32 %r254, %r253;\n"
1287 " add.s32 %r255, %r254, %r252;\n"
1288 " and.b32 %r256, %r179, 65535;\n"
1289 " sub.s32 %r257, %r55, %r256;\n"
1290 " abs.s32 %r258, %r257;\n"
1291 " add.s32 %r259, %r255, %r258;\n"
1292 " setp.lt.s32 %p11, %r259, %r62;\n"
1293 " add.s32 %r260, %r61, %r60;\n"
1294 " selp.b32 %r422, %r260, %r249, %p11;\n"
1295 " min.s32 %r421, %r259, %r62;\n"
1297 " setp.eq.b16 %p1, %rs6, 1;\n"
1298 " ld.param.u32 %r113, [yadif_ushort2_param_10];\n"
1299 " ld.param.u64 %rd28, [yadif_ushort2_param_3];\n"
1300 " ld.param.u64 %rd20, [yadif_ushort2_param_1];\n"
1301 " and.b32 %r67, %r159, 65535;\n"
1302 " sub.s32 %r261, %r52, %r67;\n"
1303 " abs.s32 %r262, %r261;\n"
1304 " sub.s32 %r263, %r57, %r56;\n"
1305 " abs.s32 %r264, %r263;\n"
1306 " add.s32 %r265, %r264, %r262;\n"
1307 " and.b32 %r68, %r147, 65535;\n"
1308 " sub.s32 %r266, %r68, %r53;\n"
1309 " abs.s32 %r267, %r266;\n"
1310 " add.s32 %r69, %r265, %r267;\n"
1311 " setp.ge.s32 %p12, %r69, %r421;\n"
1312 " @%p12 bra LBB3_11;\n"
1313 " add.s32 %r268, %r56, %r57;\n"
1314 " and.b32 %r269, %r155, 65535;\n"
1315 " sub.s32 %r270, %r57, %r269;\n"
1316 " abs.s32 %r271, %r270;\n"
1317 " sub.s32 %r272, %r68, %r67;\n"
1318 " abs.s32 %r273, %r272;\n"
1319 " add.s32 %r274, %r273, %r271;\n"
1320 " and.b32 %r275, %r151, 65535;\n"
1321 " sub.s32 %r276, %r275, %r56;\n"
1322 " abs.s32 %r277, %r276;\n"
1323 " add.s32 %r278, %r274, %r277;\n"
1324 " setp.lt.s32 %p13, %r278, %r69;\n"
1325 " add.s32 %r279, %r67, %r68;\n"
1326 " selp.b32 %r422, %r279, %r268, %p13;\n"
1328 " shr.u32 %r51, %r419, 1;\n"
1329 " setp.eq.s32 %p14, %r112, %r113;\n"
1330 " selp.b64 %rd22, %rd3, %rd20, %p14;\n"
1331 " selp.b64 %rd25, %rd28, %rd3, %p14;\n"
1332 " // begin inline asm\n"
1333 " tex.2d.v4.u32.f32 {%r280, %r281, %r282, %r283}, [%rd20, {%f10, %f5}];\n"
1334 " // end inline asm\n"
1335 " // begin inline asm\n"
1336 " tex.2d.v4.u32.f32 {%r284, %r285, %r286, %r287}, [%rd20, {%f10, %f19}];\n"
1337 " // end inline asm\n"
1338 " add.s32 %r320, %r2, -2;\n"
1339 " cvt.rn.f32.s32 %f37, %r320;\n"
1340 " // begin inline asm\n"
1341 " tex.2d.v4.u32.f32 {%r288, %r289, %r290, %r291}, [%rd22, {%f10, %f37}];\n"
1342 " // end inline asm\n"
1343 " cvt.rn.f32.s32 %f39, %r2;\n"
1344 " // begin inline asm\n"
1345 " tex.2d.v4.u32.f32 {%r292, %r293, %r294, %r295}, [%rd22, {%f10, %f39}];\n"
1346 " // end inline asm\n"
1347 " add.s32 %r321, %r2, 2;\n"
1348 " cvt.rn.f32.s32 %f41, %r321;\n"
1349 " // begin inline asm\n"
1350 " tex.2d.v4.u32.f32 {%r296, %r297, %r298, %r299}, [%rd22, {%f10, %f41}];\n"
1351 " // end inline asm\n"
1352 " // begin inline asm\n"
1353 " tex.2d.v4.u32.f32 {%r300, %r301, %r302, %r303}, [%rd25, {%f10, %f37}];\n"
1354 " // end inline asm\n"
1355 " // begin inline asm\n"
1356 " tex.2d.v4.u32.f32 {%r304, %r305, %r306, %r307}, [%rd25, {%f10, %f39}];\n"
1357 " // end inline asm\n"
1358 " // begin inline asm\n"
1359 " tex.2d.v4.u32.f32 {%r308, %r309, %r310, %r311}, [%rd25, {%f10, %f41}];\n"
1360 " // end inline asm\n"
1361 " // begin inline asm\n"
1362 " tex.2d.v4.u32.f32 {%r312, %r313, %r314, %r315}, [%rd28, {%f10, %f5}];\n"
1363 " // end inline asm\n"
1364 " // begin inline asm\n"
1365 " tex.2d.v4.u32.f32 {%r316, %r317, %r318, %r319}, [%rd28, {%f10, %f19}];\n"
1366 " // end inline asm\n"
1367 " and.b32 %r322, %r292, 65535;\n"
1368 " and.b32 %r323, %r304, 65535;\n"
1369 " add.s32 %r324, %r323, %r322;\n"
1370 " shr.u32 %r99, %r324, 1;\n"
1371 " sub.s32 %r325, %r322, %r323;\n"
1372 " abs.s32 %r326, %r325;\n"
1373 " and.b32 %r327, %r280, 65535;\n"
1374 " sub.s32 %r328, %r327, %r31;\n"
1375 " abs.s32 %r329, %r328;\n"
1376 " and.b32 %r330, %r284, 65535;\n"
1377 " sub.s32 %r331, %r330, %r32;\n"
1378 " abs.s32 %r332, %r331;\n"
1379 " add.s32 %r333, %r332, %r329;\n"
1380 " shr.u32 %r334, %r333, 1;\n"
1381 " and.b32 %r335, %r312, 65535;\n"
1382 " sub.s32 %r336, %r335, %r31;\n"
1383 " abs.s32 %r337, %r336;\n"
1384 " and.b32 %r338, %r316, 65535;\n"
1385 " sub.s32 %r339, %r32, %r338;\n"
1386 " abs.s32 %r340, %r339;\n"
1387 " add.s32 %r341, %r340, %r337;\n"
1388 " shr.u32 %r342, %r341, 1;\n"
1389 " max.s32 %r343, %r326, %r334;\n"
1390 " max.s32 %r423, %r343, %r342;\n"
1391 " @%p1 bra LBB3_13;\n"
1392 " cvt.u16.u32 %rs1, %r138;\n"
1393 " cvt.u16.u32 %rs3, %r166;\n"
1394 " and.b32 %r344, %r296, 65535;\n"
1395 " and.b32 %r345, %r308, 65535;\n"
1396 " add.s32 %r346, %r345, %r344;\n"
1397 " shr.u32 %r347, %r346, 1;\n"
1398 " and.b32 %r348, %r288, 65535;\n"
1399 " and.b32 %r349, %r300, 65535;\n"
1400 " add.s32 %r350, %r349, %r348;\n"
1401 " shr.u32 %r351, %r350, 1;\n"
1402 " sub.s32 %r352, %r99, %r32;\n"
1403 " sub.s32 %r353, %r99, %r31;\n"
1404 " sub.s32 %r354, %r351, %r31;\n"
1405 " sub.s32 %r355, %r347, %r32;\n"
1406 " min.s32 %r356, %r354, %r355;\n"
1407 " setp.gt.u16 %p15, %rs1, %rs3;\n"
1408 " selp.b32 %r357, %r352, %r353, %p15;\n"
1409 " max.s32 %r358, %r357, %r356;\n"
1410 " max.s32 %r359, %r354, %r355;\n"
1411 " setp.lt.u16 %p16, %rs1, %rs3;\n"
1412 " selp.b32 %r360, %r352, %r353, %p16;\n"
1413 " min.s32 %r361, %r360, %r359;\n"
1414 " neg.s32 %r362, %r358;\n"
1415 " max.s32 %r363, %r423, %r361;\n"
1416 " max.s32 %r423, %r363, %r362;\n"
1418 " shr.u32 %r72, %r422, 1;\n"
1419 " add.s32 %r364, %r423, %r99;\n"
1420 " min.s32 %r365, %r364, %r51;\n"
1421 " sub.s32 %r366, %r99, %r423;\n"
1422 " and.b32 %r367, %r293, 65535;\n"
1423 " and.b32 %r368, %r305, 65535;\n"
1424 " add.s32 %r369, %r368, %r367;\n"
1425 " shr.u32 %r104, %r369, 1;\n"
1426 " sub.s32 %r370, %r367, %r368;\n"
1427 " abs.s32 %r371, %r370;\n"
1428 " and.b32 %r372, %r281, 65535;\n"
1429 " sub.s32 %r373, %r372, %r52;\n"
1430 " abs.s32 %r374, %r373;\n"
1431 " and.b32 %r375, %r285, 65535;\n"
1432 " sub.s32 %r376, %r375, %r53;\n"
1433 " abs.s32 %r377, %r376;\n"
1434 " add.s32 %r378, %r377, %r374;\n"
1435 " shr.u32 %r379, %r378, 1;\n"
1436 " and.b32 %r380, %r313, 65535;\n"
1437 " sub.s32 %r381, %r380, %r52;\n"
1438 " abs.s32 %r382, %r381;\n"
1439 " and.b32 %r383, %r317, 65535;\n"
1440 " sub.s32 %r384, %r53, %r383;\n"
1441 " abs.s32 %r385, %r384;\n"
1442 " add.s32 %r386, %r385, %r382;\n"
1443 " shr.u32 %r387, %r386, 1;\n"
1444 " max.s32 %r388, %r371, %r379;\n"
1445 " max.s32 %r424, %r388, %r387;\n"
1446 " @%p1 bra LBB3_15;\n"
1447 " cvt.u16.u32 %rs2, %r139;\n"
1448 " cvt.u16.u32 %rs4, %r167;\n"
1449 " and.b32 %r389, %r297, 65535;\n"
1450 " and.b32 %r390, %r309, 65535;\n"
1451 " add.s32 %r391, %r390, %r389;\n"
1452 " shr.u32 %r392, %r391, 1;\n"
1453 " and.b32 %r393, %r289, 65535;\n"
1454 " and.b32 %r394, %r301, 65535;\n"
1455 " add.s32 %r395, %r394, %r393;\n"
1456 " shr.u32 %r396, %r395, 1;\n"
1457 " sub.s32 %r397, %r104, %r53;\n"
1458 " sub.s32 %r398, %r104, %r52;\n"
1459 " sub.s32 %r399, %r396, %r52;\n"
1460 " sub.s32 %r400, %r392, %r53;\n"
1461 " min.s32 %r401, %r399, %r400;\n"
1462 " setp.gt.u16 %p17, %rs2, %rs4;\n"
1463 " selp.b32 %r402, %r397, %r398, %p17;\n"
1464 " max.s32 %r403, %r402, %r401;\n"
1465 " max.s32 %r404, %r399, %r400;\n"
1466 " setp.lt.u16 %p18, %rs2, %rs4;\n"
1467 " selp.b32 %r405, %r397, %r398, %p18;\n"
1468 " min.s32 %r406, %r405, %r404;\n"
1469 " neg.s32 %r407, %r403;\n"
1470 " max.s32 %r408, %r424, %r406;\n"
1471 " max.s32 %r424, %r408, %r407;\n"
1473 " max.s32 %r426, %r366, %r365;\n"
1474 " add.s32 %r409, %r424, %r104;\n"
1475 " min.s32 %r410, %r409, %r72;\n"
1476 " sub.s32 %r411, %r104, %r424;\n"
1477 " max.s32 %r425, %r411, %r410;\n"
1479 " cvt.u16.u32 %rs7, %r426;\n"
1480 " cvt.u16.u32 %rs8, %r425;\n"
1481 " mad.lo.s32 %r416, %r2, %r111, %r1;\n"
1482 " mul.wide.s32 %rd31, %r416, 4;\n"
1483 " add.s64 %rd32, %rd1, %rd31;\n"
1484 " st.global.v2.u16 [%rd32], {%rs7, %rs8};\n"
const char vf_yadif_cuda_ptx[]