29 #define DECLARE_DOUBLE_1 double db_1
30 #define DECLARE_DOUBLE_2 double db_2
31 #define DECLARE_UINT32_T uint32_t it_1
32 #define RESTRICT_ASM_DOUBLE_1 [db_1]"=&f"(db_1)
33 #define RESTRICT_ASM_DOUBLE_2 [db_2]"=&f"(db_2)
34 #define RESTRICT_ASM_UINT32_T [it_1]"=&r"(it_1)
36 #define MMI_PCMPGTUB(dst, src1, src2) \
37 "pcmpeqb %[db_1], "#src1", "#src2" \n\t" \
38 "pmaxub %[db_2], "#src1", "#src2" \n\t" \
39 "pcmpeqb %[db_2], %[db_2], "#src1" \n\t" \
40 "xor "#dst", %[db_2], %[db_1] \n\t"
42 #define MMI_BTOH(dst_l, dst_r, src) \
43 "xor %[db_1], %[db_1], %[db_1] \n\t" \
44 "pcmpgtb %[db_2], %[db_1], "#src" \n\t" \
45 "punpcklbh "#dst_r", "#src", %[db_2] \n\t" \
46 "punpckhbh "#dst_l", "#src", %[db_2] \n\t"
48 #define MMI_VP8_LOOP_FILTER \
50 "dmtc1 %[thresh], %[ftmp3] \n\t" \
51 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
52 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
53 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
54 "pasubub %[ftmp0], %[p1], %[p0] \n\t" \
55 "pasubub %[ftmp1], %[q1], %[q0] \n\t" \
56 "pmaxub %[ftmp0], %[ftmp0], %[ftmp1] \n\t" \
57 MMI_PCMPGTUB(%[hev], %[ftmp0], %[ftmp3]) \
59 "pasubub %[ftmp1], %[p0], %[q0] \n\t" \
60 "paddusb %[ftmp1], %[ftmp1], %[ftmp1] \n\t" \
61 "pasubub %[ftmp2], %[p1], %[q1] \n\t" \
62 "li %[tmp0], 0x09 \n\t" \
63 "dmtc1 %[tmp0], %[ftmp3] \n\t" \
64 PSRLB_MMI(%[ftmp2], %[ftmp3], %[ftmp4], %[ftmp5], %[ftmp2]) \
65 "paddusb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
66 "dmtc1 %[e], %[ftmp3] \n\t" \
67 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
68 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
69 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
70 MMI_PCMPGTUB(%[mask], %[ftmp1], %[ftmp3]) \
71 "pmaxub %[mask], %[mask], %[ftmp0] \n\t" \
72 "pasubub %[ftmp1], %[p3], %[p2] \n\t" \
73 "pasubub %[ftmp2], %[p2], %[p1] \n\t" \
74 "pmaxub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
75 "pmaxub %[mask], %[mask], %[ftmp1] \n\t" \
76 "pasubub %[ftmp1], %[q3], %[q2] \n\t" \
77 "pasubub %[ftmp2], %[q2], %[q1] \n\t" \
78 "pmaxub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
79 "pmaxub %[mask], %[mask], %[ftmp1] \n\t" \
80 "dmtc1 %[i], %[ftmp3] \n\t" \
81 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
82 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
83 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
84 MMI_PCMPGTUB(%[mask], %[mask], %[ftmp3]) \
85 "pcmpeqw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
86 "xor %[mask], %[mask], %[ftmp3] \n\t" \
88 "li %[tmp0], 0x80808080 \n\t" \
89 "dmtc1 %[tmp0], %[ftmp7] \n\t" \
90 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t" \
91 "xor %[p2], %[p2], %[ftmp7] \n\t" \
92 "xor %[p1], %[p1], %[ftmp7] \n\t" \
93 "xor %[p0], %[p0], %[ftmp7] \n\t" \
94 "xor %[q0], %[q0], %[ftmp7] \n\t" \
95 "xor %[q1], %[q1], %[ftmp7] \n\t" \
96 "xor %[q2], %[q2], %[ftmp7] \n\t" \
97 "psubsb %[ftmp4], %[p1], %[q1] \n\t" \
98 "psubb %[ftmp5], %[q0], %[p0] \n\t" \
99 MMI_BTOH(%[ftmp1], %[ftmp0], %[ftmp5]) \
100 MMI_BTOH(%[ftmp3], %[ftmp2], %[ftmp4]) \
102 "paddh %[ftmp5], %[ftmp0], %[ftmp0] \n\t" \
103 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t" \
104 "paddh %[ftmp0], %[ftmp2], %[ftmp0] \n\t" \
106 "paddh %[ftmp5], %[ftmp1], %[ftmp1] \n\t" \
107 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" \
108 "paddh %[ftmp1], %[ftmp3], %[ftmp1] \n\t" \
110 "packsshb %[ftmp1], %[ftmp0], %[ftmp1] \n\t" \
111 "and %[ftmp1], %[ftmp1], %[mask] \n\t" \
112 "and %[ftmp2], %[ftmp1], %[hev] \n\t" \
113 "li %[tmp0], 0x04040404 \n\t" \
114 "dmtc1 %[tmp0], %[ftmp0] \n\t" \
115 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \
116 "paddsb %[ftmp3], %[ftmp2], %[ftmp0] \n\t" \
117 "li %[tmp0], 0x0B \n\t" \
118 "dmtc1 %[tmp0], %[ftmp4] \n\t" \
119 PSRAB_MMI(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6], %[ftmp3]) \
120 "li %[tmp0], 0x03030303 \n\t" \
121 "dmtc1 %[tmp0], %[ftmp0] \n\t" \
122 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \
123 "paddsb %[ftmp4], %[ftmp2], %[ftmp0] \n\t" \
124 "li %[tmp0], 0x0B \n\t" \
125 "dmtc1 %[tmp0], %[ftmp2] \n\t" \
126 PSRAB_MMI(%[ftmp4], %[ftmp2], %[ftmp5], %[ftmp6], %[ftmp4]) \
127 "psubsb %[q0], %[q0], %[ftmp3] \n\t" \
128 "paddsb %[p0], %[p0], %[ftmp4] \n\t" \
130 "pcmpeqw %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \
131 "xor %[hev], %[hev], %[ftmp0] \n\t" \
132 "and %[ftmp1], %[ftmp1], %[hev] \n\t" \
133 MMI_BTOH(%[ftmp5], %[ftmp6], %[ftmp1]) \
134 "li %[tmp0], 0x07 \n\t" \
135 "dmtc1 %[tmp0], %[ftmp2] \n\t" \
136 "li %[tmp0], 0x001b001b \n\t" \
137 "dmtc1 %[tmp0], %[ftmp1] \n\t" \
138 "punpcklwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t" \
139 "li %[tmp0], 0x003f003f \n\t" \
140 "dmtc1 %[tmp0], %[ftmp0] \n\t" \
141 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \
143 "pmullh %[ftmp3], %[ftmp6], %[ftmp1] \n\t" \
144 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \
145 "psrah %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
147 "pmullh %[ftmp4], %[ftmp5], %[ftmp1] \n\t" \
148 "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \
149 "psrah %[ftmp4], %[ftmp4], %[ftmp2] \n\t" \
151 "packsshb %[ftmp4], %[ftmp3], %[ftmp4] \n\t" \
152 "psubsb %[q0], %[q0], %[ftmp4] \n\t" \
153 "xor %[q0], %[q0], %[ftmp7] \n\t" \
154 "paddsb %[p0], %[p0], %[ftmp4] \n\t" \
155 "xor %[p0], %[p0], %[ftmp7] \n\t" \
156 "li %[tmp0], 0x00120012 \n\t" \
157 "dmtc1 %[tmp0], %[ftmp1] \n\t" \
158 "punpcklwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t" \
160 "pmullh %[ftmp3], %[ftmp6], %[ftmp1] \n\t" \
161 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \
162 "psrah %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
164 "pmullh %[ftmp4], %[ftmp5], %[ftmp1] \n\t" \
165 "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \
166 "psrah %[ftmp4], %[ftmp4], %[ftmp2] \n\t" \
168 "packsshb %[ftmp4], %[ftmp3], %[ftmp4] \n\t" \
169 "psubsb %[q1], %[q1], %[ftmp4] \n\t" \
170 "xor %[q1], %[q1], %[ftmp7] \n\t" \
171 "paddsb %[p1], %[p1], %[ftmp4] \n\t" \
172 "xor %[p1], %[p1], %[ftmp7] \n\t" \
173 "li %[tmp0], 0x03 \n\t" \
174 "dmtc1 %[tmp0], %[ftmp1] \n\t" \
176 "psllh %[ftmp3], %[ftmp6], %[ftmp1] \n\t" \
177 "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t" \
178 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \
179 "psrah %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
181 "psllh %[ftmp4], %[ftmp5], %[ftmp1] \n\t" \
182 "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \
183 "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \
184 "psrah %[ftmp4], %[ftmp4], %[ftmp2] \n\t" \
186 "packsshb %[ftmp4], %[ftmp3], %[ftmp4] \n\t" \
187 "psubsb %[q2], %[q2], %[ftmp4] \n\t" \
188 "xor %[q2], %[q2], %[ftmp7] \n\t" \
189 "paddsb %[p2], %[p2], %[ftmp4] \n\t" \
190 "xor %[p2], %[p2], %[ftmp7] \n\t"
192 #define PUT_VP8_EPEL4_H6_MMI(src, dst) \
193 MMI_ULWC1(%[ftmp1], src, 0x00) \
194 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
195 "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \
197 MMI_ULWC1(%[ftmp1], src, -0x01) \
198 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
199 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
200 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
202 MMI_ULWC1(%[ftmp1], src, -0x02) \
203 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
204 "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \
205 "paddsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \
207 MMI_ULWC1(%[ftmp1], src, 0x01) \
208 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
209 "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \
211 MMI_ULWC1(%[ftmp1], src, 0x02) \
212 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
213 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
214 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
216 MMI_ULWC1(%[ftmp1], src, 0x03) \
217 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
218 "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \
219 "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
221 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \
222 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \
223 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
224 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
226 MMI_SWC1(%[ftmp1], dst, 0x00)
229 #define PUT_VP8_EPEL4_H4_MMI(src, dst) \
230 MMI_ULWC1(%[ftmp1], src, 0x00) \
231 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
232 "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \
234 MMI_ULWC1(%[ftmp1], src, -0x01) \
235 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
236 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
237 "psubsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \
239 MMI_ULWC1(%[ftmp1], src, 0x01) \
240 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
241 "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \
243 MMI_ULWC1(%[ftmp1], src, 0x02) \
244 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
245 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
246 "psubh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
248 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \
250 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \
251 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
253 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
254 MMI_SWC1(%[ftmp1], dst, 0x00)
257 #define PUT_VP8_EPEL4_V6_MMI(src, src1, dst, srcstride) \
258 MMI_ULWC1(%[ftmp1], src, 0x00) \
259 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
260 "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \
262 PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \
263 MMI_ULWC1(%[ftmp1], src1, 0x00) \
264 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
265 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
266 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
268 PTR_SUBU ""#src1", "#src1", "#srcstride" \n\t" \
269 MMI_ULWC1(%[ftmp1], src1, 0x00) \
270 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
271 "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \
272 "paddsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \
274 PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \
275 MMI_ULWC1(%[ftmp1], src1, 0x00) \
276 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
277 "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \
279 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
280 MMI_ULWC1(%[ftmp1], src1, 0x00) \
281 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
282 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
283 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
285 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
286 MMI_ULWC1(%[ftmp1], src1, 0x00) \
287 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
288 "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \
289 "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
291 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \
293 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \
294 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
295 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
297 MMI_SWC1(%[ftmp1], dst, 0x00)
300 #define PUT_VP8_EPEL4_V4_MMI(src, src1, dst, srcstride) \
301 MMI_ULWC1(%[ftmp1], src, 0x00) \
302 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
303 "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \
305 PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \
306 MMI_ULWC1(%[ftmp1], src1, 0x00) \
307 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
308 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
309 "psubsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \
311 PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \
312 MMI_ULWC1(%[ftmp1], src1, 0x00) \
313 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
314 "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \
316 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
317 MMI_ULWC1(%[ftmp1], src1, 0x00) \
318 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
319 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
320 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
322 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \
324 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \
325 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
326 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
328 MMI_SWC1(%[ftmp1], dst, 0x00)
331 #define PUT_VP8_EPEL8_H6_MMI(src, dst) \
332 MMI_ULDC1(%[ftmp1], src, 0x00) \
333 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
334 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
335 "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \
336 "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \
338 MMI_ULDC1(%[ftmp1], src, -0x01) \
339 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
340 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
341 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
342 "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \
343 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
344 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
346 MMI_ULDC1(%[ftmp1], src, -0x02) \
347 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
348 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
349 "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \
350 "pmullh %[ftmp3], %[ftmp3], %[filter0] \n\t" \
351 "paddsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \
352 "paddsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \
354 MMI_ULDC1(%[ftmp1], src, 0x01) \
355 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
356 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
357 "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \
358 "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \
360 MMI_ULDC1(%[ftmp1], src, 0x02) \
361 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
362 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
363 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
364 "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \
365 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
366 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
368 MMI_ULDC1(%[ftmp1], src, 0x03) \
369 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
370 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
371 "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \
372 "pmullh %[ftmp3], %[ftmp3], %[filter5] \n\t" \
373 "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
374 "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
376 "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \
377 "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \
379 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \
380 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \
381 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
382 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
383 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
385 MMI_SDC1(%[ftmp1], dst, 0x00)
388 #define PUT_VP8_EPEL8_H4_MMI(src, dst) \
389 MMI_ULDC1(%[ftmp1], src, 0x00) \
390 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
391 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
392 "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \
393 "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \
395 MMI_ULDC1(%[ftmp1], src, -0x01) \
396 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
397 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
398 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
399 "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \
400 "psubsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \
401 "psubsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \
403 MMI_ULDC1(%[ftmp1], src, 0x01) \
404 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
405 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
406 "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \
407 "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \
409 MMI_ULDC1(%[ftmp1], src, 0x02) \
410 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
411 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
412 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
413 "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \
414 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
415 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
417 "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \
418 "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \
420 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \
421 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \
422 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
423 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
425 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
426 MMI_SDC1(%[ftmp1], dst, 0x00)
429 #define PUT_VP8_EPEL8_V6_MMI(src, src1, dst, srcstride) \
430 MMI_ULDC1(%[ftmp1], src, 0x00) \
431 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
432 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
433 "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \
434 "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \
436 PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \
437 MMI_ULDC1(%[ftmp1], src1, 0x00) \
438 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
439 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
440 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
441 "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \
442 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
443 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
445 PTR_SUBU ""#src1", "#src1", "#srcstride" \n\t" \
446 MMI_ULDC1(%[ftmp1], src1, 0x00) \
447 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
448 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
449 "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \
450 "pmullh %[ftmp3], %[ftmp3], %[filter0] \n\t" \
451 "paddsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \
452 "paddsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \
454 PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \
455 MMI_ULDC1(%[ftmp1], src1, 0x00) \
456 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
457 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
458 "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \
459 "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \
461 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
462 MMI_ULDC1(%[ftmp1], src1, 0x00) \
463 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
464 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
465 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
466 "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \
467 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
468 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
470 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
471 MMI_ULDC1(%[ftmp1], src1, 0x00) \
472 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
473 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
474 "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \
475 "pmullh %[ftmp3], %[ftmp3], %[filter5] \n\t" \
476 "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
477 "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
479 "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \
480 "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \
482 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \
483 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \
484 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
485 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
486 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
488 MMI_SDC1(%[ftmp1], dst, 0x00)
491 #define PUT_VP8_EPEL8_V4_MMI(src, src1, dst, srcstride) \
492 MMI_ULDC1(%[ftmp1], src, 0x00) \
493 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
494 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
495 "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \
496 "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \
498 PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \
499 MMI_ULDC1(%[ftmp1], src1, 0x00) \
500 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
501 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
502 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
503 "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \
504 "psubsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \
505 "psubsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \
507 PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \
508 MMI_ULDC1(%[ftmp1], src1, 0x00) \
509 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
510 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
511 "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \
512 "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \
514 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
515 MMI_ULDC1(%[ftmp1], src1, 0x00) \
516 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
517 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
518 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
519 "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \
520 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
521 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
523 "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \
524 "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \
526 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \
527 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \
528 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
529 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
530 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
532 MMI_SDC1(%[ftmp1], dst, 0x00)
535 #define PUT_VP8_BILINEAR8_H_MMI(src, dst) \
536 MMI_ULDC1(%[ftmp1], src, 0x00) \
537 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
538 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
539 "pmullh %[ftmp5], %[ftmp2], %[a] \n\t" \
540 "pmullh %[ftmp6], %[ftmp3], %[a] \n\t" \
542 MMI_ULDC1(%[ftmp1], src, 0x01) \
543 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
544 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
545 "pmullh %[ftmp2], %[ftmp2], %[b] \n\t" \
546 "pmullh %[ftmp3], %[ftmp3], %[b] \n\t" \
547 "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
548 "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
550 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_4] \n\t" \
551 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_4] \n\t" \
552 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
553 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
555 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
556 MMI_SDC1(%[ftmp1], dst, 0x00)
559 #define PUT_VP8_BILINEAR4_H_MMI(src, dst) \
560 MMI_ULWC1(%[ftmp1], src, 0x00) \
561 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
562 "pmullh %[ftmp3], %[ftmp2], %[a] \n\t" \
564 MMI_ULWC1(%[ftmp1], src, 0x01) \
565 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
566 "pmullh %[ftmp2], %[ftmp2], %[b] \n\t" \
567 "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
569 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_4] \n\t" \
570 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
572 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
573 MMI_SWC1(%[ftmp1], dst, 0x00)
576 #define PUT_VP8_BILINEAR8_V_MMI(src, src1, dst, sstride) \
577 MMI_ULDC1(%[ftmp1], src, 0x00) \
578 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
579 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
580 "pmullh %[ftmp5], %[ftmp2], %[c] \n\t" \
581 "pmullh %[ftmp6], %[ftmp3], %[c] \n\t" \
583 PTR_ADDU ""#src1", "#src", "#sstride" \n\t" \
584 MMI_ULDC1(%[ftmp1], src1, 0x00) \
585 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
586 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
587 "pmullh %[ftmp2], %[ftmp2], %[d] \n\t" \
588 "pmullh %[ftmp3], %[ftmp3], %[d] \n\t" \
589 "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
590 "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
592 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_4] \n\t" \
593 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_4] \n\t" \
594 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
595 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
597 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
598 MMI_SDC1(%[ftmp1], dst, 0x00)
601 #define PUT_VP8_BILINEAR4_V_MMI(src, src1, dst, sstride) \
602 MMI_ULWC1(%[ftmp1], src, 0x00) \
603 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
604 "pmullh %[ftmp3], %[ftmp2], %[c] \n\t" \
606 PTR_ADDU ""#src1", "#src", "#sstride" \n\t" \
607 MMI_ULWC1(%[ftmp1], src1, 0x00) \
608 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
609 "pmullh %[ftmp2], %[ftmp2], %[d] \n\t" \
610 "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
612 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_4] \n\t" \
613 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
615 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
616 MMI_SWC1(%[ftmp1], dst, 0x00)
620 {0x0000000000000000, 0x0006000600060006, 0x007b007b007b007b,
621 0x000c000c000c000c, 0x0001000100010001, 0x0000000000000000},
623 {0x0002000200020002, 0x000b000b000b000b, 0x006c006c006c006c,
624 0x0024002400240024, 0x0008000800080008, 0x0001000100010001},
626 {0x0000000000000000, 0x0009000900090009, 0x005d005d005d005d,
627 0x0032003200320032, 0x0006000600060006, 0x0000000000000000},
629 {0x0003000300030003, 0x0010001000100010, 0x004d004d004d004d,
630 0x004d004d004d004d, 0x0010001000100010, 0x0003000300030003},
632 {0x0000000000000000, 0x0006000600060006, 0x0032003200320032,
633 0x005d005d005d005d, 0x0009000900090009, 0x0000000000000000},
635 {0x0001000100010001, 0x0008000800080008, 0x0024002400240024,
636 0x006c006c006c006c, 0x000b000b000b000b, 0x0002000200020002},
638 {0x0000000000000000, 0x0001000100010001, 0x000c000c000c000c,
639 0x007b007b007b007b, 0x0006000600060006, 0x0000000000000000}
643 #define FILTER_6TAP(src, F, stride) \
644 cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \
645 F[0] * src[x - 2 * stride] + F[3] * src[x + 1 * stride] - \
646 F[4] * src[x + 2 * stride] + F[5] * src[x + 3 * stride] + 64) >> 7]
648 #define FILTER_4TAP(src, F, stride) \
649 cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \
650 F[3] * src[x + 1 * stride] - F[4] * src[x + 2 * stride] + 64) >> 7]
653 { 0, 6, 123, 12, 1, 0 },
654 { 2, 11, 108, 36, 8, 1 },
655 { 0, 9, 93, 50, 6, 0 },
656 { 3, 16, 77, 77, 16, 3 },
657 { 0, 6, 50, 93, 9, 0 },
658 { 1, 8, 36, 108, 11, 2 },
659 { 0, 1, 12, 123, 6, 0 },
662 #define MUL_20091(a) ((((a) * 20091) >> 16) + (a))
663 #define MUL_35468(a) (((a) * 35468) >> 16)
666 #define clip_int8(n) (cm[(n) + 0x80] - 0x80)
683 f1 =
FFMIN(
a + 4, 127) >> 3;
684 f2 =
FFMIN(
a + 3, 127) >> 3;
707 f1 =
FFMIN(
a + 4, 127) >> 3;
708 f2 =
FFMIN(
a + 3, 127) >> 3;
755 a0 = (27 *
w + 63) >> 7;
756 a1 = (18 *
w + 63) >> 7;
757 a2 = (9 *
w + 63) >> 7;
786 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
795 "gsldlc1 %[q0], 0x07(%[dst]) \n\t"
796 "gsldrc1 %[q0], 0x00(%[dst]) \n\t"
797 PTR_SUBU "%[tmp0], %[dst], %[stride] \n\t"
798 "gsldlc1 %[p0], 0x07(%[tmp0]) \n\t"
799 "gsldrc1 %[p0], 0x00(%[tmp0]) \n\t"
800 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
801 "gsldlc1 %[p1], 0x07(%[tmp0]) \n\t"
802 "gsldrc1 %[p1], 0x00(%[tmp0]) \n\t"
803 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
804 "gsldlc1 %[p2], 0x07(%[tmp0]) \n\t"
805 "gsldrc1 %[p2], 0x00(%[tmp0]) \n\t"
806 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
807 "gsldlc1 %[p3], 0x07(%[tmp0]) \n\t"
808 "gsldrc1 %[p3], 0x00(%[tmp0]) \n\t"
809 PTR_ADDU "%[tmp0], %[dst], %[stride] \n\t"
810 "gsldlc1 %[q1], 0x07(%[tmp0]) \n\t"
811 "gsldrc1 %[q1], 0x00(%[tmp0]) \n\t"
812 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
813 "gsldlc1 %[q2], 0x07(%[tmp0]) \n\t"
814 "gsldrc1 %[q2], 0x00(%[tmp0]) \n\t"
815 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
816 "gsldlc1 %[q3], 0x07(%[tmp0]) \n\t"
817 "gsldrc1 %[q3], 0x00(%[tmp0]) \n\t"
820 "gssdlc1 %[q0], 0x07(%[dst]) \n\t"
821 "gssdrc1 %[q0], 0x00(%[dst]) \n\t"
822 PTR_SUBU "%[tmp0], %[dst], %[stride] \n\t"
823 "gssdlc1 %[p0], 0x07(%[tmp0]) \n\t"
824 "gssdrc1 %[p0], 0x00(%[tmp0]) \n\t"
825 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
826 "gssdlc1 %[p1], 0x07(%[tmp0]) \n\t"
827 "gssdrc1 %[p1], 0x00(%[tmp0]) \n\t"
828 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
829 "gssdlc1 %[p2], 0x07(%[tmp0]) \n\t"
830 "gssdrc1 %[p2], 0x00(%[tmp0]) \n\t"
831 PTR_ADDU "%[tmp0], %[dst], %[stride] \n\t"
832 "gssdlc1 %[q1], 0x07(%[tmp0]) \n\t"
833 "gssdrc1 %[q1], 0x00(%[tmp0]) \n\t"
834 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
835 "gssdlc1 %[q2], 0x07(%[tmp0]) \n\t"
836 "gssdrc1 %[q2], 0x00(%[tmp0]) \n\t"
837 : [p3]
"=&f"(ftmp[0]), [p2]
"=&f"(ftmp[1]),
838 [p1]
"=&f"(ftmp[2]), [p0]
"=&f"(ftmp[3]),
839 [
q0]
"=&f"(ftmp[4]), [
q1]
"=&f"(ftmp[5]),
840 [q2]
"=&f"(ftmp[6]), [q3]
"=&f"(ftmp[7]),
841 [ftmp0]
"=&f"(ftmp[8]), [ftmp1]
"=&f"(ftmp[9]),
842 [ftmp2]
"=&f"(ftmp[10]), [ftmp3]
"=&f"(ftmp[11]),
843 [
hev]
"=&f"(ftmp[12]), [
mask]
"=&f"(ftmp[13]),
844 [ftmp4]
"=&f"(ftmp[14]), [ftmp5]
"=&f"(ftmp[15]),
845 [ftmp6]
"=&f"(ftmp[16]), [ftmp7]
"=&f"(ftmp[17]),
846 [dst]
"+&r"(dst), [tmp0]
"=&r"(
tmp[0]),
856 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
860 for (
i = 0;
i < 8;
i++)
862 int hv =
hev(dst +
i * 1,
stride, hev_thresh);
871 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
880 "gsldlc1 %[p3], 0x03(%[dst]) \n\t"
881 "gsldrc1 %[p3], -0x04(%[dst]) \n\t"
882 PTR_ADDU "%[tmp0], %[dst], %[stride] \n\t"
883 "gsldlc1 %[p2], 0x03(%[tmp0]) \n\t"
884 "gsldrc1 %[p2], -0x04(%[tmp0]) \n\t"
885 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
886 "gsldlc1 %[p1], 0x03(%[tmp0]) \n\t"
887 "gsldrc1 %[p1], -0x04(%[tmp0]) \n\t"
888 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
889 "gsldlc1 %[p0], 0x03(%[tmp0]) \n\t"
890 "gsldrc1 %[p0], -0x04(%[tmp0]) \n\t"
891 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
892 "gsldlc1 %[q0], 0x03(%[tmp0]) \n\t"
893 "gsldrc1 %[q0], -0x04(%[tmp0]) \n\t"
894 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
895 "gsldlc1 %[q1], 0x03(%[tmp0]) \n\t"
896 "gsldrc1 %[q1], -0x04(%[tmp0]) \n\t"
897 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
898 "gsldlc1 %[q2], 0x03(%[tmp0]) \n\t"
899 "gsldrc1 %[q2], -0x04(%[tmp0]) \n\t"
900 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
901 "gsldlc1 %[q3], 0x03(%[tmp0]) \n\t"
902 "gsldrc1 %[q3], -0x04(%[tmp0]) \n\t"
905 %[
q0], %[
q1], %[q2], %[q3],
906 %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
910 %[
q0], %[
q1], %[q2], %[q3],
911 %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
913 "gssdlc1 %[p3], 0x03(%[dst]) \n\t"
914 "gssdrc1 %[p3], -0x04(%[dst]) \n\t"
915 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
916 "gssdlc1 %[p2], 0x03(%[dst]) \n\t"
917 "gssdrc1 %[p2], -0x04(%[dst]) \n\t"
918 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
919 "gssdlc1 %[p1], 0x03(%[dst]) \n\t"
920 "gssdrc1 %[p1], -0x04(%[dst]) \n\t"
921 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
922 "gssdlc1 %[p0], 0x03(%[dst]) \n\t"
923 "gssdrc1 %[p0], -0x04(%[dst]) \n\t"
924 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
925 "gssdlc1 %[q0], 0x03(%[dst]) \n\t"
926 "gssdrc1 %[q0], -0x04(%[dst]) \n\t"
927 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
928 "gssdlc1 %[q1], 0x03(%[dst]) \n\t"
929 "gssdrc1 %[q1], -0x04(%[dst]) \n\t"
930 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
931 "gssdlc1 %[q2], 0x03(%[dst]) \n\t"
932 "gssdrc1 %[q2], -0x04(%[dst]) \n\t"
933 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
934 "gssdlc1 %[q3], 0x03(%[dst]) \n\t"
935 "gssdrc1 %[q3], -0x04(%[dst]) \n\t"
936 : [p3]
"=&f"(ftmp[0]), [p2]
"=&f"(ftmp[1]),
937 [p1]
"=&f"(ftmp[2]), [p0]
"=&f"(ftmp[3]),
938 [
q0]
"=&f"(ftmp[4]), [
q1]
"=&f"(ftmp[5]),
939 [q2]
"=&f"(ftmp[6]), [q3]
"=&f"(ftmp[7]),
940 [ftmp0]
"=&f"(ftmp[8]), [ftmp1]
"=&f"(ftmp[9]),
941 [ftmp2]
"=&f"(ftmp[10]), [ftmp3]
"=&f"(ftmp[11]),
942 [
hev]
"=&f"(ftmp[12]), [
mask]
"=&f"(ftmp[13]),
943 [ftmp4]
"=&f"(ftmp[14]), [ftmp5]
"=&f"(ftmp[15]),
944 [ftmp6]
"=&f"(ftmp[16]), [ftmp7]
"=&f"(ftmp[17]),
945 [dst]
"+&r"(dst), [tmp0]
"=&r"(
tmp[0]),
955 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
959 for (
i = 0;
i < 8;
i++)
961 int hv =
hev(dst +
i *
stride, 1, hev_thresh);
976 MMI_LDC1(%[ftmp0], %[
dc], 0x00)
977 MMI_LDC1(%[ftmp1], %[
dc], 0x08)
978 MMI_LDC1(%[ftmp2], %[
dc], 0x10)
979 MMI_LDC1(%[ftmp3], %[
dc], 0x18)
980 "paddsh %[ftmp4], %[ftmp0], %[ftmp3] \n\t"
981 "psubsh %[ftmp5], %[ftmp0], %[ftmp3] \n\t"
982 "paddsh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
983 "psubsh %[ftmp7], %[ftmp1], %[ftmp2] \n\t"
984 "paddsh %[ftmp0], %[ftmp4], %[ftmp6] \n\t"
985 "paddsh %[ftmp1], %[ftmp5], %[ftmp7] \n\t"
986 "psubsh %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
987 "psubsh %[ftmp3], %[ftmp5], %[ftmp7] \n\t"
988 MMI_SDC1(%[ftmp0], %[
dc], 0x00)
989 MMI_SDC1(%[ftmp1], %[
dc], 0x08)
990 MMI_SDC1(%[ftmp2], %[
dc], 0x10)
991 MMI_SDC1(%[ftmp3], %[
dc], 0x18)
992 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
993 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
994 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
995 [ftmp6]
"=&f"(ftmp[6]),
997 [ftmp7]
"=&f"(ftmp[7])
1012 block[2][0][0] = (
dc[8] +
dc[11] + 3 +
dc[9] +
dc[10]) >> 3;
1013 block[2][1][0] = (
dc[8] -
dc[11] + 3 +
dc[9] -
dc[10]) >> 3;
1014 block[2][2][0] = (
dc[8] +
dc[11] + 3 -
dc[9] -
dc[10]) >> 3;
1015 block[2][3][0] = (
dc[8] -
dc[11] + 3 -
dc[9] +
dc[10]) >> 3;
1017 block[3][0][0] = (
dc[12] +
dc[15] + 3 +
dc[13] +
dc[14]) >> 3;
1018 block[3][1][0] = (
dc[12] -
dc[15] + 3 +
dc[13] -
dc[14]) >> 3;
1019 block[3][2][0] = (
dc[12] +
dc[15] + 3 -
dc[13] -
dc[14]) >> 3;
1020 block[3][3][0] = (
dc[12] -
dc[15] + 3 -
dc[13] +
dc[14]) >> 3;
1023 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1024 MMI_SDC1(%[ftmp0], %[
dc], 0x00)
1025 MMI_SDC1(%[ftmp0], %[
dc], 0x08)
1026 MMI_SDC1(%[ftmp0], %[
dc], 0x10)
1027 MMI_SDC1(%[ftmp0], %[
dc], 0x18)
1028 : RESTRICT_ASM_ALL64
1029 [ftmp0]
"=&f"(ftmp[0])
1034 int t00, t01, t02, t03,
t10,
t11,
t12, t13, t20, t21, t22, t23, t30, t31, t32, t33;
1036 t00 =
dc[0] +
dc[12];
1038 t20 =
dc[2] +
dc[14];
1039 t30 =
dc[3] +
dc[15];
1041 t03 =
dc[0] -
dc[12];
1042 t13 =
dc[1] -
dc[13];
1043 t23 =
dc[2] -
dc[14];
1044 t33 =
dc[3] -
dc[15];
1046 t01 =
dc[4] +
dc[ 8];
1048 t21 =
dc[6] +
dc[10];
1049 t31 =
dc[7] +
dc[11];
1051 t02 =
dc[4] -
dc[ 8];
1053 t22 =
dc[6] -
dc[10];
1054 t32 =
dc[7] -
dc[11];
1086 block[2][0][0] = (
dc[8] +
dc[11] + 3 +
dc[9] +
dc[10]) >> 3;
1087 block[2][1][0] = (
dc[8] -
dc[11] + 3 +
dc[9] -
dc[10]) >> 3;
1088 block[2][2][0] = (
dc[8] +
dc[11] + 3 -
dc[9] -
dc[10]) >> 3;
1089 block[2][3][0] = (
dc[8] -
dc[11] + 3 -
dc[9] +
dc[10]) >> 3;
1091 block[3][0][0] = (
dc[12] +
dc[15] + 3 +
dc[13] +
dc[14]) >> 3;
1092 block[3][1][0] = (
dc[12] -
dc[15] + 3 +
dc[13] -
dc[14]) >> 3;
1093 block[3][2][0] = (
dc[12] +
dc[15] + 3 -
dc[13] -
dc[14]) >> 3;
1094 block[3][3][0] = (
dc[12] -
dc[15] + 3 -
dc[13] +
dc[14]) >> 3;
1105 int val = (
dc[0] + 3) >> 3;
1130 DECLARE_ALIGNED(8,
const uint64_t, ff_ph_4e7b) = {0x4e7b4e7b4e7b4e7bULL};
1131 DECLARE_ALIGNED(8,
const uint64_t, ff_ph_22a3) = {0x22a322a322a322a3ULL};
1138 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1139 MMI_LDC1(%[ftmp1], %[
block], 0x00)
1140 MMI_LDC1(%[ftmp2], %[
block], 0x08)
1141 MMI_LDC1(%[ftmp3], %[
block], 0x10)
1142 MMI_LDC1(%[ftmp4], %[
block], 0x18)
1144 "li %[tmp0], 0x02 \n\t"
1145 "mtc1 %[tmp0], %[ftmp11] \n\t"
1148 "paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t"
1150 "psubh %[ftmp6], %[ftmp1], %[ftmp3] \n\t"
1152 "psllh %[ftmp9], %[ftmp4], %[ftmp11] \n\t"
1153 "pmulhh %[ftmp7], %[ftmp9], %[ff_ph_22a3] \n\t"
1155 "psllh %[ftmp9], %[ftmp2], %[ftmp11] \n\t"
1156 "pmulhh %[ftmp8], %[ftmp9], %[ff_ph_22a3] \n\t"
1158 "pmulhh %[ftmp9], %[ftmp2], %[ff_ph_4e7b] \n\t"
1159 "paddh %[ftmp9], %[ftmp9], %[ftmp2] \n\t"
1161 "pmulhh %[ftmp10], %[ftmp4], %[ff_ph_4e7b] \n\t"
1162 "paddh %[ftmp10], %[ftmp10], %[ftmp4] \n\t"
1165 "paddh %[ftmp1], %[ftmp5], %[ftmp7] \n\t"
1166 "paddh %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
1168 "paddh %[ftmp2], %[ftmp6], %[ftmp8] \n\t"
1169 "psubh %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
1171 "psubh %[ftmp3], %[ftmp6], %[ftmp8] \n\t"
1172 "paddh %[ftmp3], %[ftmp3], %[ftmp10] \n\t"
1174 "psubh %[ftmp4], %[ftmp5], %[ftmp7] \n\t"
1175 "psubh %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
1177 MMI_SDC1(%[ftmp0], %[
block], 0x00)
1178 MMI_SDC1(%[ftmp0], %[
block], 0x08)
1179 MMI_SDC1(%[ftmp0], %[
block], 0x10)
1180 MMI_SDC1(%[ftmp0], %[
block], 0x18)
1183 %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8])
1186 "paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t"
1188 "psubh %[ftmp6], %[ftmp1], %[ftmp3] \n\t"
1190 "psllh %[ftmp9], %[ftmp2], %[ftmp11] \n\t"
1191 "pmulhh %[ftmp9], %[ftmp9], %[ff_ph_22a3] \n\t"
1192 "psubh %[ftmp7], %[ftmp9], %[ftmp4] \n\t"
1193 "pmulhh %[ftmp10], %[ftmp4], %[ff_ph_4e7b] \n\t"
1194 "psubh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1196 "psllh %[ftmp9], %[ftmp4], %[ftmp11] \n\t"
1197 "pmulhh %[ftmp9], %[ftmp9], %[ff_ph_22a3] \n\t"
1198 "paddh %[ftmp8], %[ftmp9], %[ftmp2] \n\t"
1199 "pmulhh %[ftmp10], %[ftmp2], %[ff_ph_4e7b] \n\t"
1200 "paddh %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1202 "li %[tmp0], 0x03 \n\t"
1203 "mtc1 %[tmp0], %[ftmp11] \n\t"
1204 "paddh %[ftmp1], %[ftmp5], %[ftmp8] \n\t"
1205 "paddh %[ftmp1], %[ftmp1], %[ff_pw_4] \n\t"
1206 "psrah %[ftmp1], %[ftmp1], %[ftmp11] \n\t"
1207 "paddh %[ftmp2], %[ftmp6], %[ftmp7] \n\t"
1208 "paddh %[ftmp2], %[ftmp2], %[ff_pw_4] \n\t"
1209 "psrah %[ftmp2], %[ftmp2], %[ftmp11] \n\t"
1210 "psubh %[ftmp3], %[ftmp6], %[ftmp7] \n\t"
1211 "paddh %[ftmp3], %[ftmp3], %[ff_pw_4] \n\t"
1212 "psrah %[ftmp3], %[ftmp3], %[ftmp11] \n\t"
1213 "psubh %[ftmp4], %[ftmp5], %[ftmp8] \n\t"
1214 "paddh %[ftmp4], %[ftmp4], %[ff_pw_4] \n\t"
1215 "psrah %[ftmp4], %[ftmp4], %[ftmp11] \n\t"
1218 %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8])
1220 MMI_LWC1(%[ftmp5], %[dst0], 0x00)
1221 MMI_LWC1(%[ftmp6], %[dst1], 0x00)
1222 MMI_LWC1(%[ftmp7], %[dst2], 0x00)
1223 MMI_LWC1(%[ftmp8], %[dst3], 0x00)
1225 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1226 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1227 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1228 "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1230 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1231 "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1232 "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1233 "paddh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
1235 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1236 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1237 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1238 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1240 MMI_SWC1(%[ftmp1], %[dst0], 0x00)
1241 MMI_SWC1(%[ftmp2], %[dst1], 0x00)
1242 MMI_SWC1(%[ftmp3], %[dst2], 0x00)
1243 MMI_SWC1(%[ftmp4], %[dst3], 0x00)
1244 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1245 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1246 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1247 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1248 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
1249 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
1253 : [dst0]
"r"(dst), [dst1]
"r"(dst+
stride),
1256 [ff_ph_4e7b]
"f"(ff_ph_4e7b), [ff_ph_22a3]
"f"(ff_ph_22a3)
1263 for (
i = 0;
i < 4;
i++) {
1279 for (
i = 0;
i < 4;
i++) {
1282 t2 = MUL_35468(
tmp[4 +
i]) - MUL_20091(
tmp[12 +
i]);
1283 t3 = MUL_20091(
tmp[4 +
i]) + MUL_35468(
tmp[12 +
i]);
1304 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1305 "mtc1 %[dc], %[ftmp5] \n\t"
1306 MMI_LWC1(%[ftmp1], %[dst0], 0x00)
1307 MMI_LWC1(%[ftmp2], %[dst1], 0x00)
1308 MMI_LWC1(%[ftmp3], %[dst2], 0x00)
1309 MMI_LWC1(%[ftmp4], %[dst3], 0x00)
1310 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1311 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1312 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1313 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1314 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1315 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1316 "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1317 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1318 "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1319 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1320 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1321 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1322 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1323 MMI_SWC1(%[ftmp1], %[dst0], 0x00)
1324 MMI_SWC1(%[ftmp2], %[dst1], 0x00)
1325 MMI_SWC1(%[ftmp3], %[dst2], 0x00)
1326 MMI_SWC1(%[ftmp4], %[dst3], 0x00)
1327 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1328 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1329 [ftmp4]
"=&f"(ftmp[4]),
1331 [ftmp5]
"=&f"(ftmp[5])
1332 : [dst0]
"r"(dst), [dst1]
"r"(dst+
stride),
1342 for (
i = 0;
i < 4;
i++) {
1372 int flim_I,
int hev_thresh)
1379 int flim_I,
int hev_thresh)
1387 int flim_E,
int flim_I,
int hev_thresh)
1394 int flim_E,
int flim_I,
int hev_thresh)
1402 int flim_E,
int flim_I,
int hev_thresh)
1406 for (
i = 0;
i < 16;
i++)
1408 int hv =
hev(dst +
i * 1,
stride, hev_thresh);
1417 int flim_E,
int flim_I,
int hev_thresh)
1421 for (
i = 0;
i < 16;
i++)
1423 int hv =
hev(dst +
i *
stride, 1, hev_thresh);
1432 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
1439 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
1449 for (
i = 0;
i < 16;
i++)
1458 for (
i = 0;
i < 16;
i++)
1464 ptrdiff_t srcstride,
int h,
int x,
int y)
1474 PTR_ADDU "%[addr0], %[src], %[srcstride] \n\t"
1475 MMI_ULDC1(%[ftmp0], %[
src], 0x00)
1476 "ldl %[tmp0], 0x0f(%[src]) \n\t"
1477 "ldr %[tmp0], 0x08(%[src]) \n\t"
1478 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
1479 "ldl %[tmp1], 0x0f(%[addr0]) \n\t"
1480 "ldr %[tmp1], 0x08(%[addr0]) \n\t"
1481 PTR_ADDU "%[addr1], %[dst], %[dststride] \n\t"
1482 MMI_SDC1(%[ftmp0], %[dst], 0x00)
1483 "sdl %[tmp0], 0x0f(%[dst]) \n\t"
1484 "sdr %[tmp0], 0x08(%[dst]) \n\t"
1485 "addiu %[h], %[h], -0x02 \n\t"
1486 MMI_SDC1(%[ftmp1], %[addr1], 0x00)
1487 PTR_ADDU "%[src], %[addr0], %[srcstride] \n\t"
1488 "sdl %[tmp1], 0x0f(%[addr1]) \n\t"
1489 "sdr %[tmp1], 0x08(%[addr1]) \n\t"
1490 PTR_ADDU "%[dst], %[addr1], %[dststride] \n\t"
1491 "bnez %[h], 1b \n\t"
1492 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1493 [tmp0]
"=&r"(
tmp[0]), [tmp1]
"=&r"(
tmp[1]),
1495 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
1496 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
1498 : [dststride]
"r"((
mips_reg)dststride),
1499 [srcstride]
"r"((
mips_reg)srcstride)
1505 for (
i = 0;
i <
h;
i++, dst += dststride,
src += srcstride)
1506 memcpy(dst,
src, 16);
1511 ptrdiff_t srcstride,
int h,
int x,
int y)
1521 PTR_ADDU "%[addr0], %[src], %[srcstride] \n\t"
1522 MMI_ULDC1(%[ftmp0], %[
src], 0x00)
1523 "ldl %[tmp0], 0x07(%[addr0]) \n\t"
1524 "ldr %[tmp0], 0x00(%[addr0]) \n\t"
1525 PTR_ADDU "%[addr1], %[dst], %[dststride] \n\t"
1526 MMI_SDC1(%[ftmp0], %[dst], 0x00)
1527 "addiu %[h], %[h], -0x02 \n\t"
1528 "sdl %[tmp0], 0x07(%[addr1]) \n\t"
1529 "sdr %[tmp0], 0x00(%[addr1]) \n\t"
1530 PTR_ADDU "%[src], %[addr0], %[srcstride] \n\t"
1531 PTR_ADDU "%[dst], %[addr1], %[dststride] \n\t"
1532 "bnez %[h], 1b \n\t"
1533 : [ftmp0]
"=&f"(ftmp[0]), [tmp0]
"=&r"(
tmp[0]),
1535 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
1536 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
1538 : [dststride]
"r"((
mips_reg)dststride),
1539 [srcstride]
"r"((
mips_reg)srcstride)
1545 for (
i = 0;
i <
h;
i++, dst += dststride,
src += srcstride)
1546 memcpy(dst,
src, 8);
1551 ptrdiff_t srcstride,
int h,
int x,
int y)
1561 PTR_ADDU "%[addr0], %[src], %[srcstride] \n\t"
1562 MMI_LWC1(%[ftmp0], %[
src], 0x00)
1563 "lwl %[tmp0], 0x03(%[addr0]) \n\t"
1564 "lwr %[tmp0], 0x00(%[addr0]) \n\t"
1565 PTR_ADDU "%[addr1], %[dst], %[dststride] \n\t"
1566 MMI_SWC1(%[ftmp0], %[dst], 0x00)
1567 "addiu %[h], %[h], -0x02 \n\t"
1568 "swl %[tmp0], 0x03(%[addr1]) \n\t"
1569 "swr %[tmp0], 0x00(%[addr1]) \n\t"
1570 PTR_ADDU "%[src], %[addr0], %[srcstride] \n\t"
1571 PTR_ADDU "%[dst], %[addr1], %[dststride] \n\t"
1572 "bnez %[h], 1b \n\t"
1573 : [ftmp0]
"=&f"(ftmp[0]), [tmp0]
"=&r"(
tmp[0]),
1575 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
1576 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
1578 : [dststride]
"r"((
mips_reg)dststride),
1579 [srcstride]
"r"((
mips_reg)srcstride)
1585 for (
i = 0;
i <
h;
i++, dst += dststride,
src += srcstride)
1586 memcpy(dst,
src, 4);
1591 ptrdiff_t srcstride,
int h,
int mx,
int my)
1620 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1621 "li %[tmp0], 0x07 \n\t"
1622 "mtc1 %[tmp0], %[ftmp4] \n\t"
1632 "addiu %[h], %[h], -0x01 \n\t"
1633 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1634 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1635 "bnez %[h], 1b \n\t"
1636 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1637 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1638 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1639 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1640 [ftmp8]
"=&f"(ftmp[8]),
1641 [tmp0]
"=&r"(
tmp[0]),
1643 [dst1]
"=&r"(dst1), [
src1]
"=&r"(
src1),
1645 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1647 [srcstride]
"r"((
mips_reg)srcstride),
1648 [dststride]
"r"((
mips_reg)dststride),
1658 for (y = 0; y <
h; y++) {
1659 for (x = 0; x < 16; x++)
1668 ptrdiff_t srcstride,
int h,
int mx,
int my)
1687 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1688 "li %[tmp0], 0x07 \n\t"
1689 "mtc1 %[tmp0], %[ftmp4] \n\t"
1694 "addiu %[h], %[h], -0x01 \n\t"
1695 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1696 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1697 "bnez %[h], 1b \n\t"
1698 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1699 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1700 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1701 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1702 [ftmp8]
"=&f"(ftmp[8]),
1703 [tmp0]
"=&r"(
tmp[0]),
1706 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1708 [srcstride]
"r"((
mips_reg)srcstride),
1709 [dststride]
"r"((
mips_reg)dststride),
1719 for (y = 0; y <
h; y++) {
1720 for (x = 0; x < 8; x++)
1729 ptrdiff_t srcstride,
int h,
int mx,
int my)
1744 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1745 "li %[tmp0], 0x07 \n\t"
1746 "mtc1 %[tmp0], %[ftmp4] \n\t"
1751 "addiu %[h], %[h], -0x01 \n\t"
1752 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1753 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1754 "bnez %[h], 1b \n\t"
1755 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1756 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1757 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1758 [tmp0]
"=&r"(
tmp[0]),
1761 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1763 [srcstride]
"r"((
mips_reg)srcstride),
1764 [dststride]
"r"((
mips_reg)dststride),
1774 for (y = 0; y <
h; y++) {
1775 for (x = 0; x < 4; x++)
1784 ptrdiff_t srcstride,
int h,
int mx,
int my)
1813 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1814 "li %[tmp0], 0x07 \n\t"
1815 "mtc1 %[tmp0], %[ftmp4] \n\t"
1825 "addiu %[h], %[h], -0x01 \n\t"
1826 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1827 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1828 "bnez %[h], 1b \n\t"
1829 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1830 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1831 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1832 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1833 [ftmp8]
"=&f"(ftmp[8]),
1834 [tmp0]
"=&r"(
tmp[0]),
1836 [dst1]
"=&r"(dst1), [
src1]
"=&r"(
src1),
1838 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1840 [srcstride]
"r"((
mips_reg)srcstride),
1841 [dststride]
"r"((
mips_reg)dststride),
1852 for (y = 0; y <
h; y++) {
1853 for (x = 0; x < 16; x++)
1862 ptrdiff_t srcstride,
int h,
int mx,
int my)
1881 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1882 "li %[tmp0], 0x07 \n\t"
1883 "mtc1 %[tmp0], %[ftmp4] \n\t"
1888 "addiu %[h], %[h], -0x01 \n\t"
1889 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1890 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1891 "bnez %[h], 1b \n\t"
1892 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1893 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1894 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1895 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1896 [ftmp8]
"=&f"(ftmp[8]),
1897 [tmp0]
"=&r"(
tmp[0]),
1900 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1902 [srcstride]
"r"((
mips_reg)srcstride),
1903 [dststride]
"r"((
mips_reg)dststride),
1914 for (y = 0; y <
h; y++) {
1915 for (x = 0; x < 8; x++)
1924 ptrdiff_t srcstride,
int h,
int mx,
int my)
1939 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1940 "li %[tmp0], 0x07 \n\t"
1941 "mtc1 %[tmp0], %[ftmp4] \n\t"
1946 "addiu %[h], %[h], -0x01 \n\t"
1947 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1948 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1949 "bnez %[h], 1b \n\t"
1950 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1951 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1952 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1953 [tmp0]
"=&r"(
tmp[0]),
1956 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1958 [srcstride]
"r"((
mips_reg)srcstride),
1959 [dststride]
"r"((
mips_reg)dststride),
1970 for (y = 0; y <
h; y++) {
1971 for (x = 0; x < 4; x++)
1980 ptrdiff_t srcstride,
int h,
int mx,
int my)
2009 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2010 "li %[tmp0], 0x07 \n\t"
2011 "mtc1 %[tmp0], %[ftmp4] \n\t"
2021 "addiu %[h], %[h], -0x01 \n\t"
2022 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2023 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2024 "bnez %[h], 1b \n\t"
2025 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2026 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2027 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2028 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2029 [ftmp8]
"=&f"(ftmp[8]),
2030 [tmp0]
"=&r"(
tmp[0]),
2032 [
src0]
"=&r"(
src0), [dst0]
"=&r"(dst0),
2035 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2037 [srcstride]
"r"((
mips_reg)srcstride),
2038 [dststride]
"r"((
mips_reg)dststride),
2048 for (y = 0; y <
h; y++) {
2049 for (x = 0; x < 16; x++)
2058 ptrdiff_t srcstride,
int h,
int mx,
int my)
2078 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2079 "li %[tmp0], 0x07 \n\t"
2080 "mtc1 %[tmp0], %[ftmp4] \n\t"
2085 "addiu %[h], %[h], -0x01 \n\t"
2086 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2087 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2088 "bnez %[h], 1b \n\t"
2089 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2090 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2091 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2092 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2093 [ftmp8]
"=&f"(ftmp[8]),
2094 [tmp0]
"=&r"(
tmp[0]),
2098 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2100 [srcstride]
"r"((
mips_reg)srcstride),
2101 [dststride]
"r"((
mips_reg)dststride),
2111 for (y = 0; y <
h; y++) {
2112 for (x = 0; x < 8; x++)
2121 ptrdiff_t srcstride,
int h,
int mx,
int my)
2137 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2138 "li %[tmp0], 0x07 \n\t"
2139 "mtc1 %[tmp0], %[ftmp4] \n\t"
2144 "addiu %[h], %[h], -0x01 \n\t"
2145 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2146 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2147 "bnez %[h], 1b \n\t"
2148 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2149 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2150 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2151 [tmp0]
"=&r"(
tmp[0]),
2155 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2157 [srcstride]
"r"((
mips_reg)srcstride),
2158 [dststride]
"r"((
mips_reg)dststride),
2168 for (y = 0; y <
h; y++) {
2169 for (x = 0; x < 4; x++)
2178 ptrdiff_t srcstride,
int h,
int mx,
int my)
2207 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2208 "li %[tmp0], 0x07 \n\t"
2209 "mtc1 %[tmp0], %[ftmp4] \n\t"
2219 "addiu %[h], %[h], -0x01 \n\t"
2220 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2221 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2222 "bnez %[h], 1b \n\t"
2223 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2224 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2225 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2226 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2227 [ftmp8]
"=&f"(ftmp[8]),
2228 [tmp0]
"=&r"(
tmp[0]),
2230 [
src0]
"=&r"(
src0), [dst0]
"=&r"(dst0),
2233 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2235 [srcstride]
"r"((
mips_reg)srcstride),
2236 [dststride]
"r"((
mips_reg)dststride),
2247 for (y = 0; y <
h; y++) {
2248 for (x = 0; x < 16; x++)
2257 ptrdiff_t srcstride,
int h,
int mx,
int my)
2277 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2278 "li %[tmp0], 0x07 \n\t"
2279 "mtc1 %[tmp0], %[ftmp4] \n\t"
2284 "addiu %[h], %[h], -0x01 \n\t"
2285 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2286 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2287 "bnez %[h], 1b \n\t"
2288 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2289 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2290 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2291 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2292 [ftmp8]
"=&f"(ftmp[8]),
2293 [tmp0]
"=&r"(
tmp[0]),
2297 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2299 [srcstride]
"r"((
mips_reg)srcstride),
2300 [dststride]
"r"((
mips_reg)dststride),
2311 for (y = 0; y <
h; y++) {
2312 for (x = 0; x < 8; x++)
2321 ptrdiff_t srcstride,
int h,
int mx,
int my)
2337 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2338 "li %[tmp0], 0x07 \n\t"
2339 "mtc1 %[tmp0], %[ftmp4] \n\t"
2344 "addiu %[h], %[h], -0x01 \n\t"
2345 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2346 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2347 "bnez %[h], 1b \n\t"
2348 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2349 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2350 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2351 [tmp0]
"=&r"(
tmp[0]),
2355 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2357 [srcstride]
"r"((
mips_reg)srcstride),
2358 [dststride]
"r"((
mips_reg)dststride),
2369 for (y = 0; y <
h; y++) {
2370 for (x = 0; x < 4; x++)
2379 ptrdiff_t srcstride,
int h,
int mx,
int my)
2387 tmp = tmp_array + 16;
2398 for (y = 0; y <
h + 3; y++) {
2399 for (x = 0; x < 16; x++)
2405 tmp = tmp_array + 16;
2408 for (y = 0; y <
h; y++) {
2409 for (x = 0; x < 16; x++)
2418 ptrdiff_t srcstride,
int h,
int mx,
int my)
2426 tmp = tmp_array + 8;
2437 for (y = 0; y <
h + 3; y++) {
2438 for (x = 0; x < 8; x++)
2444 tmp = tmp_array + 8;
2447 for (y = 0; y <
h; y++) {
2448 for (x = 0; x < 8; x++)
2457 ptrdiff_t srcstride,
int h,
int mx,
int my)
2465 tmp = tmp_array + 4;
2476 for (y = 0; y <
h + 3; y++) {
2477 for (x = 0; x < 4; x++)
2482 tmp = tmp_array + 4;
2485 for (y = 0; y <
h; y++) {
2486 for (x = 0; x < 4; x++)
2495 ptrdiff_t srcstride,
int h,
int mx,
int my)
2501 src -= 2 * srcstride;
2503 tmp = tmp_array + 32;
2512 src -= 2 * srcstride;
2514 for (y = 0; y <
h + 5; y++) {
2515 for (x = 0; x < 16; x++)
2521 tmp = tmp_array + 32;
2524 for (y = 0; y <
h; y++) {
2525 for (x = 0; x < 16; x++)
2534 ptrdiff_t srcstride,
int h,
int mx,
int my)
2540 src -= 2 * srcstride;
2542 tmp = tmp_array + 16;
2551 src -= 2 * srcstride;
2553 for (y = 0; y <
h + 5; y++) {
2554 for (x = 0; x < 8; x++)
2560 tmp = tmp_array + 16;
2563 for (y = 0; y <
h; y++) {
2564 for (x = 0; x < 8; x++)
2573 ptrdiff_t srcstride,
int h,
int mx,
int my)
2579 src -= 2 * srcstride;
2581 tmp = tmp_array + 8;
2590 src -= 2 * srcstride;
2592 for (y = 0; y <
h + 5; y++) {
2593 for (x = 0; x < 4; x++)
2599 tmp = tmp_array + 8;
2602 for (y = 0; y <
h; y++) {
2603 for (x = 0; x < 4; x++)
2612 ptrdiff_t srcstride,
int h,
int mx,
int my)
2620 tmp = tmp_array + 16;
2631 for (y = 0; y <
h + 3; y++) {
2632 for (x = 0; x < 16; x++)
2638 tmp = tmp_array + 16;
2641 for (y = 0; y <
h; y++) {
2642 for (x = 0; x < 16; x++)
2651 ptrdiff_t srcstride,
int h,
int mx,
int my)
2659 tmp = tmp_array + 8;
2670 for (y = 0; y <
h + 3; y++) {
2671 for (x = 0; x < 8; x++)
2677 tmp = tmp_array + 8;
2680 for (y = 0; y <
h; y++) {
2681 for (x = 0; x < 8; x++)
2690 ptrdiff_t srcstride,
int h,
int mx,
int my)
2698 tmp = tmp_array + 4;
2709 for (y = 0; y <
h + 3; y++) {
2710 for (x = 0; x < 4; x++)
2716 tmp = tmp_array + 4;
2719 for (y = 0; y <
h; y++) {
2720 for (x = 0; x < 4; x++)
2729 ptrdiff_t srcstride,
int h,
int mx,
int my)
2735 src -= 2 * srcstride;
2737 tmp = tmp_array + 32;
2746 src -= 2 * srcstride;
2748 for (y = 0; y <
h + 5; y++) {
2749 for (x = 0; x < 16; x++)
2755 tmp = tmp_array + 32;
2758 for (y = 0; y <
h; y++) {
2759 for (x = 0; x < 16; x++)
2768 ptrdiff_t srcstride,
int h,
int mx,
int my)
2774 src -= 2 * srcstride;
2776 tmp = tmp_array + 16;
2785 src -= 2 * srcstride;
2787 for (y = 0; y <
h + 5; y++) {
2788 for (x = 0; x < 8; x++)
2794 tmp = tmp_array + 16;
2797 for (y = 0; y <
h; y++) {
2798 for (x = 0; x < 8; x++)
2807 ptrdiff_t srcstride,
int h,
int mx,
int my)
2813 src -= 2 * srcstride;
2815 tmp = tmp_array + 8;
2824 src -= 2 * srcstride;
2826 for (y = 0; y <
h + 5; y++) {
2827 for (x = 0; x < 4; x++)
2833 tmp = tmp_array + 8;
2836 for (y = 0; y <
h; y++) {
2837 for (x = 0; x < 4; x++)
2846 ptrdiff_t sstride,
int h,
int mx,
int my)
2849 int a = 8 - mx,
b = mx;
2875 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2876 "li %[tmp0], 0x03 \n\t"
2877 "mtc1 %[tmp0], %[ftmp4] \n\t"
2878 "pshufh %[a], %[a], %[ftmp0] \n\t"
2879 "pshufh %[b], %[b], %[ftmp0] \n\t"
2889 "addiu %[h], %[h], -0x01 \n\t"
2890 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
2891 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
2892 "bnez %[h], 1b \n\t"
2893 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2894 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2895 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2896 [ftmp6]
"=&f"(ftmp[6]),
2897 [tmp0]
"=&r"(
tmp[0]),
2899 [dst0]
"=&r"(dst0), [
src0]
"=&r"(
src0),
2901 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
2902 [
a]
"+&f"(
a), [
b]
"+&f"(
b)
2909 int a = 8 - mx,
b = mx;
2912 for (y = 0; y <
h; y++) {
2913 for (x = 0; x < 16; x++)
2914 dst[x] = (
a *
src[x] +
b *
src[x + 1] + 4) >> 3;
2922 ptrdiff_t sstride,
int h,
int mx,
int my)
2925 int c = 8 - my, d = my;
2942 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2943 "li %[tmp0], 0x03 \n\t"
2944 "mtc1 %[tmp0], %[ftmp4] \n\t"
2945 "pshufh %[c], %[c], %[ftmp0] \n\t"
2946 "pshufh %[d], %[d], %[ftmp0] \n\t"
2956 "addiu %[h], %[h], -0x01 \n\t"
2957 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
2958 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
2959 "bnez %[h], 1b \n\t"
2960 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2961 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2962 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2963 [ftmp6]
"=&f"(ftmp[6]),
2964 [tmp0]
"=&r"(
tmp[0]),
2966 [
src0]
"=&r"(
src0), [dst0]
"=&r"(dst0),
2969 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
2970 [
c]
"+&f"(
c), [d]
"+&f"(d)
2977 int c = 8 - my, d = my;
2980 for (y = 0; y <
h; y++) {
2981 for (x = 0; x < 16; x++)
2982 dst[x] = (
c *
src[x] + d *
src[x + sstride] + 4) >> 3;
2990 ptrdiff_t sstride,
int h,
int mx,
int my)
2999 int a = 8 - mx,
b = mx;
3000 int c = 8 - my, d = my;
3005 for (y = 0; y <
h + 1; y++) {
3006 for (x = 0; x < 16; x++)
3014 for (y = 0; y <
h; y++) {
3015 for (x = 0; x < 16; x++)
3016 dst[x] = (
c *
tmp[x] + d *
tmp[x + 16] + 4) >> 3;
3024 ptrdiff_t sstride,
int h,
int mx,
int my)
3027 int a = 8 - mx,
b = mx;
3043 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
3044 "li %[tmp0], 0x03 \n\t"
3045 "mtc1 %[tmp0], %[ftmp4] \n\t"
3046 "pshufh %[a], %[a], %[ftmp0] \n\t"
3047 "pshufh %[b], %[b], %[ftmp0] \n\t"
3052 "addiu %[h], %[h], -0x01 \n\t"
3053 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
3054 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3055 "bnez %[h], 1b \n\t"
3056 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3057 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3058 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
3059 [ftmp6]
"=&f"(ftmp[6]),
3060 [tmp0]
"=&r"(
tmp[0]),
3063 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
3064 [
a]
"+&f"(
a), [
b]
"+&f"(
b)
3071 int a = 8 - mx,
b = mx;
3074 for (y = 0; y <
h; y++) {
3075 for (x = 0; x < 8; x++)
3076 dst[x] = (
a *
src[x] +
b *
src[x + 1] + 4) >> 3;
3084 ptrdiff_t sstride,
int h,
int mx,
int my)
3087 int c = 8 - my, d = my;
3104 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
3105 "li %[tmp0], 0x03 \n\t"
3106 "mtc1 %[tmp0], %[ftmp4] \n\t"
3107 "pshufh %[c], %[c], %[ftmp0] \n\t"
3108 "pshufh %[d], %[d], %[ftmp0] \n\t"
3113 "addiu %[h], %[h], -0x01 \n\t"
3114 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
3115 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3116 "bnez %[h], 1b \n\t"
3117 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3118 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3119 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
3120 [ftmp6]
"=&f"(ftmp[6]),
3121 [tmp0]
"=&r"(
tmp[0]),
3125 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
3126 [
c]
"+&f"(
c), [d]
"+&f"(d)
3133 int c = 8 - my, d = my;
3136 for (y = 0; y <
h; y++) {
3137 for (x = 0; x < 8; x++)
3138 dst[x] = (
c *
src[x] + d *
src[x + sstride] + 4) >> 3;
3146 ptrdiff_t sstride,
int h,
int mx,
int my)
3155 int a = 8 - mx,
b = mx;
3156 int c = 8 - my, d = my;
3161 for (y = 0; y <
h + 1; y++) {
3162 for (x = 0; x < 8; x++)
3170 for (y = 0; y <
h; y++) {
3171 for (x = 0; x < 8; x++)
3172 dst[x] = (
c *
tmp[x] + d *
tmp[x + 8] + 4) >> 3;
3180 ptrdiff_t sstride,
int h,
int mx,
int my)
3183 int a = 8 - mx,
b = mx;
3196 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
3197 "li %[tmp0], 0x03 \n\t"
3198 "mtc1 %[tmp0], %[ftmp4] \n\t"
3199 "pshufh %[a], %[a], %[ftmp0] \n\t"
3200 "pshufh %[b], %[b], %[ftmp0] \n\t"
3205 "addiu %[h], %[h], -0x01 \n\t"
3206 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
3207 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3208 "bnez %[h], 1b \n\t"
3209 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3210 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3211 [ftmp4]
"=&f"(ftmp[4]),
3212 [tmp0]
"=&r"(
tmp[0]),
3216 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
3217 [
a]
"+&f"(
a), [
b]
"+&f"(
b)
3224 int a = 8 - mx,
b = mx;
3227 for (y = 0; y <
h; y++) {
3228 for (x = 0; x < 4; x++)
3229 dst[x] = (
a *
src[x] +
b *
src[x + 1] + 4) >> 3;
3237 ptrdiff_t sstride,
int h,
int mx,
int my)
3240 int c = 8 - my, d = my;
3254 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
3255 "li %[tmp0], 0x03 \n\t"
3256 "mtc1 %[tmp0], %[ftmp4] \n\t"
3257 "pshufh %[c], %[c], %[ftmp0] \n\t"
3258 "pshufh %[d], %[d], %[ftmp0] \n\t"
3263 "addiu %[h], %[h], -0x01 \n\t"
3264 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
3265 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3266 "bnez %[h], 1b \n\t"
3267 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3268 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3269 [ftmp4]
"=&f"(ftmp[4]),
3270 [tmp0]
"=&r"(
tmp[0]),
3275 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
3276 [
c]
"+&f"(
c), [d]
"+&f"(d)
3283 int c = 8 - my, d = my;
3286 for (y = 0; y <
h; y++) {
3287 for (x = 0; x < 4; x++)
3288 dst[x] = (
c *
src[x] + d *
src[x + sstride] + 4) >> 3;
3296 ptrdiff_t sstride,
int h,
int mx,
int my)
3305 int a = 8 - mx,
b = mx;
3306 int c = 8 - my, d = my;
3311 for (y = 0; y <
h + 1; y++) {
3312 for (x = 0; x < 4; x++)
3320 for (y = 0; y <
h; y++) {
3321 for (x = 0; x < 4; x++)
3322 dst[x] = (
c *
tmp[x] + d *
tmp[x + 4] + 4) >> 3;
static double val(void *priv, double ch)
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> dc
static av_always_inline void filter(int16_t *output, ptrdiff_t out_stride, const int16_t *low, ptrdiff_t low_stride, const int16_t *high, ptrdiff_t high_stride, int len, int clip)
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
static void filter0(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
#define FILTER_6TAP(src, F, stride)
#define FILTER_4TAP(src, F, stride)
static const uint8_t subpel_filters[7][6]
static const uint16_t mask[17]
#define TRANSPOSE_8B(fr_i0, fr_i1, fr_i2, fr_i3, fr_i4, fr_i5, fr_i6, fr_i7, fr_t0, fr_t1, fr_t2, fr_t3)
brief: Transpose 8x8 byte packaged data.
#define TRANSPOSE_4H(fr_i0, fr_i1, fr_i2, fr_i3, fr_t0, fr_t1, fr_t2, fr_t3)
brief: Transpose 4X4 half word packaged data.
static const uint8_t q1[256]
static const uint8_t q0[256]
#define PUT_VP8_EPEL8_V6_MMI(src, src1, dst, srcstride)
void ff_put_vp8_pixels4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y)
void ff_put_vp8_epel8_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_bilinear8_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_vp8_h_loop_filter16_inner_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_vp8_luma_dc_wht_mmi(int16_t block[4][4][16], int16_t dc[16])
#define MMI_VP8_LOOP_FILTER
#define PUT_VP8_BILINEAR8_H_MMI(src, dst)
void ff_put_vp8_epel16_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define PUT_VP8_EPEL8_H6_MMI(src, dst)
void ff_put_vp8_epel8_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static av_always_inline void vp8_h_loop_filter8_inner_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
#define PUT_VP8_EPEL8_H4_MMI(src, dst)
static av_always_inline void vp8_v_loop_filter8_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
static const uint64_t fourtap_subpel_filters[7][6]
void ff_put_vp8_bilinear16_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_put_vp8_bilinear4_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_vp8_h_loop_filter16_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
static av_always_inline void vp8_filter_common_isnot4tap(uint8_t *p, ptrdiff_t stride)
void ff_vp8_v_loop_filter8uv_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
#define PUT_VP8_EPEL8_V4_MMI(src, src1, dst, srcstride)
void ff_put_vp8_epel16_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static av_always_inline int vp8_normal_limit(uint8_t *p, ptrdiff_t stride, int E, int I)
#define PUT_VP8_BILINEAR4_H_MMI(src, dst)
#define PUT_VP8_EPEL4_V6_MMI(src, src1, dst, srcstride)
void ff_vp8_v_loop_filter16_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_vp8_v_loop_filter_simple_mmi(uint8_t *dst, ptrdiff_t stride, int flim)
static av_always_inline void vp8_filter_common_is4tap(uint8_t *p, ptrdiff_t stride)
void ff_put_vp8_epel4_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static av_always_inline void vp8_h_loop_filter8_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_vp8_idct_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
void ff_put_vp8_epel16_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel16_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel4_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_bilinear8_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_vp8_h_loop_filter_simple_mmi(uint8_t *dst, ptrdiff_t stride, int flim)
void ff_put_vp8_epel4_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel8_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_pixels16_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y)
void ff_vp8_v_loop_filter8uv_inner_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_put_vp8_epel16_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_vp8_idct_dc_add4y_mmi(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
#define PUT_VP8_BILINEAR8_V_MMI(src, src1, dst, sstride)
void ff_vp8_luma_dc_wht_dc_mmi(int16_t block[4][4][16], int16_t dc[16])
void ff_put_vp8_epel4_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh)
void ff_put_vp8_epel8_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define PUT_VP8_EPEL4_H6_MMI(src, dst)
void ff_vp8_v_loop_filter16_inner_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_put_vp8_epel8_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel4_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_pixels8_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y)
void ff_put_vp8_bilinear16_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_put_vp8_epel16_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_vp8_idct_dc_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
static av_always_inline void vp8_v_loop_filter8_inner_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_vp8_h_loop_filter8uv_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_vp8_idct_dc_add4uv_mmi(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
void ff_put_vp8_bilinear4_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
#define RESTRICT_ASM_DOUBLE_1
void ff_put_vp8_bilinear4_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_put_vp8_bilinear16_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_put_vp8_epel8_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel4_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_vp8_h_loop_filter8uv_inner_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_put_vp8_epel8_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define RESTRICT_ASM_DOUBLE_2
void ff_put_vp8_epel8_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel4_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define RESTRICT_ASM_UINT32_T
static av_always_inline int vp8_simple_limit(uint8_t *p, ptrdiff_t stride, int flim)
#define PUT_VP8_EPEL4_H4_MMI(src, dst)
void ff_put_vp8_epel16_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride)
void ff_put_vp8_epel4_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_bilinear8_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
#define PUT_VP8_EPEL4_V4_MMI(src, src1, dst, srcstride)
void ff_put_vp8_epel16_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define PUT_VP8_BILINEAR4_V_MMI(src, src1, dst, sstride)