FFmpeg  4.4.5
g722enc.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) CMU 1993 Computer Science, Speech Group
3  * Chengxiang Lu and Alex Hauptmann
4  * Copyright (c) 2005 Steve Underwood <steveu at coppice.org>
5  * Copyright (c) 2009 Kenan Gillet
6  * Copyright (c) 2010 Martin Storsjo
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 /**
26  * @file
27  * G.722 ADPCM audio encoder
28  */
29 
30 #include "libavutil/avassert.h"
31 #include "avcodec.h"
32 #include "internal.h"
33 #include "g722.h"
34 #include "libavutil/common.h"
35 
36 #define FREEZE_INTERVAL 128
37 
38 /* This is an arbitrary value. Allowing insanely large values leads to strange
39  problems, so we limit it to a reasonable value */
40 #define MAX_FRAME_SIZE 32768
41 
42 /* We clip the value of avctx->trellis to prevent data type overflows and
43  undefined behavior. Using larger values is insanely slow anyway. */
44 #define MIN_TRELLIS 0
45 #define MAX_TRELLIS 16
46 
48 {
49  G722Context *c = avctx->priv_data;
50  int i;
51  for (i = 0; i < 2; i++) {
52  av_freep(&c->paths[i]);
53  av_freep(&c->node_buf[i]);
54  av_freep(&c->nodep_buf[i]);
55  }
56  return 0;
57 }
58 
60 {
61  G722Context *c = avctx->priv_data;
62 
63  c->band[0].scale_factor = 8;
64  c->band[1].scale_factor = 2;
65  c->prev_samples_pos = 22;
66 
67  if (avctx->frame_size) {
68  /* validate frame size */
69  if (avctx->frame_size & 1 || avctx->frame_size > MAX_FRAME_SIZE) {
70  int new_frame_size;
71 
72  if (avctx->frame_size == 1)
73  new_frame_size = 2;
74  else if (avctx->frame_size > MAX_FRAME_SIZE)
75  new_frame_size = MAX_FRAME_SIZE;
76  else
77  new_frame_size = avctx->frame_size - 1;
78 
79  av_log(avctx, AV_LOG_WARNING, "Requested frame size is not "
80  "allowed. Using %d instead of %d\n", new_frame_size,
81  avctx->frame_size);
82  avctx->frame_size = new_frame_size;
83  }
84  } else {
85  /* This is arbitrary. We use 320 because it's 20ms @ 16kHz, which is
86  a common packet size for VoIP applications */
87  avctx->frame_size = 320;
88  }
89  avctx->initial_padding = 22;
90 
91  if (avctx->trellis) {
92  /* validate trellis */
93  if (avctx->trellis < MIN_TRELLIS || avctx->trellis > MAX_TRELLIS) {
94  int new_trellis = av_clip(avctx->trellis, MIN_TRELLIS, MAX_TRELLIS);
95  av_log(avctx, AV_LOG_WARNING, "Requested trellis value is not "
96  "allowed. Using %d instead of %d\n", new_trellis,
97  avctx->trellis);
98  avctx->trellis = new_trellis;
99  }
100  if (avctx->trellis) {
101  int frontier = 1 << avctx->trellis;
102  int max_paths = frontier * FREEZE_INTERVAL;
103 
104  for (int i = 0; i < 2; i++) {
105  c->paths[i] = av_calloc(max_paths, sizeof(**c->paths));
106  c->node_buf[i] = av_calloc(frontier, 2 * sizeof(**c->node_buf));
107  c->nodep_buf[i] = av_calloc(frontier, 2 * sizeof(**c->nodep_buf));
108  if (!c->paths[i] || !c->node_buf[i] || !c->nodep_buf[i])
109  return AVERROR(ENOMEM);
110  }
111  }
112  }
113 
114  ff_g722dsp_init(&c->dsp);
115 
116  return 0;
117 }
118 
119 static const int16_t low_quant[33] = {
120  35, 72, 110, 150, 190, 233, 276, 323,
121  370, 422, 473, 530, 587, 650, 714, 786,
122  858, 940, 1023, 1121, 1219, 1339, 1458, 1612,
123  1765, 1980, 2195, 2557, 2919
124 };
125 
126 static inline void filter_samples(G722Context *c, const int16_t *samples,
127  int *xlow, int *xhigh)
128 {
129  int xout[2];
130  c->prev_samples[c->prev_samples_pos++] = samples[0];
131  c->prev_samples[c->prev_samples_pos++] = samples[1];
132  c->dsp.apply_qmf(c->prev_samples + c->prev_samples_pos - 24, xout);
133  *xlow = xout[0] + xout[1] >> 14;
134  *xhigh = xout[0] - xout[1] >> 14;
135  if (c->prev_samples_pos >= PREV_SAMPLES_BUF_SIZE) {
136  memmove(c->prev_samples,
137  c->prev_samples + c->prev_samples_pos - 22,
138  22 * sizeof(c->prev_samples[0]));
139  c->prev_samples_pos = 22;
140  }
141 }
142 
143 static inline int encode_high(const struct G722Band *state, int xhigh)
144 {
145  int diff = av_clip_int16(xhigh - state->s_predictor);
146  int pred = 141 * state->scale_factor >> 8;
147  /* = diff >= 0 ? (diff < pred) + 2 : diff >= -pred */
148  return ((diff ^ (diff >> (sizeof(diff)*8-1))) < pred) + 2*(diff >= 0);
149 }
150 
151 static inline int encode_low(const struct G722Band* state, int xlow)
152 {
153  int diff = av_clip_int16(xlow - state->s_predictor);
154  /* = diff >= 0 ? diff : -(diff + 1) */
155  int limit = diff ^ (diff >> (sizeof(diff)*8-1));
156  int i = 0;
157  limit = limit + 1 << 10;
158  if (limit > low_quant[8] * state->scale_factor)
159  i = 9;
160  while (i < 29 && limit > low_quant[i] * state->scale_factor)
161  i++;
162  return (diff < 0 ? (i < 2 ? 63 : 33) : 61) - i;
163 }
164 
165 static void g722_encode_trellis(G722Context *c, int trellis,
166  uint8_t *dst, int nb_samples,
167  const int16_t *samples)
168 {
169  int i, j, k;
170  int frontier = 1 << trellis;
171  struct TrellisNode **nodes[2];
172  struct TrellisNode **nodes_next[2];
173  int pathn[2] = {0, 0}, froze = -1;
174  struct TrellisPath *p[2];
175 
176  for (i = 0; i < 2; i++) {
177  nodes[i] = c->nodep_buf[i];
178  nodes_next[i] = c->nodep_buf[i] + frontier;
179  memset(c->nodep_buf[i], 0, 2 * frontier * sizeof(*c->nodep_buf[i]));
180  nodes[i][0] = c->node_buf[i] + frontier;
181  nodes[i][0]->ssd = 0;
182  nodes[i][0]->path = 0;
183  nodes[i][0]->state = c->band[i];
184  }
185 
186  for (i = 0; i < nb_samples >> 1; i++) {
187  int xlow, xhigh;
188  struct TrellisNode *next[2];
189  int heap_pos[2] = {0, 0};
190 
191  for (j = 0; j < 2; j++) {
192  next[j] = c->node_buf[j] + frontier*(i & 1);
193  memset(nodes_next[j], 0, frontier * sizeof(**nodes_next));
194  }
195 
196  filter_samples(c, &samples[2*i], &xlow, &xhigh);
197 
198  for (j = 0; j < frontier && nodes[0][j]; j++) {
199  /* Only k >> 2 affects the future adaptive state, therefore testing
200  * small steps that don't change k >> 2 is useless, the original
201  * value from encode_low is better than them. Since we step k
202  * in steps of 4, make sure range is a multiple of 4, so that
203  * we don't miss the original value from encode_low. */
204  int range = j < frontier/2 ? 4 : 0;
205  struct TrellisNode *cur_node = nodes[0][j];
206 
207  int ilow = encode_low(&cur_node->state, xlow);
208 
209  for (k = ilow - range; k <= ilow + range && k <= 63; k += 4) {
210  int decoded, dec_diff, pos;
211  uint32_t ssd;
212  struct TrellisNode* node;
213 
214  if (k < 0)
215  continue;
216 
217  decoded = av_clip_intp2((cur_node->state.scale_factor *
218  ff_g722_low_inv_quant6[k] >> 10)
219  + cur_node->state.s_predictor, 14);
220  dec_diff = xlow - decoded;
221 
222 #define STORE_NODE(index, UPDATE, VALUE)\
223  ssd = cur_node->ssd + dec_diff*dec_diff;\
224  /* Check for wraparound. Using 64 bit ssd counters would \
225  * be simpler, but is slower on x86 32 bit. */\
226  if (ssd < cur_node->ssd)\
227  continue;\
228  if (heap_pos[index] < frontier) {\
229  pos = heap_pos[index]++;\
230  av_assert2(pathn[index] < FREEZE_INTERVAL * frontier);\
231  node = nodes_next[index][pos] = next[index]++;\
232  node->path = pathn[index]++;\
233  } else {\
234  /* Try to replace one of the leaf nodes with the new \
235  * one, but not always testing the same leaf position */\
236  pos = (frontier>>1) + (heap_pos[index] & ((frontier>>1) - 1));\
237  if (ssd >= nodes_next[index][pos]->ssd)\
238  continue;\
239  heap_pos[index]++;\
240  node = nodes_next[index][pos];\
241  }\
242  node->ssd = ssd;\
243  node->state = cur_node->state;\
244  UPDATE;\
245  c->paths[index][node->path].value = VALUE;\
246  c->paths[index][node->path].prev = cur_node->path;\
247  /* Sift the newly inserted node up in the heap to restore \
248  * the heap property */\
249  while (pos > 0) {\
250  int parent = (pos - 1) >> 1;\
251  if (nodes_next[index][parent]->ssd <= ssd)\
252  break;\
253  FFSWAP(struct TrellisNode*, nodes_next[index][parent],\
254  nodes_next[index][pos]);\
255  pos = parent;\
256  }
257  STORE_NODE(0, ff_g722_update_low_predictor(&node->state, k >> 2), k);
258  }
259  }
260 
261  for (j = 0; j < frontier && nodes[1][j]; j++) {
262  int ihigh;
263  struct TrellisNode *cur_node = nodes[1][j];
264 
265  /* We don't try to get any initial guess for ihigh via
266  * encode_high - since there's only 4 possible values, test
267  * them all. Testing all of these gives a much, much larger
268  * gain than testing a larger range around ilow. */
269  for (ihigh = 0; ihigh < 4; ihigh++) {
270  int dhigh, decoded, dec_diff, pos;
271  uint32_t ssd;
272  struct TrellisNode* node;
273 
274  dhigh = cur_node->state.scale_factor *
275  ff_g722_high_inv_quant[ihigh] >> 10;
276  decoded = av_clip_intp2(dhigh + cur_node->state.s_predictor, 14);
277  dec_diff = xhigh - decoded;
278 
279  STORE_NODE(1, ff_g722_update_high_predictor(&node->state, dhigh, ihigh), ihigh);
280  }
281  }
282 
283  for (j = 0; j < 2; j++) {
284  FFSWAP(struct TrellisNode**, nodes[j], nodes_next[j]);
285 
286  if (nodes[j][0]->ssd > (1 << 16)) {
287  for (k = 1; k < frontier && nodes[j][k]; k++)
288  nodes[j][k]->ssd -= nodes[j][0]->ssd;
289  nodes[j][0]->ssd = 0;
290  }
291  }
292 
293  if (i == froze + FREEZE_INTERVAL) {
294  p[0] = &c->paths[0][nodes[0][0]->path];
295  p[1] = &c->paths[1][nodes[1][0]->path];
296  for (j = i; j > froze; j--) {
297  dst[j] = p[1]->value << 6 | p[0]->value;
298  p[0] = &c->paths[0][p[0]->prev];
299  p[1] = &c->paths[1][p[1]->prev];
300  }
301  froze = i;
302  pathn[0] = pathn[1] = 0;
303  memset(nodes[0] + 1, 0, (frontier - 1)*sizeof(**nodes));
304  memset(nodes[1] + 1, 0, (frontier - 1)*sizeof(**nodes));
305  }
306  }
307 
308  p[0] = &c->paths[0][nodes[0][0]->path];
309  p[1] = &c->paths[1][nodes[1][0]->path];
310  for (j = i; j > froze; j--) {
311  dst[j] = p[1]->value << 6 | p[0]->value;
312  p[0] = &c->paths[0][p[0]->prev];
313  p[1] = &c->paths[1][p[1]->prev];
314  }
315  c->band[0] = nodes[0][0]->state;
316  c->band[1] = nodes[1][0]->state;
317 }
318 
320  const int16_t *samples)
321 {
322  int xlow, xhigh, ilow, ihigh;
323  filter_samples(c, samples, &xlow, &xhigh);
324  ihigh = encode_high(&c->band[1], xhigh);
325  ilow = encode_low (&c->band[0], xlow);
326  ff_g722_update_high_predictor(&c->band[1], c->band[1].scale_factor *
327  ff_g722_high_inv_quant[ihigh] >> 10, ihigh);
328  ff_g722_update_low_predictor(&c->band[0], ilow >> 2);
329  *dst = ihigh << 6 | ilow;
330 }
331 
333  uint8_t *dst, int nb_samples,
334  const int16_t *samples)
335 {
336  int i;
337  for (i = 0; i < nb_samples; i += 2)
338  encode_byte(c, dst++, &samples[i]);
339 }
340 
341 static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
342  const AVFrame *frame, int *got_packet_ptr)
343 {
344  G722Context *c = avctx->priv_data;
345  const int16_t *samples = (const int16_t *)frame->data[0];
346  int nb_samples, out_size, ret;
347 
348  out_size = (frame->nb_samples + 1) / 2;
349  if ((ret = ff_alloc_packet2(avctx, avpkt, out_size, 0)) < 0)
350  return ret;
351 
352  nb_samples = frame->nb_samples - (frame->nb_samples & 1);
353 
354  if (avctx->trellis)
355  g722_encode_trellis(c, avctx->trellis, avpkt->data, nb_samples, samples);
356  else
357  g722_encode_no_trellis(c, avpkt->data, nb_samples, samples);
358 
359  /* handle last frame with odd frame_size */
360  if (nb_samples < frame->nb_samples) {
361  int16_t last_samples[2] = { samples[nb_samples], samples[nb_samples] };
362  encode_byte(c, &avpkt->data[nb_samples >> 1], last_samples);
363  }
364 
365  if (frame->pts != AV_NOPTS_VALUE)
366  avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
367  *got_packet_ptr = 1;
368  return 0;
369 }
370 
372  .name = "g722",
373  .long_name = NULL_IF_CONFIG_SMALL("G.722 ADPCM"),
374  .type = AVMEDIA_TYPE_AUDIO,
376  .priv_data_size = sizeof(G722Context),
378  .close = g722_encode_close,
379  .encode2 = g722_encode_frame,
380  .capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME,
382  .channel_layouts = (const uint64_t[]){ AV_CH_LAYOUT_MONO, 0 },
383  .caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
384 };
static enum AVSampleFormat sample_fmts[]
Definition: adpcmenc.c:925
#define av_always_inline
Definition: attributes.h:45
#define av_cold
Definition: attributes.h:88
uint8_t
simple assert() macros that are a bit more flexible than ISO C assert().
Libavcodec external API header.
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:31
static struct @321 state
common internal and external API header
#define av_clip_intp2
Definition: common.h:143
#define FFSWAP(type, a, b)
Definition: common.h:108
#define av_clip
Definition: common.h:122
#define av_clip_int16
Definition: common.h:137
static AVFrame * frame
int ff_alloc_packet2(AVCodecContext *avctx, AVPacket *avpkt, int64_t size, int64_t min_size)
Check AVPacket size and/or allocate data.
Definition: encode.c:33
#define PREV_SAMPLES_BUF_SIZE
Definition: g722.h:32
static int encode_low(const struct G722Band *state, int xlow)
Definition: g722enc.c:151
#define MAX_TRELLIS
Definition: g722enc.c:45
static void filter_samples(G722Context *c, const int16_t *samples, int *xlow, int *xhigh)
Definition: g722enc.c:126
static av_cold int g722_encode_init(AVCodecContext *avctx)
Definition: g722enc.c:59
static int g722_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, const AVFrame *frame, int *got_packet_ptr)
Definition: g722enc.c:338
static void g722_encode_no_trellis(G722Context *c, uint8_t *dst, int nb_samples, const int16_t *samples)
Definition: g722enc.c:329
static int encode_high(const struct G722Band *state, int xhigh)
Definition: g722enc.c:143
#define STORE_NODE(index, UPDATE, VALUE)
static av_always_inline void encode_byte(G722Context *c, uint8_t *dst, const int16_t *samples)
Definition: g722enc.c:316
static av_cold int g722_encode_close(AVCodecContext *avctx)
Definition: g722enc.c:47
static const int16_t low_quant[33]
Definition: g722enc.c:119
static void g722_encode_trellis(G722Context *c, int trellis, uint8_t *dst, int nb_samples, const int16_t *samples)
Definition: g722enc.c:165
#define FREEZE_INTERVAL
Definition: g722enc.c:36
#define MAX_FRAME_SIZE
Definition: g722enc.c:40
AVCodec ff_adpcm_g722_encoder
Definition: g722enc.c:368
#define MIN_TRELLIS
Definition: g722enc.c:44
#define AV_CH_LAYOUT_MONO
#define AV_CODEC_CAP_SMALL_LAST_FRAME
Codec can be fed a final frame with a smaller size.
Definition: codec.h:82
@ AV_CODEC_ID_ADPCM_G722
Definition: codec_id.h:381
#define AVERROR(e)
Definition: error.h:43
#define AV_LOG_WARNING
Something somehow does not look correct.
Definition: log.h:200
void * av_calloc(size_t nmemb, size_t size)
Non-inlined equivalent of av_mallocz_array().
Definition: mem.c:245
@ AVMEDIA_TYPE_AUDIO
Definition: avutil.h:202
AVSampleFormat
Audio sample formats.
Definition: samplefmt.h:58
@ AV_SAMPLE_FMT_NONE
Definition: samplefmt.h:59
@ AV_SAMPLE_FMT_S16
signed 16 bits
Definition: samplefmt.h:61
#define AV_NOPTS_VALUE
Undefined timestamp value.
Definition: avutil.h:248
int i
Definition: input.c:407
const int16_t ff_g722_low_inv_quant6[64]
Definition: g722.c:63
void ff_g722_update_high_predictor(struct G722Band *band, const int dhigh, const int ihigh)
Definition: g722.c:154
const int16_t ff_g722_high_inv_quant[4]
Definition: g722.c:51
void ff_g722_update_low_predictor(struct G722Band *band, const int ilow)
Definition: g722.c:143
av_cold void ff_g722dsp_init(G722DSPContext *c)
Definition: g722dsp.c:68
static av_always_inline int64_t ff_samples_to_time_base(AVCodecContext *avctx, int64_t samples)
Rescale from sample rate to AVCodecContext.time_base.
Definition: internal.h:277
#define FF_CODEC_CAP_INIT_CLEANUP
The codec allows calling the close function for deallocation even if the init function returned a fai...
Definition: internal.h:49
common internal API header
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:117
static const float pred[4]
Definition: siprdata.h:259
unsigned int pos
Definition: spdifenc.c:412
main external API structure.
Definition: avcodec.h:536
int trellis
trellis RD quantization
Definition: avcodec.h:1487
int initial_padding
Audio only.
Definition: avcodec.h:2062
int frame_size
Number of samples per channel in an audio frame.
Definition: avcodec.h:1216
void * priv_data
Definition: avcodec.h:563
AVCodec.
Definition: codec.h:197
const char * name
Name of the codec implementation.
Definition: codec.h:204
This structure describes decoded (raw) audio or video data.
Definition: frame.h:318
int nb_samples
number of audio samples (per channel) described by this frame
Definition: frame.h:384
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:411
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:332
This structure stores compressed data.
Definition: packet.h:346
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
Definition: packet.h:362
uint8_t * data
Definition: packet.h:369
uint32_t ssd
Definition: adpcmenc.c:46
int path
Definition: adpcmenc.c:47
#define av_freep(p)
#define av_log(a,...)
int out_size
Definition: movenc.c:55
static av_always_inline int diff(const uint32_t a, const uint32_t b)
static double c[64]