WebM VP8 Codec SDK
vpx_temporal_svc_encoder
1 /*
2  * Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3  *
4  * Use of this source code is governed by a BSD-style license
5  * that can be found in the LICENSE file in the root of the source
6  * tree. An additional intellectual property rights grant can be found
7  * in the file PATENTS. All contributing project authors may
8  * be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 // This is an example demonstrating how to implement a multi-layer VPx
12 // encoding scheme based on temporal scalability for video applications
13 // that benefit from a scalable bitstream.
14 
15 #include <math.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 
20 #define VPX_CODEC_DISABLE_COMPAT 1
21 #include "./vpx_config.h"
22 #include "vpx_ports/vpx_timer.h"
23 #include "vpx/vp8cx.h"
24 #include "vpx/vpx_encoder.h"
25 
26 #include "./tools_common.h"
27 #include "./video_writer.h"
28 
29 static const char *exec_name;
30 
31 void usage_exit() {
32  exit(EXIT_FAILURE);
33 }
34 
35 // Denoiser states, for temporal denoising.
36 enum denoiserState {
37  kDenoiserOff,
38  kDenoiserOnYOnly,
39  kDenoiserOnYUV,
40  kDenoiserOnYUVAggressive // Aggressive mode not implemented currently.
41 };
42 
43 static int mode_to_num_layers[12] = {1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3};
44 
45 // For rate control encoding stats.
46 struct RateControlMetrics {
47  // Number of input frames per layer.
48  int layer_input_frames[VPX_TS_MAX_LAYERS];
49  // Total (cumulative) number of encoded frames per layer.
50  int layer_tot_enc_frames[VPX_TS_MAX_LAYERS];
51  // Number of encoded non-key frames per layer.
52  int layer_enc_frames[VPX_TS_MAX_LAYERS];
53  // Framerate per layer layer (cumulative).
54  double layer_framerate[VPX_TS_MAX_LAYERS];
55  // Target average frame size per layer (per-frame-bandwidth per layer).
56  double layer_pfb[VPX_TS_MAX_LAYERS];
57  // Actual average frame size per layer.
58  double layer_avg_frame_size[VPX_TS_MAX_LAYERS];
59  // Average rate mismatch per layer (|target - actual| / target).
60  double layer_avg_rate_mismatch[VPX_TS_MAX_LAYERS];
61  // Actual encoding bitrate per layer (cumulative).
62  double layer_encoding_bitrate[VPX_TS_MAX_LAYERS];
63 };
64 
65 // Note: these rate control metrics assume only 1 key frame in the
66 // sequence (i.e., first frame only). So for temporal pattern# 7
67 // (which has key frame for every frame on base layer), the metrics
68 // computation will be off/wrong.
69 // TODO(marpan): Update these metrics to account for multiple key frames
70 // in the stream.
71 static void set_rate_control_metrics(struct RateControlMetrics *rc,
72  vpx_codec_enc_cfg_t *cfg) {
73  unsigned int i = 0;
74  // Set the layer (cumulative) framerate and the target layer (non-cumulative)
75  // per-frame-bandwidth, for the rate control encoding stats below.
76  const double framerate = cfg->g_timebase.den / cfg->g_timebase.num;
77  rc->layer_framerate[0] = framerate / cfg->ts_rate_decimator[0];
78  rc->layer_pfb[0] = 1000.0 * cfg->ts_target_bitrate[0] /
79  rc->layer_framerate[0];
80  for (i = 0; i < cfg->ts_number_layers; ++i) {
81  if (i > 0) {
82  rc->layer_framerate[i] = framerate / cfg->ts_rate_decimator[i];
83  rc->layer_pfb[i] = 1000.0 *
84  (cfg->ts_target_bitrate[i] - cfg->ts_target_bitrate[i - 1]) /
85  (rc->layer_framerate[i] - rc->layer_framerate[i - 1]);
86  }
87  rc->layer_input_frames[i] = 0;
88  rc->layer_enc_frames[i] = 0;
89  rc->layer_tot_enc_frames[i] = 0;
90  rc->layer_encoding_bitrate[i] = 0.0;
91  rc->layer_avg_frame_size[i] = 0.0;
92  rc->layer_avg_rate_mismatch[i] = 0.0;
93  }
94 }
95 
96 static void printout_rate_control_summary(struct RateControlMetrics *rc,
98  int frame_cnt) {
99  unsigned int i = 0;
100  int tot_num_frames = 0;
101  printf("Total number of processed frames: %d\n\n", frame_cnt -1);
102  printf("Rate control layer stats for %d layer(s):\n\n",
103  cfg->ts_number_layers);
104  for (i = 0; i < cfg->ts_number_layers; ++i) {
105  const int num_dropped = (i > 0) ?
106  (rc->layer_input_frames[i] - rc->layer_enc_frames[i]) :
107  (rc->layer_input_frames[i] - rc->layer_enc_frames[i] - 1);
108  tot_num_frames += rc->layer_input_frames[i];
109  rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[i] *
110  rc->layer_encoding_bitrate[i] / tot_num_frames;
111  rc->layer_avg_frame_size[i] = rc->layer_avg_frame_size[i] /
112  rc->layer_enc_frames[i];
113  rc->layer_avg_rate_mismatch[i] = 100.0 * rc->layer_avg_rate_mismatch[i] /
114  rc->layer_enc_frames[i];
115  printf("For layer#: %d \n", i);
116  printf("Bitrate (target vs actual): %d %f \n", cfg->ts_target_bitrate[i],
117  rc->layer_encoding_bitrate[i]);
118  printf("Average frame size (target vs actual): %f %f \n", rc->layer_pfb[i],
119  rc->layer_avg_frame_size[i]);
120  printf("Average rate_mismatch: %f \n", rc->layer_avg_rate_mismatch[i]);
121  printf("Number of input frames, encoded (non-key) frames, "
122  "and perc dropped frames: %d %d %f \n", rc->layer_input_frames[i],
123  rc->layer_enc_frames[i],
124  100.0 * num_dropped / rc->layer_input_frames[i]);
125  printf("\n");
126  }
127  if ((frame_cnt - 1) != tot_num_frames)
128  die("Error: Number of input frames not equal to output! \n");
129 }
130 
131 // Temporal scaling parameters:
132 // NOTE: The 3 prediction frames cannot be used interchangeably due to
133 // differences in the way they are handled throughout the code. The
134 // frames should be allocated to layers in the order LAST, GF, ARF.
135 // Other combinations work, but may produce slightly inferior results.
136 static void set_temporal_layer_pattern(int layering_mode,
137  vpx_codec_enc_cfg_t *cfg,
138  int *layer_flags,
139  int *flag_periodicity) {
140  switch (layering_mode) {
141  case 0: {
142  // 1-layer.
143  int ids[1] = {0};
144  cfg->ts_periodicity = 1;
145  *flag_periodicity = 1;
146  cfg->ts_number_layers = 1;
147  cfg->ts_rate_decimator[0] = 1;
148  memcpy(cfg->ts_layer_id, ids, sizeof(ids));
149  // Update L only.
150  layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_GF |
152  break;
153  }
154  case 1: {
155  // 2-layers, 2-frame period.
156  int ids[2] = {0, 1};
157  cfg->ts_periodicity = 2;
158  *flag_periodicity = 2;
159  cfg->ts_number_layers = 2;
160  cfg->ts_rate_decimator[0] = 2;
161  cfg->ts_rate_decimator[1] = 1;
162  memcpy(cfg->ts_layer_id, ids, sizeof(ids));
163 #if 1
164  // 0=L, 1=GF, Intra-layer prediction enabled.
165  layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_GF |
167  layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
169 #else
170  // 0=L, 1=GF, Intra-layer prediction disabled.
171  layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_GF |
173  layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
175 #endif
176  break;
177  }
178  case 2: {
179  // 2-layers, 3-frame period.
180  int ids[3] = {0, 1, 1};
181  cfg->ts_periodicity = 3;
182  *flag_periodicity = 3;
183  cfg->ts_number_layers = 2;
184  cfg->ts_rate_decimator[0] = 3;
185  cfg->ts_rate_decimator[1] = 1;
186  memcpy(cfg->ts_layer_id, ids, sizeof(ids));
187  // 0=L, 1=GF, Intra-layer prediction enabled.
188  layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF |
190  layer_flags[1] =
191  layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
193  break;
194  }
195  case 3: {
196  // 3-layers, 6-frame period.
197  int ids[6] = {0, 2, 2, 1, 2, 2};
198  cfg->ts_periodicity = 6;
199  *flag_periodicity = 6;
200  cfg->ts_number_layers = 3;
201  cfg->ts_rate_decimator[0] = 6;
202  cfg->ts_rate_decimator[1] = 3;
203  cfg->ts_rate_decimator[2] = 1;
204  memcpy(cfg->ts_layer_id, ids, sizeof(ids));
205  // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled.
206  layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF |
208  layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF |
210  layer_flags[1] =
211  layer_flags[2] =
212  layer_flags[4] =
213  layer_flags[5] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST;
214  break;
215  }
216  case 4: {
217  // 3-layers, 4-frame period.
218  int ids[4] = {0, 2, 1, 2};
219  cfg->ts_periodicity = 4;
220  *flag_periodicity = 4;
221  cfg->ts_number_layers = 3;
222  cfg->ts_rate_decimator[0] = 4;
223  cfg->ts_rate_decimator[1] = 2;
224  cfg->ts_rate_decimator[2] = 1;
225  memcpy(cfg->ts_layer_id, ids, sizeof(ids));
226  // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled.
227  layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF |
229  layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
231  layer_flags[1] =
232  layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST |
234  break;
235  }
236  case 5: {
237  // 3-layers, 4-frame period.
238  int ids[4] = {0, 2, 1, 2};
239  cfg->ts_periodicity = 4;
240  *flag_periodicity = 4;
241  cfg->ts_number_layers = 3;
242  cfg->ts_rate_decimator[0] = 4;
243  cfg->ts_rate_decimator[1] = 2;
244  cfg->ts_rate_decimator[2] = 1;
245  memcpy(cfg->ts_layer_id, ids, sizeof(ids));
246  // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled in layer 1, disabled
247  // in layer 2.
248  layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF |
250  layer_flags[2] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST |
252  layer_flags[1] =
253  layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST |
255  break;
256  }
257  case 6: {
258  // 3-layers, 4-frame period.
259  int ids[4] = {0, 2, 1, 2};
260  cfg->ts_periodicity = 4;
261  *flag_periodicity = 4;
262  cfg->ts_number_layers = 3;
263  cfg->ts_rate_decimator[0] = 4;
264  cfg->ts_rate_decimator[1] = 2;
265  cfg->ts_rate_decimator[2] = 1;
266  memcpy(cfg->ts_layer_id, ids, sizeof(ids));
267  // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled.
268  layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF |
270  layer_flags[2] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST |
272  layer_flags[1] =
273  layer_flags[3] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
274  break;
275  }
276  case 7: {
277  // NOTE: Probably of academic interest only.
278  // 5-layers, 16-frame period.
279  int ids[16] = {0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4};
280  cfg->ts_periodicity = 16;
281  *flag_periodicity = 16;
282  cfg->ts_number_layers = 5;
283  cfg->ts_rate_decimator[0] = 16;
284  cfg->ts_rate_decimator[1] = 8;
285  cfg->ts_rate_decimator[2] = 4;
286  cfg->ts_rate_decimator[3] = 2;
287  cfg->ts_rate_decimator[4] = 1;
288  memcpy(cfg->ts_layer_id, ids, sizeof(ids));
289  layer_flags[0] = VPX_EFLAG_FORCE_KF;
290  layer_flags[1] =
291  layer_flags[3] =
292  layer_flags[5] =
293  layer_flags[7] =
294  layer_flags[9] =
295  layer_flags[11] =
296  layer_flags[13] =
297  layer_flags[15] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
299  layer_flags[2] =
300  layer_flags[6] =
301  layer_flags[10] =
302  layer_flags[14] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF;
303  layer_flags[4] =
304  layer_flags[12] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_UPD_ARF;
305  layer_flags[8] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF;
306  break;
307  }
308  case 8: {
309  // 2-layers, with sync point at first frame of layer 1.
310  int ids[2] = {0, 1};
311  cfg->ts_periodicity = 2;
312  *flag_periodicity = 8;
313  cfg->ts_number_layers = 2;
314  cfg->ts_rate_decimator[0] = 2;
315  cfg->ts_rate_decimator[1] = 1;
316  memcpy(cfg->ts_layer_id, ids, sizeof(ids));
317  // 0=L, 1=GF.
318  // ARF is used as predictor for all frames, and is only updated on
319  // key frame. Sync point every 8 frames.
320 
321  // Layer 0: predict from L and ARF, update L and G.
322  layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF |
324  // Layer 1: sync point: predict from L and ARF, and update G.
325  layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_LAST |
327  // Layer 0, predict from L and ARF, update L.
328  layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF |
330  // Layer 1: predict from L, G and ARF, and update G.
331  layer_flags[3] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
333  // Layer 0.
334  layer_flags[4] = layer_flags[2];
335  // Layer 1.
336  layer_flags[5] = layer_flags[3];
337  // Layer 0.
338  layer_flags[6] = layer_flags[4];
339  // Layer 1.
340  layer_flags[7] = layer_flags[5];
341  break;
342  }
343  case 9: {
344  // 3-layers: Sync points for layer 1 and 2 every 8 frames.
345  int ids[4] = {0, 2, 1, 2};
346  cfg->ts_periodicity = 4;
347  *flag_periodicity = 8;
348  cfg->ts_number_layers = 3;
349  cfg->ts_rate_decimator[0] = 4;
350  cfg->ts_rate_decimator[1] = 2;
351  cfg->ts_rate_decimator[2] = 1;
352  memcpy(cfg->ts_layer_id, ids, sizeof(ids));
353  // 0=L, 1=GF, 2=ARF.
354  layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF |
356  layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
358  layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
360  layer_flags[3] =
361  layer_flags[5] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
362  layer_flags[4] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
364  layer_flags[6] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST |
366  layer_flags[7] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
368  break;
369  }
370  case 10: {
371  // 3-layers structure where ARF is used as predictor for all frames,
372  // and is only updated on key frame.
373  // Sync points for layer 1 and 2 every 8 frames.
374 
375  int ids[4] = {0, 2, 1, 2};
376  cfg->ts_periodicity = 4;
377  *flag_periodicity = 8;
378  cfg->ts_number_layers = 3;
379  cfg->ts_rate_decimator[0] = 4;
380  cfg->ts_rate_decimator[1] = 2;
381  cfg->ts_rate_decimator[2] = 1;
382  memcpy(cfg->ts_layer_id, ids, sizeof(ids));
383  // 0=L, 1=GF, 2=ARF.
384  // Layer 0: predict from L and ARF; update L and G.
385  layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_ARF |
387  // Layer 2: sync point: predict from L and ARF; update none.
388  layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF |
391  // Layer 1: sync point: predict from L and ARF; update G.
392  layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_ARF |
394  // Layer 2: predict from L, G, ARF; update none.
395  layer_flags[3] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
397  // Layer 0: predict from L and ARF; update L.
398  layer_flags[4] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
400  // Layer 2: predict from L, G, ARF; update none.
401  layer_flags[5] = layer_flags[3];
402  // Layer 1: predict from L, G, ARF; update G.
403  layer_flags[6] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
404  // Layer 2: predict from L, G, ARF; update none.
405  layer_flags[7] = layer_flags[3];
406  break;
407  }
408  case 11:
409  default: {
410  // 3-layers structure as in case 10, but no sync/refresh points for
411  // layer 1 and 2.
412  int ids[4] = {0, 2, 1, 2};
413  cfg->ts_periodicity = 4;
414  *flag_periodicity = 8;
415  cfg->ts_number_layers = 3;
416  cfg->ts_rate_decimator[0] = 4;
417  cfg->ts_rate_decimator[1] = 2;
418  cfg->ts_rate_decimator[2] = 1;
419  memcpy(cfg->ts_layer_id, ids, sizeof(ids));
420  // 0=L, 1=GF, 2=ARF.
421  // Layer 0: predict from L and ARF; update L.
422  layer_flags[0] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
424  layer_flags[4] = layer_flags[0];
425  // Layer 1: predict from L, G, ARF; update G.
426  layer_flags[2] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
427  layer_flags[6] = layer_flags[2];
428  // Layer 2: predict from L, G, ARF; update none.
429  layer_flags[1] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
431  layer_flags[3] = layer_flags[1];
432  layer_flags[5] = layer_flags[1];
433  layer_flags[7] = layer_flags[1];
434  break;
435  }
436  }
437 }
438 
439 int main(int argc, char **argv) {
440  VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS];
441  vpx_codec_ctx_t codec;
443  int frame_cnt = 0;
444  vpx_image_t raw;
445  vpx_codec_err_t res;
446  unsigned int width;
447  unsigned int height;
448  int speed;
449  int frame_avail;
450  int got_data;
451  int flags = 0;
452  unsigned int i;
453  int pts = 0; // PTS starts at 0.
454  int frame_duration = 1; // 1 timebase tick per frame.
455  int layering_mode = 0;
456  int layer_flags[VPX_TS_MAX_PERIODICITY] = {0};
457  int flag_periodicity = 1;
458  int max_intra_size_pct;
459  vpx_svc_layer_id_t layer_id = {0, 0};
460  const VpxInterface *encoder = NULL;
461  FILE *infile = NULL;
462  struct RateControlMetrics rc;
463  int64_t cx_time = 0;
464 
465  exec_name = argv[0];
466  // Check usage and arguments.
467  if (argc < 11) {
468  die("Usage: %s <infile> <outfile> <codec_type(vp8/vp9)> <width> <height> "
469  "<rate_num> <rate_den> <speed> <frame_drop_threshold> <mode> "
470  "<Rate_0> ... <Rate_nlayers-1> \n", argv[0]);
471  }
472 
473  encoder = get_vpx_encoder_by_name(argv[3]);
474  if (!encoder)
475  die("Unsupported codec.");
476 
477  printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface()));
478 
479  width = strtol(argv[4], NULL, 0);
480  height = strtol(argv[5], NULL, 0);
481  if (width < 16 || width % 2 || height < 16 || height % 2) {
482  die("Invalid resolution: %d x %d", width, height);
483  }
484 
485  layering_mode = strtol(argv[10], NULL, 0);
486  if (layering_mode < 0 || layering_mode > 12) {
487  die("Invalid layering mode (0..12) %s", argv[10]);
488  }
489 
490  if (argc != 11 + mode_to_num_layers[layering_mode]) {
491  die("Invalid number of arguments");
492  }
493 
494  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, width, height, 32)) {
495  die("Failed to allocate image", width, height);
496  }
497 
498  // Populate encoder configuration.
499  res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0);
500  if (res) {
501  printf("Failed to get config: %s\n", vpx_codec_err_to_string(res));
502  return EXIT_FAILURE;
503  }
504 
505  // Update the default configuration with our settings.
506  cfg.g_w = width;
507  cfg.g_h = height;
508 
509  // Timebase format e.g. 30fps: numerator=1, demoninator = 30.
510  cfg.g_timebase.num = strtol(argv[6], NULL, 0);
511  cfg.g_timebase.den = strtol(argv[7], NULL, 0);
512 
513  speed = strtol(argv[8], NULL, 0);
514  if (speed < 0) {
515  die("Invalid speed setting: must be positive");
516  }
517 
518  for (i = 11; (int)i < 11 + mode_to_num_layers[layering_mode]; ++i) {
519  cfg.ts_target_bitrate[i - 11] = strtol(argv[i], NULL, 0);
520  }
521 
522  // Real time parameters.
523  cfg.rc_dropframe_thresh = strtol(argv[9], NULL, 0);
524  cfg.rc_end_usage = VPX_CBR;
525  cfg.rc_resize_allowed = 0;
526  cfg.rc_min_quantizer = 2;
527  cfg.rc_max_quantizer = 56;
528  cfg.rc_undershoot_pct = 50;
529  cfg.rc_overshoot_pct = 50;
530  cfg.rc_buf_initial_sz = 500;
531  cfg.rc_buf_optimal_sz = 600;
532  cfg.rc_buf_sz = 1000;
533 
534  // Enable error resilient mode.
535  cfg.g_error_resilient = 1;
536  cfg.g_lag_in_frames = 0;
537  cfg.kf_mode = VPX_KF_AUTO;
538 
539  // Disable automatic keyframe placement.
540  cfg.kf_min_dist = cfg.kf_max_dist = 3000;
541 
542  set_temporal_layer_pattern(layering_mode,
543  &cfg,
544  layer_flags,
545  &flag_periodicity);
546 
547  set_rate_control_metrics(&rc, &cfg);
548 
549  // Target bandwidth for the whole stream.
550  // Set to ts_target_bitrate for highest layer (total bitrate).
552 
553  // Open input file.
554  if (!(infile = fopen(argv[1], "rb"))) {
555  die("Failed to open %s for reading", argv[1]);
556  }
557 
558  // Open an output file for each stream.
559  for (i = 0; i < cfg.ts_number_layers; ++i) {
560  char file_name[PATH_MAX];
561  VpxVideoInfo info;
562  info.codec_fourcc = encoder->fourcc;
563  info.frame_width = cfg.g_w;
564  info.frame_height = cfg.g_h;
565  info.time_base.numerator = cfg.g_timebase.num;
566  info.time_base.denominator = cfg.g_timebase.den;
567 
568  snprintf(file_name, sizeof(file_name), "%s_%d.ivf", argv[2], i);
569  outfile[i] = vpx_video_writer_open(file_name, kContainerIVF, &info);
570  if (!outfile[i])
571  die("Failed to open %s for writing", file_name);
572  }
573  // No spatial layers in this encoder.
574  cfg.ss_number_layers = 1;
575 
576  // Initialize codec.
577  if (vpx_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0))
578  die_codec(&codec, "Failed to initialize encoder");
579 
580  if (strncmp(encoder->name, "vp8", 3) == 0) {
581  vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed);
582  vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOnYOnly);
583  } else if (strncmp(encoder->name, "vp9", 3) == 0) {
584  vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
585  vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
586  vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
588  if (vpx_codec_control(&codec, VP9E_SET_SVC, 1)) {
589  die_codec(&codec, "Failed to set SVC");
590  }
591  }
594  // This controls the maximum target size of the key frame.
595  // For generating smaller key frames, use a smaller max_intra_size_pct
596  // value, like 100 or 200.
597  max_intra_size_pct = (int) (((double)cfg.rc_buf_optimal_sz * 0.5)
598  * ((double) cfg.g_timebase.den / cfg.g_timebase.num) / 10.0);
599  // For low-quality key frame.
600  max_intra_size_pct = 200;
601  vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, max_intra_size_pct);
602 
603  frame_avail = 1;
604  while (frame_avail || got_data) {
605  struct vpx_usec_timer timer;
606  vpx_codec_iter_t iter = NULL;
607  const vpx_codec_cx_pkt_t *pkt;
608  // Update the temporal layer_id. No spatial layers in this test.
609  layer_id.spatial_layer_id = 0;
610  layer_id.temporal_layer_id =
611  cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity];
612  if (strncmp(encoder->name, "vp9", 3) == 0) {
613  vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id);
614  }
615  flags = layer_flags[frame_cnt % flag_periodicity];
616  frame_avail = vpx_img_read(&raw, infile);
617  if (frame_avail)
618  ++rc.layer_input_frames[layer_id.temporal_layer_id];
619  vpx_usec_timer_start(&timer);
620  if (vpx_codec_encode(&codec, frame_avail? &raw : NULL, pts, 1, flags,
621  VPX_DL_REALTIME)) {
622  die_codec(&codec, "Failed to encode frame");
623  }
624  vpx_usec_timer_mark(&timer);
625  cx_time += vpx_usec_timer_elapsed(&timer);
626  // Reset KF flag.
627  if (layering_mode != 7) {
628  layer_flags[0] &= ~VPX_EFLAG_FORCE_KF;
629  }
630  got_data = 0;
631  while ( (pkt = vpx_codec_get_cx_data(&codec, &iter)) ) {
632  got_data = 1;
633  switch (pkt->kind) {
635  for (i = cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity];
636  i < cfg.ts_number_layers; ++i) {
637  vpx_video_writer_write_frame(outfile[i], pkt->data.frame.buf,
638  pkt->data.frame.sz, pts);
639  ++rc.layer_tot_enc_frames[i];
640  rc.layer_encoding_bitrate[i] += 8.0 * pkt->data.frame.sz;
641  // Keep count of rate control stats per layer (for non-key frames).
642  if (i == cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity] &&
643  !(pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
644  rc.layer_avg_frame_size[i] += 8.0 * pkt->data.frame.sz;
645  rc.layer_avg_rate_mismatch[i] +=
646  fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[i]) /
647  rc.layer_pfb[i];
648  ++rc.layer_enc_frames[i];
649  }
650  }
651  break;
652  default:
653  break;
654  }
655  }
656  ++frame_cnt;
657  pts += frame_duration;
658  }
659  fclose(infile);
660  printout_rate_control_summary(&rc, &cfg, frame_cnt);
661  printf("\n");
662  printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n",
663  frame_cnt,
664  1000 * (float)cx_time / (double)(frame_cnt * 1000000),
665  1000000 * (double)frame_cnt / (double)cx_time);
666 
667  if (vpx_codec_destroy(&codec))
668  die_codec(&codec, "Failed to destroy codec");
669 
670  // Try to rewrite the output file headers with the actual frame count.
671  for (i = 0; i < cfg.ts_number_layers; ++i)
672  vpx_video_writer_close(outfile[i]);
673 
674  vpx_img_free(&raw);
675  return EXIT_SUCCESS;
676 }
unsigned int rc_buf_initial_sz
Decoder Buffer Initial Size.
Definition: vpx_encoder.h:566
unsigned int ts_number_layers
Number of temporal coding layers.
Definition: vpx_encoder.h:671
control function to set vp8 encoder cpuused
Definition: vp8cx.h:149
#define VP8_EFLAG_NO_REF_LAST
Don&#39;t reference the last frame.
Definition: vp8cx.h:55
#define VP8_EFLAG_NO_UPD_GF
Don&#39;t update the golden frame.
Definition: vp8cx.h:89
Image Descriptor.
Definition: vpx_image.h:102
Describes the encoder algorithm interface to applications.
const char * vpx_codec_iface_name(vpx_codec_iface_t *iface)
Return the name for a given interface.
const char * vpx_codec_err_to_string(vpx_codec_err_t err)
Convert error number to printable string.
#define VPX_TS_MAX_LAYERS
Definition: vpx_encoder.h:40
struct vpx_rational g_timebase
Stream timebase units.
Definition: vpx_encoder.h:353
Definition: vpx_encoder.h:247
unsigned int rc_buf_sz
Decoder Buffer Size.
Definition: vpx_encoder.h:556
#define VP8_EFLAG_NO_REF_GF
Don&#39;t reference the golden frame.
Definition: vp8cx.h:64
enum vpx_kf_mode kf_mode
Keyframe placement mode.
Definition: vpx_encoder.h:621
int den
Definition: vpx_encoder.h:232
vpx_codec_err_t vpx_codec_encode(vpx_codec_ctx_t *ctx, const vpx_image_t *img, vpx_codec_pts_t pts, unsigned long duration, vpx_enc_frame_flags_t flags, unsigned long deadline)
Encode a frame.
unsigned int rc_max_quantizer
Maximum (Worst Quality) Quantizer.
Definition: vpx_encoder.h:508
unsigned int rc_min_quantizer
Minimum (Best Quality) Quantizer.
Definition: vpx_encoder.h:497
unsigned int kf_max_dist
Keyframe maximum interval.
Definition: vpx_encoder.h:641
unsigned int g_lag_in_frames
Allow lagged encoding.
Definition: vpx_encoder.h:385
Encoder configuration structure.
Definition: vpx_encoder.h:285
Definition: vpx_encoder.h:263
int spatial_layer_id
Definition: vp8cx.h:324
Max data rate for Intra frames.
Definition: vp8cx.h:190
Encoder output packet.
Definition: vpx_encoder.h:185
unsigned int rc_overshoot_pct
Rate control adaptation overshoot control.
Definition: vpx_encoder.h:539
unsigned int ts_rate_decimator[5]
Frame rate decimation factor for each temporal layer.
Definition: vpx_encoder.h:685
unsigned int rc_buf_optimal_sz
Decoder Buffer Optimal Size.
Definition: vpx_encoder.h:576
unsigned int kf_min_dist
Keyframe minimum interval.
Definition: vpx_encoder.h:631
unsigned int ts_layer_id[16]
Template defining the membership of frames to temporal layers.
Definition: vpx_encoder.h:703
struct vpx_codec_cx_pkt::@1::@2 frame
vpx_image_t * vpx_img_alloc(vpx_image_t *img, vpx_img_fmt_t fmt, unsigned int d_w, unsigned int d_h, unsigned int align)
Open a descriptor, allocating storage for the underlying image.
Definition: vpx_image.h:56
unsigned int g_w
Width of the frame.
Definition: vpx_encoder.h:328
unsigned int ts_target_bitrate[5]
Target bitrate for each temporal layer.
Definition: vpx_encoder.h:678
unsigned int rc_undershoot_pct
Rate control adaptation undershoot control.
Definition: vpx_encoder.h:526
unsigned int g_h
Height of the frame.
Definition: vpx_encoder.h:338
enum vpx_codec_cx_pkt_kind kind
Definition: vpx_encoder.h:186
unsigned int rc_dropframe_thresh
Temporal resampling configuration, if supported by the codec.
Definition: vpx_encoder.h:408
vp9 svc layer parameters
Definition: vp8cx.h:323
#define VP8_EFLAG_NO_UPD_LAST
Don&#39;t update the last frame.
Definition: vp8cx.h:81
void vpx_img_free(vpx_image_t *img)
Close an image descriptor.
Definition: vp8cx.h:154
unsigned int rc_target_bitrate
Target data rate.
Definition: vpx_encoder.h:481
#define VPX_DL_REALTIME
Definition: vpx_encoder.h:838
int num
Definition: vpx_encoder.h:231
Definition: vp8cx.h:151
vpx_codec_err_t vpx_codec_enc_config_default(vpx_codec_iface_t *iface, vpx_codec_enc_cfg_t *cfg, unsigned int usage)
Get a default configuration.
unsigned int ss_number_layers
Number of spatial coding layers.
Definition: vpx_encoder.h:651
Provides definitions for using the VP8 encoder algorithm within the vpx Codec Interface.
#define vpx_codec_enc_init(ctx, iface, cfg, flags)
Convenience macro for vpx_codec_enc_init_ver()
Definition: vpx_encoder.h:740
unsigned int rc_resize_allowed
Enable/disable spatial resampling, if supported by the codec.
Definition: vpx_encoder.h:418
vpx_codec_err_t
Algorithm return codes.
Definition: vpx_codec.h:89
const vpx_codec_cx_pkt_t * vpx_codec_get_cx_data(vpx_codec_ctx_t *ctx, vpx_codec_iter_t *iter)
Encoded data iterator.
union vpx_codec_cx_pkt::@1 data
int temporal_layer_id
Definition: vp8cx.h:325
#define VPX_TS_MAX_PERIODICITY
Definition: vpx_encoder.h:37
#define vpx_codec_control(ctx, id, data)
vpx_codec_control wrapper macro
Definition: vpx_codec.h:405
unsigned int ts_periodicity
Length of the sequence defining frame temporal layer membership.
Definition: vpx_encoder.h:694
#define VP8_EFLAG_NO_REF_ARF
Don&#39;t reference the alternate reference frame.
Definition: vp8cx.h:73
vpx_codec_err_t vpx_codec_destroy(vpx_codec_ctx_t *ctx)
Destroy a codec instance.
Definition: vp8cx.h:153
#define VPX_FRAME_IS_KEY
Definition: vpx_encoder.h:123
#define VPX_EFLAG_FORCE_KF
Definition: vpx_encoder.h:276
const void * vpx_codec_iter_t
Iterator.
Definition: vpx_codec.h:188
Definition: vpx_encoder.h:169
vpx_codec_er_flags_t g_error_resilient
Enable error resilient modes.
Definition: vpx_encoder.h:362
#define VP8_EFLAG_NO_UPD_ARF
Don&#39;t update the alternate reference frame.
Definition: vp8cx.h:97
#define VP8_EFLAG_NO_UPD_ENTROPY
Disable entropy update.
Definition: vp8cx.h:121
control function to set svc layer for spatial and temporal.
Definition: vp8cx.h:208
enum vpx_rc_mode rc_end_usage
Rate control algorithm to use.
Definition: vpx_encoder.h:460
Codec context structure.
Definition: vpx_codec.h:199