yanchengPowerSupply/node_modules/mux.js/lib/mp4/transmuxer.js

1455 lines
44 KiB
JavaScript

/**
* mux.js
*
* Copyright (c) 2015 Brightcove
* All rights reserved.
*
* A stream-based mp2t to mp4 converter. This utility can be used to
* deliver mp4s to a SourceBuffer on platforms that support native
* Media Source Extensions.
*/
'use strict';
var Stream = require('../utils/stream.js');
var mp4 = require('./mp4-generator.js');
var m2ts = require('../m2ts/m2ts.js');
var AdtsStream = require('../codecs/adts.js');
var H264Stream = require('../codecs/h264').H264Stream;
var AacStream = require('../aac');
var coneOfSilence = require('../data/silence');
var clock = require('../utils/clock');
// constants
var AUDIO_PROPERTIES = [
'audioobjecttype',
'channelcount',
'samplerate',
'samplingfrequencyindex',
'samplesize'
];
var VIDEO_PROPERTIES = [
'width',
'height',
'profileIdc',
'levelIdc',
'profileCompatibility'
];
var ONE_SECOND_IN_TS = 90000; // 90kHz clock
// object types
var VideoSegmentStream, AudioSegmentStream, Transmuxer, CoalesceStream;
// Helper functions
var
createDefaultSample,
isLikelyAacData,
collectDtsInfo,
clearDtsInfo,
calculateTrackBaseMediaDecodeTime,
arrayEquals,
sumFrameByteLengths;
/**
* Default sample object
* see ISO/IEC 14496-12:2012, section 8.6.4.3
*/
createDefaultSample = function() {
return {
size: 0,
flags: {
isLeading: 0,
dependsOn: 1,
isDependedOn: 0,
hasRedundancy: 0,
degradationPriority: 0
}
};
};
isLikelyAacData = function(data) {
if ((data[0] === 'I'.charCodeAt(0)) &&
(data[1] === 'D'.charCodeAt(0)) &&
(data[2] === '3'.charCodeAt(0))) {
return true;
}
return false;
};
/**
* Compare two arrays (even typed) for same-ness
*/
arrayEquals = function(a, b) {
var
i;
if (a.length !== b.length) {
return false;
}
// compare the value of each element in the array
for (i = 0; i < a.length; i++) {
if (a[i] !== b[i]) {
return false;
}
}
return true;
};
/**
* Sum the `byteLength` properties of the data in each AAC frame
*/
sumFrameByteLengths = function(array) {
var
i,
currentObj,
sum = 0;
// sum the byteLength's all each nal unit in the frame
for (i = 0; i < array.length; i++) {
currentObj = array[i];
sum += currentObj.data.byteLength;
}
return sum;
};
/**
* Constructs a single-track, ISO BMFF media segment from AAC data
* events. The output of this stream can be fed to a SourceBuffer
* configured with a suitable initialization segment.
*/
AudioSegmentStream = function(track) {
var
adtsFrames = [],
sequenceNumber = 0,
earliestAllowedDts = 0,
audioAppendStartTs = 0,
videoBaseMediaDecodeTime = Infinity;
AudioSegmentStream.prototype.init.call(this);
this.push = function(data) {
collectDtsInfo(track, data);
if (track) {
AUDIO_PROPERTIES.forEach(function(prop) {
track[prop] = data[prop];
});
}
// buffer audio data until end() is called
adtsFrames.push(data);
};
this.setEarliestDts = function(earliestDts) {
earliestAllowedDts = earliestDts - track.timelineStartInfo.baseMediaDecodeTime;
};
this.setVideoBaseMediaDecodeTime = function(baseMediaDecodeTime) {
videoBaseMediaDecodeTime = baseMediaDecodeTime;
};
this.setAudioAppendStart = function(timestamp) {
audioAppendStartTs = timestamp;
};
this.flush = function() {
var
frames,
moof,
mdat,
boxes;
// return early if no audio data has been observed
if (adtsFrames.length === 0) {
this.trigger('done', 'AudioSegmentStream');
return;
}
frames = this.trimAdtsFramesByEarliestDts_(adtsFrames);
track.baseMediaDecodeTime = calculateTrackBaseMediaDecodeTime(track);
this.prefixWithSilence_(track, frames);
// we have to build the index from byte locations to
// samples (that is, adts frames) in the audio data
track.samples = this.generateSampleTable_(frames);
// concatenate the audio data to constuct the mdat
mdat = mp4.mdat(this.concatenateFrameData_(frames));
adtsFrames = [];
moof = mp4.moof(sequenceNumber, [track]);
boxes = new Uint8Array(moof.byteLength + mdat.byteLength);
// bump the sequence number for next time
sequenceNumber++;
boxes.set(moof);
boxes.set(mdat, moof.byteLength);
clearDtsInfo(track);
this.trigger('data', {track: track, boxes: boxes});
this.trigger('done', 'AudioSegmentStream');
};
// Possibly pad (prefix) the audio track with silence if appending this track
// would lead to the introduction of a gap in the audio buffer
this.prefixWithSilence_ = function(track, frames) {
var
baseMediaDecodeTimeTs,
frameDuration = 0,
audioGapDuration = 0,
audioFillFrameCount = 0,
audioFillDuration = 0,
silentFrame,
i;
if (!frames.length) {
return;
}
baseMediaDecodeTimeTs = clock.audioTsToVideoTs(track.baseMediaDecodeTime, track.samplerate);
// determine frame clock duration based on sample rate, round up to avoid overfills
frameDuration = Math.ceil(ONE_SECOND_IN_TS / (track.samplerate / 1024));
if (audioAppendStartTs && videoBaseMediaDecodeTime) {
// insert the shortest possible amount (audio gap or audio to video gap)
audioGapDuration =
baseMediaDecodeTimeTs - Math.max(audioAppendStartTs, videoBaseMediaDecodeTime);
// number of full frames in the audio gap
audioFillFrameCount = Math.floor(audioGapDuration / frameDuration);
audioFillDuration = audioFillFrameCount * frameDuration;
}
// don't attempt to fill gaps smaller than a single frame or larger
// than a half second
if (audioFillFrameCount < 1 || audioFillDuration > ONE_SECOND_IN_TS / 2) {
return;
}
silentFrame = coneOfSilence[track.samplerate];
if (!silentFrame) {
// we don't have a silent frame pregenerated for the sample rate, so use a frame
// from the content instead
silentFrame = frames[0].data;
}
for (i = 0; i < audioFillFrameCount; i++) {
frames.splice(i, 0, {
data: silentFrame
});
}
track.baseMediaDecodeTime -=
Math.floor(clock.videoTsToAudioTs(audioFillDuration, track.samplerate));
};
// If the audio segment extends before the earliest allowed dts
// value, remove AAC frames until starts at or after the earliest
// allowed DTS so that we don't end up with a negative baseMedia-
// DecodeTime for the audio track
this.trimAdtsFramesByEarliestDts_ = function(adtsFrames) {
if (track.minSegmentDts >= earliestAllowedDts) {
return adtsFrames;
}
// We will need to recalculate the earliest segment Dts
track.minSegmentDts = Infinity;
return adtsFrames.filter(function(currentFrame) {
// If this is an allowed frame, keep it and record it's Dts
if (currentFrame.dts >= earliestAllowedDts) {
track.minSegmentDts = Math.min(track.minSegmentDts, currentFrame.dts);
track.minSegmentPts = track.minSegmentDts;
return true;
}
// Otherwise, discard it
return false;
});
};
// generate the track's raw mdat data from an array of frames
this.generateSampleTable_ = function(frames) {
var
i,
currentFrame,
samples = [];
for (i = 0; i < frames.length; i++) {
currentFrame = frames[i];
samples.push({
size: currentFrame.data.byteLength,
duration: 1024 // For AAC audio, all samples contain 1024 samples
});
}
return samples;
};
// generate the track's sample table from an array of frames
this.concatenateFrameData_ = function(frames) {
var
i,
currentFrame,
dataOffset = 0,
data = new Uint8Array(sumFrameByteLengths(frames));
for (i = 0; i < frames.length; i++) {
currentFrame = frames[i];
data.set(currentFrame.data, dataOffset);
dataOffset += currentFrame.data.byteLength;
}
return data;
};
};
AudioSegmentStream.prototype = new Stream();
/**
* Constructs a single-track, ISO BMFF media segment from H264 data
* events. The output of this stream can be fed to a SourceBuffer
* configured with a suitable initialization segment.
* @param track {object} track metadata configuration
* @param options {object} transmuxer options object
* @param options.alignGopsAtEnd {boolean} If true, start from the end of the
* gopsToAlignWith list when attempting to align gop pts
*/
VideoSegmentStream = function(track, options) {
var
sequenceNumber = 0,
nalUnits = [],
gopsToAlignWith = [],
config,
pps;
options = options || {};
VideoSegmentStream.prototype.init.call(this);
delete track.minPTS;
this.gopCache_ = [];
this.push = function(nalUnit) {
collectDtsInfo(track, nalUnit);
// record the track config
if (nalUnit.nalUnitType === 'seq_parameter_set_rbsp' && !config) {
config = nalUnit.config;
track.sps = [nalUnit.data];
VIDEO_PROPERTIES.forEach(function(prop) {
track[prop] = config[prop];
}, this);
}
if (nalUnit.nalUnitType === 'pic_parameter_set_rbsp' &&
!pps) {
pps = nalUnit.data;
track.pps = [nalUnit.data];
}
// buffer video until flush() is called
nalUnits.push(nalUnit);
};
this.flush = function() {
var
frames,
gopForFusion,
gops,
moof,
mdat,
boxes;
// Throw away nalUnits at the start of the byte stream until
// we find the first AUD
while (nalUnits.length) {
if (nalUnits[0].nalUnitType === 'access_unit_delimiter_rbsp') {
break;
}
nalUnits.shift();
}
// Return early if no video data has been observed
if (nalUnits.length === 0) {
this.resetStream_();
this.trigger('done', 'VideoSegmentStream');
return;
}
// Organize the raw nal-units into arrays that represent
// higher-level constructs such as frames and gops
// (group-of-pictures)
frames = this.groupNalsIntoFrames_(nalUnits);
gops = this.groupFramesIntoGops_(frames);
// If the first frame of this fragment is not a keyframe we have
// a problem since MSE (on Chrome) requires a leading keyframe.
//
// We have two approaches to repairing this situation:
// 1) GOP-FUSION:
// This is where we keep track of the GOPS (group-of-pictures)
// from previous fragments and attempt to find one that we can
// prepend to the current fragment in order to create a valid
// fragment.
// 2) KEYFRAME-PULLING:
// Here we search for the first keyframe in the fragment and
// throw away all the frames between the start of the fragment
// and that keyframe. We then extend the duration and pull the
// PTS of the keyframe forward so that it covers the time range
// of the frames that were disposed of.
//
// #1 is far prefereable over #2 which can cause "stuttering" but
// requires more things to be just right.
if (!gops[0][0].keyFrame) {
// Search for a gop for fusion from our gopCache
gopForFusion = this.getGopForFusion_(nalUnits[0], track);
if (gopForFusion) {
gops.unshift(gopForFusion);
// Adjust Gops' metadata to account for the inclusion of the
// new gop at the beginning
gops.byteLength += gopForFusion.byteLength;
gops.nalCount += gopForFusion.nalCount;
gops.pts = gopForFusion.pts;
gops.dts = gopForFusion.dts;
gops.duration += gopForFusion.duration;
} else {
// If we didn't find a candidate gop fall back to keyrame-pulling
gops = this.extendFirstKeyFrame_(gops);
}
}
// Trim gops to align with gopsToAlignWith
if (gopsToAlignWith.length) {
var alignedGops;
if (options.alignGopsAtEnd) {
alignedGops = this.alignGopsAtEnd_(gops);
} else {
alignedGops = this.alignGopsAtStart_(gops);
}
if (!alignedGops) {
// save all the nals in the last GOP into the gop cache
this.gopCache_.unshift({
gop: gops.pop(),
pps: track.pps,
sps: track.sps
});
// Keep a maximum of 6 GOPs in the cache
this.gopCache_.length = Math.min(6, this.gopCache_.length);
// Clear nalUnits
nalUnits = [];
// return early no gops can be aligned with desired gopsToAlignWith
this.resetStream_();
this.trigger('done', 'VideoSegmentStream');
return;
}
// Some gops were trimmed. clear dts info so minSegmentDts and pts are correct
// when recalculated before sending off to CoalesceStream
clearDtsInfo(track);
gops = alignedGops;
}
collectDtsInfo(track, gops);
// First, we have to build the index from byte locations to
// samples (that is, frames) in the video data
track.samples = this.generateSampleTable_(gops);
// Concatenate the video data and construct the mdat
mdat = mp4.mdat(this.concatenateNalData_(gops));
track.baseMediaDecodeTime = calculateTrackBaseMediaDecodeTime(track);
this.trigger('processedGopsInfo', gops.map(function(gop) {
return {
pts: gop.pts,
dts: gop.dts,
byteLength: gop.byteLength
};
}));
// save all the nals in the last GOP into the gop cache
this.gopCache_.unshift({
gop: gops.pop(),
pps: track.pps,
sps: track.sps
});
// Keep a maximum of 6 GOPs in the cache
this.gopCache_.length = Math.min(6, this.gopCache_.length);
// Clear nalUnits
nalUnits = [];
this.trigger('baseMediaDecodeTime', track.baseMediaDecodeTime);
this.trigger('timelineStartInfo', track.timelineStartInfo);
moof = mp4.moof(sequenceNumber, [track]);
// it would be great to allocate this array up front instead of
// throwing away hundreds of media segment fragments
boxes = new Uint8Array(moof.byteLength + mdat.byteLength);
// Bump the sequence number for next time
sequenceNumber++;
boxes.set(moof);
boxes.set(mdat, moof.byteLength);
this.trigger('data', {track: track, boxes: boxes});
this.resetStream_();
// Continue with the flush process now
this.trigger('done', 'VideoSegmentStream');
};
this.resetStream_ = function() {
clearDtsInfo(track);
// reset config and pps because they may differ across segments
// for instance, when we are rendition switching
config = undefined;
pps = undefined;
};
// Search for a candidate Gop for gop-fusion from the gop cache and
// return it or return null if no good candidate was found
this.getGopForFusion_ = function(nalUnit) {
var
halfSecond = 45000, // Half-a-second in a 90khz clock
allowableOverlap = 10000, // About 3 frames @ 30fps
nearestDistance = Infinity,
dtsDistance,
nearestGopObj,
currentGop,
currentGopObj,
i;
// Search for the GOP nearest to the beginning of this nal unit
for (i = 0; i < this.gopCache_.length; i++) {
currentGopObj = this.gopCache_[i];
currentGop = currentGopObj.gop;
// Reject Gops with different SPS or PPS
if (!(track.pps && arrayEquals(track.pps[0], currentGopObj.pps[0])) ||
!(track.sps && arrayEquals(track.sps[0], currentGopObj.sps[0]))) {
continue;
}
// Reject Gops that would require a negative baseMediaDecodeTime
if (currentGop.dts < track.timelineStartInfo.dts) {
continue;
}
// The distance between the end of the gop and the start of the nalUnit
dtsDistance = (nalUnit.dts - currentGop.dts) - currentGop.duration;
// Only consider GOPS that start before the nal unit and end within
// a half-second of the nal unit
if (dtsDistance >= -allowableOverlap &&
dtsDistance <= halfSecond) {
// Always use the closest GOP we found if there is more than
// one candidate
if (!nearestGopObj ||
nearestDistance > dtsDistance) {
nearestGopObj = currentGopObj;
nearestDistance = dtsDistance;
}
}
}
if (nearestGopObj) {
return nearestGopObj.gop;
}
return null;
};
this.extendFirstKeyFrame_ = function(gops) {
var currentGop;
if (!gops[0][0].keyFrame && gops.length > 1) {
// Remove the first GOP
currentGop = gops.shift();
gops.byteLength -= currentGop.byteLength;
gops.nalCount -= currentGop.nalCount;
// Extend the first frame of what is now the
// first gop to cover the time period of the
// frames we just removed
gops[0][0].dts = currentGop.dts;
gops[0][0].pts = currentGop.pts;
gops[0][0].duration += currentGop.duration;
}
return gops;
};
// Convert an array of nal units into an array of frames with each frame being
// composed of the nal units that make up that frame
// Also keep track of cummulative data about the frame from the nal units such
// as the frame duration, starting pts, etc.
this.groupNalsIntoFrames_ = function(nalUnits) {
var
i,
currentNal,
currentFrame = [],
frames = [];
currentFrame.byteLength = 0;
for (i = 0; i < nalUnits.length; i++) {
currentNal = nalUnits[i];
// Split on 'aud'-type nal units
if (currentNal.nalUnitType === 'access_unit_delimiter_rbsp') {
// Since the very first nal unit is expected to be an AUD
// only push to the frames array when currentFrame is not empty
if (currentFrame.length) {
currentFrame.duration = currentNal.dts - currentFrame.dts;
frames.push(currentFrame);
}
currentFrame = [currentNal];
currentFrame.byteLength = currentNal.data.byteLength;
currentFrame.pts = currentNal.pts;
currentFrame.dts = currentNal.dts;
} else {
// Specifically flag key frames for ease of use later
if (currentNal.nalUnitType === 'slice_layer_without_partitioning_rbsp_idr') {
currentFrame.keyFrame = true;
}
currentFrame.duration = currentNal.dts - currentFrame.dts;
currentFrame.byteLength += currentNal.data.byteLength;
currentFrame.push(currentNal);
}
}
// For the last frame, use the duration of the previous frame if we
// have nothing better to go on
if (frames.length &&
(!currentFrame.duration ||
currentFrame.duration <= 0)) {
currentFrame.duration = frames[frames.length - 1].duration;
}
// Push the final frame
frames.push(currentFrame);
return frames;
};
// Convert an array of frames into an array of Gop with each Gop being composed
// of the frames that make up that Gop
// Also keep track of cummulative data about the Gop from the frames such as the
// Gop duration, starting pts, etc.
this.groupFramesIntoGops_ = function(frames) {
var
i,
currentFrame,
currentGop = [],
gops = [];
// We must pre-set some of the values on the Gop since we
// keep running totals of these values
currentGop.byteLength = 0;
currentGop.nalCount = 0;
currentGop.duration = 0;
currentGop.pts = frames[0].pts;
currentGop.dts = frames[0].dts;
// store some metadata about all the Gops
gops.byteLength = 0;
gops.nalCount = 0;
gops.duration = 0;
gops.pts = frames[0].pts;
gops.dts = frames[0].dts;
for (i = 0; i < frames.length; i++) {
currentFrame = frames[i];
if (currentFrame.keyFrame) {
// Since the very first frame is expected to be an keyframe
// only push to the gops array when currentGop is not empty
if (currentGop.length) {
gops.push(currentGop);
gops.byteLength += currentGop.byteLength;
gops.nalCount += currentGop.nalCount;
gops.duration += currentGop.duration;
}
currentGop = [currentFrame];
currentGop.nalCount = currentFrame.length;
currentGop.byteLength = currentFrame.byteLength;
currentGop.pts = currentFrame.pts;
currentGop.dts = currentFrame.dts;
currentGop.duration = currentFrame.duration;
} else {
currentGop.duration += currentFrame.duration;
currentGop.nalCount += currentFrame.length;
currentGop.byteLength += currentFrame.byteLength;
currentGop.push(currentFrame);
}
}
if (gops.length && currentGop.duration <= 0) {
currentGop.duration = gops[gops.length - 1].duration;
}
gops.byteLength += currentGop.byteLength;
gops.nalCount += currentGop.nalCount;
gops.duration += currentGop.duration;
// push the final Gop
gops.push(currentGop);
return gops;
};
// generate the track's sample table from an array of gops
this.generateSampleTable_ = function(gops, baseDataOffset) {
var
h, i,
sample,
currentGop,
currentFrame,
dataOffset = baseDataOffset || 0,
samples = [];
for (h = 0; h < gops.length; h++) {
currentGop = gops[h];
for (i = 0; i < currentGop.length; i++) {
currentFrame = currentGop[i];
sample = createDefaultSample();
sample.dataOffset = dataOffset;
sample.compositionTimeOffset = currentFrame.pts - currentFrame.dts;
sample.duration = currentFrame.duration;
sample.size = 4 * currentFrame.length; // Space for nal unit size
sample.size += currentFrame.byteLength;
if (currentFrame.keyFrame) {
sample.flags.dependsOn = 2;
}
dataOffset += sample.size;
samples.push(sample);
}
}
return samples;
};
// generate the track's raw mdat data from an array of gops
this.concatenateNalData_ = function(gops) {
var
h, i, j,
currentGop,
currentFrame,
currentNal,
dataOffset = 0,
nalsByteLength = gops.byteLength,
numberOfNals = gops.nalCount,
totalByteLength = nalsByteLength + 4 * numberOfNals,
data = new Uint8Array(totalByteLength),
view = new DataView(data.buffer);
// For each Gop..
for (h = 0; h < gops.length; h++) {
currentGop = gops[h];
// For each Frame..
for (i = 0; i < currentGop.length; i++) {
currentFrame = currentGop[i];
// For each NAL..
for (j = 0; j < currentFrame.length; j++) {
currentNal = currentFrame[j];
view.setUint32(dataOffset, currentNal.data.byteLength);
dataOffset += 4;
data.set(currentNal.data, dataOffset);
dataOffset += currentNal.data.byteLength;
}
}
}
return data;
};
// trim gop list to the first gop found that has a matching pts with a gop in the list
// of gopsToAlignWith starting from the START of the list
this.alignGopsAtStart_ = function(gops) {
var alignIndex, gopIndex, align, gop, byteLength, nalCount, duration, alignedGops;
byteLength = gops.byteLength;
nalCount = gops.nalCount;
duration = gops.duration;
alignIndex = gopIndex = 0;
while (alignIndex < gopsToAlignWith.length && gopIndex < gops.length) {
align = gopsToAlignWith[alignIndex];
gop = gops[gopIndex];
if (align.pts === gop.pts) {
break;
}
if (gop.pts > align.pts) {
// this current gop starts after the current gop we want to align on, so increment
// align index
alignIndex++;
continue;
}
// current gop starts before the current gop we want to align on. so increment gop
// index
gopIndex++;
byteLength -= gop.byteLength;
nalCount -= gop.nalCount;
duration -= gop.duration;
}
if (gopIndex === 0) {
// no gops to trim
return gops;
}
if (gopIndex === gops.length) {
// all gops trimmed, skip appending all gops
return null;
}
alignedGops = gops.slice(gopIndex);
alignedGops.byteLength = byteLength;
alignedGops.duration = duration;
alignedGops.nalCount = nalCount;
alignedGops.pts = alignedGops[0].pts;
alignedGops.dts = alignedGops[0].dts;
return alignedGops;
};
// trim gop list to the first gop found that has a matching pts with a gop in the list
// of gopsToAlignWith starting from the END of the list
this.alignGopsAtEnd_ = function(gops) {
var alignIndex, gopIndex, align, gop, alignEndIndex, matchFound;
alignIndex = gopsToAlignWith.length - 1;
gopIndex = gops.length - 1;
alignEndIndex = null;
matchFound = false;
while (alignIndex >= 0 && gopIndex >= 0) {
align = gopsToAlignWith[alignIndex];
gop = gops[gopIndex];
if (align.pts === gop.pts) {
matchFound = true;
break;
}
if (align.pts > gop.pts) {
alignIndex--;
continue;
}
if (alignIndex === gopsToAlignWith.length - 1) {
// gop.pts is greater than the last alignment candidate. If no match is found
// by the end of this loop, we still want to append gops that come after this
// point
alignEndIndex = gopIndex;
}
gopIndex--;
}
if (!matchFound && alignEndIndex === null) {
return null;
}
var trimIndex;
if (matchFound) {
trimIndex = gopIndex;
} else {
trimIndex = alignEndIndex;
}
if (trimIndex === 0) {
return gops;
}
var alignedGops = gops.slice(trimIndex);
var metadata = alignedGops.reduce(function(total, gop) {
total.byteLength += gop.byteLength;
total.duration += gop.duration;
total.nalCount += gop.nalCount;
return total;
}, { byteLength: 0, duration: 0, nalCount: 0 });
alignedGops.byteLength = metadata.byteLength;
alignedGops.duration = metadata.duration;
alignedGops.nalCount = metadata.nalCount;
alignedGops.pts = alignedGops[0].pts;
alignedGops.dts = alignedGops[0].dts;
return alignedGops;
};
this.alignGopsWith = function(newGopsToAlignWith) {
gopsToAlignWith = newGopsToAlignWith;
};
};
VideoSegmentStream.prototype = new Stream();
/**
* Store information about the start and end of the track and the
* duration for each frame/sample we process in order to calculate
* the baseMediaDecodeTime
*/
collectDtsInfo = function(track, data) {
if (typeof data.pts === 'number') {
if (track.timelineStartInfo.pts === undefined) {
track.timelineStartInfo.pts = data.pts;
}
if (track.minSegmentPts === undefined) {
track.minSegmentPts = data.pts;
} else {
track.minSegmentPts = Math.min(track.minSegmentPts, data.pts);
}
if (track.maxSegmentPts === undefined) {
track.maxSegmentPts = data.pts;
} else {
track.maxSegmentPts = Math.max(track.maxSegmentPts, data.pts);
}
}
if (typeof data.dts === 'number') {
if (track.timelineStartInfo.dts === undefined) {
track.timelineStartInfo.dts = data.dts;
}
if (track.minSegmentDts === undefined) {
track.minSegmentDts = data.dts;
} else {
track.minSegmentDts = Math.min(track.minSegmentDts, data.dts);
}
if (track.maxSegmentDts === undefined) {
track.maxSegmentDts = data.dts;
} else {
track.maxSegmentDts = Math.max(track.maxSegmentDts, data.dts);
}
}
};
/**
* Clear values used to calculate the baseMediaDecodeTime between
* tracks
*/
clearDtsInfo = function(track) {
delete track.minSegmentDts;
delete track.maxSegmentDts;
delete track.minSegmentPts;
delete track.maxSegmentPts;
};
/**
* Calculate the track's baseMediaDecodeTime based on the earliest
* DTS the transmuxer has ever seen and the minimum DTS for the
* current track
*/
calculateTrackBaseMediaDecodeTime = function(track) {
var
baseMediaDecodeTime,
scale,
// Calculate the distance, in time, that this segment starts from the start
// of the timeline (earliest time seen since the transmuxer initialized)
timeSinceStartOfTimeline = track.minSegmentDts - track.timelineStartInfo.dts;
// track.timelineStartInfo.baseMediaDecodeTime is the location, in time, where
// we want the start of the first segment to be placed
baseMediaDecodeTime = track.timelineStartInfo.baseMediaDecodeTime;
// Add to that the distance this segment is from the very first
baseMediaDecodeTime += timeSinceStartOfTimeline;
// baseMediaDecodeTime must not become negative
baseMediaDecodeTime = Math.max(0, baseMediaDecodeTime);
if (track.type === 'audio') {
// Audio has a different clock equal to the sampling_rate so we need to
// scale the PTS values into the clock rate of the track
scale = track.samplerate / ONE_SECOND_IN_TS;
baseMediaDecodeTime *= scale;
baseMediaDecodeTime = Math.floor(baseMediaDecodeTime);
}
return baseMediaDecodeTime;
};
/**
* A Stream that can combine multiple streams (ie. audio & video)
* into a single output segment for MSE. Also supports audio-only
* and video-only streams.
*/
CoalesceStream = function(options, metadataStream) {
// Number of Tracks per output segment
// If greater than 1, we combine multiple
// tracks into a single segment
this.numberOfTracks = 0;
this.metadataStream = metadataStream;
if (typeof options.remux !== 'undefined') {
this.remuxTracks = !!options.remux;
} else {
this.remuxTracks = true;
}
this.pendingTracks = [];
this.videoTrack = null;
this.pendingBoxes = [];
this.pendingCaptions = [];
this.pendingMetadata = [];
this.pendingBytes = 0;
this.emittedTracks = 0;
CoalesceStream.prototype.init.call(this);
// Take output from multiple
this.push = function(output) {
// buffer incoming captions until the associated video segment
// finishes
if (output.text) {
return this.pendingCaptions.push(output);
}
// buffer incoming id3 tags until the final flush
if (output.frames) {
return this.pendingMetadata.push(output);
}
// Add this track to the list of pending tracks and store
// important information required for the construction of
// the final segment
this.pendingTracks.push(output.track);
this.pendingBoxes.push(output.boxes);
this.pendingBytes += output.boxes.byteLength;
if (output.track.type === 'video') {
this.videoTrack = output.track;
}
if (output.track.type === 'audio') {
this.audioTrack = output.track;
}
};
};
CoalesceStream.prototype = new Stream();
CoalesceStream.prototype.flush = function(flushSource) {
var
offset = 0,
event = {
captions: [],
captionStreams: {},
metadata: [],
info: {}
},
caption,
id3,
initSegment,
timelineStartPts = 0,
i;
if (this.pendingTracks.length < this.numberOfTracks) {
if (flushSource !== 'VideoSegmentStream' &&
flushSource !== 'AudioSegmentStream') {
// Return because we haven't received a flush from a data-generating
// portion of the segment (meaning that we have only recieved meta-data
// or captions.)
return;
} else if (this.remuxTracks) {
// Return until we have enough tracks from the pipeline to remux (if we
// are remuxing audio and video into a single MP4)
return;
} else if (this.pendingTracks.length === 0) {
// In the case where we receive a flush without any data having been
// received we consider it an emitted track for the purposes of coalescing
// `done` events.
// We do this for the case where there is an audio and video track in the
// segment but no audio data. (seen in several playlists with alternate
// audio tracks and no audio present in the main TS segments.)
this.emittedTracks++;
if (this.emittedTracks >= this.numberOfTracks) {
this.trigger('done');
this.emittedTracks = 0;
}
return;
}
}
if (this.videoTrack) {
timelineStartPts = this.videoTrack.timelineStartInfo.pts;
VIDEO_PROPERTIES.forEach(function(prop) {
event.info[prop] = this.videoTrack[prop];
}, this);
} else if (this.audioTrack) {
timelineStartPts = this.audioTrack.timelineStartInfo.pts;
AUDIO_PROPERTIES.forEach(function(prop) {
event.info[prop] = this.audioTrack[prop];
}, this);
}
if (this.pendingTracks.length === 1) {
event.type = this.pendingTracks[0].type;
} else {
event.type = 'combined';
}
this.emittedTracks += this.pendingTracks.length;
initSegment = mp4.initSegment(this.pendingTracks);
// Create a new typed array to hold the init segment
event.initSegment = new Uint8Array(initSegment.byteLength);
// Create an init segment containing a moov
// and track definitions
event.initSegment.set(initSegment);
// Create a new typed array to hold the moof+mdats
event.data = new Uint8Array(this.pendingBytes);
// Append each moof+mdat (one per track) together
for (i = 0; i < this.pendingBoxes.length; i++) {
event.data.set(this.pendingBoxes[i], offset);
offset += this.pendingBoxes[i].byteLength;
}
// Translate caption PTS times into second offsets into the
// video timeline for the segment, and add track info
for (i = 0; i < this.pendingCaptions.length; i++) {
caption = this.pendingCaptions[i];
caption.startTime = (caption.startPts - timelineStartPts);
caption.startTime /= 90e3;
caption.endTime = (caption.endPts - timelineStartPts);
caption.endTime /= 90e3;
event.captionStreams[caption.stream] = true;
event.captions.push(caption);
}
// Translate ID3 frame PTS times into second offsets into the
// video timeline for the segment
for (i = 0; i < this.pendingMetadata.length; i++) {
id3 = this.pendingMetadata[i];
id3.cueTime = (id3.pts - timelineStartPts);
id3.cueTime /= 90e3;
event.metadata.push(id3);
}
// We add this to every single emitted segment even though we only need
// it for the first
event.metadata.dispatchType = this.metadataStream.dispatchType;
// Reset stream state
this.pendingTracks.length = 0;
this.videoTrack = null;
this.pendingBoxes.length = 0;
this.pendingCaptions.length = 0;
this.pendingBytes = 0;
this.pendingMetadata.length = 0;
// Emit the built segment
this.trigger('data', event);
// Only emit `done` if all tracks have been flushed and emitted
if (this.emittedTracks >= this.numberOfTracks) {
this.trigger('done');
this.emittedTracks = 0;
}
};
/**
* A Stream that expects MP2T binary data as input and produces
* corresponding media segments, suitable for use with Media Source
* Extension (MSE) implementations that support the ISO BMFF byte
* stream format, like Chrome.
*/
Transmuxer = function(options) {
var
self = this,
hasFlushed = true,
videoTrack,
audioTrack;
Transmuxer.prototype.init.call(this);
options = options || {};
this.baseMediaDecodeTime = options.baseMediaDecodeTime || 0;
this.transmuxPipeline_ = {};
this.setupAacPipeline = function() {
var pipeline = {};
this.transmuxPipeline_ = pipeline;
pipeline.type = 'aac';
pipeline.metadataStream = new m2ts.MetadataStream();
// set up the parsing pipeline
pipeline.aacStream = new AacStream();
pipeline.audioTimestampRolloverStream = new m2ts.TimestampRolloverStream('audio');
pipeline.timedMetadataTimestampRolloverStream = new m2ts.TimestampRolloverStream('timed-metadata');
pipeline.adtsStream = new AdtsStream();
pipeline.coalesceStream = new CoalesceStream(options, pipeline.metadataStream);
pipeline.headOfPipeline = pipeline.aacStream;
pipeline.aacStream
.pipe(pipeline.audioTimestampRolloverStream)
.pipe(pipeline.adtsStream);
pipeline.aacStream
.pipe(pipeline.timedMetadataTimestampRolloverStream)
.pipe(pipeline.metadataStream)
.pipe(pipeline.coalesceStream);
pipeline.metadataStream.on('timestamp', function(frame) {
pipeline.aacStream.setTimestamp(frame.timeStamp);
});
pipeline.aacStream.on('data', function(data) {
if (data.type === 'timed-metadata' && !pipeline.audioSegmentStream) {
audioTrack = audioTrack || {
timelineStartInfo: {
baseMediaDecodeTime: self.baseMediaDecodeTime
},
codec: 'adts',
type: 'audio'
};
// hook up the audio segment stream to the first track with aac data
pipeline.coalesceStream.numberOfTracks++;
pipeline.audioSegmentStream = new AudioSegmentStream(audioTrack);
// Set up the final part of the audio pipeline
pipeline.adtsStream
.pipe(pipeline.audioSegmentStream)
.pipe(pipeline.coalesceStream);
}
});
// Re-emit any data coming from the coalesce stream to the outside world
pipeline.coalesceStream.on('data', this.trigger.bind(this, 'data'));
// Let the consumer know we have finished flushing the entire pipeline
pipeline.coalesceStream.on('done', this.trigger.bind(this, 'done'));
};
this.setupTsPipeline = function() {
var pipeline = {};
this.transmuxPipeline_ = pipeline;
pipeline.type = 'ts';
pipeline.metadataStream = new m2ts.MetadataStream();
// set up the parsing pipeline
pipeline.packetStream = new m2ts.TransportPacketStream();
pipeline.parseStream = new m2ts.TransportParseStream();
pipeline.elementaryStream = new m2ts.ElementaryStream();
pipeline.videoTimestampRolloverStream = new m2ts.TimestampRolloverStream('video');
pipeline.audioTimestampRolloverStream = new m2ts.TimestampRolloverStream('audio');
pipeline.timedMetadataTimestampRolloverStream = new m2ts.TimestampRolloverStream('timed-metadata');
pipeline.adtsStream = new AdtsStream();
pipeline.h264Stream = new H264Stream();
pipeline.captionStream = new m2ts.CaptionStream();
pipeline.coalesceStream = new CoalesceStream(options, pipeline.metadataStream);
pipeline.headOfPipeline = pipeline.packetStream;
// disassemble MPEG2-TS packets into elementary streams
pipeline.packetStream
.pipe(pipeline.parseStream)
.pipe(pipeline.elementaryStream);
// !!THIS ORDER IS IMPORTANT!!
// demux the streams
pipeline.elementaryStream
.pipe(pipeline.videoTimestampRolloverStream)
.pipe(pipeline.h264Stream);
pipeline.elementaryStream
.pipe(pipeline.audioTimestampRolloverStream)
.pipe(pipeline.adtsStream);
pipeline.elementaryStream
.pipe(pipeline.timedMetadataTimestampRolloverStream)
.pipe(pipeline.metadataStream)
.pipe(pipeline.coalesceStream);
// Hook up CEA-608/708 caption stream
pipeline.h264Stream.pipe(pipeline.captionStream)
.pipe(pipeline.coalesceStream);
pipeline.elementaryStream.on('data', function(data) {
var i;
if (data.type === 'metadata') {
i = data.tracks.length;
// scan the tracks listed in the metadata
while (i--) {
if (!videoTrack && data.tracks[i].type === 'video') {
videoTrack = data.tracks[i];
videoTrack.timelineStartInfo.baseMediaDecodeTime = self.baseMediaDecodeTime;
} else if (!audioTrack && data.tracks[i].type === 'audio') {
audioTrack = data.tracks[i];
audioTrack.timelineStartInfo.baseMediaDecodeTime = self.baseMediaDecodeTime;
}
}
// hook up the video segment stream to the first track with h264 data
if (videoTrack && !pipeline.videoSegmentStream) {
pipeline.coalesceStream.numberOfTracks++;
pipeline.videoSegmentStream = new VideoSegmentStream(videoTrack, options);
pipeline.videoSegmentStream.on('timelineStartInfo', function(timelineStartInfo) {
// When video emits timelineStartInfo data after a flush, we forward that
// info to the AudioSegmentStream, if it exists, because video timeline
// data takes precedence.
if (audioTrack) {
audioTrack.timelineStartInfo = timelineStartInfo;
// On the first segment we trim AAC frames that exist before the
// very earliest DTS we have seen in video because Chrome will
// interpret any video track with a baseMediaDecodeTime that is
// non-zero as a gap.
pipeline.audioSegmentStream.setEarliestDts(timelineStartInfo.dts);
}
});
pipeline.videoSegmentStream.on('processedGopsInfo',
self.trigger.bind(self, 'gopInfo'));
pipeline.videoSegmentStream.on('baseMediaDecodeTime', function(baseMediaDecodeTime) {
if (audioTrack) {
pipeline.audioSegmentStream.setVideoBaseMediaDecodeTime(baseMediaDecodeTime);
}
});
// Set up the final part of the video pipeline
pipeline.h264Stream
.pipe(pipeline.videoSegmentStream)
.pipe(pipeline.coalesceStream);
}
if (audioTrack && !pipeline.audioSegmentStream) {
// hook up the audio segment stream to the first track with aac data
pipeline.coalesceStream.numberOfTracks++;
pipeline.audioSegmentStream = new AudioSegmentStream(audioTrack);
// Set up the final part of the audio pipeline
pipeline.adtsStream
.pipe(pipeline.audioSegmentStream)
.pipe(pipeline.coalesceStream);
}
}
});
// Re-emit any data coming from the coalesce stream to the outside world
pipeline.coalesceStream.on('data', this.trigger.bind(this, 'data'));
// Let the consumer know we have finished flushing the entire pipeline
pipeline.coalesceStream.on('done', this.trigger.bind(this, 'done'));
};
// hook up the segment streams once track metadata is delivered
this.setBaseMediaDecodeTime = function(baseMediaDecodeTime) {
var pipeline = this.transmuxPipeline_;
this.baseMediaDecodeTime = baseMediaDecodeTime;
if (audioTrack) {
audioTrack.timelineStartInfo.dts = undefined;
audioTrack.timelineStartInfo.pts = undefined;
clearDtsInfo(audioTrack);
audioTrack.timelineStartInfo.baseMediaDecodeTime = baseMediaDecodeTime;
if (pipeline.audioTimestampRolloverStream) {
pipeline.audioTimestampRolloverStream.discontinuity();
}
}
if (videoTrack) {
if (pipeline.videoSegmentStream) {
pipeline.videoSegmentStream.gopCache_ = [];
pipeline.videoTimestampRolloverStream.discontinuity();
}
videoTrack.timelineStartInfo.dts = undefined;
videoTrack.timelineStartInfo.pts = undefined;
clearDtsInfo(videoTrack);
pipeline.captionStream.reset();
videoTrack.timelineStartInfo.baseMediaDecodeTime = baseMediaDecodeTime;
}
if (pipeline.timedMetadataTimestampRolloverStream) {
pipeline.timedMetadataTimestampRolloverStream.discontinuity();
}
};
this.setAudioAppendStart = function(timestamp) {
if (audioTrack) {
this.transmuxPipeline_.audioSegmentStream.setAudioAppendStart(timestamp);
}
};
this.alignGopsWith = function(gopsToAlignWith) {
if (videoTrack && this.transmuxPipeline_.videoSegmentStream) {
this.transmuxPipeline_.videoSegmentStream.alignGopsWith(gopsToAlignWith);
}
};
// feed incoming data to the front of the parsing pipeline
this.push = function(data) {
if (hasFlushed) {
var isAac = isLikelyAacData(data);
if (isAac && this.transmuxPipeline_.type !== 'aac') {
this.setupAacPipeline();
} else if (!isAac && this.transmuxPipeline_.type !== 'ts') {
this.setupTsPipeline();
}
hasFlushed = false;
}
this.transmuxPipeline_.headOfPipeline.push(data);
};
// flush any buffered data
this.flush = function() {
hasFlushed = true;
// Start at the top of the pipeline and flush all pending work
this.transmuxPipeline_.headOfPipeline.flush();
};
// Caption data has to be reset when seeking outside buffered range
this.resetCaptions = function() {
if (this.transmuxPipeline_.captionStream) {
this.transmuxPipeline_.captionStream.reset();
}
};
};
Transmuxer.prototype = new Stream();
module.exports = {
Transmuxer: Transmuxer,
VideoSegmentStream: VideoSegmentStream,
AudioSegmentStream: AudioSegmentStream,
AUDIO_PROPERTIES: AUDIO_PROPERTIES,
VIDEO_PROPERTIES: VIDEO_PROPERTIES
};