1455 lines
44 KiB
JavaScript
1455 lines
44 KiB
JavaScript
/**
|
|
* mux.js
|
|
*
|
|
* Copyright (c) 2015 Brightcove
|
|
* All rights reserved.
|
|
*
|
|
* A stream-based mp2t to mp4 converter. This utility can be used to
|
|
* deliver mp4s to a SourceBuffer on platforms that support native
|
|
* Media Source Extensions.
|
|
*/
|
|
'use strict';
|
|
|
|
var Stream = require('../utils/stream.js');
|
|
var mp4 = require('./mp4-generator.js');
|
|
var m2ts = require('../m2ts/m2ts.js');
|
|
var AdtsStream = require('../codecs/adts.js');
|
|
var H264Stream = require('../codecs/h264').H264Stream;
|
|
var AacStream = require('../aac');
|
|
var coneOfSilence = require('../data/silence');
|
|
var clock = require('../utils/clock');
|
|
|
|
// constants
|
|
var AUDIO_PROPERTIES = [
|
|
'audioobjecttype',
|
|
'channelcount',
|
|
'samplerate',
|
|
'samplingfrequencyindex',
|
|
'samplesize'
|
|
];
|
|
|
|
var VIDEO_PROPERTIES = [
|
|
'width',
|
|
'height',
|
|
'profileIdc',
|
|
'levelIdc',
|
|
'profileCompatibility'
|
|
];
|
|
|
|
var ONE_SECOND_IN_TS = 90000; // 90kHz clock
|
|
|
|
// object types
|
|
var VideoSegmentStream, AudioSegmentStream, Transmuxer, CoalesceStream;
|
|
|
|
// Helper functions
|
|
var
|
|
createDefaultSample,
|
|
isLikelyAacData,
|
|
collectDtsInfo,
|
|
clearDtsInfo,
|
|
calculateTrackBaseMediaDecodeTime,
|
|
arrayEquals,
|
|
sumFrameByteLengths;
|
|
|
|
/**
|
|
* Default sample object
|
|
* see ISO/IEC 14496-12:2012, section 8.6.4.3
|
|
*/
|
|
createDefaultSample = function() {
|
|
return {
|
|
size: 0,
|
|
flags: {
|
|
isLeading: 0,
|
|
dependsOn: 1,
|
|
isDependedOn: 0,
|
|
hasRedundancy: 0,
|
|
degradationPriority: 0
|
|
}
|
|
};
|
|
};
|
|
|
|
isLikelyAacData = function(data) {
|
|
if ((data[0] === 'I'.charCodeAt(0)) &&
|
|
(data[1] === 'D'.charCodeAt(0)) &&
|
|
(data[2] === '3'.charCodeAt(0))) {
|
|
return true;
|
|
}
|
|
return false;
|
|
};
|
|
|
|
/**
|
|
* Compare two arrays (even typed) for same-ness
|
|
*/
|
|
arrayEquals = function(a, b) {
|
|
var
|
|
i;
|
|
|
|
if (a.length !== b.length) {
|
|
return false;
|
|
}
|
|
|
|
// compare the value of each element in the array
|
|
for (i = 0; i < a.length; i++) {
|
|
if (a[i] !== b[i]) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return true;
|
|
};
|
|
|
|
/**
|
|
* Sum the `byteLength` properties of the data in each AAC frame
|
|
*/
|
|
sumFrameByteLengths = function(array) {
|
|
var
|
|
i,
|
|
currentObj,
|
|
sum = 0;
|
|
|
|
// sum the byteLength's all each nal unit in the frame
|
|
for (i = 0; i < array.length; i++) {
|
|
currentObj = array[i];
|
|
sum += currentObj.data.byteLength;
|
|
}
|
|
|
|
return sum;
|
|
};
|
|
|
|
/**
|
|
* Constructs a single-track, ISO BMFF media segment from AAC data
|
|
* events. The output of this stream can be fed to a SourceBuffer
|
|
* configured with a suitable initialization segment.
|
|
*/
|
|
AudioSegmentStream = function(track) {
|
|
var
|
|
adtsFrames = [],
|
|
sequenceNumber = 0,
|
|
earliestAllowedDts = 0,
|
|
audioAppendStartTs = 0,
|
|
videoBaseMediaDecodeTime = Infinity;
|
|
|
|
AudioSegmentStream.prototype.init.call(this);
|
|
|
|
this.push = function(data) {
|
|
collectDtsInfo(track, data);
|
|
|
|
if (track) {
|
|
AUDIO_PROPERTIES.forEach(function(prop) {
|
|
track[prop] = data[prop];
|
|
});
|
|
}
|
|
|
|
// buffer audio data until end() is called
|
|
adtsFrames.push(data);
|
|
};
|
|
|
|
this.setEarliestDts = function(earliestDts) {
|
|
earliestAllowedDts = earliestDts - track.timelineStartInfo.baseMediaDecodeTime;
|
|
};
|
|
|
|
this.setVideoBaseMediaDecodeTime = function(baseMediaDecodeTime) {
|
|
videoBaseMediaDecodeTime = baseMediaDecodeTime;
|
|
};
|
|
|
|
this.setAudioAppendStart = function(timestamp) {
|
|
audioAppendStartTs = timestamp;
|
|
};
|
|
|
|
this.flush = function() {
|
|
var
|
|
frames,
|
|
moof,
|
|
mdat,
|
|
boxes;
|
|
|
|
// return early if no audio data has been observed
|
|
if (adtsFrames.length === 0) {
|
|
this.trigger('done', 'AudioSegmentStream');
|
|
return;
|
|
}
|
|
|
|
frames = this.trimAdtsFramesByEarliestDts_(adtsFrames);
|
|
track.baseMediaDecodeTime = calculateTrackBaseMediaDecodeTime(track);
|
|
|
|
this.prefixWithSilence_(track, frames);
|
|
|
|
// we have to build the index from byte locations to
|
|
// samples (that is, adts frames) in the audio data
|
|
track.samples = this.generateSampleTable_(frames);
|
|
|
|
// concatenate the audio data to constuct the mdat
|
|
mdat = mp4.mdat(this.concatenateFrameData_(frames));
|
|
|
|
adtsFrames = [];
|
|
|
|
moof = mp4.moof(sequenceNumber, [track]);
|
|
boxes = new Uint8Array(moof.byteLength + mdat.byteLength);
|
|
|
|
// bump the sequence number for next time
|
|
sequenceNumber++;
|
|
|
|
boxes.set(moof);
|
|
boxes.set(mdat, moof.byteLength);
|
|
|
|
clearDtsInfo(track);
|
|
|
|
this.trigger('data', {track: track, boxes: boxes});
|
|
this.trigger('done', 'AudioSegmentStream');
|
|
};
|
|
|
|
// Possibly pad (prefix) the audio track with silence if appending this track
|
|
// would lead to the introduction of a gap in the audio buffer
|
|
this.prefixWithSilence_ = function(track, frames) {
|
|
var
|
|
baseMediaDecodeTimeTs,
|
|
frameDuration = 0,
|
|
audioGapDuration = 0,
|
|
audioFillFrameCount = 0,
|
|
audioFillDuration = 0,
|
|
silentFrame,
|
|
i;
|
|
|
|
if (!frames.length) {
|
|
return;
|
|
}
|
|
|
|
baseMediaDecodeTimeTs = clock.audioTsToVideoTs(track.baseMediaDecodeTime, track.samplerate);
|
|
// determine frame clock duration based on sample rate, round up to avoid overfills
|
|
frameDuration = Math.ceil(ONE_SECOND_IN_TS / (track.samplerate / 1024));
|
|
|
|
if (audioAppendStartTs && videoBaseMediaDecodeTime) {
|
|
// insert the shortest possible amount (audio gap or audio to video gap)
|
|
audioGapDuration =
|
|
baseMediaDecodeTimeTs - Math.max(audioAppendStartTs, videoBaseMediaDecodeTime);
|
|
// number of full frames in the audio gap
|
|
audioFillFrameCount = Math.floor(audioGapDuration / frameDuration);
|
|
audioFillDuration = audioFillFrameCount * frameDuration;
|
|
}
|
|
|
|
// don't attempt to fill gaps smaller than a single frame or larger
|
|
// than a half second
|
|
if (audioFillFrameCount < 1 || audioFillDuration > ONE_SECOND_IN_TS / 2) {
|
|
return;
|
|
}
|
|
|
|
silentFrame = coneOfSilence[track.samplerate];
|
|
|
|
if (!silentFrame) {
|
|
// we don't have a silent frame pregenerated for the sample rate, so use a frame
|
|
// from the content instead
|
|
silentFrame = frames[0].data;
|
|
}
|
|
|
|
for (i = 0; i < audioFillFrameCount; i++) {
|
|
frames.splice(i, 0, {
|
|
data: silentFrame
|
|
});
|
|
}
|
|
|
|
track.baseMediaDecodeTime -=
|
|
Math.floor(clock.videoTsToAudioTs(audioFillDuration, track.samplerate));
|
|
};
|
|
|
|
// If the audio segment extends before the earliest allowed dts
|
|
// value, remove AAC frames until starts at or after the earliest
|
|
// allowed DTS so that we don't end up with a negative baseMedia-
|
|
// DecodeTime for the audio track
|
|
this.trimAdtsFramesByEarliestDts_ = function(adtsFrames) {
|
|
if (track.minSegmentDts >= earliestAllowedDts) {
|
|
return adtsFrames;
|
|
}
|
|
|
|
// We will need to recalculate the earliest segment Dts
|
|
track.minSegmentDts = Infinity;
|
|
|
|
return adtsFrames.filter(function(currentFrame) {
|
|
// If this is an allowed frame, keep it and record it's Dts
|
|
if (currentFrame.dts >= earliestAllowedDts) {
|
|
track.minSegmentDts = Math.min(track.minSegmentDts, currentFrame.dts);
|
|
track.minSegmentPts = track.minSegmentDts;
|
|
return true;
|
|
}
|
|
// Otherwise, discard it
|
|
return false;
|
|
});
|
|
};
|
|
|
|
// generate the track's raw mdat data from an array of frames
|
|
this.generateSampleTable_ = function(frames) {
|
|
var
|
|
i,
|
|
currentFrame,
|
|
samples = [];
|
|
|
|
for (i = 0; i < frames.length; i++) {
|
|
currentFrame = frames[i];
|
|
samples.push({
|
|
size: currentFrame.data.byteLength,
|
|
duration: 1024 // For AAC audio, all samples contain 1024 samples
|
|
});
|
|
}
|
|
return samples;
|
|
};
|
|
|
|
// generate the track's sample table from an array of frames
|
|
this.concatenateFrameData_ = function(frames) {
|
|
var
|
|
i,
|
|
currentFrame,
|
|
dataOffset = 0,
|
|
data = new Uint8Array(sumFrameByteLengths(frames));
|
|
|
|
for (i = 0; i < frames.length; i++) {
|
|
currentFrame = frames[i];
|
|
|
|
data.set(currentFrame.data, dataOffset);
|
|
dataOffset += currentFrame.data.byteLength;
|
|
}
|
|
return data;
|
|
};
|
|
};
|
|
|
|
AudioSegmentStream.prototype = new Stream();
|
|
|
|
/**
|
|
* Constructs a single-track, ISO BMFF media segment from H264 data
|
|
* events. The output of this stream can be fed to a SourceBuffer
|
|
* configured with a suitable initialization segment.
|
|
* @param track {object} track metadata configuration
|
|
* @param options {object} transmuxer options object
|
|
* @param options.alignGopsAtEnd {boolean} If true, start from the end of the
|
|
* gopsToAlignWith list when attempting to align gop pts
|
|
*/
|
|
VideoSegmentStream = function(track, options) {
|
|
var
|
|
sequenceNumber = 0,
|
|
nalUnits = [],
|
|
gopsToAlignWith = [],
|
|
config,
|
|
pps;
|
|
|
|
options = options || {};
|
|
|
|
VideoSegmentStream.prototype.init.call(this);
|
|
|
|
delete track.minPTS;
|
|
|
|
this.gopCache_ = [];
|
|
|
|
this.push = function(nalUnit) {
|
|
collectDtsInfo(track, nalUnit);
|
|
|
|
// record the track config
|
|
if (nalUnit.nalUnitType === 'seq_parameter_set_rbsp' && !config) {
|
|
config = nalUnit.config;
|
|
track.sps = [nalUnit.data];
|
|
|
|
VIDEO_PROPERTIES.forEach(function(prop) {
|
|
track[prop] = config[prop];
|
|
}, this);
|
|
}
|
|
|
|
if (nalUnit.nalUnitType === 'pic_parameter_set_rbsp' &&
|
|
!pps) {
|
|
pps = nalUnit.data;
|
|
track.pps = [nalUnit.data];
|
|
}
|
|
|
|
// buffer video until flush() is called
|
|
nalUnits.push(nalUnit);
|
|
};
|
|
|
|
this.flush = function() {
|
|
var
|
|
frames,
|
|
gopForFusion,
|
|
gops,
|
|
moof,
|
|
mdat,
|
|
boxes;
|
|
|
|
// Throw away nalUnits at the start of the byte stream until
|
|
// we find the first AUD
|
|
while (nalUnits.length) {
|
|
if (nalUnits[0].nalUnitType === 'access_unit_delimiter_rbsp') {
|
|
break;
|
|
}
|
|
nalUnits.shift();
|
|
}
|
|
|
|
// Return early if no video data has been observed
|
|
if (nalUnits.length === 0) {
|
|
this.resetStream_();
|
|
this.trigger('done', 'VideoSegmentStream');
|
|
return;
|
|
}
|
|
|
|
// Organize the raw nal-units into arrays that represent
|
|
// higher-level constructs such as frames and gops
|
|
// (group-of-pictures)
|
|
frames = this.groupNalsIntoFrames_(nalUnits);
|
|
gops = this.groupFramesIntoGops_(frames);
|
|
|
|
// If the first frame of this fragment is not a keyframe we have
|
|
// a problem since MSE (on Chrome) requires a leading keyframe.
|
|
//
|
|
// We have two approaches to repairing this situation:
|
|
// 1) GOP-FUSION:
|
|
// This is where we keep track of the GOPS (group-of-pictures)
|
|
// from previous fragments and attempt to find one that we can
|
|
// prepend to the current fragment in order to create a valid
|
|
// fragment.
|
|
// 2) KEYFRAME-PULLING:
|
|
// Here we search for the first keyframe in the fragment and
|
|
// throw away all the frames between the start of the fragment
|
|
// and that keyframe. We then extend the duration and pull the
|
|
// PTS of the keyframe forward so that it covers the time range
|
|
// of the frames that were disposed of.
|
|
//
|
|
// #1 is far prefereable over #2 which can cause "stuttering" but
|
|
// requires more things to be just right.
|
|
if (!gops[0][0].keyFrame) {
|
|
// Search for a gop for fusion from our gopCache
|
|
gopForFusion = this.getGopForFusion_(nalUnits[0], track);
|
|
|
|
if (gopForFusion) {
|
|
gops.unshift(gopForFusion);
|
|
// Adjust Gops' metadata to account for the inclusion of the
|
|
// new gop at the beginning
|
|
gops.byteLength += gopForFusion.byteLength;
|
|
gops.nalCount += gopForFusion.nalCount;
|
|
gops.pts = gopForFusion.pts;
|
|
gops.dts = gopForFusion.dts;
|
|
gops.duration += gopForFusion.duration;
|
|
} else {
|
|
// If we didn't find a candidate gop fall back to keyrame-pulling
|
|
gops = this.extendFirstKeyFrame_(gops);
|
|
}
|
|
}
|
|
|
|
// Trim gops to align with gopsToAlignWith
|
|
if (gopsToAlignWith.length) {
|
|
var alignedGops;
|
|
|
|
if (options.alignGopsAtEnd) {
|
|
alignedGops = this.alignGopsAtEnd_(gops);
|
|
} else {
|
|
alignedGops = this.alignGopsAtStart_(gops);
|
|
}
|
|
|
|
if (!alignedGops) {
|
|
// save all the nals in the last GOP into the gop cache
|
|
this.gopCache_.unshift({
|
|
gop: gops.pop(),
|
|
pps: track.pps,
|
|
sps: track.sps
|
|
});
|
|
|
|
// Keep a maximum of 6 GOPs in the cache
|
|
this.gopCache_.length = Math.min(6, this.gopCache_.length);
|
|
|
|
// Clear nalUnits
|
|
nalUnits = [];
|
|
|
|
// return early no gops can be aligned with desired gopsToAlignWith
|
|
this.resetStream_();
|
|
this.trigger('done', 'VideoSegmentStream');
|
|
return;
|
|
}
|
|
|
|
// Some gops were trimmed. clear dts info so minSegmentDts and pts are correct
|
|
// when recalculated before sending off to CoalesceStream
|
|
clearDtsInfo(track);
|
|
|
|
gops = alignedGops;
|
|
}
|
|
|
|
collectDtsInfo(track, gops);
|
|
|
|
// First, we have to build the index from byte locations to
|
|
// samples (that is, frames) in the video data
|
|
track.samples = this.generateSampleTable_(gops);
|
|
|
|
// Concatenate the video data and construct the mdat
|
|
mdat = mp4.mdat(this.concatenateNalData_(gops));
|
|
|
|
track.baseMediaDecodeTime = calculateTrackBaseMediaDecodeTime(track);
|
|
|
|
this.trigger('processedGopsInfo', gops.map(function(gop) {
|
|
return {
|
|
pts: gop.pts,
|
|
dts: gop.dts,
|
|
byteLength: gop.byteLength
|
|
};
|
|
}));
|
|
|
|
// save all the nals in the last GOP into the gop cache
|
|
this.gopCache_.unshift({
|
|
gop: gops.pop(),
|
|
pps: track.pps,
|
|
sps: track.sps
|
|
});
|
|
|
|
// Keep a maximum of 6 GOPs in the cache
|
|
this.gopCache_.length = Math.min(6, this.gopCache_.length);
|
|
|
|
// Clear nalUnits
|
|
nalUnits = [];
|
|
|
|
this.trigger('baseMediaDecodeTime', track.baseMediaDecodeTime);
|
|
this.trigger('timelineStartInfo', track.timelineStartInfo);
|
|
|
|
moof = mp4.moof(sequenceNumber, [track]);
|
|
|
|
// it would be great to allocate this array up front instead of
|
|
// throwing away hundreds of media segment fragments
|
|
boxes = new Uint8Array(moof.byteLength + mdat.byteLength);
|
|
|
|
// Bump the sequence number for next time
|
|
sequenceNumber++;
|
|
|
|
boxes.set(moof);
|
|
boxes.set(mdat, moof.byteLength);
|
|
|
|
this.trigger('data', {track: track, boxes: boxes});
|
|
|
|
this.resetStream_();
|
|
|
|
// Continue with the flush process now
|
|
this.trigger('done', 'VideoSegmentStream');
|
|
};
|
|
|
|
this.resetStream_ = function() {
|
|
clearDtsInfo(track);
|
|
|
|
// reset config and pps because they may differ across segments
|
|
// for instance, when we are rendition switching
|
|
config = undefined;
|
|
pps = undefined;
|
|
};
|
|
|
|
// Search for a candidate Gop for gop-fusion from the gop cache and
|
|
// return it or return null if no good candidate was found
|
|
this.getGopForFusion_ = function(nalUnit) {
|
|
var
|
|
halfSecond = 45000, // Half-a-second in a 90khz clock
|
|
allowableOverlap = 10000, // About 3 frames @ 30fps
|
|
nearestDistance = Infinity,
|
|
dtsDistance,
|
|
nearestGopObj,
|
|
currentGop,
|
|
currentGopObj,
|
|
i;
|
|
|
|
// Search for the GOP nearest to the beginning of this nal unit
|
|
for (i = 0; i < this.gopCache_.length; i++) {
|
|
currentGopObj = this.gopCache_[i];
|
|
currentGop = currentGopObj.gop;
|
|
|
|
// Reject Gops with different SPS or PPS
|
|
if (!(track.pps && arrayEquals(track.pps[0], currentGopObj.pps[0])) ||
|
|
!(track.sps && arrayEquals(track.sps[0], currentGopObj.sps[0]))) {
|
|
continue;
|
|
}
|
|
|
|
// Reject Gops that would require a negative baseMediaDecodeTime
|
|
if (currentGop.dts < track.timelineStartInfo.dts) {
|
|
continue;
|
|
}
|
|
|
|
// The distance between the end of the gop and the start of the nalUnit
|
|
dtsDistance = (nalUnit.dts - currentGop.dts) - currentGop.duration;
|
|
|
|
// Only consider GOPS that start before the nal unit and end within
|
|
// a half-second of the nal unit
|
|
if (dtsDistance >= -allowableOverlap &&
|
|
dtsDistance <= halfSecond) {
|
|
|
|
// Always use the closest GOP we found if there is more than
|
|
// one candidate
|
|
if (!nearestGopObj ||
|
|
nearestDistance > dtsDistance) {
|
|
nearestGopObj = currentGopObj;
|
|
nearestDistance = dtsDistance;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (nearestGopObj) {
|
|
return nearestGopObj.gop;
|
|
}
|
|
return null;
|
|
};
|
|
|
|
this.extendFirstKeyFrame_ = function(gops) {
|
|
var currentGop;
|
|
|
|
if (!gops[0][0].keyFrame && gops.length > 1) {
|
|
// Remove the first GOP
|
|
currentGop = gops.shift();
|
|
|
|
gops.byteLength -= currentGop.byteLength;
|
|
gops.nalCount -= currentGop.nalCount;
|
|
|
|
// Extend the first frame of what is now the
|
|
// first gop to cover the time period of the
|
|
// frames we just removed
|
|
gops[0][0].dts = currentGop.dts;
|
|
gops[0][0].pts = currentGop.pts;
|
|
gops[0][0].duration += currentGop.duration;
|
|
}
|
|
|
|
return gops;
|
|
};
|
|
|
|
// Convert an array of nal units into an array of frames with each frame being
|
|
// composed of the nal units that make up that frame
|
|
// Also keep track of cummulative data about the frame from the nal units such
|
|
// as the frame duration, starting pts, etc.
|
|
this.groupNalsIntoFrames_ = function(nalUnits) {
|
|
var
|
|
i,
|
|
currentNal,
|
|
currentFrame = [],
|
|
frames = [];
|
|
|
|
currentFrame.byteLength = 0;
|
|
|
|
for (i = 0; i < nalUnits.length; i++) {
|
|
currentNal = nalUnits[i];
|
|
|
|
// Split on 'aud'-type nal units
|
|
if (currentNal.nalUnitType === 'access_unit_delimiter_rbsp') {
|
|
// Since the very first nal unit is expected to be an AUD
|
|
// only push to the frames array when currentFrame is not empty
|
|
if (currentFrame.length) {
|
|
currentFrame.duration = currentNal.dts - currentFrame.dts;
|
|
frames.push(currentFrame);
|
|
}
|
|
currentFrame = [currentNal];
|
|
currentFrame.byteLength = currentNal.data.byteLength;
|
|
currentFrame.pts = currentNal.pts;
|
|
currentFrame.dts = currentNal.dts;
|
|
} else {
|
|
// Specifically flag key frames for ease of use later
|
|
if (currentNal.nalUnitType === 'slice_layer_without_partitioning_rbsp_idr') {
|
|
currentFrame.keyFrame = true;
|
|
}
|
|
currentFrame.duration = currentNal.dts - currentFrame.dts;
|
|
currentFrame.byteLength += currentNal.data.byteLength;
|
|
currentFrame.push(currentNal);
|
|
}
|
|
}
|
|
|
|
// For the last frame, use the duration of the previous frame if we
|
|
// have nothing better to go on
|
|
if (frames.length &&
|
|
(!currentFrame.duration ||
|
|
currentFrame.duration <= 0)) {
|
|
currentFrame.duration = frames[frames.length - 1].duration;
|
|
}
|
|
|
|
// Push the final frame
|
|
frames.push(currentFrame);
|
|
return frames;
|
|
};
|
|
|
|
// Convert an array of frames into an array of Gop with each Gop being composed
|
|
// of the frames that make up that Gop
|
|
// Also keep track of cummulative data about the Gop from the frames such as the
|
|
// Gop duration, starting pts, etc.
|
|
this.groupFramesIntoGops_ = function(frames) {
|
|
var
|
|
i,
|
|
currentFrame,
|
|
currentGop = [],
|
|
gops = [];
|
|
|
|
// We must pre-set some of the values on the Gop since we
|
|
// keep running totals of these values
|
|
currentGop.byteLength = 0;
|
|
currentGop.nalCount = 0;
|
|
currentGop.duration = 0;
|
|
currentGop.pts = frames[0].pts;
|
|
currentGop.dts = frames[0].dts;
|
|
|
|
// store some metadata about all the Gops
|
|
gops.byteLength = 0;
|
|
gops.nalCount = 0;
|
|
gops.duration = 0;
|
|
gops.pts = frames[0].pts;
|
|
gops.dts = frames[0].dts;
|
|
|
|
for (i = 0; i < frames.length; i++) {
|
|
currentFrame = frames[i];
|
|
|
|
if (currentFrame.keyFrame) {
|
|
// Since the very first frame is expected to be an keyframe
|
|
// only push to the gops array when currentGop is not empty
|
|
if (currentGop.length) {
|
|
gops.push(currentGop);
|
|
gops.byteLength += currentGop.byteLength;
|
|
gops.nalCount += currentGop.nalCount;
|
|
gops.duration += currentGop.duration;
|
|
}
|
|
|
|
currentGop = [currentFrame];
|
|
currentGop.nalCount = currentFrame.length;
|
|
currentGop.byteLength = currentFrame.byteLength;
|
|
currentGop.pts = currentFrame.pts;
|
|
currentGop.dts = currentFrame.dts;
|
|
currentGop.duration = currentFrame.duration;
|
|
} else {
|
|
currentGop.duration += currentFrame.duration;
|
|
currentGop.nalCount += currentFrame.length;
|
|
currentGop.byteLength += currentFrame.byteLength;
|
|
currentGop.push(currentFrame);
|
|
}
|
|
}
|
|
|
|
if (gops.length && currentGop.duration <= 0) {
|
|
currentGop.duration = gops[gops.length - 1].duration;
|
|
}
|
|
gops.byteLength += currentGop.byteLength;
|
|
gops.nalCount += currentGop.nalCount;
|
|
gops.duration += currentGop.duration;
|
|
|
|
// push the final Gop
|
|
gops.push(currentGop);
|
|
return gops;
|
|
};
|
|
|
|
// generate the track's sample table from an array of gops
|
|
this.generateSampleTable_ = function(gops, baseDataOffset) {
|
|
var
|
|
h, i,
|
|
sample,
|
|
currentGop,
|
|
currentFrame,
|
|
dataOffset = baseDataOffset || 0,
|
|
samples = [];
|
|
|
|
for (h = 0; h < gops.length; h++) {
|
|
currentGop = gops[h];
|
|
|
|
for (i = 0; i < currentGop.length; i++) {
|
|
currentFrame = currentGop[i];
|
|
|
|
sample = createDefaultSample();
|
|
|
|
sample.dataOffset = dataOffset;
|
|
sample.compositionTimeOffset = currentFrame.pts - currentFrame.dts;
|
|
sample.duration = currentFrame.duration;
|
|
sample.size = 4 * currentFrame.length; // Space for nal unit size
|
|
sample.size += currentFrame.byteLength;
|
|
|
|
if (currentFrame.keyFrame) {
|
|
sample.flags.dependsOn = 2;
|
|
}
|
|
|
|
dataOffset += sample.size;
|
|
|
|
samples.push(sample);
|
|
}
|
|
}
|
|
return samples;
|
|
};
|
|
|
|
// generate the track's raw mdat data from an array of gops
|
|
this.concatenateNalData_ = function(gops) {
|
|
var
|
|
h, i, j,
|
|
currentGop,
|
|
currentFrame,
|
|
currentNal,
|
|
dataOffset = 0,
|
|
nalsByteLength = gops.byteLength,
|
|
numberOfNals = gops.nalCount,
|
|
totalByteLength = nalsByteLength + 4 * numberOfNals,
|
|
data = new Uint8Array(totalByteLength),
|
|
view = new DataView(data.buffer);
|
|
|
|
// For each Gop..
|
|
for (h = 0; h < gops.length; h++) {
|
|
currentGop = gops[h];
|
|
|
|
// For each Frame..
|
|
for (i = 0; i < currentGop.length; i++) {
|
|
currentFrame = currentGop[i];
|
|
|
|
// For each NAL..
|
|
for (j = 0; j < currentFrame.length; j++) {
|
|
currentNal = currentFrame[j];
|
|
|
|
view.setUint32(dataOffset, currentNal.data.byteLength);
|
|
dataOffset += 4;
|
|
data.set(currentNal.data, dataOffset);
|
|
dataOffset += currentNal.data.byteLength;
|
|
}
|
|
}
|
|
}
|
|
return data;
|
|
};
|
|
|
|
// trim gop list to the first gop found that has a matching pts with a gop in the list
|
|
// of gopsToAlignWith starting from the START of the list
|
|
this.alignGopsAtStart_ = function(gops) {
|
|
var alignIndex, gopIndex, align, gop, byteLength, nalCount, duration, alignedGops;
|
|
|
|
byteLength = gops.byteLength;
|
|
nalCount = gops.nalCount;
|
|
duration = gops.duration;
|
|
alignIndex = gopIndex = 0;
|
|
|
|
while (alignIndex < gopsToAlignWith.length && gopIndex < gops.length) {
|
|
align = gopsToAlignWith[alignIndex];
|
|
gop = gops[gopIndex];
|
|
|
|
if (align.pts === gop.pts) {
|
|
break;
|
|
}
|
|
|
|
if (gop.pts > align.pts) {
|
|
// this current gop starts after the current gop we want to align on, so increment
|
|
// align index
|
|
alignIndex++;
|
|
continue;
|
|
}
|
|
|
|
// current gop starts before the current gop we want to align on. so increment gop
|
|
// index
|
|
gopIndex++;
|
|
byteLength -= gop.byteLength;
|
|
nalCount -= gop.nalCount;
|
|
duration -= gop.duration;
|
|
}
|
|
|
|
if (gopIndex === 0) {
|
|
// no gops to trim
|
|
return gops;
|
|
}
|
|
|
|
if (gopIndex === gops.length) {
|
|
// all gops trimmed, skip appending all gops
|
|
return null;
|
|
}
|
|
|
|
alignedGops = gops.slice(gopIndex);
|
|
alignedGops.byteLength = byteLength;
|
|
alignedGops.duration = duration;
|
|
alignedGops.nalCount = nalCount;
|
|
alignedGops.pts = alignedGops[0].pts;
|
|
alignedGops.dts = alignedGops[0].dts;
|
|
|
|
return alignedGops;
|
|
};
|
|
|
|
// trim gop list to the first gop found that has a matching pts with a gop in the list
|
|
// of gopsToAlignWith starting from the END of the list
|
|
this.alignGopsAtEnd_ = function(gops) {
|
|
var alignIndex, gopIndex, align, gop, alignEndIndex, matchFound;
|
|
|
|
alignIndex = gopsToAlignWith.length - 1;
|
|
gopIndex = gops.length - 1;
|
|
alignEndIndex = null;
|
|
matchFound = false;
|
|
|
|
while (alignIndex >= 0 && gopIndex >= 0) {
|
|
align = gopsToAlignWith[alignIndex];
|
|
gop = gops[gopIndex];
|
|
|
|
if (align.pts === gop.pts) {
|
|
matchFound = true;
|
|
break;
|
|
}
|
|
|
|
if (align.pts > gop.pts) {
|
|
alignIndex--;
|
|
continue;
|
|
}
|
|
|
|
if (alignIndex === gopsToAlignWith.length - 1) {
|
|
// gop.pts is greater than the last alignment candidate. If no match is found
|
|
// by the end of this loop, we still want to append gops that come after this
|
|
// point
|
|
alignEndIndex = gopIndex;
|
|
}
|
|
|
|
gopIndex--;
|
|
}
|
|
|
|
if (!matchFound && alignEndIndex === null) {
|
|
return null;
|
|
}
|
|
|
|
var trimIndex;
|
|
|
|
if (matchFound) {
|
|
trimIndex = gopIndex;
|
|
} else {
|
|
trimIndex = alignEndIndex;
|
|
}
|
|
|
|
if (trimIndex === 0) {
|
|
return gops;
|
|
}
|
|
|
|
var alignedGops = gops.slice(trimIndex);
|
|
var metadata = alignedGops.reduce(function(total, gop) {
|
|
total.byteLength += gop.byteLength;
|
|
total.duration += gop.duration;
|
|
total.nalCount += gop.nalCount;
|
|
return total;
|
|
}, { byteLength: 0, duration: 0, nalCount: 0 });
|
|
|
|
alignedGops.byteLength = metadata.byteLength;
|
|
alignedGops.duration = metadata.duration;
|
|
alignedGops.nalCount = metadata.nalCount;
|
|
alignedGops.pts = alignedGops[0].pts;
|
|
alignedGops.dts = alignedGops[0].dts;
|
|
|
|
return alignedGops;
|
|
};
|
|
|
|
this.alignGopsWith = function(newGopsToAlignWith) {
|
|
gopsToAlignWith = newGopsToAlignWith;
|
|
};
|
|
};
|
|
|
|
VideoSegmentStream.prototype = new Stream();
|
|
|
|
/**
|
|
* Store information about the start and end of the track and the
|
|
* duration for each frame/sample we process in order to calculate
|
|
* the baseMediaDecodeTime
|
|
*/
|
|
collectDtsInfo = function(track, data) {
|
|
if (typeof data.pts === 'number') {
|
|
if (track.timelineStartInfo.pts === undefined) {
|
|
track.timelineStartInfo.pts = data.pts;
|
|
}
|
|
|
|
if (track.minSegmentPts === undefined) {
|
|
track.minSegmentPts = data.pts;
|
|
} else {
|
|
track.minSegmentPts = Math.min(track.minSegmentPts, data.pts);
|
|
}
|
|
|
|
if (track.maxSegmentPts === undefined) {
|
|
track.maxSegmentPts = data.pts;
|
|
} else {
|
|
track.maxSegmentPts = Math.max(track.maxSegmentPts, data.pts);
|
|
}
|
|
}
|
|
|
|
if (typeof data.dts === 'number') {
|
|
if (track.timelineStartInfo.dts === undefined) {
|
|
track.timelineStartInfo.dts = data.dts;
|
|
}
|
|
|
|
if (track.minSegmentDts === undefined) {
|
|
track.minSegmentDts = data.dts;
|
|
} else {
|
|
track.minSegmentDts = Math.min(track.minSegmentDts, data.dts);
|
|
}
|
|
|
|
if (track.maxSegmentDts === undefined) {
|
|
track.maxSegmentDts = data.dts;
|
|
} else {
|
|
track.maxSegmentDts = Math.max(track.maxSegmentDts, data.dts);
|
|
}
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Clear values used to calculate the baseMediaDecodeTime between
|
|
* tracks
|
|
*/
|
|
clearDtsInfo = function(track) {
|
|
delete track.minSegmentDts;
|
|
delete track.maxSegmentDts;
|
|
delete track.minSegmentPts;
|
|
delete track.maxSegmentPts;
|
|
};
|
|
|
|
/**
|
|
* Calculate the track's baseMediaDecodeTime based on the earliest
|
|
* DTS the transmuxer has ever seen and the minimum DTS for the
|
|
* current track
|
|
*/
|
|
calculateTrackBaseMediaDecodeTime = function(track) {
|
|
var
|
|
baseMediaDecodeTime,
|
|
scale,
|
|
// Calculate the distance, in time, that this segment starts from the start
|
|
// of the timeline (earliest time seen since the transmuxer initialized)
|
|
timeSinceStartOfTimeline = track.minSegmentDts - track.timelineStartInfo.dts;
|
|
|
|
// track.timelineStartInfo.baseMediaDecodeTime is the location, in time, where
|
|
// we want the start of the first segment to be placed
|
|
baseMediaDecodeTime = track.timelineStartInfo.baseMediaDecodeTime;
|
|
|
|
// Add to that the distance this segment is from the very first
|
|
baseMediaDecodeTime += timeSinceStartOfTimeline;
|
|
|
|
// baseMediaDecodeTime must not become negative
|
|
baseMediaDecodeTime = Math.max(0, baseMediaDecodeTime);
|
|
|
|
if (track.type === 'audio') {
|
|
// Audio has a different clock equal to the sampling_rate so we need to
|
|
// scale the PTS values into the clock rate of the track
|
|
scale = track.samplerate / ONE_SECOND_IN_TS;
|
|
baseMediaDecodeTime *= scale;
|
|
baseMediaDecodeTime = Math.floor(baseMediaDecodeTime);
|
|
}
|
|
|
|
return baseMediaDecodeTime;
|
|
};
|
|
|
|
/**
|
|
* A Stream that can combine multiple streams (ie. audio & video)
|
|
* into a single output segment for MSE. Also supports audio-only
|
|
* and video-only streams.
|
|
*/
|
|
CoalesceStream = function(options, metadataStream) {
|
|
// Number of Tracks per output segment
|
|
// If greater than 1, we combine multiple
|
|
// tracks into a single segment
|
|
this.numberOfTracks = 0;
|
|
this.metadataStream = metadataStream;
|
|
|
|
if (typeof options.remux !== 'undefined') {
|
|
this.remuxTracks = !!options.remux;
|
|
} else {
|
|
this.remuxTracks = true;
|
|
}
|
|
|
|
this.pendingTracks = [];
|
|
this.videoTrack = null;
|
|
this.pendingBoxes = [];
|
|
this.pendingCaptions = [];
|
|
this.pendingMetadata = [];
|
|
this.pendingBytes = 0;
|
|
this.emittedTracks = 0;
|
|
|
|
CoalesceStream.prototype.init.call(this);
|
|
|
|
// Take output from multiple
|
|
this.push = function(output) {
|
|
// buffer incoming captions until the associated video segment
|
|
// finishes
|
|
if (output.text) {
|
|
return this.pendingCaptions.push(output);
|
|
}
|
|
// buffer incoming id3 tags until the final flush
|
|
if (output.frames) {
|
|
return this.pendingMetadata.push(output);
|
|
}
|
|
|
|
// Add this track to the list of pending tracks and store
|
|
// important information required for the construction of
|
|
// the final segment
|
|
this.pendingTracks.push(output.track);
|
|
this.pendingBoxes.push(output.boxes);
|
|
this.pendingBytes += output.boxes.byteLength;
|
|
|
|
if (output.track.type === 'video') {
|
|
this.videoTrack = output.track;
|
|
}
|
|
if (output.track.type === 'audio') {
|
|
this.audioTrack = output.track;
|
|
}
|
|
};
|
|
};
|
|
|
|
CoalesceStream.prototype = new Stream();
|
|
CoalesceStream.prototype.flush = function(flushSource) {
|
|
var
|
|
offset = 0,
|
|
event = {
|
|
captions: [],
|
|
captionStreams: {},
|
|
metadata: [],
|
|
info: {}
|
|
},
|
|
caption,
|
|
id3,
|
|
initSegment,
|
|
timelineStartPts = 0,
|
|
i;
|
|
|
|
if (this.pendingTracks.length < this.numberOfTracks) {
|
|
if (flushSource !== 'VideoSegmentStream' &&
|
|
flushSource !== 'AudioSegmentStream') {
|
|
// Return because we haven't received a flush from a data-generating
|
|
// portion of the segment (meaning that we have only recieved meta-data
|
|
// or captions.)
|
|
return;
|
|
} else if (this.remuxTracks) {
|
|
// Return until we have enough tracks from the pipeline to remux (if we
|
|
// are remuxing audio and video into a single MP4)
|
|
return;
|
|
} else if (this.pendingTracks.length === 0) {
|
|
// In the case where we receive a flush without any data having been
|
|
// received we consider it an emitted track for the purposes of coalescing
|
|
// `done` events.
|
|
// We do this for the case where there is an audio and video track in the
|
|
// segment but no audio data. (seen in several playlists with alternate
|
|
// audio tracks and no audio present in the main TS segments.)
|
|
this.emittedTracks++;
|
|
|
|
if (this.emittedTracks >= this.numberOfTracks) {
|
|
this.trigger('done');
|
|
this.emittedTracks = 0;
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (this.videoTrack) {
|
|
timelineStartPts = this.videoTrack.timelineStartInfo.pts;
|
|
VIDEO_PROPERTIES.forEach(function(prop) {
|
|
event.info[prop] = this.videoTrack[prop];
|
|
}, this);
|
|
} else if (this.audioTrack) {
|
|
timelineStartPts = this.audioTrack.timelineStartInfo.pts;
|
|
AUDIO_PROPERTIES.forEach(function(prop) {
|
|
event.info[prop] = this.audioTrack[prop];
|
|
}, this);
|
|
}
|
|
|
|
if (this.pendingTracks.length === 1) {
|
|
event.type = this.pendingTracks[0].type;
|
|
} else {
|
|
event.type = 'combined';
|
|
}
|
|
|
|
this.emittedTracks += this.pendingTracks.length;
|
|
|
|
initSegment = mp4.initSegment(this.pendingTracks);
|
|
|
|
// Create a new typed array to hold the init segment
|
|
event.initSegment = new Uint8Array(initSegment.byteLength);
|
|
|
|
// Create an init segment containing a moov
|
|
// and track definitions
|
|
event.initSegment.set(initSegment);
|
|
|
|
// Create a new typed array to hold the moof+mdats
|
|
event.data = new Uint8Array(this.pendingBytes);
|
|
|
|
// Append each moof+mdat (one per track) together
|
|
for (i = 0; i < this.pendingBoxes.length; i++) {
|
|
event.data.set(this.pendingBoxes[i], offset);
|
|
offset += this.pendingBoxes[i].byteLength;
|
|
}
|
|
|
|
// Translate caption PTS times into second offsets into the
|
|
// video timeline for the segment, and add track info
|
|
for (i = 0; i < this.pendingCaptions.length; i++) {
|
|
caption = this.pendingCaptions[i];
|
|
caption.startTime = (caption.startPts - timelineStartPts);
|
|
caption.startTime /= 90e3;
|
|
caption.endTime = (caption.endPts - timelineStartPts);
|
|
caption.endTime /= 90e3;
|
|
event.captionStreams[caption.stream] = true;
|
|
event.captions.push(caption);
|
|
}
|
|
|
|
// Translate ID3 frame PTS times into second offsets into the
|
|
// video timeline for the segment
|
|
for (i = 0; i < this.pendingMetadata.length; i++) {
|
|
id3 = this.pendingMetadata[i];
|
|
id3.cueTime = (id3.pts - timelineStartPts);
|
|
id3.cueTime /= 90e3;
|
|
event.metadata.push(id3);
|
|
}
|
|
// We add this to every single emitted segment even though we only need
|
|
// it for the first
|
|
event.metadata.dispatchType = this.metadataStream.dispatchType;
|
|
|
|
// Reset stream state
|
|
this.pendingTracks.length = 0;
|
|
this.videoTrack = null;
|
|
this.pendingBoxes.length = 0;
|
|
this.pendingCaptions.length = 0;
|
|
this.pendingBytes = 0;
|
|
this.pendingMetadata.length = 0;
|
|
|
|
// Emit the built segment
|
|
this.trigger('data', event);
|
|
|
|
// Only emit `done` if all tracks have been flushed and emitted
|
|
if (this.emittedTracks >= this.numberOfTracks) {
|
|
this.trigger('done');
|
|
this.emittedTracks = 0;
|
|
}
|
|
};
|
|
/**
|
|
* A Stream that expects MP2T binary data as input and produces
|
|
* corresponding media segments, suitable for use with Media Source
|
|
* Extension (MSE) implementations that support the ISO BMFF byte
|
|
* stream format, like Chrome.
|
|
*/
|
|
Transmuxer = function(options) {
|
|
var
|
|
self = this,
|
|
hasFlushed = true,
|
|
videoTrack,
|
|
audioTrack;
|
|
|
|
Transmuxer.prototype.init.call(this);
|
|
|
|
options = options || {};
|
|
this.baseMediaDecodeTime = options.baseMediaDecodeTime || 0;
|
|
this.transmuxPipeline_ = {};
|
|
|
|
this.setupAacPipeline = function() {
|
|
var pipeline = {};
|
|
this.transmuxPipeline_ = pipeline;
|
|
|
|
pipeline.type = 'aac';
|
|
pipeline.metadataStream = new m2ts.MetadataStream();
|
|
|
|
// set up the parsing pipeline
|
|
pipeline.aacStream = new AacStream();
|
|
pipeline.audioTimestampRolloverStream = new m2ts.TimestampRolloverStream('audio');
|
|
pipeline.timedMetadataTimestampRolloverStream = new m2ts.TimestampRolloverStream('timed-metadata');
|
|
pipeline.adtsStream = new AdtsStream();
|
|
pipeline.coalesceStream = new CoalesceStream(options, pipeline.metadataStream);
|
|
pipeline.headOfPipeline = pipeline.aacStream;
|
|
|
|
pipeline.aacStream
|
|
.pipe(pipeline.audioTimestampRolloverStream)
|
|
.pipe(pipeline.adtsStream);
|
|
pipeline.aacStream
|
|
.pipe(pipeline.timedMetadataTimestampRolloverStream)
|
|
.pipe(pipeline.metadataStream)
|
|
.pipe(pipeline.coalesceStream);
|
|
|
|
pipeline.metadataStream.on('timestamp', function(frame) {
|
|
pipeline.aacStream.setTimestamp(frame.timeStamp);
|
|
});
|
|
|
|
pipeline.aacStream.on('data', function(data) {
|
|
if (data.type === 'timed-metadata' && !pipeline.audioSegmentStream) {
|
|
audioTrack = audioTrack || {
|
|
timelineStartInfo: {
|
|
baseMediaDecodeTime: self.baseMediaDecodeTime
|
|
},
|
|
codec: 'adts',
|
|
type: 'audio'
|
|
};
|
|
// hook up the audio segment stream to the first track with aac data
|
|
pipeline.coalesceStream.numberOfTracks++;
|
|
pipeline.audioSegmentStream = new AudioSegmentStream(audioTrack);
|
|
// Set up the final part of the audio pipeline
|
|
pipeline.adtsStream
|
|
.pipe(pipeline.audioSegmentStream)
|
|
.pipe(pipeline.coalesceStream);
|
|
}
|
|
});
|
|
|
|
// Re-emit any data coming from the coalesce stream to the outside world
|
|
pipeline.coalesceStream.on('data', this.trigger.bind(this, 'data'));
|
|
// Let the consumer know we have finished flushing the entire pipeline
|
|
pipeline.coalesceStream.on('done', this.trigger.bind(this, 'done'));
|
|
};
|
|
|
|
this.setupTsPipeline = function() {
|
|
var pipeline = {};
|
|
this.transmuxPipeline_ = pipeline;
|
|
|
|
pipeline.type = 'ts';
|
|
pipeline.metadataStream = new m2ts.MetadataStream();
|
|
|
|
// set up the parsing pipeline
|
|
pipeline.packetStream = new m2ts.TransportPacketStream();
|
|
pipeline.parseStream = new m2ts.TransportParseStream();
|
|
pipeline.elementaryStream = new m2ts.ElementaryStream();
|
|
pipeline.videoTimestampRolloverStream = new m2ts.TimestampRolloverStream('video');
|
|
pipeline.audioTimestampRolloverStream = new m2ts.TimestampRolloverStream('audio');
|
|
pipeline.timedMetadataTimestampRolloverStream = new m2ts.TimestampRolloverStream('timed-metadata');
|
|
pipeline.adtsStream = new AdtsStream();
|
|
pipeline.h264Stream = new H264Stream();
|
|
pipeline.captionStream = new m2ts.CaptionStream();
|
|
pipeline.coalesceStream = new CoalesceStream(options, pipeline.metadataStream);
|
|
pipeline.headOfPipeline = pipeline.packetStream;
|
|
|
|
// disassemble MPEG2-TS packets into elementary streams
|
|
pipeline.packetStream
|
|
.pipe(pipeline.parseStream)
|
|
.pipe(pipeline.elementaryStream);
|
|
|
|
// !!THIS ORDER IS IMPORTANT!!
|
|
// demux the streams
|
|
pipeline.elementaryStream
|
|
.pipe(pipeline.videoTimestampRolloverStream)
|
|
.pipe(pipeline.h264Stream);
|
|
pipeline.elementaryStream
|
|
.pipe(pipeline.audioTimestampRolloverStream)
|
|
.pipe(pipeline.adtsStream);
|
|
|
|
pipeline.elementaryStream
|
|
.pipe(pipeline.timedMetadataTimestampRolloverStream)
|
|
.pipe(pipeline.metadataStream)
|
|
.pipe(pipeline.coalesceStream);
|
|
|
|
// Hook up CEA-608/708 caption stream
|
|
pipeline.h264Stream.pipe(pipeline.captionStream)
|
|
.pipe(pipeline.coalesceStream);
|
|
|
|
pipeline.elementaryStream.on('data', function(data) {
|
|
var i;
|
|
|
|
if (data.type === 'metadata') {
|
|
i = data.tracks.length;
|
|
|
|
// scan the tracks listed in the metadata
|
|
while (i--) {
|
|
if (!videoTrack && data.tracks[i].type === 'video') {
|
|
videoTrack = data.tracks[i];
|
|
videoTrack.timelineStartInfo.baseMediaDecodeTime = self.baseMediaDecodeTime;
|
|
} else if (!audioTrack && data.tracks[i].type === 'audio') {
|
|
audioTrack = data.tracks[i];
|
|
audioTrack.timelineStartInfo.baseMediaDecodeTime = self.baseMediaDecodeTime;
|
|
}
|
|
}
|
|
|
|
// hook up the video segment stream to the first track with h264 data
|
|
if (videoTrack && !pipeline.videoSegmentStream) {
|
|
pipeline.coalesceStream.numberOfTracks++;
|
|
pipeline.videoSegmentStream = new VideoSegmentStream(videoTrack, options);
|
|
|
|
pipeline.videoSegmentStream.on('timelineStartInfo', function(timelineStartInfo) {
|
|
// When video emits timelineStartInfo data after a flush, we forward that
|
|
// info to the AudioSegmentStream, if it exists, because video timeline
|
|
// data takes precedence.
|
|
if (audioTrack) {
|
|
audioTrack.timelineStartInfo = timelineStartInfo;
|
|
// On the first segment we trim AAC frames that exist before the
|
|
// very earliest DTS we have seen in video because Chrome will
|
|
// interpret any video track with a baseMediaDecodeTime that is
|
|
// non-zero as a gap.
|
|
pipeline.audioSegmentStream.setEarliestDts(timelineStartInfo.dts);
|
|
}
|
|
});
|
|
|
|
pipeline.videoSegmentStream.on('processedGopsInfo',
|
|
self.trigger.bind(self, 'gopInfo'));
|
|
|
|
pipeline.videoSegmentStream.on('baseMediaDecodeTime', function(baseMediaDecodeTime) {
|
|
if (audioTrack) {
|
|
pipeline.audioSegmentStream.setVideoBaseMediaDecodeTime(baseMediaDecodeTime);
|
|
}
|
|
});
|
|
|
|
// Set up the final part of the video pipeline
|
|
pipeline.h264Stream
|
|
.pipe(pipeline.videoSegmentStream)
|
|
.pipe(pipeline.coalesceStream);
|
|
}
|
|
|
|
if (audioTrack && !pipeline.audioSegmentStream) {
|
|
// hook up the audio segment stream to the first track with aac data
|
|
pipeline.coalesceStream.numberOfTracks++;
|
|
pipeline.audioSegmentStream = new AudioSegmentStream(audioTrack);
|
|
|
|
// Set up the final part of the audio pipeline
|
|
pipeline.adtsStream
|
|
.pipe(pipeline.audioSegmentStream)
|
|
.pipe(pipeline.coalesceStream);
|
|
}
|
|
}
|
|
});
|
|
|
|
// Re-emit any data coming from the coalesce stream to the outside world
|
|
pipeline.coalesceStream.on('data', this.trigger.bind(this, 'data'));
|
|
// Let the consumer know we have finished flushing the entire pipeline
|
|
pipeline.coalesceStream.on('done', this.trigger.bind(this, 'done'));
|
|
};
|
|
|
|
// hook up the segment streams once track metadata is delivered
|
|
this.setBaseMediaDecodeTime = function(baseMediaDecodeTime) {
|
|
var pipeline = this.transmuxPipeline_;
|
|
|
|
this.baseMediaDecodeTime = baseMediaDecodeTime;
|
|
if (audioTrack) {
|
|
audioTrack.timelineStartInfo.dts = undefined;
|
|
audioTrack.timelineStartInfo.pts = undefined;
|
|
clearDtsInfo(audioTrack);
|
|
audioTrack.timelineStartInfo.baseMediaDecodeTime = baseMediaDecodeTime;
|
|
if (pipeline.audioTimestampRolloverStream) {
|
|
pipeline.audioTimestampRolloverStream.discontinuity();
|
|
}
|
|
}
|
|
if (videoTrack) {
|
|
if (pipeline.videoSegmentStream) {
|
|
pipeline.videoSegmentStream.gopCache_ = [];
|
|
pipeline.videoTimestampRolloverStream.discontinuity();
|
|
}
|
|
videoTrack.timelineStartInfo.dts = undefined;
|
|
videoTrack.timelineStartInfo.pts = undefined;
|
|
clearDtsInfo(videoTrack);
|
|
pipeline.captionStream.reset();
|
|
videoTrack.timelineStartInfo.baseMediaDecodeTime = baseMediaDecodeTime;
|
|
}
|
|
|
|
if (pipeline.timedMetadataTimestampRolloverStream) {
|
|
pipeline.timedMetadataTimestampRolloverStream.discontinuity();
|
|
}
|
|
};
|
|
|
|
this.setAudioAppendStart = function(timestamp) {
|
|
if (audioTrack) {
|
|
this.transmuxPipeline_.audioSegmentStream.setAudioAppendStart(timestamp);
|
|
}
|
|
};
|
|
|
|
this.alignGopsWith = function(gopsToAlignWith) {
|
|
if (videoTrack && this.transmuxPipeline_.videoSegmentStream) {
|
|
this.transmuxPipeline_.videoSegmentStream.alignGopsWith(gopsToAlignWith);
|
|
}
|
|
};
|
|
|
|
// feed incoming data to the front of the parsing pipeline
|
|
this.push = function(data) {
|
|
if (hasFlushed) {
|
|
var isAac = isLikelyAacData(data);
|
|
|
|
if (isAac && this.transmuxPipeline_.type !== 'aac') {
|
|
this.setupAacPipeline();
|
|
} else if (!isAac && this.transmuxPipeline_.type !== 'ts') {
|
|
this.setupTsPipeline();
|
|
}
|
|
hasFlushed = false;
|
|
}
|
|
this.transmuxPipeline_.headOfPipeline.push(data);
|
|
};
|
|
|
|
// flush any buffered data
|
|
this.flush = function() {
|
|
hasFlushed = true;
|
|
// Start at the top of the pipeline and flush all pending work
|
|
this.transmuxPipeline_.headOfPipeline.flush();
|
|
};
|
|
|
|
// Caption data has to be reset when seeking outside buffered range
|
|
this.resetCaptions = function() {
|
|
if (this.transmuxPipeline_.captionStream) {
|
|
this.transmuxPipeline_.captionStream.reset();
|
|
}
|
|
};
|
|
|
|
};
|
|
Transmuxer.prototype = new Stream();
|
|
|
|
module.exports = {
|
|
Transmuxer: Transmuxer,
|
|
VideoSegmentStream: VideoSegmentStream,
|
|
AudioSegmentStream: AudioSegmentStream,
|
|
AUDIO_PROPERTIES: AUDIO_PROPERTIES,
|
|
VIDEO_PROPERTIES: VIDEO_PROPERTIES
|
|
};
|