322 lines
11 KiB
JavaScript
322 lines
11 KiB
JavaScript
/**
|
|
* @license
|
|
* Copyright 2019 Google LLC. All Rights Reserved.
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* https://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
* =============================================================================
|
|
*/
|
|
|
|
import * as speechCommands from '../src';
|
|
|
|
import {plotSpectrogram} from './ui';
|
|
|
|
/** Remove the children of a div that do not have the isFixed attribute. */
|
|
export function removeNonFixedChildrenFromWordDiv(wordDiv) {
|
|
for (let i = wordDiv.children.length - 1; i >= 0; --i) {
|
|
if (wordDiv.children[i].getAttribute('isFixed') == null) {
|
|
wordDiv.removeChild(wordDiv.children[i]);
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get the relative x-coordinate of a click event in a canvas.
|
|
*
|
|
* @param {HTMLCanvasElement} canvasElement The canvas in which the click
|
|
* event happened.
|
|
* @param {Event} event The click event object.
|
|
* @return {number} The relative x-coordinate: a `number` between 0 and 1.
|
|
*/
|
|
function getCanvasClickRelativeXCoordinate(canvasElement, event) {
|
|
let x;
|
|
if (event.pageX) {
|
|
x = event.pageX;
|
|
} else {
|
|
x = event.clientX + document.body.scrollLeft +
|
|
document.documentElement.scrollLeft;
|
|
}
|
|
x -= canvasElement.offsetLeft;
|
|
return x / canvasElement.width;
|
|
}
|
|
|
|
/**
|
|
* Dataset visualizer that supports
|
|
*
|
|
* - Display of words and spectrograms
|
|
* - Navigation through examples
|
|
* - Deletion of examples
|
|
*/
|
|
export class DatasetViz {
|
|
/**
|
|
* Constructor of DatasetViz
|
|
*
|
|
* @param {Object} transferRecognizer An instance of
|
|
* `speechCommands.TransferSpeechCommandRecognizer`.
|
|
* @param {HTMLDivElement} topLevelContainer The div element that
|
|
* holds the div elements for the individual words. It is assumed
|
|
* that each element has its "word" attribute set to the word.
|
|
* @param {number} minExamplesPerClass Minimum number of examples
|
|
* per word class required for the start-transfer-learning button
|
|
* to be enabled.
|
|
* @param {HTMLButtonElement} startTransferLearnButton The button
|
|
* which starts the transfer learning when clicked.
|
|
* @param {HTMLBUttonElement} downloadAsFileButton The button
|
|
* that triggers downloading of the dataset as a file when clicked.
|
|
* @param {number} transferDurationMultiplier Optional duration
|
|
* multiplier (the ratio between the length of the example
|
|
* and the length expected by the model.) Defaults to 1.
|
|
*/
|
|
constructor(
|
|
transferRecognizer, topLevelContainer, minExamplesPerClass,
|
|
startTransferLearnButton, downloadAsFileButton,
|
|
transferDurationMultiplier = 1) {
|
|
this.transferRecognizer = transferRecognizer;
|
|
this.container = topLevelContainer;
|
|
this.minExamplesPerClass = minExamplesPerClass;
|
|
this.startTransferLearnButton = startTransferLearnButton;
|
|
this.downloadAsFileButton = downloadAsFileButton;
|
|
this.transferDurationMultiplier = transferDurationMultiplier;
|
|
|
|
// Navigation indices for the words.
|
|
this.navIndices = {};
|
|
}
|
|
|
|
/** Get the set of words in the dataset visualizer. */
|
|
words_() {
|
|
const words = [];
|
|
for (const element of this.container.children) {
|
|
words.push(element.getAttribute('word'));
|
|
}
|
|
return words;
|
|
}
|
|
|
|
/**
|
|
* Draw an example.
|
|
*
|
|
* @param {HTMLDivElement} wordDiv The div element for the word. It is assumed
|
|
* that it contains the word button as the first child and the canvas as the
|
|
* second.
|
|
* @param {string} word The word of the example being added.
|
|
* @param {SpectrogramData} spectrogram Optional spectrogram data.
|
|
* If provided, will use it as is. If not provided, will use WebAudio
|
|
* to collect an example.
|
|
* @param {RawAudio} rawAudio Raw audio waveform. Optional
|
|
* @param {string} uid UID of the example being drawn. Must match the UID
|
|
* of the example from `this.transferRecognizer`.
|
|
*/
|
|
async drawExample(wordDiv, word, spectrogram, rawAudio, uid) {
|
|
if (uid == null) {
|
|
throw new Error('Error: UID is not provided for pre-existing example.');
|
|
}
|
|
|
|
removeNonFixedChildrenFromWordDiv(wordDiv);
|
|
|
|
// Create the left and right nav buttons.
|
|
const leftButton = document.createElement('button');
|
|
leftButton.textContent = '←';
|
|
wordDiv.appendChild(leftButton);
|
|
|
|
const rightButton = document.createElement('button');
|
|
rightButton.textContent = '→';
|
|
wordDiv.appendChild(rightButton);
|
|
|
|
// Determine the position of the example in the word of the dataset.
|
|
const exampleUIDs =
|
|
this.transferRecognizer.getExamples(word).map(ex => ex.uid);
|
|
const position = exampleUIDs.indexOf(uid);
|
|
this.navIndices[word] = exampleUIDs.indexOf(uid);
|
|
|
|
if (position > 0) {
|
|
leftButton.addEventListener('click', () => {
|
|
this.redraw(word, exampleUIDs[position - 1]);
|
|
});
|
|
} else {
|
|
leftButton.disabled = true;
|
|
}
|
|
|
|
if (position < exampleUIDs.length - 1) {
|
|
rightButton.addEventListener('click', () => {
|
|
this.redraw(word, exampleUIDs[position + 1]);
|
|
});
|
|
} else {
|
|
rightButton.disabled = true;
|
|
}
|
|
|
|
// Spectrogram canvas.
|
|
const exampleCanvas = document.createElement('canvas');
|
|
exampleCanvas.style['display'] = 'inline-block';
|
|
exampleCanvas.style['vertical-align'] = 'middle';
|
|
exampleCanvas.height = 60;
|
|
exampleCanvas.width = 80;
|
|
exampleCanvas.style['padding'] = '3px';
|
|
|
|
// Set up the click callback for the spectrogram canvas. When clicked,
|
|
// the keyFrameIndex will be set.
|
|
if (word !== speechCommands.BACKGROUND_NOISE_TAG) {
|
|
exampleCanvas.addEventListener('click', event => {
|
|
const relativeX =
|
|
getCanvasClickRelativeXCoordinate(exampleCanvas, event);
|
|
const numFrames = spectrogram.data.length / spectrogram.frameSize;
|
|
const keyFrameIndex = Math.floor(numFrames * relativeX);
|
|
console.log(
|
|
`relativeX=${relativeX}; ` +
|
|
`changed keyFrameIndex to ${keyFrameIndex}`);
|
|
this.transferRecognizer.setExampleKeyFrameIndex(uid, keyFrameIndex);
|
|
this.redraw(word, uid);
|
|
});
|
|
}
|
|
|
|
wordDiv.appendChild(exampleCanvas);
|
|
|
|
const modelNumFrames = this.transferRecognizer.modelInputShape()[1];
|
|
await plotSpectrogram(
|
|
exampleCanvas, spectrogram.data, spectrogram.frameSize,
|
|
spectrogram.frameSize, {
|
|
pixelsPerFrame: exampleCanvas.width / modelNumFrames,
|
|
maxPixelWidth: Math.round(0.4 * window.innerWidth),
|
|
markKeyFrame: this.transferDurationMultiplier > 1 &&
|
|
word !== speechCommands.BACKGROUND_NOISE_TAG,
|
|
keyFrameIndex: spectrogram.keyFrameIndex
|
|
});
|
|
|
|
if (rawAudio != null) {
|
|
const playButton = document.createElement('button');
|
|
playButton.textContent = '▶️';
|
|
playButton.addEventListener('click', () => {
|
|
playButton.disabled = true;
|
|
speechCommands.utils.playRawAudio(
|
|
rawAudio, () => playButton.disabled = false);
|
|
});
|
|
wordDiv.appendChild(playButton);
|
|
}
|
|
|
|
// Create Delete button.
|
|
const deleteButton = document.createElement('button');
|
|
deleteButton.textContent = 'X';
|
|
wordDiv.appendChild(deleteButton);
|
|
|
|
// Callback for delete button.
|
|
deleteButton.addEventListener('click', () => {
|
|
this.transferRecognizer.removeExample(uid);
|
|
// TODO(cais): Smarter logic for which example to draw after deletion.
|
|
// Right now it always redraws the last available one.
|
|
this.redraw(word);
|
|
});
|
|
|
|
this.updateButtons_();
|
|
}
|
|
|
|
/**
|
|
* Redraw the spectrogram and buttons for a word.
|
|
*
|
|
* @param {string} word The word being redrawn. This must belong to the
|
|
* vocabulary currently held by the transferRecognizer.
|
|
* @param {string} uid Optional UID for the example to render. If not
|
|
* specified, the last available example of the dataset will be drawn.
|
|
*/
|
|
async redraw(word, uid) {
|
|
if (word == null) {
|
|
throw new Error('word is not specified');
|
|
}
|
|
let divIndex;
|
|
for (divIndex = 0; divIndex < this.container.children.length; ++divIndex) {
|
|
if (this.container.children[divIndex].getAttribute('word') === word) {
|
|
break;
|
|
}
|
|
}
|
|
if (divIndex === this.container.children.length) {
|
|
throw new Error(`Cannot find div corresponding to word ${word}`);
|
|
}
|
|
const wordDiv = this.container.children[divIndex];
|
|
const exampleCounts = this.transferRecognizer.isDatasetEmpty() ?
|
|
{} :
|
|
this.transferRecognizer.countExamples();
|
|
|
|
if (word in exampleCounts) {
|
|
const examples = this.transferRecognizer.getExamples(word);
|
|
let example;
|
|
if (uid == null) {
|
|
// Example UID is not specified. Draw the last one available.
|
|
example = examples[examples.length - 1];
|
|
} else {
|
|
// Example UID is specified. Find the example and update navigation
|
|
// indices.
|
|
for (let index = 0; index < examples.length; ++index) {
|
|
if (examples[index].uid === uid) {
|
|
example = examples[index];
|
|
}
|
|
}
|
|
}
|
|
|
|
const spectrogram = example.example.spectrogram;
|
|
await this.drawExample(
|
|
wordDiv, word, spectrogram, example.example.rawAudio, example.uid);
|
|
} else {
|
|
removeNonFixedChildrenFromWordDiv(wordDiv);
|
|
}
|
|
|
|
this.updateButtons_();
|
|
}
|
|
|
|
/**
|
|
* Redraw the spectrograms and buttons for all words.
|
|
*
|
|
* For each word, the last available example is rendered.
|
|
**/
|
|
redrawAll() {
|
|
for (const word of this.words_()) {
|
|
this.redraw(word);
|
|
}
|
|
}
|
|
|
|
/** Update the button states according to the state of transferRecognizer. */
|
|
updateButtons_() {
|
|
const exampleCounts = this.transferRecognizer.isDatasetEmpty() ?
|
|
{} :
|
|
this.transferRecognizer.countExamples();
|
|
const minCountByClass =
|
|
this.words_()
|
|
.map(word => exampleCounts[word] || 0)
|
|
.reduce((prev, current) => current < prev ? current : prev);
|
|
|
|
for (const element of this.container.children) {
|
|
const word = element.getAttribute('word');
|
|
const button = element.children[0];
|
|
const displayWord =
|
|
word === speechCommands.BACKGROUND_NOISE_TAG ? 'noise' : word;
|
|
const exampleCount = exampleCounts[word] || 0;
|
|
if (exampleCount === 0) {
|
|
button.textContent = `${displayWord} (${exampleCount})`;
|
|
} else {
|
|
const pos = this.navIndices[word] + 1;
|
|
button.textContent = `${displayWord} (${pos}/${exampleCount})`;
|
|
}
|
|
}
|
|
|
|
const requiredMinCountPerClass =
|
|
Math.ceil(this.minExamplesPerClass / this.transferDurationMultiplier);
|
|
if (minCountByClass >= requiredMinCountPerClass) {
|
|
this.startTransferLearnButton.textContent = 'Start transfer learning';
|
|
this.startTransferLearnButton.disabled = false;
|
|
} else {
|
|
this.startTransferLearnButton.textContent =
|
|
`Need at least ${requiredMinCountPerClass} examples per word`;
|
|
this.startTransferLearnButton.disabled = true;
|
|
}
|
|
|
|
this.downloadAsFileButton.disabled =
|
|
this.transferRecognizer.isDatasetEmpty();
|
|
}
|
|
}
|