322 lines
11 KiB
JavaScript

/**
* @license
* Copyright 2019 Google LLC. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* =============================================================================
*/
import * as speechCommands from '../src';
import {plotSpectrogram} from './ui';
/** Remove the children of a div that do not have the isFixed attribute. */
export function removeNonFixedChildrenFromWordDiv(wordDiv) {
for (let i = wordDiv.children.length - 1; i >= 0; --i) {
if (wordDiv.children[i].getAttribute('isFixed') == null) {
wordDiv.removeChild(wordDiv.children[i]);
} else {
break;
}
}
}
/**
* Get the relative x-coordinate of a click event in a canvas.
*
* @param {HTMLCanvasElement} canvasElement The canvas in which the click
* event happened.
* @param {Event} event The click event object.
* @return {number} The relative x-coordinate: a `number` between 0 and 1.
*/
function getCanvasClickRelativeXCoordinate(canvasElement, event) {
let x;
if (event.pageX) {
x = event.pageX;
} else {
x = event.clientX + document.body.scrollLeft +
document.documentElement.scrollLeft;
}
x -= canvasElement.offsetLeft;
return x / canvasElement.width;
}
/**
* Dataset visualizer that supports
*
* - Display of words and spectrograms
* - Navigation through examples
* - Deletion of examples
*/
export class DatasetViz {
/**
* Constructor of DatasetViz
*
* @param {Object} transferRecognizer An instance of
* `speechCommands.TransferSpeechCommandRecognizer`.
* @param {HTMLDivElement} topLevelContainer The div element that
* holds the div elements for the individual words. It is assumed
* that each element has its "word" attribute set to the word.
* @param {number} minExamplesPerClass Minimum number of examples
* per word class required for the start-transfer-learning button
* to be enabled.
* @param {HTMLButtonElement} startTransferLearnButton The button
* which starts the transfer learning when clicked.
* @param {HTMLBUttonElement} downloadAsFileButton The button
* that triggers downloading of the dataset as a file when clicked.
* @param {number} transferDurationMultiplier Optional duration
* multiplier (the ratio between the length of the example
* and the length expected by the model.) Defaults to 1.
*/
constructor(
transferRecognizer, topLevelContainer, minExamplesPerClass,
startTransferLearnButton, downloadAsFileButton,
transferDurationMultiplier = 1) {
this.transferRecognizer = transferRecognizer;
this.container = topLevelContainer;
this.minExamplesPerClass = minExamplesPerClass;
this.startTransferLearnButton = startTransferLearnButton;
this.downloadAsFileButton = downloadAsFileButton;
this.transferDurationMultiplier = transferDurationMultiplier;
// Navigation indices for the words.
this.navIndices = {};
}
/** Get the set of words in the dataset visualizer. */
words_() {
const words = [];
for (const element of this.container.children) {
words.push(element.getAttribute('word'));
}
return words;
}
/**
* Draw an example.
*
* @param {HTMLDivElement} wordDiv The div element for the word. It is assumed
* that it contains the word button as the first child and the canvas as the
* second.
* @param {string} word The word of the example being added.
* @param {SpectrogramData} spectrogram Optional spectrogram data.
* If provided, will use it as is. If not provided, will use WebAudio
* to collect an example.
* @param {RawAudio} rawAudio Raw audio waveform. Optional
* @param {string} uid UID of the example being drawn. Must match the UID
* of the example from `this.transferRecognizer`.
*/
async drawExample(wordDiv, word, spectrogram, rawAudio, uid) {
if (uid == null) {
throw new Error('Error: UID is not provided for pre-existing example.');
}
removeNonFixedChildrenFromWordDiv(wordDiv);
// Create the left and right nav buttons.
const leftButton = document.createElement('button');
leftButton.textContent = '←';
wordDiv.appendChild(leftButton);
const rightButton = document.createElement('button');
rightButton.textContent = '→';
wordDiv.appendChild(rightButton);
// Determine the position of the example in the word of the dataset.
const exampleUIDs =
this.transferRecognizer.getExamples(word).map(ex => ex.uid);
const position = exampleUIDs.indexOf(uid);
this.navIndices[word] = exampleUIDs.indexOf(uid);
if (position > 0) {
leftButton.addEventListener('click', () => {
this.redraw(word, exampleUIDs[position - 1]);
});
} else {
leftButton.disabled = true;
}
if (position < exampleUIDs.length - 1) {
rightButton.addEventListener('click', () => {
this.redraw(word, exampleUIDs[position + 1]);
});
} else {
rightButton.disabled = true;
}
// Spectrogram canvas.
const exampleCanvas = document.createElement('canvas');
exampleCanvas.style['display'] = 'inline-block';
exampleCanvas.style['vertical-align'] = 'middle';
exampleCanvas.height = 60;
exampleCanvas.width = 80;
exampleCanvas.style['padding'] = '3px';
// Set up the click callback for the spectrogram canvas. When clicked,
// the keyFrameIndex will be set.
if (word !== speechCommands.BACKGROUND_NOISE_TAG) {
exampleCanvas.addEventListener('click', event => {
const relativeX =
getCanvasClickRelativeXCoordinate(exampleCanvas, event);
const numFrames = spectrogram.data.length / spectrogram.frameSize;
const keyFrameIndex = Math.floor(numFrames * relativeX);
console.log(
`relativeX=${relativeX}; ` +
`changed keyFrameIndex to ${keyFrameIndex}`);
this.transferRecognizer.setExampleKeyFrameIndex(uid, keyFrameIndex);
this.redraw(word, uid);
});
}
wordDiv.appendChild(exampleCanvas);
const modelNumFrames = this.transferRecognizer.modelInputShape()[1];
await plotSpectrogram(
exampleCanvas, spectrogram.data, spectrogram.frameSize,
spectrogram.frameSize, {
pixelsPerFrame: exampleCanvas.width / modelNumFrames,
maxPixelWidth: Math.round(0.4 * window.innerWidth),
markKeyFrame: this.transferDurationMultiplier > 1 &&
word !== speechCommands.BACKGROUND_NOISE_TAG,
keyFrameIndex: spectrogram.keyFrameIndex
});
if (rawAudio != null) {
const playButton = document.createElement('button');
playButton.textContent = '▶️';
playButton.addEventListener('click', () => {
playButton.disabled = true;
speechCommands.utils.playRawAudio(
rawAudio, () => playButton.disabled = false);
});
wordDiv.appendChild(playButton);
}
// Create Delete button.
const deleteButton = document.createElement('button');
deleteButton.textContent = 'X';
wordDiv.appendChild(deleteButton);
// Callback for delete button.
deleteButton.addEventListener('click', () => {
this.transferRecognizer.removeExample(uid);
// TODO(cais): Smarter logic for which example to draw after deletion.
// Right now it always redraws the last available one.
this.redraw(word);
});
this.updateButtons_();
}
/**
* Redraw the spectrogram and buttons for a word.
*
* @param {string} word The word being redrawn. This must belong to the
* vocabulary currently held by the transferRecognizer.
* @param {string} uid Optional UID for the example to render. If not
* specified, the last available example of the dataset will be drawn.
*/
async redraw(word, uid) {
if (word == null) {
throw new Error('word is not specified');
}
let divIndex;
for (divIndex = 0; divIndex < this.container.children.length; ++divIndex) {
if (this.container.children[divIndex].getAttribute('word') === word) {
break;
}
}
if (divIndex === this.container.children.length) {
throw new Error(`Cannot find div corresponding to word ${word}`);
}
const wordDiv = this.container.children[divIndex];
const exampleCounts = this.transferRecognizer.isDatasetEmpty() ?
{} :
this.transferRecognizer.countExamples();
if (word in exampleCounts) {
const examples = this.transferRecognizer.getExamples(word);
let example;
if (uid == null) {
// Example UID is not specified. Draw the last one available.
example = examples[examples.length - 1];
} else {
// Example UID is specified. Find the example and update navigation
// indices.
for (let index = 0; index < examples.length; ++index) {
if (examples[index].uid === uid) {
example = examples[index];
}
}
}
const spectrogram = example.example.spectrogram;
await this.drawExample(
wordDiv, word, spectrogram, example.example.rawAudio, example.uid);
} else {
removeNonFixedChildrenFromWordDiv(wordDiv);
}
this.updateButtons_();
}
/**
* Redraw the spectrograms and buttons for all words.
*
* For each word, the last available example is rendered.
**/
redrawAll() {
for (const word of this.words_()) {
this.redraw(word);
}
}
/** Update the button states according to the state of transferRecognizer. */
updateButtons_() {
const exampleCounts = this.transferRecognizer.isDatasetEmpty() ?
{} :
this.transferRecognizer.countExamples();
const minCountByClass =
this.words_()
.map(word => exampleCounts[word] || 0)
.reduce((prev, current) => current < prev ? current : prev);
for (const element of this.container.children) {
const word = element.getAttribute('word');
const button = element.children[0];
const displayWord =
word === speechCommands.BACKGROUND_NOISE_TAG ? 'noise' : word;
const exampleCount = exampleCounts[word] || 0;
if (exampleCount === 0) {
button.textContent = `${displayWord} (${exampleCount})`;
} else {
const pos = this.navIndices[word] + 1;
button.textContent = `${displayWord} (${pos}/${exampleCount})`;
}
}
const requiredMinCountPerClass =
Math.ceil(this.minExamplesPerClass / this.transferDurationMultiplier);
if (minCountByClass >= requiredMinCountPerClass) {
this.startTransferLearnButton.textContent = 'Start transfer learning';
this.startTransferLearnButton.disabled = false;
} else {
this.startTransferLearnButton.textContent =
`Need at least ${requiredMinCountPerClass} examples per word`;
this.startTransferLearnButton.disabled = true;
}
this.downloadAsFileButton.disabled =
this.transferRecognizer.isDatasetEmpty();
}
}