/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ import { AppConstants } from "resource://gre/modules/AppConstants.sys.mjs"; /** * @typedef {object} Lazy * @property {typeof setTimeout} setTimeout * @property {typeof clearTimeout} clearTimeout * @property {typeof console} console * @property {typeof import("chrome://global/content/translations/TranslationsUtils.mjs").TranslationsUtils} TranslationsUtils */ /** @type {Lazy} */ const lazy = /** @type {any} */ ({}); ChromeUtils.defineESModuleGetters(lazy, { setTimeout: "resource://gre/modules/Timer.sys.mjs", clearTimeout: "resource://gre/modules/Timer.sys.mjs", TranslationsUtils: "chrome://global/content/translations/TranslationsUtils.mjs", }); ChromeUtils.defineLazyGetter(lazy, "console", () => { return console.createInstance({ maxLogLevelPref: "browser.translations.logLevel", prefix: "Translations", }); }); /** * Map the NodeFilter enums that are used by the TreeWalker into enums that make * sense for determining the status of the nodes for the TranslationsDocument process. * This aligns the meanings of the filtering for the translations process. */ const NodeStatus = { // This node is ready to translate as is. READY_TO_TRANSLATE: NodeFilter.FILTER_ACCEPT, // This node is a shadow host and needs to be subdivided further. SHADOW_HOST: NodeFilter.FILTER_ACCEPT, // This node contains too many block elements and needs to be subdivided further. SUBDIVIDE_FURTHER: NodeFilter.FILTER_SKIP, // This node should not be considered for translation. NOT_TRANSLATABLE: NodeFilter.FILTER_REJECT, }; /** * @typedef {import("../translations").NodeVisibility} NodeVisibility * @typedef {import("../translations").LanguagePair} LanguagePair * @typedef {import("../translations").PortToPage} PortToPage * @typedef {import("../translations").EngineStatus} EngineStatus * @typedef {import("../translations").TranslationsMode} TranslationsMode * @typedef {import("../translations").ScrollDirection} ScrollDirection * @typedef {import("../translations").NodeViewportContext} NodeViewportContext * @typedef {import("../translations").NodeSpatialContext} NodeSpatialContext * @typedef {import("../translations").UpdateEligibility} UpdateEligibility * @typedef {import("../translations").SortableContentElement} SortableContentElement * @typedef {import("../translations").PrioritizedContentElements} PrioritizedContentElements * @typedef {import("../translations").SortableAttributeElement} SortableAttributeElement * @typedef {import("../translations").PrioritizedAttributeElements} PrioritizedAttributeElements * @typedef {import("../translations").TranslationPriorityKinds} TranslationPriorityKinds * @typedef {import("../translations").TranslationRequest} TranslationRequest * @typedef {import("../translations").TranslationFunction} TranslationFunction */ /** * Create a translation cache with a limit. It implements a "least recently used" strategy * to remove old translations. After `#cacheExpirationMS` the cache will be emptied. * This cache is owned statically by the TranslationsChild. This means that it will be * re-used on page reloads if the origin of the site does not change. */ export class LRUCache { /** * A Map from input HTML strings to their translated HTML strings. * * This cache is used to check if we already have a translated response for the given * input HTML, to help avoid spending CPU cycles translating HTML for which we already * know the translated output. * * @type {Map} */ #htmlCacheMap = new Map(); /** * A Map from input text strings to their translated text strings. * * This cache is used to check if we already have a translated response for the given * input text, to help avoid spending CPU cycles translating text for which we already * know the translated output. * * @type {Map} */ #textCacheMap = new Map(); /** * A Set containing strings of translated HTML output. * * This cache is used to check if the HTML has already been translated, * to help avoid sending already-translated HTML to be translated a second time. * * Ideally, a translation model that receives source text that is already in the * target translation language should just pass it through, but this is not always * the case in practice. Depending on the model, sending already-translated text to * be translated again may change the translation or even produce garbage as a response. * * Best to avoid this situation altogether if we can. * * @type {Set} */ #htmlCacheSet = new Set(); /** * A Set containing strings of translated plain text output. * * This cache is used to check if the text has already been translated, * to help avoid sending already-translated text to be translated a second time. * * Ideally, a translation model that receives source text that is already in the * target translation language should just pass it through, but this is not always * the case in practice. Depending on the model, sending already-translated text to * be translated again may change the translation or even produce garbage as a response. * * Best to avoid this situation altogether if we can. * * @type {Set} */ #textCacheSet = new Set(); /** * The language pair for this cache. All cached translations will be for the given pair. * * @type {LanguagePair} */ #languagePair; /** * The limit of entries that can be held in each underlying cache before old entries * will start being replaced by new entries. * * @type {number} */ #cacheLimit = 5_000; /** * This cache will self-destruct after 10 minutes. * * @type {number} */ #cacheExpirationMS = 10 * 60_000; /** * The source and target langue pair for the content in this cache. * * @param {LanguagePair} languagePair */ constructor(languagePair) { this.#languagePair = languagePair; } /** * Retrieves the corresponding Map from source text to translated text. * * This is used to determine if a cached translation already exists for * the given source text, preventing us from having to spend CPU time by * recomputing the translation. * * @param {boolean} isHTML * * @returns {Map} */ #getCacheMap(isHTML) { return isHTML ? this.#htmlCacheMap : this.#textCacheMap; } /** * Retrieves the corresponding Set of translated text responses * * This is used to determine if the text being sent to translate * has already been translated. In such a situation we want to * avoid sending it to the translator a second time. * * @param {boolean} isHTML * @returns {Set} */ #getCacheSet(isHTML) { return isHTML ? this.#htmlCacheSet : this.#textCacheSet; } /** * Get a translation if it exists from the cache, and move it to the end of the cache * to keep it alive longer. * * @param {string} sourceString * @param {boolean} isHTML * * @returns {string | undefined} */ get(sourceString, isHTML) { const cacheMap = this.#getCacheMap(isHTML); const targetString = cacheMap.get(sourceString); if (targetString === undefined) { return undefined; } // Maps are ordered, move this item to the end of the list so it will stay // alive longer. cacheMap.delete(sourceString); cacheMap.set(sourceString, targetString); this.keepAlive(); return targetString; } /** * Adds a new translation to the cache, a mapping from the source text to the target text. * * @param {string} sourceString * @param {string} targetString * @param {boolean} isHTML */ set(sourceString, targetString, isHTML) { const cacheMap = this.#getCacheMap(isHTML); if (cacheMap.has(sourceString)) { // The Map already has this value, so we must delete it to // re-insert it at the most-recently-used position of the Map. cacheMap.delete(sourceString); } else if (cacheMap.size === this.#cacheLimit) { // The Map is at capacity, so we must evict the least-recently-used value. const oldestKey = cacheMap.keys().next().value; // @ts-ignore: We can ensure that oldestKey is not undefined. cacheMap.delete(oldestKey); } cacheMap.set(sourceString, targetString); const cacheSet = this.#getCacheSet(isHTML); if (cacheSet.has(targetString)) { // The Set already has this value, so we must delete it to // re-insert it at the most-recently-used position of the Set. cacheSet.delete(targetString); } else if (cacheSet.size === this.#cacheLimit) { // The Set is at capacity, so we must evict the least-recently-used value. const oldestKey = cacheSet.keys().next().value; // @ts-ignore: We can ensure that oldestKey is not undefined. cacheSet.delete(oldestKey); } cacheSet.add(targetString); this.keepAlive(); } /** * Returns true if the source text is text that has already been translated * into the target language, otherwise false. If so, we want to avoid sending * this text to be translated a second time. Depending on the model, retranslating * text that is already in the target language may produce garbage output. * * @param {string} sourceText * @param {boolean} isHTML * * @returns {boolean} */ isAlreadyTranslated(sourceText, isHTML) { return this.#getCacheSet(isHTML).has(sourceText); } /** * Returns true if the given pair matches the language pair for this cache, otherwise false. * * @param {LanguagePair} languagePair * * @returns {boolean} */ matches(languagePair) { return ( lazy.TranslationsUtils.langTagsMatch( this.#languagePair.sourceLanguage, languagePair.sourceLanguage ) && lazy.TranslationsUtils.langTagsMatch( this.#languagePair.targetLanguage, languagePair.targetLanguage ) ); } /** * The id for the cache's keep-alive timeout, at which point it will destroy itself. * * @type {number} */ #keepAliveTimeoutId = 0; /** * Used to ensure that only one callback is added to the event loop to set keep-alive timeout. * * @type {boolean} */ #hasPendingKeepAliveCallback = false; /** * Resets the timer for the cache's keep-alive timeout, extending the time the cache will live. */ keepAlive() { if (this.#hasPendingKeepAliveCallback) { // There is already a pending callback to extend the timeout. return; } if (this.#keepAliveTimeoutId) { lazy.clearTimeout(this.#keepAliveTimeoutId); this.#keepAliveTimeoutId = 0; } this.#hasPendingKeepAliveCallback = true; lazy.setTimeout(() => { this.#hasPendingKeepAliveCallback = false; this.#keepAliveTimeoutId = lazy.setTimeout(() => { this.#htmlCacheMap = new Map(); this.#textCacheMap = new Map(); this.#htmlCacheSet = new Set(); this.#textCacheSet = new Set(); }, this.#cacheExpirationMS); }, 0); } } /** * How often the DOM is updated with translations, in milliseconds. * * Each time the DOM is updated, we must pause the mutation observer. * * - Stopping the observer takes about 5 micro seconds based on profiling. * * - Starting the observer takes about 30 micro seconds based on profiling. * * We want to choose a DOM update interval that is fast enough to feel instantaneously * reactive when completed translation requests come in, while also allowing multiple * nodes to be updated within a single pause of the observer. * * @type {number} */ const DOM_UPDATE_INTERVAL_MS = 25; /** * Tags excluded from content translation. */ const CONTENT_EXCLUDED_TAGS = new Set([ // The following are elements that semantically should not be translated. "CODE", "KBD", "SAMP", "VAR", "ACRONYM", // The following are deprecated tags. "DIR", "APPLET", // The following are embedded elements, and are not supported (yet). "MATH", "EMBED", "OBJECT", "IFRAME", // This is an SVG tag that can contain arbitrary XML, ignore it. "METADATA", // These are elements that are treated as opaque by IceCat which causes their // innerHTML property to be just the raw text node behind it. Any text that is sent as // HTML must be valid, and there is no guarantee that the innerHTML is valid. "NOSCRIPT", "NOEMBED", "NOFRAMES", // The title is handled separately, and a HEAD tag should not be considered. "HEAD", // These are not user-visible tags. "STYLE", "SCRIPT", "TEMPLATE", // Textarea elements contain user content, which should not be translated. "TEXTAREA", ]); /** * Tags excluded from attribute translation. */ const ATTRIBUTE_EXCLUDED_TAGS = (() => { const attributeTags = new Set(CONTENT_EXCLUDED_TAGS); // The element may contain elements that may have translatable attributes. // So we will allow for attribute translations, but not for content translations. attributeTags.delete("HEAD"); //