448 lines
14 KiB
JavaScript
448 lines
14 KiB
JavaScript
/**
|
|
* GNU LibreJS - A browser add-on to block nonfree nontrivial JavaScript.
|
|
* *
|
|
* Copyright (C) 2018 Nathan Nichols
|
|
* Copyright (C) 2022 Yuchen Pei
|
|
*
|
|
* This file is part of GNU LibreJS.
|
|
*
|
|
* GNU LibreJS is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* GNU LibreJS is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with GNU LibreJS. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
const acorn = require('acorn');
|
|
const licenses = require('./license_definitions.json');
|
|
const { patternUtils } = require('./pattern_utils.js');
|
|
const { makeDebugLogger } = require('./debug.js');
|
|
const fnameData = require('./fname_data.json').fname_data;
|
|
|
|
const LIC_RE = /@licstartThefollowingistheentirelicensenoticefortheJavaScriptcodeinthis(?:page|file)(.*)?@licendTheaboveistheentirelicensenoticefortheJavaScriptcodeinthis(?:page|file)/mi;
|
|
|
|
/*
|
|
NONTRIVIAL THINGS:
|
|
- Fetch
|
|
- XMLhttpRequest
|
|
- eval()
|
|
- ?
|
|
JAVASCRIPT CAN BE FOUND IN:
|
|
- Event handlers (onclick, onload, onsubmit, etc.)
|
|
- <script>JS</script>
|
|
- <script src="/JS.js"></script>
|
|
WAYS TO DETERMINE PASS/FAIL:
|
|
- "// @license [magnet link] [identifier]" then "// @license-end" (may also use /* comments)
|
|
- Automatic whitelist: (http://bzr.savannah.gnu.org/lh/librejs/dev/annotate/head:/data/script_libraries/script-libraries.json_
|
|
*/
|
|
// These are objects that it will search for in an initial regex pass over non-free scripts.
|
|
const RESERVED_OBJECTS = [
|
|
//"document",
|
|
//"window",
|
|
'fetch',
|
|
'XMLHttpRequest',
|
|
'chrome', // only on chrome
|
|
'browser', // only on firefox
|
|
'eval'
|
|
];
|
|
const LOOPKEYS = new Set(['for', 'if', 'while', 'switch']);
|
|
const OPERATORS = new Set(['||', '&&', '=', '==', '++', '--', '+=', '-=', '*']);
|
|
// @license match, second and third capture groups are canonicalUrl
|
|
// and license name
|
|
// Caveat: will not work in a commented out star comments:
|
|
// '// /* @license */ ... /* @license-end */' will be checked, though
|
|
// the whole thing is a comment
|
|
const OPENING_LICENSE_RE1 = /^\s*\/\/\s*@license\s+(\S+)\s+(\S+).*$/mi;
|
|
const OPENING_LICENSE_RE2 = /\/\*\s*?@license\s+(\S+)\s+([^/*]+).*\*\//mi;
|
|
const CLOSING_LICENSE_RE1 = /^\s*\/\/\s*@license-end\s*/mi;
|
|
const CLOSING_LICENSE_RE2 = /\/\*\s*@license-end\s*\*\//mi;
|
|
/**
|
|
* If this is true, it evaluates entire scripts instead of returning as soon as it encounters a violation.
|
|
*
|
|
* Also, it controls whether or not this part of the code logs to the console.
|
|
*
|
|
*/
|
|
const DEBUG = false; // debug the JS evaluation
|
|
const PRINT_DEBUG = false;
|
|
const dbg_print = makeDebugLogger('checks.js', PRINT_DEBUG, Date.now());
|
|
|
|
/**
|
|
* stripLicenseToRegexp
|
|
*
|
|
* Removes all non-alphanumeric characters except for the
|
|
* special tokens, and replace the text values that are
|
|
* hardcoded in license_definitions.js. Puts the result in
|
|
* the regex field of the fragments.
|
|
*
|
|
*/
|
|
const stripLicenseToRegexp = function(license) {
|
|
for (const frag of license.licenseFragments) {
|
|
frag.regex = patternUtils.removeNonalpha(frag.text);
|
|
frag.regex = new RegExp(
|
|
patternUtils.replaceTokens(frag.regex), '');
|
|
}
|
|
};
|
|
|
|
const init = function() {
|
|
console.log('initializing regexes');
|
|
for (const key in licenses) {
|
|
stripLicenseToRegexp(licenses[key]);
|
|
}
|
|
}
|
|
|
|
/**
|
|
*
|
|
* Takes in the declaration that has been preprocessed and
|
|
* tests it against regexes in licenses.
|
|
*/
|
|
const searchTable = function(strippedComment) {
|
|
const stripped = patternUtils.removeNonalpha(strippedComment);
|
|
// looking up license
|
|
for (const key in licenses) {
|
|
const license = licenses[key];
|
|
for (const frag of license.licenseFragments) {
|
|
if (frag.regex.test(stripped)) {
|
|
return license.licenseName;
|
|
}
|
|
}
|
|
}
|
|
console.log('No global license found.');
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Checks whether licenseText, modulo whitespace, starts with
|
|
* a @licstart .. @licend with a free license, returns the license name
|
|
* if so, and null otherwise.
|
|
*/
|
|
const checkLicenseText = function(licenseText) {
|
|
if (licenseText === undefined || licenseText === null) {
|
|
return null;
|
|
}
|
|
// remove whitespace
|
|
const stripped = patternUtils.removeWhitespace(licenseText);
|
|
// Search for @licstart/@licend
|
|
const matches = stripped.match(LIC_RE);
|
|
return matches && searchTable(matches[0]);
|
|
};
|
|
|
|
//************************this part can be tested in the HTML file index.html's script test.js****************************
|
|
|
|
/**
|
|
* Checks whether script is trivial by analysing its tokens.
|
|
*
|
|
* Returns an array of
|
|
* [flag (boolean, true if trivial), reason (string, human readable report)].
|
|
*/
|
|
function fullEvaluate(script) {
|
|
if (script === undefined || script == '') {
|
|
return [true, 'Harmless null script'];
|
|
}
|
|
|
|
let tokens;
|
|
|
|
try {
|
|
tokens = acorn.tokenizer(script);
|
|
} catch (e) {
|
|
console.warn('Tokenizer could not be initiated (probably invalid code)');
|
|
return [false, 'Tokenizer could not be initiated (probably invalid code)'];
|
|
}
|
|
try {
|
|
var toke = tokens.getToken();
|
|
} catch (e) {
|
|
console.log(script);
|
|
console.log(e);
|
|
console.warn('couldn\'t get first token (probably invalid code)');
|
|
console.warn('Continuing evaluation');
|
|
}
|
|
|
|
let amtloops = 0;
|
|
let definesFunctions = false;
|
|
|
|
/**
|
|
* Given the end of an identifer token, it tests for parentheses
|
|
*/
|
|
function is_bsn(end) {
|
|
let i = 0;
|
|
while (script.charAt(end + i).match(/\s/g) !== null) {
|
|
i++;
|
|
if (i >= script.length - 1) {
|
|
return false;
|
|
}
|
|
}
|
|
return script.charAt(end + i) == '[';
|
|
}
|
|
|
|
function evaluateByTokenValue(toke) {
|
|
const value = toke.value;
|
|
if (OPERATORS.has(value)) {
|
|
// It's just an operator. Javascript doesn't have operator overloading so it must be some
|
|
// kind of primitive (I.e. a number)
|
|
} else {
|
|
const status = fnameData[value];
|
|
if (status === true) { // is the identifier banned?
|
|
dbg_print('%c NONTRIVIAL: nontrivial token: \'' + value + '\'', 'color:red');
|
|
if (DEBUG == false) {
|
|
return [false, 'NONTRIVIAL: nontrivial token: \'' + value + '\''];
|
|
}
|
|
} else if (status === false || status === undefined) {// is the identifier not banned or user defined?
|
|
// Is there bracket suffix notation?
|
|
if (is_bsn(toke.end)) {
|
|
dbg_print('%c NONTRIVIAL: Bracket suffix notation on variable \'' + value + '\'', 'color:red');
|
|
if (DEBUG == false) {
|
|
return [false, '%c NONTRIVIAL: Bracket suffix notation on variable \'' + value + '\''];
|
|
}
|
|
}
|
|
} else {
|
|
dbg_print('trivial token:' + value);
|
|
}
|
|
}
|
|
return [true, ''];
|
|
}
|
|
|
|
function evaluateByTokenTypeKeyword(keyword) {
|
|
if (toke.type.keyword == 'function') {
|
|
dbg_print('%c NOTICE: Function declaration.', 'color:green');
|
|
definesFunctions = true;
|
|
}
|
|
|
|
if (LOOPKEYS.has(keyword)) {
|
|
amtloops++;
|
|
if (amtloops > 3) {
|
|
dbg_print('%c NONTRIVIAL: Too many loops/conditionals.', 'color:red');
|
|
if (DEBUG == false) {
|
|
return [false, 'NONTRIVIAL: Too many loops/conditionals.'];
|
|
}
|
|
}
|
|
}
|
|
return [true, ''];
|
|
}
|
|
|
|
while (toke !== undefined && toke.type != acorn.tokTypes.eof) {
|
|
if (toke.type.keyword !== undefined) {
|
|
//dbg_print("Keyword:");
|
|
//dbg_print(toke);
|
|
|
|
// This type of loop detection ignores functional loop alternatives and ternary operators
|
|
const tokeTypeRes = evaluateByTokenTypeKeyword(toke.type.keyword);
|
|
if (tokeTypeRes[0] === false) {
|
|
return tokeTypeRes;
|
|
}
|
|
} else if (toke.value !== undefined) {
|
|
const tokeValRes = evaluateByTokenValue(toke);
|
|
if (tokeValRes[0] === false) {
|
|
return tokeValRes;
|
|
}
|
|
}
|
|
// If not a keyword or an identifier it's some kind of operator, field parenthesis, brackets
|
|
try {
|
|
toke = tokens.getToken();
|
|
} catch (e) {
|
|
dbg_print('Denied script because it cannot be parsed.');
|
|
return [false, 'NONTRIVIAL: Cannot be parsed. This could mean it is a 404 error.'];
|
|
}
|
|
}
|
|
|
|
dbg_print('%cAppears to be trivial.', 'color:green;');
|
|
if (definesFunctions === true)
|
|
return [true, 'Script appears to be trivial but defines functions.'];
|
|
else
|
|
return [true, 'Script appears to be trivial.'];
|
|
}
|
|
|
|
|
|
//****************************************************************************************************
|
|
/**
|
|
* This is the entry point for full code evaluation for triviality.
|
|
*
|
|
* Performs the initial pass on code to see if it needs to be completely parsed
|
|
*
|
|
* This can only determine if a script is bad, not if it's good
|
|
*
|
|
* If it passes the intitial pass, it runs the full pass and returns the result
|
|
|
|
* It returns an array of [flag (boolean, false if "bad"), reason (string, human readable report)]
|
|
*
|
|
*/
|
|
function evaluate(script, name) {
|
|
const reservedResult = evaluateForReservedObj(script, name);
|
|
if (reservedResult[0] === true) {
|
|
dbg_print('%c pass', 'color:green;');
|
|
} else {
|
|
return reservedResult;
|
|
}
|
|
|
|
return fullEvaluate(script);
|
|
}
|
|
|
|
function evaluateForReservedObj(script, name) {
|
|
function reservedObjectRegex(object) {
|
|
const arithOperators = '\\+\\-\\*\\/\\%\\=';
|
|
return new RegExp('(?:[^\\w\\d]|^|(?:' + arithOperators + '))' + object + '(?:\\s*?(?:[\\;\\,\\.\\(\\[])\\s*?)', 'g');
|
|
}
|
|
const mlComment = /\/\*([\s\S]+?)\*\//g;
|
|
const ilComment = /\/\/.+/gm;
|
|
const temp = script.replace(/'.+?'+/gm, '\'string\'').replace(/".+?"+/gm, '"string"').replace(mlComment, '').replace(ilComment, '');
|
|
dbg_print('%c ------evaluation results for ' + name + '------', 'color:white');
|
|
dbg_print('Script accesses reserved objects?');
|
|
|
|
// This is where individual "passes" are made over the code
|
|
for (const reserved of RESERVED_OBJECTS) {
|
|
if (reservedObjectRegex(reserved).exec(temp) != null) {
|
|
dbg_print('%c fail', 'color:red;');
|
|
return [false, 'Script uses a reserved object (' + reserved + ')'];
|
|
}
|
|
}
|
|
return [true, 'Reserved object not found.'];
|
|
}
|
|
|
|
/**
|
|
* Checks whether url is the magnet link of a license.
|
|
*
|
|
* Returns the licenseName if so, otherwise returns null. If a key is
|
|
* supplied, checks for the license with the key only.
|
|
*/
|
|
function checkMagnet(url, key = null) {
|
|
const fixedUrl = url.replace(/&/g, '&');
|
|
// Match by magnet link
|
|
const checkLicenseMagnet = license => {
|
|
for (const cUrl of license.canonicalUrl) {
|
|
if (cUrl.startsWith('magnet:') && fixedUrl === cUrl) {
|
|
return license.licenseName;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
if (key) {
|
|
try {
|
|
return checkLicenseMagnet(licenses[key]);
|
|
} catch (error) {
|
|
return null;
|
|
}
|
|
} else {
|
|
for (const key in licenses) {
|
|
const result = checkLicenseMagnet(licenses[key]);
|
|
if (result) return result;
|
|
}
|
|
return null;
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
*
|
|
* Evaluates the content of a script for licenses and triviality
|
|
* scriptSrc: content of the script; name: script name; external:
|
|
* whether the script is external
|
|
*
|
|
* Returns
|
|
* [
|
|
* true (accepted) or false (denied),
|
|
* edited content,
|
|
* reason text
|
|
* ]
|
|
*/
|
|
function checkScriptSource(scriptSrc, name, external = false) {
|
|
let inSrc = scriptSrc.trim();
|
|
if (!inSrc) return [true, scriptSrc, 'Empty source.'];
|
|
|
|
// Check for @licstart .. @licend method
|
|
const license = checkLicenseText(scriptSrc);
|
|
if (license) {
|
|
return [true, scriptSrc, `Licensed under: ${license}`];
|
|
}
|
|
|
|
let outSrc = '';
|
|
let reason = '';
|
|
let partsDenied = false;
|
|
let partsAccepted = false;
|
|
|
|
function checkTriviality(s) {
|
|
if (!patternUtils.removeJsComments(s).trim()) {
|
|
return true; // empty, ignore it
|
|
}
|
|
const [trivial, message] = external ?
|
|
[false, 'External script with no known license']
|
|
: evaluate(s, name);
|
|
if (trivial) {
|
|
partsAccepted = true;
|
|
outSrc += s;
|
|
} else {
|
|
partsDenied = true;
|
|
if (s.startsWith('javascript:'))
|
|
outSrc += `# LIBREJS BLOCKED: ${message}`;
|
|
else
|
|
outSrc += `/*\nLIBREJS BLOCKED: ${message}\n*/`;
|
|
}
|
|
reason += `\n${message}`;
|
|
}
|
|
|
|
// Consume inSrc by checking licenses in all @license / @license-end
|
|
// blocks and triviality outside these blocks
|
|
while (inSrc) {
|
|
const openingMatch1 = OPENING_LICENSE_RE1.exec(inSrc);
|
|
const openingMatch2 = OPENING_LICENSE_RE2.exec(inSrc);
|
|
const openingMatch =
|
|
(openingMatch1 && openingMatch2) ?
|
|
(openingMatch1.index < openingMatch2.index ? openingMatch1
|
|
: openingMatch2)
|
|
: (openingMatch1 || openingMatch2);
|
|
const openingIndex = openingMatch ? openingMatch.index : inSrc.length;
|
|
// checks the triviality of the code before the license tag, if any
|
|
checkTriviality(inSrc.substring(0, openingIndex));
|
|
inSrc = inSrc.substring(openingIndex);
|
|
if (!inSrc) break;
|
|
|
|
// checks the remaining part, that starts with an @license
|
|
const closureMatch1 = CLOSING_LICENSE_RE1.exec(inSrc);
|
|
const closureMatch2 = CLOSING_LICENSE_RE2.exec(inSrc);
|
|
const closureMatch =
|
|
(closureMatch1 && closureMatch2) ?
|
|
(closureMatch1.index < closureMatch2.index ? closureMatch1
|
|
: closureMatch2)
|
|
: (closureMatch1 || closureMatch2);
|
|
if (!closureMatch) {
|
|
const msg = 'ERROR: @license with no @license-end';
|
|
return [false, `\n/*\n ${msg} \n*/\n`, msg];
|
|
}
|
|
const closureEndIndex = closureMatch.index + closureMatch[0].length;
|
|
|
|
if (!(Array.isArray(openingMatch) && openingMatch.length >= 3)) {
|
|
return [false, 'Malformed or unrecognized license tag.'];
|
|
}
|
|
const licenseName = checkMagnet(openingMatch[1]);
|
|
let message;
|
|
if (licenseName) {
|
|
outSrc += inSrc.substr(0, closureEndIndex);
|
|
partsAccepted = true;
|
|
message = `Recognized license: "${licenseName}".`
|
|
} else {
|
|
outSrc += `\n/*\n${message}\n*/\n`;
|
|
partsDenied = true;
|
|
message = `Unrecognized license tag: "${openingMatch[0]}"`;
|
|
}
|
|
reason += `\n${message}`;
|
|
|
|
// trim off everything we just evaluated
|
|
inSrc = inSrc.substring(closureEndIndex).trim();
|
|
}
|
|
|
|
if (partsDenied) {
|
|
if (partsAccepted) {
|
|
reason = `Some parts of the script have been disabled (check the source for details).\n^--- ${reason}`;
|
|
}
|
|
return [false, outSrc, reason];
|
|
}
|
|
|
|
return [true, scriptSrc, reason];
|
|
}
|
|
|
|
module.exports = { init, checkLicenseText, checkMagnet, checkScriptSource };
|