220 lines
7.9 KiB
Python
220 lines
7.9 KiB
Python
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
import re
|
|
from xml.dom import minidom
|
|
|
|
from .base import Checker
|
|
from ..parser.android import textContent
|
|
|
|
|
|
class AndroidChecker(Checker):
|
|
pattern = re.compile("(.*)?strings.*\\.xml$")
|
|
|
|
def check(self, refEnt, l10nEnt):
|
|
"""Given the reference and localized Entities, performs checks.
|
|
|
|
This is a generator yielding tuples of
|
|
- "warning" or "error", depending on what should be reported,
|
|
- tuple of line, column info for the error within the string
|
|
- description string to be shown in the report
|
|
"""
|
|
yield from super().check(refEnt, l10nEnt)
|
|
refNode = refEnt.node
|
|
l10nNode = l10nEnt.node
|
|
# Apples and oranges, error out.
|
|
if refNode.nodeName != l10nNode.nodeName:
|
|
yield ("error", 0, "Incompatible resource types", "android")
|
|
return
|
|
# Once we start parsing more resource types, make sure to add checks
|
|
# for them.
|
|
if refNode.nodeName != "string":
|
|
yield ("warning", 0, "Unsupported resource type", "android")
|
|
return
|
|
yield from self.check_string([refNode], l10nEnt)
|
|
|
|
def check_string(self, refs, l10nEnt):
|
|
"""Check a single string literal against a list of references.
|
|
|
|
There should be multiple nodes given for <plurals> or <string-array>.
|
|
"""
|
|
l10n = l10nEnt.node
|
|
if self.not_translatable(l10n, *refs):
|
|
yield ("error", 0, "strings must be translatable", "android")
|
|
return
|
|
if self.no_at_string(l10n):
|
|
yield ("error", 0, "strings must be translatable", "android")
|
|
return
|
|
if self.no_at_string(*refs):
|
|
yield ("warning", 0, "strings must be translatable", "android")
|
|
if self.non_simple_data(l10n):
|
|
yield (
|
|
"error",
|
|
0,
|
|
"Only plain text allowed, " "or one CDATA surrounded by whitespace",
|
|
"android",
|
|
)
|
|
return
|
|
yield from check_apostrophes(l10nEnt.val)
|
|
|
|
params, count, errors = get_params(refs)
|
|
for error, pos in errors:
|
|
yield ("warning", pos, error, "android")
|
|
# Always check parameters, as the translation might have additional ones
|
|
yield from check_params(params, count, l10nEnt.val)
|
|
|
|
def not_translatable(self, *nodes):
|
|
return any(
|
|
node.hasAttribute("translatable")
|
|
and node.getAttribute("translatable") == "false"
|
|
for node in nodes
|
|
)
|
|
|
|
def no_at_string(self, *ref_nodes):
|
|
"""Android allows to reference other strings by using
|
|
@string/identifier
|
|
instead of the actual value. Those references don't belong into
|
|
a localizable file, warn on that.
|
|
"""
|
|
return any(textContent(node).startswith("@string/") for node in ref_nodes)
|
|
|
|
def non_simple_data(self, node):
|
|
"""Only allow single text nodes, or, a single CDATA node
|
|
surrounded by whitespace.
|
|
"""
|
|
cdata = [
|
|
child
|
|
for child in node.childNodes
|
|
if child.nodeType == minidom.Node.CDATA_SECTION_NODE
|
|
]
|
|
if len(cdata) == 0:
|
|
if node.childNodes.length == 0:
|
|
# empty translation is OK
|
|
return False
|
|
if node.childNodes.length != 1:
|
|
return True
|
|
return node.childNodes[0].nodeType != minidom.Node.TEXT_NODE
|
|
if len(cdata) > 1:
|
|
return True
|
|
for child in node.childNodes:
|
|
if child == cdata[0]:
|
|
continue
|
|
if child.nodeType != minidom.Node.TEXT_NODE:
|
|
return True
|
|
if child.data.strip() != "":
|
|
return True
|
|
return False
|
|
|
|
|
|
silencer = re.compile(r'\\.|""')
|
|
|
|
|
|
def check_apostrophes(string):
|
|
"""Check Android logic for quotes and apostrophes.
|
|
|
|
If you have an apostrophe (') in your string, you must either escape it
|
|
with a backslash (\') or enclose the string in double-quotes (").
|
|
|
|
Unescaped quotes are not visually shown on Android, but they're
|
|
also harmless, so we're not checking for quotes. We might do once we're
|
|
better at checking for inline XML, which is full of quotes.
|
|
Pairing quotes as in '""' is bad, though, so report errors for that.
|
|
Mostly, because it's hard to tell if a string is consider quoted or not
|
|
by Android in the end.
|
|
|
|
https://developer.android.com/guide/topics/resources/string-resource#escaping_quotes
|
|
"""
|
|
for m in re.finditer('""', string):
|
|
yield ("error", m.start(), "Double straight quotes not allowed", "android")
|
|
string = silencer.sub(" ", string)
|
|
|
|
is_quoted = string.startswith('"') and string.endswith('"')
|
|
if not is_quoted:
|
|
# apostrophes need to be escaped
|
|
for m in re.finditer("'", string):
|
|
yield ("error", m.start(), "Apostrophe must be escaped", "android")
|
|
|
|
|
|
def get_params(refs):
|
|
"""Get printf parameters and internal errors.
|
|
|
|
Returns a sparse map of positions to formatter, parameter count and
|
|
a list of errors. Errors covered so far are mismatching formatters.
|
|
"""
|
|
params = {}
|
|
errors = []
|
|
count = 0
|
|
next_implicit = 1
|
|
for ref in refs:
|
|
if isinstance(ref, minidom.Node):
|
|
ref = textContent(ref)
|
|
for m in re.finditer(
|
|
r"%(?P<order>[1-9]\$)?(?P<format>(?:\.[0-9]+)?f|[dsS])", ref
|
|
):
|
|
count += 1
|
|
order = m.group("order")
|
|
if order:
|
|
order = int(order[0])
|
|
else:
|
|
order = next_implicit
|
|
next_implicit += 1
|
|
fmt = m.group("format")
|
|
if order not in params:
|
|
params[order] = fmt
|
|
else:
|
|
# check for consistency errors
|
|
if params[order] == fmt:
|
|
continue
|
|
msg = "Conflicting formatting, %{order}${f1} vs %{order}${f2}"
|
|
errors.append(
|
|
(msg.format(order=order, f1=fmt, f2=params[order]), m.start())
|
|
)
|
|
return params, count, errors
|
|
|
|
|
|
def check_params(params, count, string):
|
|
"""Compare the printf parameters in the given string to the reference
|
|
parameters.
|
|
|
|
Also yields errors that are internal to the parameters inside string,
|
|
as found by `get_params`.
|
|
"""
|
|
has_errors = False
|
|
lparams, lcount, errors = get_params([string])
|
|
for error, pos in errors:
|
|
has_errors = True
|
|
yield ("error", pos, error, "android")
|
|
# Compare reference for each localized parameter.
|
|
# If there's no reference found, error, as an out-of-bounds
|
|
# parameter crashes.
|
|
# This assumes that all parameters are actually used in the reference,
|
|
# which should be OK.
|
|
# If there's a mismatch in the formatter, error.
|
|
for order in sorted(lparams):
|
|
if order not in params:
|
|
has_errors = True
|
|
yield (
|
|
"error",
|
|
0,
|
|
f"Formatter %{order}${lparams[order]} not found in reference",
|
|
"android",
|
|
)
|
|
elif params[order] != lparams[order]:
|
|
has_errors = True
|
|
yield ("error", 0, "Mismatching formatter", "android")
|
|
# All parameters used in the reference are expected to be included.
|
|
# Warn if this isn't the case.
|
|
for order in params:
|
|
if order not in sorted(lparams):
|
|
has_errors = True
|
|
yield (
|
|
"warning",
|
|
0,
|
|
"Formatter %{}${} not found in translation".format(
|
|
order, params[order]
|
|
),
|
|
"android",
|
|
)
|
|
if not has_errors and count != lcount:
|
|
yield ("warning", 0, "Formatter count mismatch", "android")
|