342 lines
11 KiB
Python
342 lines
11 KiB
Python
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
"Mozilla l10n compare locales tool"
|
|
|
|
import codecs
|
|
import os
|
|
import shutil
|
|
import re
|
|
|
|
from compare_locales import parser
|
|
from compare_locales import mozpath
|
|
from compare_locales.checks import getChecker, EntityPos
|
|
from compare_locales.keyedtuple import KeyedTuple
|
|
|
|
from .observer import ObserverList
|
|
from .utils import AddRemove
|
|
|
|
|
|
class ContentComparer:
|
|
keyRE = re.compile("[kK]ey")
|
|
nl = re.compile("\n", re.M)
|
|
|
|
def __init__(self, quiet=0):
|
|
"""Create a ContentComparer.
|
|
observer is usually a instance of Observer. The return values
|
|
of the notify method are used to control the handling of missing
|
|
entities.
|
|
"""
|
|
self.observers = ObserverList(quiet=quiet)
|
|
|
|
def create_merge_dir(self, merge_file):
|
|
outdir = mozpath.dirname(merge_file)
|
|
os.makedirs(outdir, exist_ok=True)
|
|
|
|
def merge(
|
|
self,
|
|
ref_entities,
|
|
ref_file,
|
|
l10n_file,
|
|
merge_file,
|
|
missing,
|
|
skips,
|
|
ctx,
|
|
capabilities,
|
|
encoding,
|
|
):
|
|
"""Create localized file in merge dir
|
|
|
|
`ref_entities` and `ref_map` are the parser result of the
|
|
reference file
|
|
`ref_file` and `l10n_file` are the File objects for the reference and
|
|
the l10n file, resp.
|
|
`merge_file` is the output path for the generated content. This is None
|
|
if we're just comparing or validating.
|
|
`missing` are the missing messages in l10n - potentially copied from
|
|
reference
|
|
`skips` are entries to be dropped from the localized file
|
|
`ctx` is the parsing context
|
|
`capabilities` are the capabilities for the merge algorithm
|
|
`encoding` is the encoding to be used when serializing, usually utf-8
|
|
"""
|
|
|
|
if not merge_file:
|
|
return
|
|
|
|
if capabilities == parser.CAN_NONE:
|
|
return
|
|
|
|
self.create_merge_dir(merge_file)
|
|
|
|
if capabilities & parser.CAN_COPY:
|
|
# copy the l10n file if it's good, or the reference file if not
|
|
if skips or missing:
|
|
src = ref_file.fullpath
|
|
else:
|
|
src = l10n_file.fullpath
|
|
shutil.copyfile(src, merge_file)
|
|
print("copied reference to " + merge_file)
|
|
return
|
|
|
|
if not (capabilities & parser.CAN_SKIP):
|
|
return
|
|
|
|
# Start with None in case the merge file doesn't need to be created.
|
|
f = None
|
|
|
|
if skips:
|
|
# skips come in ordered by key name, we need them in file order
|
|
skips.sort(key=lambda s: s.span[0])
|
|
|
|
# we need to skip a few erroneous blocks in the input, copy by hand
|
|
f = codecs.open(merge_file, "wb", encoding)
|
|
offset = 0
|
|
for skip in skips:
|
|
chunk = skip.span
|
|
f.write(ctx.contents[offset : chunk[0]])
|
|
offset = chunk[1]
|
|
f.write(ctx.contents[offset:])
|
|
|
|
if f is None:
|
|
# l10n file is a good starting point
|
|
shutil.copyfile(l10n_file.fullpath, merge_file)
|
|
|
|
if not (capabilities & parser.CAN_MERGE):
|
|
if f:
|
|
f.close()
|
|
return
|
|
|
|
if skips or missing:
|
|
if f is None:
|
|
f = codecs.open(merge_file, "ab", encoding)
|
|
trailing = (
|
|
["\n"]
|
|
+ [ref_entities[key].all for key in missing]
|
|
+ [
|
|
ref_entities[skip.key].all
|
|
for skip in skips
|
|
if not isinstance(skip, parser.Junk)
|
|
]
|
|
)
|
|
|
|
def ensureNewline(s):
|
|
if not s.endswith("\n"):
|
|
return s + "\n"
|
|
return s
|
|
|
|
print("adding to " + merge_file)
|
|
f.write("".join(map(ensureNewline, trailing)))
|
|
|
|
if f is not None:
|
|
f.close()
|
|
|
|
def remove(self, ref_file, l10n, merge_file):
|
|
"""Obsolete l10n file.
|
|
|
|
Copy to merge stage if we can.
|
|
"""
|
|
self.observers.notify("obsoleteFile", l10n, None)
|
|
self.merge(
|
|
KeyedTuple([]),
|
|
ref_file,
|
|
l10n,
|
|
merge_file,
|
|
[],
|
|
[],
|
|
None,
|
|
parser.CAN_COPY,
|
|
None,
|
|
)
|
|
|
|
def compare(self, ref_file, l10n, merge_file, extra_tests=None):
|
|
try:
|
|
p = parser.getParser(ref_file.file)
|
|
except UserWarning:
|
|
# no comparison, XXX report?
|
|
# At least, merge
|
|
self.merge(
|
|
KeyedTuple([]),
|
|
ref_file,
|
|
l10n,
|
|
merge_file,
|
|
[],
|
|
[],
|
|
None,
|
|
parser.CAN_COPY,
|
|
None,
|
|
)
|
|
return
|
|
try:
|
|
p.readFile(ref_file)
|
|
except Exception as e:
|
|
self.observers.notify("error", ref_file, str(e))
|
|
return
|
|
ref_entities = p.parse()
|
|
try:
|
|
p.readFile(l10n)
|
|
l10n_entities = p.parse()
|
|
l10n_ctx = p.ctx
|
|
except Exception as e:
|
|
self.observers.notify("error", l10n, str(e))
|
|
return
|
|
|
|
ar = AddRemove()
|
|
ar.set_left(ref_entities.keys())
|
|
ar.set_right(l10n_entities.keys())
|
|
report = missing = obsolete = changed = unchanged = keys = 0
|
|
missing_w = changed_w = unchanged_w = 0 # word stats
|
|
missings = []
|
|
skips = []
|
|
checker = getChecker(l10n, extra_tests=extra_tests)
|
|
if checker and checker.needs_reference:
|
|
checker.set_reference(ref_entities)
|
|
for msg in p.findDuplicates(ref_entities):
|
|
self.observers.notify("warning", l10n, msg)
|
|
for msg in p.findDuplicates(l10n_entities):
|
|
self.observers.notify("error", l10n, msg)
|
|
for action, entity_id in ar:
|
|
if action == "delete":
|
|
# missing entity
|
|
if isinstance(ref_entities[entity_id], parser.Junk):
|
|
self.observers.notify("warning", l10n, "Parser error in en-US")
|
|
continue
|
|
_rv = self.observers.notify("missingEntity", l10n, entity_id)
|
|
if _rv == "ignore":
|
|
continue
|
|
if _rv == "error":
|
|
# only add to missing entities for l10n-merge on error,
|
|
# not report
|
|
missings.append(entity_id)
|
|
missing += 1
|
|
refent = ref_entities[entity_id]
|
|
missing_w += refent.count_words()
|
|
else:
|
|
# just report
|
|
report += 1
|
|
elif action == "add":
|
|
# obsolete entity or junk
|
|
if isinstance(l10n_entities[entity_id], parser.Junk):
|
|
junk = l10n_entities[entity_id]
|
|
self.observers.notify("error", l10n, junk.error_message())
|
|
if merge_file is not None:
|
|
skips.append(junk)
|
|
elif (
|
|
self.observers.notify("obsoleteEntity", l10n, entity_id) != "ignore"
|
|
):
|
|
obsolete += 1
|
|
else:
|
|
# entity found in both ref and l10n, check for changed
|
|
refent = ref_entities[entity_id]
|
|
l10nent = l10n_entities[entity_id]
|
|
if self.keyRE.search(entity_id):
|
|
keys += 1
|
|
else:
|
|
if refent.equals(l10nent):
|
|
self.doUnchanged(l10nent)
|
|
unchanged += 1
|
|
unchanged_w += refent.count_words()
|
|
else:
|
|
self.doChanged(ref_file, refent, l10nent)
|
|
changed += 1
|
|
changed_w += refent.count_words()
|
|
# run checks:
|
|
if checker:
|
|
for tp, pos, msg, cat in checker.check(refent, l10nent):
|
|
if isinstance(pos, EntityPos):
|
|
line, col = l10nent.position(pos)
|
|
else:
|
|
line, col = l10nent.value_position(pos)
|
|
# skip error entities when merging
|
|
if tp == "error" and merge_file is not None:
|
|
skips.append(l10nent)
|
|
self.observers.notify(
|
|
tp,
|
|
l10n,
|
|
"%s at line %d, column %d for %s"
|
|
% (msg, line, col, refent.key),
|
|
)
|
|
pass
|
|
|
|
if merge_file is not None:
|
|
self.merge(
|
|
ref_entities,
|
|
ref_file,
|
|
l10n,
|
|
merge_file,
|
|
missings,
|
|
skips,
|
|
l10n_ctx,
|
|
p.capabilities,
|
|
p.encoding,
|
|
)
|
|
|
|
stats = {
|
|
"missing": missing,
|
|
"missing_w": missing_w,
|
|
"report": report,
|
|
"obsolete": obsolete,
|
|
"changed": changed,
|
|
"changed_w": changed_w,
|
|
"unchanged": unchanged,
|
|
"unchanged_w": unchanged_w,
|
|
"keys": keys,
|
|
}
|
|
self.observers.updateStats(l10n, stats)
|
|
pass
|
|
|
|
def add(self, orig, missing, merge_file):
|
|
"""Add missing localized file."""
|
|
f = orig
|
|
try:
|
|
p = parser.getParser(f.file)
|
|
except UserWarning:
|
|
p = None
|
|
|
|
# if we don't support this file, assume CAN_COPY to mimick
|
|
# l10n dir as closely as possible
|
|
caps = p.capabilities if p else parser.CAN_COPY
|
|
if caps & (parser.CAN_COPY | parser.CAN_MERGE):
|
|
# even if we can merge, pretend we can only copy
|
|
self.merge(
|
|
KeyedTuple([]),
|
|
orig,
|
|
missing,
|
|
merge_file,
|
|
["trigger copy"],
|
|
[],
|
|
None,
|
|
parser.CAN_COPY,
|
|
None,
|
|
)
|
|
|
|
if self.observers.notify("missingFile", missing, None) == "ignore":
|
|
# filter said that we don't need this file, don't count it
|
|
return
|
|
|
|
if p is None:
|
|
# We don't have a parser, cannot count missing strings
|
|
return
|
|
|
|
try:
|
|
p.readFile(f)
|
|
entities = p.parse()
|
|
except Exception as ex:
|
|
self.observers.notify("error", f, str(ex))
|
|
return
|
|
# strip parse errors
|
|
entities = [e for e in entities if not isinstance(e, parser.Junk)]
|
|
self.observers.updateStats(missing, {"missing": len(entities)})
|
|
missing_w = 0
|
|
for e in entities:
|
|
missing_w += e.count_words()
|
|
self.observers.updateStats(missing, {"missing_w": missing_w})
|
|
|
|
def doUnchanged(self, entity):
|
|
# overload this if needed
|
|
pass
|
|
|
|
def doChanged(self, file, ref_entity, l10n_entity):
|
|
# overload this if needed
|
|
pass
|