# This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. "Mozilla l10n compare locales tool" import codecs import os import shutil import re from compare_locales import parser from compare_locales import mozpath from compare_locales.checks import getChecker, EntityPos from compare_locales.keyedtuple import KeyedTuple from .observer import ObserverList from .utils import AddRemove class ContentComparer: keyRE = re.compile("[kK]ey") nl = re.compile("\n", re.M) def __init__(self, quiet=0): """Create a ContentComparer. observer is usually a instance of Observer. The return values of the notify method are used to control the handling of missing entities. """ self.observers = ObserverList(quiet=quiet) def create_merge_dir(self, merge_file): outdir = mozpath.dirname(merge_file) os.makedirs(outdir, exist_ok=True) def merge( self, ref_entities, ref_file, l10n_file, merge_file, missing, skips, ctx, capabilities, encoding, ): """Create localized file in merge dir `ref_entities` and `ref_map` are the parser result of the reference file `ref_file` and `l10n_file` are the File objects for the reference and the l10n file, resp. `merge_file` is the output path for the generated content. This is None if we're just comparing or validating. `missing` are the missing messages in l10n - potentially copied from reference `skips` are entries to be dropped from the localized file `ctx` is the parsing context `capabilities` are the capabilities for the merge algorithm `encoding` is the encoding to be used when serializing, usually utf-8 """ if not merge_file: return if capabilities == parser.CAN_NONE: return self.create_merge_dir(merge_file) if capabilities & parser.CAN_COPY: # copy the l10n file if it's good, or the reference file if not if skips or missing: src = ref_file.fullpath else: src = l10n_file.fullpath shutil.copyfile(src, merge_file) print("copied reference to " + merge_file) return if not (capabilities & parser.CAN_SKIP): return # Start with None in case the merge file doesn't need to be created. f = None if skips: # skips come in ordered by key name, we need them in file order skips.sort(key=lambda s: s.span[0]) # we need to skip a few erroneous blocks in the input, copy by hand f = codecs.open(merge_file, "wb", encoding) offset = 0 for skip in skips: chunk = skip.span f.write(ctx.contents[offset : chunk[0]]) offset = chunk[1] f.write(ctx.contents[offset:]) if f is None: # l10n file is a good starting point shutil.copyfile(l10n_file.fullpath, merge_file) if not (capabilities & parser.CAN_MERGE): if f: f.close() return if skips or missing: if f is None: f = codecs.open(merge_file, "ab", encoding) trailing = ( ["\n"] + [ref_entities[key].all for key in missing] + [ ref_entities[skip.key].all for skip in skips if not isinstance(skip, parser.Junk) ] ) def ensureNewline(s): if not s.endswith("\n"): return s + "\n" return s print("adding to " + merge_file) f.write("".join(map(ensureNewline, trailing))) if f is not None: f.close() def remove(self, ref_file, l10n, merge_file): """Obsolete l10n file. Copy to merge stage if we can. """ self.observers.notify("obsoleteFile", l10n, None) self.merge( KeyedTuple([]), ref_file, l10n, merge_file, [], [], None, parser.CAN_COPY, None, ) def compare(self, ref_file, l10n, merge_file, extra_tests=None): try: p = parser.getParser(ref_file.file) except UserWarning: # no comparison, XXX report? # At least, merge self.merge( KeyedTuple([]), ref_file, l10n, merge_file, [], [], None, parser.CAN_COPY, None, ) return try: p.readFile(ref_file) except Exception as e: self.observers.notify("error", ref_file, str(e)) return ref_entities = p.parse() try: p.readFile(l10n) l10n_entities = p.parse() l10n_ctx = p.ctx except Exception as e: self.observers.notify("error", l10n, str(e)) return ar = AddRemove() ar.set_left(ref_entities.keys()) ar.set_right(l10n_entities.keys()) report = missing = obsolete = changed = unchanged = keys = 0 missing_w = changed_w = unchanged_w = 0 # word stats missings = [] skips = [] checker = getChecker(l10n, extra_tests=extra_tests) if checker and checker.needs_reference: checker.set_reference(ref_entities) for msg in p.findDuplicates(ref_entities): self.observers.notify("warning", l10n, msg) for msg in p.findDuplicates(l10n_entities): self.observers.notify("error", l10n, msg) for action, entity_id in ar: if action == "delete": # missing entity if isinstance(ref_entities[entity_id], parser.Junk): self.observers.notify("warning", l10n, "Parser error in en-US") continue _rv = self.observers.notify("missingEntity", l10n, entity_id) if _rv == "ignore": continue if _rv == "error": # only add to missing entities for l10n-merge on error, # not report missings.append(entity_id) missing += 1 refent = ref_entities[entity_id] missing_w += refent.count_words() else: # just report report += 1 elif action == "add": # obsolete entity or junk if isinstance(l10n_entities[entity_id], parser.Junk): junk = l10n_entities[entity_id] self.observers.notify("error", l10n, junk.error_message()) if merge_file is not None: skips.append(junk) elif ( self.observers.notify("obsoleteEntity", l10n, entity_id) != "ignore" ): obsolete += 1 else: # entity found in both ref and l10n, check for changed refent = ref_entities[entity_id] l10nent = l10n_entities[entity_id] if self.keyRE.search(entity_id): keys += 1 else: if refent.equals(l10nent): self.doUnchanged(l10nent) unchanged += 1 unchanged_w += refent.count_words() else: self.doChanged(ref_file, refent, l10nent) changed += 1 changed_w += refent.count_words() # run checks: if checker: for tp, pos, msg, cat in checker.check(refent, l10nent): if isinstance(pos, EntityPos): line, col = l10nent.position(pos) else: line, col = l10nent.value_position(pos) # skip error entities when merging if tp == "error" and merge_file is not None: skips.append(l10nent) self.observers.notify( tp, l10n, "%s at line %d, column %d for %s" % (msg, line, col, refent.key), ) pass if merge_file is not None: self.merge( ref_entities, ref_file, l10n, merge_file, missings, skips, l10n_ctx, p.capabilities, p.encoding, ) stats = { "missing": missing, "missing_w": missing_w, "report": report, "obsolete": obsolete, "changed": changed, "changed_w": changed_w, "unchanged": unchanged, "unchanged_w": unchanged_w, "keys": keys, } self.observers.updateStats(l10n, stats) pass def add(self, orig, missing, merge_file): """Add missing localized file.""" f = orig try: p = parser.getParser(f.file) except UserWarning: p = None # if we don't support this file, assume CAN_COPY to mimick # l10n dir as closely as possible caps = p.capabilities if p else parser.CAN_COPY if caps & (parser.CAN_COPY | parser.CAN_MERGE): # even if we can merge, pretend we can only copy self.merge( KeyedTuple([]), orig, missing, merge_file, ["trigger copy"], [], None, parser.CAN_COPY, None, ) if self.observers.notify("missingFile", missing, None) == "ignore": # filter said that we don't need this file, don't count it return if p is None: # We don't have a parser, cannot count missing strings return try: p.readFile(f) entities = p.parse() except Exception as ex: self.observers.notify("error", f, str(ex)) return # strip parse errors entities = [e for e in entities if not isinstance(e, parser.Junk)] self.observers.updateStats(missing, {"missing": len(entities)}) missing_w = 0 for e in entities: missing_w += e.count_words() self.observers.updateStats(missing, {"missing_w": missing_w}) def doUnchanged(self, entity): # overload this if needed pass def doChanged(self, file, ref_entity, l10n_entity): # overload this if needed pass