# This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. """Merge resources across channels. Merging resources is done over a series of parsed resources, or source strings. The nomenclature is that the resources are ordered from newest to oldest. The generated file structure is taken from the newest file, and then the next-newest, etc. The values of the returned entities are taken from the newest to the oldest resource, too. In merge_resources, there's an option to choose the values from oldest to newest instead. """ from collections import OrderedDict, defaultdict from codecs import encode from functools import reduce from compare_locales import parser as cl from compare_locales.parser.base import StickyEntry from compare_locales.compare.utils import AddRemove class MergeNotSupportedError(ValueError): pass def merge_channels(name, resources): try: parser = cl.getParser(name) except UserWarning: raise MergeNotSupportedError(f"Unsupported file format ({name}).") entities = merge_resources(parser, resources) return encode(serialize_legacy_resource(entities), parser.encoding) def merge_resources(parser, resources, keep_newest=True): """Merge parsed or unparsed resources, returning a enumerable of Entities. Resources are ordered from newest to oldest in the input. The structure of the generated content is taken from the newest resource first, and then filled by the next etc. Values are also taken from the newest, unless keep_newest is False, then values are taken from the oldest first. """ def parse_resource(resource): # The counter dict keeps track of number of identical comments. counter = defaultdict(int) if isinstance(resource, bytes): parser.readContents(resource) resource = parser.walk() pairs = [get_key_value(entity, counter) for entity in resource] return OrderedDict(pairs) def get_key_value(entity, counter): if isinstance(entity, cl.Comment): counter[entity.val] += 1 # Use the (value, index) tuple as the key. AddRemove will # de-deplicate identical comments at the same index. return ((entity.val, counter[entity.val]), entity) if isinstance(entity, cl.Whitespace): # Use the Whitespace instance as the key so that it's always # unique. Adjecent whitespace will be folded into the longer one in # prune. return (entity, entity) return (entity.key, entity) entities = reduce( lambda x, y: merge_two(x, y, keep_newer=keep_newest), map(parse_resource, resources), ) return entities.values() def merge_two(newer, older, keep_newer=True): """Merge two OrderedDicts. The order of the result dict is determined by `newer`. The values in the dict are the newer ones by default, too. If `keep_newer` is False, the values will be taken from the older dict. """ diff = AddRemove() diff.set_left(newer.keys()) diff.set_right(older.keys()) # Create a flat sequence of all entities in order reported by AddRemove. get_entity = get_newer_entity if keep_newer else get_older_entity contents = [(key, get_entity(newer, older, key)) for _, key in diff] def prune(acc, cur): _, entity = cur if entity is None: # Prune Nones which stand for duplicated comments. return acc if len(acc) and isinstance(entity, cl.Whitespace): _, prev_entity = acc[-1] if isinstance(prev_entity, cl.Whitespace): # Prefer the longer whitespace. if len(entity.all) > len(prev_entity.all): acc[-1] = (entity, entity) return acc acc.append(cur) return acc pruned = reduce(prune, contents, []) return OrderedDict(pruned) def get_newer_entity(newer, older, key): entity = newer.get(key, None) # Always prefer the newer version. if entity is not None: return entity return older.get(key) def get_older_entity(newer, older, key): entity = older.get(key, None) # If we don't have an older version, or it's a StickyEntry, # get a newer version if entity is None or isinstance(entity, StickyEntry): return newer.get(key) return entity def serialize_legacy_resource(entities): return "".join(entity.all for entity in entities)