123 lines
4.2 KiB
Python
123 lines
4.2 KiB
Python
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
"""Serialize string changes.
|
|
|
|
The serialization logic is based on the cross-channel merge algorithm.
|
|
It's taking the file structure for the first file, and localizable entries
|
|
from the last.
|
|
Input data is the parsed reference as a list of parser.walk(),
|
|
the existing localized file, also a list of parser.walk(), and a dictionary
|
|
of newly added keys and raw values.
|
|
To remove a string from a localization, pass `None` as value for a key.
|
|
|
|
The marshalling between raw values and entities is done via Entity.unwrap
|
|
and Entity.wrap.
|
|
|
|
To avoid adding English reference strings into the generated file, the
|
|
actual entities in the reference are replaced with Placeholders, which
|
|
are removed in a final pass over the result of merge_resources. After that,
|
|
we also prune whitespace once more.`
|
|
"""
|
|
|
|
from codecs import encode
|
|
from functools import reduce
|
|
|
|
from compare_locales.merge import merge_resources, serialize_legacy_resource
|
|
from compare_locales.parser import getParser
|
|
from compare_locales.parser.base import (
|
|
Entity,
|
|
PlaceholderEntity,
|
|
Junk,
|
|
Whitespace,
|
|
)
|
|
|
|
|
|
class SerializationNotSupportedError(ValueError):
|
|
pass
|
|
|
|
|
|
def serialize(filename, reference, old_l10n, new_data):
|
|
"""Returns a byte string of the serialized content to use.
|
|
|
|
Input are a filename to create the right parser, a reference and
|
|
an existing localization, both as the result of parser.walk().
|
|
Finally, new_data is a dictionary of key to raw values to serialize.
|
|
|
|
Raises a SerializationNotSupportedError if we don't support the file
|
|
format.
|
|
"""
|
|
try:
|
|
parser = getParser(filename)
|
|
except UserWarning:
|
|
raise SerializationNotSupportedError(f"Unsupported file format ({filename}).")
|
|
# create template, whitespace and all
|
|
placeholders = [
|
|
placeholder(entry) for entry in reference if not isinstance(entry, Junk)
|
|
]
|
|
ref_mapping = {entry.key: entry for entry in reference if isinstance(entry, Entity)}
|
|
# strip obsolete strings
|
|
old_l10n = sanitize_old(ref_mapping.keys(), old_l10n, new_data)
|
|
# create new Entities
|
|
# .val can just be "", merge_channels doesn't need that
|
|
new_l10n = []
|
|
for key, new_raw_val in new_data.items():
|
|
if new_raw_val is None or key not in ref_mapping:
|
|
continue
|
|
ref_ent = ref_mapping[key]
|
|
new_l10n.append(ref_ent.wrap(new_raw_val))
|
|
|
|
merged = merge_resources(
|
|
parser, [placeholders, old_l10n, new_l10n], keep_newest=False
|
|
)
|
|
pruned = prune_placeholders(merged)
|
|
return encode(serialize_legacy_resource(pruned), parser.encoding)
|
|
|
|
|
|
def sanitize_old(known_keys, old_l10n, new_data):
|
|
"""Strip Junk and replace obsolete messages with placeholders.
|
|
If new_data has `None` as a value, strip the existing translation.
|
|
Use placeholders generously, so that we can rely on `prune_placeholders`
|
|
to find their associated comments and remove them, too.
|
|
"""
|
|
|
|
def should_placeholder(entry):
|
|
# If entry is an Entity, check if it's obsolete
|
|
# or marked to be removed.
|
|
if not isinstance(entry, Entity):
|
|
return False
|
|
if entry.key not in known_keys:
|
|
return True
|
|
return entry.key in new_data and new_data[entry.key] is None
|
|
|
|
return [
|
|
placeholder(entry) if should_placeholder(entry) else entry
|
|
for entry in old_l10n
|
|
if not isinstance(entry, Junk)
|
|
]
|
|
|
|
|
|
def placeholder(entry):
|
|
if isinstance(entry, Entity):
|
|
return PlaceholderEntity(entry.key)
|
|
return entry
|
|
|
|
|
|
def prune_placeholders(entries):
|
|
pruned = [entry for entry in entries if not isinstance(entry, PlaceholderEntity)]
|
|
|
|
def prune_whitespace(acc, entity):
|
|
if len(acc) and isinstance(entity, Whitespace):
|
|
prev_entity = acc[-1]
|
|
|
|
if isinstance(prev_entity, Whitespace):
|
|
# Prefer the longer whitespace.
|
|
if len(entity.all) > len(prev_entity.all):
|
|
acc[-1] = entity
|
|
return acc
|
|
|
|
acc.append(entity)
|
|
return acc
|
|
|
|
return reduce(prune_whitespace, pruned, [])
|