locale: gui: show translation completion percentage in language names

In the GUIs, on the language-select screen, show e.g. Czech (100%), Danish (13%), Dutch (54%) instead of Czech, Danish, Dutch - we count the source strings when creating the .pot PO-template file and add an "X-Electrum-SourceStringCount" header to it, in the push_locale.py script that uploads the .pot file to crowdin. - later, when we run electrum-locale/update.py to download the translations in .po files, these files will also contain the same header. - then when the build_locale.sh script compiles those .po files, we can read the header and use it to populate a new "stats.json" file that we place in electrum/locale/locale/ and bundle in the all release binaries/distributables. - stats.json also includes the number of translated strings for each lang - at runtime we simply read stats.json and use the values to calculate the percentages - a prior implementation did not pre-calc stats.json but did all calculations at runtime (by opening all .mo translations) - however that was deemed to slow, hence the build-time pre-calc - runtime calc took 40 ms on my laptop, so I guess it could easily take 10x that on an old phone - just as we have always been very tolerant of any locale files or even the whole locale/ dir missing, we also tolerate stats.json missing
2026-02-16 00:47:47 +00:00
parent 6de3fef717
commit 3afa2fcdf3
6 changed files with 154 additions and 12 deletions
@@ -4,6 +4,9 @@

 set -e

+CONTRIB_LOCALE="$(dirname "$(realpath "$0" 2> /dev/null || grealpath "$0")")"
+
+
 if [[ ! -d "$1" || -z "$2" ]]; then
    echo "usage: $0 locale_source_dir locale_dest_dir"
    echo "       The dirs can match, to build in place."
@@ -28,3 +31,6 @@ for i in *; do
    mkdir -p "$dir"
    (msgfmt --output-file="$dir/electrum.mo" "$i/electrum.po" || true)
 done
+
+echo "running stats.py"
+"$CONTRIB_LOCALE/stats.py"
@@ -91,6 +91,20 @@ cmd = ["msgcat", "-u", "-o", f"{build_dir}/messages.pot", f"{build_dir}/messages
 print('Generate template')
 subprocess.check_output(cmd)

+# Add a custom PO header entry to messages.pot. This header survives crowdin,
+# and will still be in the translated .po files, and will get compiled into the final .mo files.
+cnt_src_strings = 0
+with open(f"{build_dir}/messages.pot", "r", encoding="utf-8") as f:
+    for line in f.readlines():
+        if line.startswith('msgid '):
+            cnt_src_strings += 1
+with open(f"{build_dir}/messages_customheader.pot", "w", encoding="utf-8") as f:
+    f.write('''msgid ""\n''')
+    f.write('''msgstr ""\n''')
+    f.write(f'''"X-Electrum-SourceStringCount: {cnt_src_strings}"\n''')
+cmd = ["msgcat", "-u", "-o", f"{build_dir}/messages.pot", f"{build_dir}/messages.pot", f"{build_dir}/messages_customheader.pot"]
+print('Add custom header to template')
+subprocess.check_output(cmd)

 # prepare uploading to crowdin
 os.chdir(os.path.join(project_root, "electrum"))
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+#
+# Copyright (C) 2026 The Electrum developers
+# Distributed under the MIT software license, see the accompanying
+# file LICENCE or http://www.opensource.org/licenses/mit-license.php
+#
+#
+# This generates a 'stats.json' file containing some statistics about translation completeness.
+
+import gettext
+import glob
+import json
+import os
+
+PROJECT_ROOT = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+LOCALE_DIR = os.path.join(PROJECT_ROOT, "electrum", "locale", "locale")
+
+
+if __name__ == '__main__':
+    catalog_size = {}  # type: dict[str, int]
+    source_string_count = None
+    # - calc stats
+    files_list = glob.glob(f"{LOCALE_DIR}/*/LC_MESSAGES/*.mo")
+    for fname in files_list:
+        lang_code = os.path.basename(os.path.dirname(os.path.dirname(fname)))
+        try:
+            t = gettext.translation('electrum', LOCALE_DIR, languages=[lang_code])
+        except OSError as e:
+            raise Exception(f"cannot find or parse .mo file matching {fname!r}") from e
+        # calc catalog size of translated strings
+        catalog_size[lang_code] = len(t._catalog)
+        # same SourceStringCount header should be present in all .mo files:
+        t_info = t.info()
+        try:
+            ss_cnt = int(t_info["x-electrum-sourcestringcount"])
+        except Exception as e:
+            raise Exception(
+                f"missing or malformed 'x-electrum-sourcestringcount' header, for {lang_code!r}.\n"
+                f"found {t_info}"
+            ) from e
+        if source_string_count is None:
+            source_string_count = ss_cnt
+        elif source_string_count != ss_cnt:
+            raise Exception(
+                f"inconsistent 'x-electrum-sourcestringcount' headers! "
+                f"prev_cnt={source_string_count}, new_cnt={ss_cnt} (for lang={lang_code})")
+    # - convert to json data. example:
+    #     {
+    #         "source_string_count": 9999,
+    #         "translations": {
+    #             "de_DE": {
+    #                 "string_count": 400,
+    #             },
+    #             ...
+    #         }
+    #     }
+    json_data = {
+        "source_string_count": source_string_count,
+        "translations": {},
+    }
+    for lang_code in catalog_size:
+        json_data["translations"][lang_code] = {}
+        json_data["translations"][lang_code]["string_count"] = catalog_size[lang_code]
+    # - write json to disk
+    with open(f"{LOCALE_DIR}/stats.json", "w", encoding="utf-8") as f:
+        json_str = json.dumps(
+            json_data,
+            indent=4,
+            sort_keys=True
+        )
+        f.write(json_str)
+    print(f"done. created file '{LOCALE_DIR}/stats.json'")
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING
 from PyQt6.QtCore import pyqtProperty, pyqtSignal, pyqtSlot, QObject, QRegularExpression

 from electrum.bitcoin import TOTAL_COIN_SUPPLY_LIMIT_IN_BTC
-from electrum.i18n import set_language, languages
+from electrum.i18n import set_language, get_gui_lang_names
 from electrum.logging import get_logger
 from electrum.util import base_unit_name_to_decimal_point
 from electrum.gui import messages
@@ -52,7 +52,7 @@ class QEConfig(AuthMixin, QObject):

    @language.setter
    def language(self, language):
-        if language not in languages:
+        if language not in get_gui_lang_names():
            return
        if self.config.LOCALIZATION_LANGUAGE != language:
            self.config.LOCALIZATION_LANGUAGE = language
@@ -62,12 +62,9 @@ class QEConfig(AuthMixin, QObject):
    languagesChanged = pyqtSignal()
    @pyqtProperty('QVariantList', notify=languagesChanged)
    def languagesAvailable(self):
-        # sort on translated languages, then re-add Default on top
-        langs = copy.deepcopy(languages)
-        default = langs.pop('')
-        langs_sorted = sorted(list(map(lambda x: {'value': x[0], 'text': x[1]}, langs.items())), key=lambda x: x['text'])
-        langs_sorted.insert(0, {'value': '', 'text': default})
-        return langs_sorted
+        langs = get_gui_lang_names()
+        langs_list = list(map(lambda x: {'value': x[0], 'text': x[1]}, langs.items()))
+        return langs_list

    termsOfUseChanged = pyqtSignal()
    @pyqtProperty(bool, notify=termsOfUseChanged)
@@ -31,7 +31,7 @@ from PyQt6.QtCore import Qt
 from PyQt6.QtWidgets import (QComboBox,  QTabWidget, QDialog, QSpinBox,  QCheckBox, QLabel,
                             QVBoxLayout, QGridLayout, QLineEdit, QWidget, QHBoxLayout, QSlider)

-from electrum.i18n import _, languages
+from electrum.i18n import _, get_gui_lang_names
 from electrum import util
 from electrum.util import base_units_list, event_listener

@@ -76,8 +76,9 @@ class SettingsDialog(QDialog, QtEventListener):
        # language
        lang_label = HelpLabel.from_configvar(self.config.cv.LOCALIZATION_LANGUAGE)
        lang_combo = QComboBox()
-        lang_combo.addItems(list(languages.values()))
-        lang_keys = list(languages.keys())
+        _languages = get_gui_lang_names()
+        lang_combo.addItems(list(_languages.values()))
+        lang_keys = list(_languages.keys())
        lang_cur_setting = self.config.LOCALIZATION_LANGUAGE
        try:
            index = lang_keys.index(lang_cur_setting)
@@ -88,7 +89,7 @@ class SettingsDialog(QDialog, QtEventListener):
            for w in [lang_combo, lang_label]: w.setEnabled(False)

        def on_lang(x):
-            lang_request = list(languages.keys())[lang_combo.currentIndex()]
+            lang_request = list(_languages.keys())[lang_combo.currentIndex()]
            if lang_request != self.config.LOCALIZATION_LANGUAGE:
                self.config.LOCALIZATION_LANGUAGE = lang_request
                self.need_restart = True
@@ -23,6 +23,7 @@
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 import functools
+import json
 import os
 import string
 from typing import Optional
@@ -168,3 +169,54 @@ languages = {
    'zh_TW': 'Chinese Traditional',
 }
 assert '' in languages
+
+
+def get_gui_lang_names(*, show_completion_percent: bool = True) -> dict[str, str]:
+    """Returns a  lang_code -> lang_name  mapping, sorted.
+
+    If show_completion_percent is True, lang_name includes a % estimate for translation completeness.
+    """
+    # calc catalog sizes
+    if show_completion_percent:
+        stats = _get_stats()
+    # sort ("Default" first, then "English", then lexicographically sorted names)
+    languages_copy = languages.copy()
+    lang_pair_default = ("", languages_copy.pop("")) # pop "Default"
+    lang_pair_english = ("en_UK", languages_copy.pop("en_UK")) # pop "English"
+    lang_pairs_sorted = sorted(languages_copy.items(), key=lambda x: x[1])
+    # fancy names
+    gui_lang_names = {}  # type: dict[str, str]
+    gui_lang_names[lang_pair_default[0]] = lang_pair_default[1]
+    gui_lang_names[lang_pair_english[0]] = lang_pair_english[1]
+    for lang_code, lang_name in lang_pairs_sorted:
+        if show_completion_percent and stats:
+            source_str_cnt = max(stats["source_string_count"], 1)  # avoid div-by-zero
+            try:
+                lang_data = stats["translations"][lang_code]
+            except KeyError as e:
+                _logger.warning(f"missing language from stats.json: {e!r}")
+                catalog_percent = "??"
+            else:
+                translated_str_cnt = lang_data["string_count"]
+                catalog_percent = round(100 * translated_str_cnt / source_str_cnt)
+            gui_lang_names[lang_code] = f"{lang_name} ({catalog_percent}%)"
+        else:
+            gui_lang_names[lang_code] = lang_name
+    return gui_lang_names
+
+
+_stats = None
+def _get_stats() -> dict:
+    global _stats
+    if _stats is None:
+        fname = f"{LOCALE_DIR}/stats.json"
+        try:
+            with open(fname, "r", encoding="utf-8") as f:
+                text = f.read()
+        except OSError as e:  # we tolerate the file missing
+            # This can happen e.g. when running from git clone if user did not run build_locale.sh.
+            _logger.info(f"failed to open stats file {fname!r} - built locale (translations) missing??: {e!r}")
+            _stats = {}
+        else:  # found file. if it is there, it MUST parse correctly
+            _stats = json.loads(text)
+    return _stats