python-apt: use git Mirrors.masterlist as main source

2024-12-05 00:06:06 +00:00 · 2024-12-05 00:06:06 +00:00 · ad12eaf56a
commit ad12eaf56a
parent c20840005e
2 changed files with 95 additions and 34 deletions
--- a/helpers/DATA/python-apt/parse-mirror-masterlist.py
+++ b/helpers/DATA/python-apt/parse-mirror-masterlist.py
@ -0,0 +1,87 @@
+#! /usr/bin/python3
+#
+# Script to parse Mirrors.masterlist file for python-apt template
+#
+#    Copyright (C) 2024 Luis Guzmán <ark@switnet.org>
+#
+#    This program is free software; you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation; either version 2 of the License, or
+#    (at your option) any later version.
+#
+#    This program is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with this program; if not, write to the Free Software
+#    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+
+import re
+import argparse
+
+# Set arguments and read input file
+parser = argparse.ArgumentParser(description="Parse Mirrors.masterlist")
+parser.add_argument(
+    "file",
+    help="Path to the Mirrors.masterlist file")
+parser.add_argument(
+    "--country",
+    "-c",
+    help="Filter by country (e.g., AU, BR, CA)",
+    default=None)
+args = parser.parse_args()
+
+with open(args.file, 'r') as file:
+    input_text = file.read()
+
+# Split file by blocks per "Site:"
+blocks = re.split(r'(?=Site:\s+)', input_text)
+filtered_blocks = [
+    block.strip()
+    for block in blocks
+    if block.strip().startswith("Site:")
+]
+
+# Process block country and site
+mirrors = {}
+for block in blocks:
+    country_match = re.search(r'Country:\s+(\w{2})\b', block)
+    country = country_match.group(1) if country_match else None
+
+    site_match = re.search(r'Site:\s+(\S+)', block)
+    site = site_match.group(1) if site_match else None
+
+    if not country or not site:
+        continue
+
+    https_matches = re.findall(r"Archive-https:\s+(\S+)", block)
+    http_matches = re.findall(r"Archive-http:\s+(\S+)", block)
+
+    https_urls = [f"https://{site}{path}" for path in https_matches]
+    http_urls = [f"http://{site}{path}" for path in http_matches]
+
+    # Save data in dict
+    if country not in mirrors:
+        mirrors[country] = {}
+    mirrors[country][site] = {"https": https_urls, "http": http_urls}
+
+# Print output
+for country, sites in sorted(mirrors.items()):
+    if args.country and country != args.country:
+        continue
+
+    valid_sites = {
+        site: urls for site, urls in sites.items() 
+        if urls["https"] or urls["http"]
+    }
+    if not valid_sites:
+        continue
+
+    print(f"#LOC:{country}")
+    for site, urls in valid_sites.items():
+        for url in urls["https"]:
+            print(url)
+        for url in urls["http"]:
+            print(url)