geoip/src/parse_locations.py
"""
Convert the country code/id information from GeoNames into MaxMind location CSV format.
"""
import sys
import csv
import re
from .utils import CONTINENTS, LOCATIONS, EU_MEMBERS
from pathlib import Path
from typing import Dict, List, Iterator
def _read_csv(in_file: Path) -> Iterator[Dict[str, str]]:
"""GeoNames CSV input format."""
with in_file.open("r") as in_fp:
reader: csv.DictReader = csv.DictReader(
(line for line in in_fp if re.match(r"^#([ \t]+|$)", line) is None),
delimiter="\t"
)
for row in reader:
yield {
"geoname_id": row["geonameid"],
"continent_code": row["Continent"],
"continent_name": CONTINENTS[row["Continent"]]["continent_name"],
"country_iso_code": row["#ISO"],
"country_name": row["Country"],
}
def _write_csv(out_file: Path, locations: List[Dict[str, str]]) -> None:
"""MaxMind CSV output format."""
with out_file.open("w") as out_fp:
writer: csv.DictWriter = csv.DictWriter(
out_fp,
fieldnames=["geoname_id", "locale_code", "continent_code", "continent_name", "country_iso_code",
"country_name", "is_in_european_union"],
quoting=csv.QUOTE_MINIMAL, lineterminator="\n"
)
writer.writeheader()
for location in locations:
location.setdefault("locale_code", "en")
location.setdefault("is_in_european_union",
"1" if location.get("country_iso_code", "") in EU_MEMBERS else "0")
writer.writerow(location)
def main(in_file: Path, out_file: Path) -> int:
locations: List[Dict[str, str]] = list(_read_csv(in_file))
locations.extend(continent for continent in CONTINENTS.values())
locations.extend(LOCATIONS)
locations.sort(key=lambda _: int(_["geoname_id"]))
_write_csv(out_file, locations)
return 0
if __name__ == "__main__":
from argparse import ArgumentParser
parser = ArgumentParser(description=__doc__)
parser.add_argument("--in-file", type=Path, required=True, help="geonames country information file to parse")
parser.add_argument("--out-file", type=Path, required=True, help="output CSV file")
args = parser.parse_args()
sys.exit(main(args.in_file, args.out_file))