"""
Vision Zero Report Card analysis.

Pull truck-involved fatal-crash deaths per city, 2018-2024, from FARS national CSV
downloads (NHTSA). Compute year-over-year change and aggregate per Vision Zero city.

Truck definition matches federal FMCSA/IIHS standard:
  BODY_TYP 60-69 = medium/heavy trucks (>10,000 lbs GVWR)
  60: Step van
  61: Single-unit straight HAZMAT
  62-65: Single-unit straight truck (varying weight classes)
  66: Medium/heavy pickup
  67: Unknown medium/heavy
  68: Truck-tractor (semi/combination)
  69: Unknown single vs combo

Output: derived/report_card.json with per-city series + summary stats.
"""
import csv
import json
from pathlib import Path
from collections import defaultdict

ROOT = Path(__file__).resolve().parent
DERIVED = ROOT / 'derived'
DERIVED.mkdir(exist_ok=True)

# (state_fips, city_fars_code, display_name, vision_zero_year)
# Vision Zero adoption years from official city commitments
CITIES = [
    # state_code, city_fars_code, display, slug, vz_year, population_2020
    (36, 4170, 'New York City',  'new-york-ny',    2014, 8_804_190),
    ( 6, 1980, 'Los Angeles',    'los-angeles-ca', 2015, 3_898_747),
    (17, 1670, 'Chicago',        'chicago-il',     2012, 2_746_388),
    (48, 3280, 'Houston',        'houston-tx',     None, 2_304_580),
    ( 4,  370, 'Phoenix',        'phoenix-az',     2017, 1_608_139),
    (42, 6540, 'Philadelphia',   'philadelphia-pa',2017, 1_603_797),
    (48, 6090, 'San Antonio',    'san-antonio-tx', 2015, 1_434_625),
    ( 6, 3260, 'San Diego',      'san-diego-ca',   2015, 1_386_932),
    (48, 1730, 'Dallas',         'dallas-tx',      2019, 1_304_379),
    (25,  120, 'Boston',         'boston-ma',      2015, 675_647),
    (53, 1960, 'Seattle',        'seattle-wa',     2015, 737_015),
    ( 8,  600, 'Denver',         'denver-co',      2017, 715_522),
    (11,   10, 'Washington DC',  'washington-dc',  2015, 689_545),
    (41, 1650, 'Portland',       'portland-or',    2015, 652_503),
    (48,  330, 'Austin',         'austin-tx',      2016, 961_855),
    (12, 2010, 'Miami',          'miami-fl',       2017, 442_241),
    (12, 1510, 'Jacksonville',   'jacksonville-fl',None, 949_611),
    (47, 1760, 'Nashville',      'nashville-tn',   2017, 689_447),
    (13,  280, 'Atlanta',        'atlanta-ga',     2020, 498_715),
]

# Body type codes for medium/heavy trucks (excludes passenger pickups <10K lbs)
TRUCK_BODY_TYPES = set(range(60, 70))

YEARS = [2018, 2019, 2020, 2021, 2022, 2023, 2024]

PATHS = {
    2018: 'FARS2018',
    2019: 'FARS2019',
    2020: 'FARS2020/FARS2020NationalCSV',
    2021: 'FARS2021/FARS2021NationalCSV',
    2022: 'FARS2022/FARS2022NationalCSV',
    2023: 'FARS2023/FARS2023NationalCSV',
    2024: 'FARS2024/FARS2024NationalCSV',
}


def _reader(path):
    """Open CSV with latin-1 (covers all FARS years) and strip BOM from first header.
    When latin-1 decodes UTF-8 BOM bytes they become 'ï»¿', not '﻿'."""
    f = open(path, encoding='latin-1')
    reader = csv.DictReader(f)
    # Trigger fieldnames materialization, then mutate in place.
    if reader.fieldnames:
        first = reader.fieldnames[0]
        if first.startswith('ï»¿'):
            reader.fieldnames[0] = first[3:]
        elif first.startswith('﻿'):
            reader.fieldnames[0] = first[1:]
    return f, reader


def load_year(year):
    base = ROOT / PATHS[year]
    accident_path = base / 'accident.csv'
    vehicle_path = base / 'vehicle.csv'

    truck_cases = set()
    f, reader = _reader(vehicle_path)
    try:
        for row in reader:
            try:
                bt = int(row.get('BODY_TYP', 0) or 0)
            except ValueError:
                bt = 0
            if bt in TRUCK_BODY_TYPES:
                key = (int(row['STATE']), int(row['ST_CASE']))
                truck_cases.add(key)
    finally:
        f.close()

    by_city = defaultdict(int)
    by_state_total = defaultdict(int)
    f, reader = _reader(accident_path)
    try:
        for row in reader:
            state = int(row['STATE'])
            st_case = int(row['ST_CASE'])
            fatals = int(row['FATALS'])
            try:
                city = int(row.get('CITY', 0) or 0)
            except ValueError:
                city = 0
            if (state, st_case) in truck_cases:
                by_city[(state, city)] += fatals
                by_state_total[state] += fatals
    finally:
        f.close()
    return by_city, by_state_total


def main():
    print('Loading FARS years...')
    series = {(s, c): {} for s, c, *_ in CITIES}
    state_totals = {}
    for yr in YEARS:
        print(f'  {yr}...')
        by_city, by_state = load_year(yr)
        for (state, city, *_rest) in CITIES:
            series[(state, city)][yr] = by_city.get((state, city), 0)
        state_totals[yr] = dict(by_state)

    # Build output
    report = []
    for state, city, name, slug, vz_year, pop in CITIES:
        s = series[(state, city)]
        first = s.get(YEARS[0], 0)
        last = s.get(YEARS[-1], 0)
        total = sum(s.values())
        baseline_avg = (s[2018] + s[2019]) / 2 if s.get(2018) is not None and s.get(2019) is not None else None
        recent_avg = (s[2023] + s[2024]) / 2 if s.get(2023) is not None and s.get(2024) is not None else None
        pct_change = None
        if baseline_avg and baseline_avg > 0:
            pct_change = ((recent_avg - baseline_avg) / baseline_avg) * 100
        per_100k_baseline = (baseline_avg / pop) * 100_000 if baseline_avg and pop else None
        per_100k_recent = (recent_avg / pop) * 100_000 if recent_avg and pop else None
        report.append({
            'name': name,
            'slug': slug,
            'state_fips': state,
            'city_fars_code': city,
            'population_2020': pop,
            'vision_zero_year': vz_year,
            'series': {str(y): s.get(y, 0) for y in YEARS},
            'total_truck_fatalities_7yr': total,
            'baseline_avg_2018_2019': baseline_avg,
            'recent_avg_2023_2024': recent_avg,
            'pct_change_2yr_vs_2yr': round(pct_change, 1) if pct_change is not None else None,
            'fatalities_per_100k_baseline': round(per_100k_baseline, 2) if per_100k_baseline else None,
            'fatalities_per_100k_recent': round(per_100k_recent, 2) if per_100k_recent else None,
        })

    # Rank by pct_change (winners ascending)
    ranked = sorted(report, key=lambda x: x['pct_change_2yr_vs_2yr'] if x['pct_change_2yr_vs_2yr'] is not None else 0)

    out = {
        'methodology': {
            'source': 'NHTSA FARS National CSV downloads, 2018-2024',
            'truck_definition': 'Medium/heavy trucks: BODY_TYP codes 60-69 (excludes passenger pickups <10,000 lbs GVWR)',
            'metric': 'Truck-involved fatalities (FATALS column on crashes with at least one truck-class vehicle)',
            'baseline_period': '2018-2019 average',
            'recent_period': '2023-2024 average',
            'change_metric': 'Percent change baseline average vs recent average',
            'per_100k_note': 'Per 100,000 residents using 2020 Decennial Census population',
            'fars_release_note': 'FARS 2024 Annual Report File (preliminary), released April 2026',
        },
        'cities': ranked,
        'national_totals_all_trucks': {
            'note': 'All-states truck-involved fatalities per year (not city-filtered)',
            'series': {str(y): sum(state_totals[y].values()) for y in YEARS},
        },
    }

    out_path = DERIVED / 'report_card.json'
    with open(out_path, 'w') as f:
        json.dump(out, f, indent=2)
    print(f'\nWrote {out_path}')

    # Console summary
    print('\n=== Vision Zero Report Card 2026 ===')
    print('Truck-involved fatalities, 2018-2019 avg vs 2023-2024 avg\n')
    print(f'{"City":<20}  {"VZ":>4}  {"2018":>5} {"2019":>5} {"2020":>5} {"2021":>5} {"2022":>5} {"2023":>5} {"2024":>5}  {"Change":>8}')
    print('-' * 110)
    for r in ranked:
        vz = r['vision_zero_year'] or '-'
        s = r['series']
        ch = r['pct_change_2yr_vs_2yr']
        ch_str = f'{ch:+.1f}%' if ch is not None else '-'
        print(f'{r["name"]:<20}  {str(vz):>4}  '
              f'{s["2018"]:>5} {s["2019"]:>5} {s["2020"]:>5} {s["2021"]:>5} '
              f'{s["2022"]:>5} {s["2023"]:>5} {s["2024"]:>5}  {ch_str:>8}')


if __name__ == '__main__':
    main()
