autohero/backend/internal/game/progression_sim.go

package game

import (
	"fmt"
	"math"
	"math/rand"
	"sort"
	"time"

	"github.com/denisovdennis/autohero/internal/model"
)

// DefaultProgressionBandTargets are wall-clock budgets per plan: sum of time for
// level-ups L→L+1 within each band (1→10, 10→20, …, 40→50).
var DefaultProgressionBandTargets = [5]time.Duration{
	1 * 7 * 24 * time.Hour,
	3 * 7 * 24 * time.Hour,
	6 * 7 * 24 * time.Hour,
	10 * 7 * 24 * time.Hour,
	20 * 7 * 24 * time.Hour,
}

// Hero level L means the step L→L+1 is in the band that contains L (1→10 = L 1..9, …).
var progressionBandLevelStart = [...]int{1, 10, 20, 30, 40}
var progressionBandLevelEnd = [...]int{9, 19, 29, 39, 49}

// SimulatedLevelCountInBand returns how many level-up steps in band bandIdx are included
// when simulating hero levels 1..maxHeroLevel (inclusive upper step).
func SimulatedLevelCountInBand(bandIdx int, maxHeroLevel int) int {
	if bandIdx < 0 || bandIdx > 4 {
		return 0
	}
	lo := progressionBandLevelStart[bandIdx]
	hi := progressionBandLevelEnd[bandIdx]
	if maxHeroLevel < lo {
		return 0
	}
	upper := hi
	if maxHeroLevel < upper {
		upper = maxHeroLevel
	}
	return upper - lo + 1
}

func fullLevelCountInBand(bandIdx int) int {
	return progressionBandLevelEnd[bandIdx] - progressionBandLevelStart[bandIdx] + 1
}

// ProratedBandTargets scales each band target by the fraction of level steps simulated
// when maxHeroLevel is below 49 (partial run, e.g. -max-level 29).
func ProratedBandTargets(maxHeroLevel int, full [5]time.Duration) [5]time.Duration {
	var out [5]time.Duration
	for i := 0; i < 5; i++ {
		c := SimulatedLevelCountInBand(i, maxHeroLevel)
		if c <= 0 {
			continue
		}
		denom := fullLevelCountInBand(i)
		out[i] = time.Duration(float64(full[i]) * float64(c) / float64(denom))
	}
	return out
}

// SumBandTargets sums per-band targets (e.g. prorated).
func SumBandTargets(t [5]time.Duration) time.Duration {
	var s time.Duration
	for _, d := range t {
		s += d
	}
	return s
}

// ProgressionSimParams configures long-run XP / time estimation.
type ProgressionSimParams struct {
	// IterationsPerLevel is Monte Carlo samples per hero level (1..49).
	IterationsPerLevel int
	// Seed drives RNG for enemy pick and combat rolls (via rand.Seed per iteration).
	Seed int64
	// RestAfterCombat is added to each fight duration (post-battle downtime).
	RestAfterCombat time.Duration
	// Gear is ReferenceGearMedian or ReferenceGearRolled.
	Gear ReferenceGearProfile
	// AccountLosses: if true, XP rate = sum(xp)/sum(cycle) over all fights; if false, wins-only ratio.
	AccountLosses bool
	// MinHeroLevel and MaxHeroLevelInclusive bound the simulated level-ups (default 1 and 49).
	MinHeroLevel, MaxHeroLevelInclusive int
}

func (p ProgressionSimParams) normalized() ProgressionSimParams {
	out := p
	if out.IterationsPerLevel < 1 {
		out.IterationsPerLevel = 80
	}
	if out.RestAfterCombat < 0 {
		out.RestAfterCombat = 0
	}
	if out.MinHeroLevel < 1 {
		out.MinHeroLevel = 1
	}
	if out.MaxHeroLevelInclusive < out.MinHeroLevel {
		out.MaxHeroLevelInclusive = 49
	}
	if out.MaxHeroLevelInclusive > 49 {
		out.MaxHeroLevelInclusive = 49
	}
	return out
}

// ProgressionBandResult holds simulated time sums and diagnostics.
type ProgressionBandResult struct {
	BandDurations [5]time.Duration
	Total         time.Duration
	// TotalSec is the sum of per-level times in seconds (may be +Inf if some levels never award XP).
	TotalSec float64
	// Per-level seconds (hero level L → L+1), index L-1.
	LevelUpSec []float64
	WinRates   []float64 // per hero level, fraction of wins in MC iterations
}

// EnemyTemplatesFromSlice indexes DB rows by Slug for balance tooling.
func EnemyTemplatesFromSlice(templates []model.Enemy) map[string]model.Enemy {
	m := make(map[string]model.Enemy, len(templates))
	for _, e := range templates {
		if e.Slug != "" {
			m[e.Slug] = e
		}
	}
	return m
}

// EnemySliceFromMap converts a slug-keyed map to a slice for SetEnemyTemplates.
func EnemySliceFromMap(m map[string]model.Enemy) []model.Enemy {
	out := make([]model.Enemy, 0, len(m))
	for _, e := range m {
		out = append(out, e)
	}
	return out
}

// CloneEnemyTemplates returns a shallow copy of the map with copied Enemy values (keys = slug).
func CloneEnemyTemplates(src map[string]model.Enemy) map[string]model.Enemy {
	if src == nil {
		return nil
	}
	out := make(map[string]model.Enemy, len(src))
	for k, v := range src {
		cp := v
		if v.SpecialAbilities != nil {
			cp.SpecialAbilities = append([]model.SpecialAbility(nil), v.SpecialAbilities...)
		}
		out[k] = cp
	}
	return out
}

// TemplateProgressionBand maps an enemy template to a band index 0..4 using the
// midpoint of [min_level..max_level] (or base level) for content-tier grouping.
func TemplateProgressionBand(t model.Enemy) int {
	mid := t.BaseLevel
	if t.MinLevel > 0 && t.MaxLevel >= t.MinLevel {
		mid = (t.MinLevel + t.MaxLevel) / 2
	}
	if mid <= 10 {
		return 0
	}
	if mid <= 20 {
		return 1
	}
	if mid <= 30 {
		return 2
	}
	if mid <= 40 {
		return 3
	}
	return 4
}

// XPRewardScaleSpec defines multipliers applied to template.XPReward (before instance scaling).
type XPRewardScaleSpec struct {
	Global float64
	// Elite multiplies xp_reward on templates with IsElite (in addition to Global).
	Elite float64
	// PerType multipliers by enemy slug; missing keys default to 1.
	PerType map[string]float64
	// PerBand scales template by TemplateProgressionBand; length 5, values default to 1.
	PerBand [5]float64
}

func (s XPRewardScaleSpec) effectiveType(slug string) float64 {
	if s.PerType != nil {
		if v, ok := s.PerType[slug]; ok && v > 0 {
			return v
		}
	}
	return 1
}

func (s XPRewardScaleSpec) effectiveBand(band int) float64 {
	if band < 0 || band > 4 {
		return 1
	}
	v := s.PerBand[band]
	if v <= 0 {
		return 1
	}
	return v
}

func (s XPRewardScaleSpec) eliteMul(e model.Enemy) float64 {
	m := s.Elite
	if m <= 0 {
		m = 1
	}
	if e.IsElite {
		return m
	}
	return 1
}

// ApplyXPRewardScaleSpec returns cloned templates with scaled XPReward (rounded, min 1).
func ApplyXPRewardScaleSpec(base map[string]model.Enemy, spec XPRewardScaleSpec) map[string]model.Enemy {
	out := CloneEnemyTemplates(base)
	g := spec.Global
	if g <= 0 {
		g = 1
	}
	for slug, e := range out {
		band := TemplateProgressionBand(e)
		mult := g * spec.effectiveType(slug) * spec.effectiveBand(band) * spec.eliteMul(e)
		x := float64(e.XPReward) * mult
		if x < 1 {
			x = 1
		}
		e.XPReward = int64(math.Round(x))
		out[slug] = e
	}
	return out
}

// WithEnemyTemplates sets global enemy templates for the duration of fn, then restores.
func WithEnemyTemplates(templates map[string]model.Enemy, fn func()) {
	prev := CloneEnemyTemplates(EnemyTemplatesFromSlice(model.EnemyTemplates))
	model.SetEnemyTemplates(EnemySliceFromMap(templates))
	defer func() {
		model.SetEnemyTemplates(EnemySliceFromMap(prev))
	}()
	fn()
}

// SimulateProgressionBands runs Monte Carlo time estimates per level-up and sums bands.
func SimulateProgressionBands(params ProgressionSimParams, templates map[string]model.Enemy) (ProgressionBandResult, error) {
	p := params.normalized()
	if len(templates) == 0 {
		return ProgressionBandResult{}, fmt.Errorf("empty enemy templates")
	}

	nLevels := p.MaxHeroLevelInclusive - p.MinHeroLevel + 1
	levelUpSec := make([]float64, nLevels)
	winRates := make([]float64, nLevels)

	// Accumulate seconds in float64 so +Inf from zero-win levels does not cast to 0 Duration.
	var bandSec [5]float64

	WithEnemyTemplates(templates, func() {
		for idx, L := range levelRange(p.MinHeroLevel, p.MaxHeroLevelInclusive) {
			sec, wr := estimateLevelUpSeconds(L, p)
			levelUpSec[idx] = sec
			winRates[idx] = wr
			bi := bandIndexForHeroLevel(L)
			if bi >= 0 {
				bandSec[bi] += sec
			}
		}
	})

	var bandAcc [5]time.Duration
	for i := range bandSec {
		bandAcc[i] = secondsToDuration(bandSec[i])
	}
	var totalSec float64
	for _, s := range bandSec {
		totalSec += s
	}
	total := secondsToDuration(totalSec)
	return ProgressionBandResult{
		BandDurations: bandAcc,
		Total:         total,
		TotalSec:      totalSec,
		LevelUpSec:    levelUpSec,
		WinRates:      winRates,
	}, nil
}

func levelRange(minL, maxL int) []int {
	out := make([]int, 0, maxL-minL+1)
	for L := minL; L <= maxL; L++ {
		out = append(out, L)
	}
	return out
}

// bandIndexForHeroLevel returns which progression band hero level L belongs to when leveling L→L+1.
// secondsToDuration converts simulated seconds to a Duration. +Inf maps to max duration
// (impossible-to-finish level); NaN maps to 0.
func secondsToDuration(sec float64) time.Duration {
	if math.IsNaN(sec) {
		return 0
	}
	if math.IsInf(sec, 1) {
		return time.Duration(1<<63 - 1)
	}
	if math.IsInf(sec, -1) {
		return 0
	}
	maxF := float64((1<<63 - 1) / int64(time.Second))
	if sec >= maxF {
		return time.Duration(1<<63 - 1)
	}
	if sec <= 0 {
		return 0
	}
	return time.Duration(sec * float64(time.Second))
}

func bandIndexForHeroLevel(L int) int {
	switch {
	case L >= 1 && L <= 9:
		return 0
	case L >= 10 && L <= 19:
		return 1
	case L >= 20 && L <= 29:
		return 2
	case L >= 30 && L <= 39:
		return 3
	case L >= 40 && L <= 49:
		return 4
	default:
		return -1
	}
}

func estimateLevelUpSeconds(heroLevel int, p ProgressionSimParams) (seconds float64, winRate float64) {
	xpNeed := float64(model.XPToNextLevel(heroLevel))
	if xpNeed <= 0 {
		return 0, 1
	}

	n := p.IterationsPerLevel
	var sumCycle float64
	var sumXP float64
	var sumCycleWin float64
	var sumXPWin float64
	wins := 0

	for i := 0; i < n; i++ {
		seed := p.Seed + int64(heroLevel)*1_000_003 + int64(i)*97_981
		rand.Seed(seed)

		var gearRng *rand.Rand
		if p.Gear == ReferenceGearRolled {
			gearRng = rand.New(rand.NewSource(seed + 42))
		}
		baseHero := NewReferenceHeroForBalance(heroLevel, p.Gear, gearRng)
		hero := CloneHeroForCombatSim(baseHero)

		pickRNG := rand.New(rand.NewSource(seed + 11_111))
		enemy := PickEnemyForLevelWithRNG(heroLevel, pickRNG)

		survived, elapsed := ResolveCombatToEndWithDuration(hero, &enemy, CombatSimDeterministicStart, CombatSimOptions{
			TickRate: 100 * time.Millisecond,
			MaxSteps: CombatSimMaxStepsLong,
		})
		cycle := elapsed.Seconds() + p.RestAfterCombat.Seconds()
		xp := float64(enemy.XPReward)
		if !survived {
			xp = 0
		} else {
			wins++
		}

		sumCycle += cycle
		sumXP += xp
		if survived {
			sumCycleWin += cycle
			sumXPWin += float64(enemy.XPReward)
		}
	}

	winRate = float64(wins) / float64(n)

	var xpPerSec float64
	if p.AccountLosses {
		if sumCycle > 0 {
			xpPerSec = sumXP / sumCycle
		}
	} else {
		if sumCycleWin > 0 && sumXPWin > 0 {
			xpPerSec = sumXPWin / sumCycleWin
		}
	}

	if xpPerSec <= 0 {
		return math.Inf(1), winRate
	}
	return xpNeed / xpPerSec, winRate
}

// BandErrors returns relative errors (sim/target - 1) per band; targets must be > 0.
func BandErrors(sim, targets [5]time.Duration) [5]float64 {
	var e [5]float64
	for i := range sim {
		if targets[i] <= 0 {
			e[i] = 0
			continue
		}
		e[i] = float64(sim[i])/float64(targets[i]) - 1
	}
	return e
}

// SquaredErrorSum returns sum of squared relative band errors.
func SquaredErrorSum(sim, targets [5]time.Duration) float64 {
	var s float64
	for i := range sim {
		if targets[i] <= 0 {
			continue
		}
		r := float64(sim[i])/float64(targets[i]) - 1
		s += r * r
	}
	return s
}

// EnemyLevelTierMid is a single sort key from catalog level band (higher = later content).
// Uses midpoint of [min_level..max_level] when set; otherwise base_level.
func EnemyLevelTierMid(e model.Enemy) int {
	if e.MinLevel > 0 && e.MaxLevel >= e.MinLevel {
		return (e.MinLevel + e.MaxLevel) / 2
	}
	bl := e.BaseLevel
	if bl <= 0 {
		bl = 1
	}
	return bl
}

// SortEnemyTypesByLevelTier sorts by ascending EnemyLevelTierMid, then slug.
func SortEnemyTypesByLevelTier(m map[string]model.Enemy) []string {
	types := make([]string, 0, len(m))
	for t := range m {
		types = append(types, t)
	}
	sort.Slice(types, func(i, j int) bool {
		mi := EnemyLevelTierMid(m[types[i]])
		mj := EnemyLevelTierMid(m[types[j]])
		if mi != mj {
			return mi < mj
		}
		return types[i] < types[j]
	})
	return types
}

// EnforceMonotonicXPRewardByTier ensures non-decreasing xp_reward with level tier; when tier
// strictly increases, xp_reward must strictly increase (>= previous + 1).
func EnforceMonotonicXPRewardByTier(templates map[string]model.Enemy) map[string]model.Enemy {
	out := CloneEnemyTemplates(templates)
	order := SortEnemyTypesByLevelTier(out)
	prevMid := -1
	var prevXP int64
	first := true
	for _, typ := range order {
		e := out[typ]
		mid := EnemyLevelTierMid(e)
		x := e.XPReward
		if x < 1 {
			x = 1
		}
		if !first {
			if mid > prevMid {
				if x <= prevXP {
					x = prevXP + 1
				}
			} else {
				if x < prevXP {
					x = prevXP
				}
			}
		}
		e.XPReward = x
		out[typ] = e
		prevXP = x
		prevMid = mid
		first = false
	}
	return out
}

// MaxRelativeErrorVsTargets returns max |sim/target-1| over bands with target>0, plus total error entry.
func MaxRelativeErrorVsTargets(sim [5]time.Duration, targets [5]time.Duration, totalSim, totalTarget time.Duration) float64 {
	maxE := 0.0
	for i := range sim {
		if targets[i] <= 0 {
			continue
		}
		e := math.Abs(float64(sim[i])/float64(targets[i]) - 1)
		if e > maxE {
			maxE = e
		}
	}
	if totalTarget > 0 {
		e := math.Abs(float64(totalSim)/float64(totalTarget) - 1)
		if e > maxE {
			maxE = e
		}
	}
	return maxE
}

func clonePerTypeMap(m map[string]float64) map[string]float64 {
	out := make(map[string]float64, len(m))
	for k, v := range m {
		out[k] = v
	}
	return out
}

// OptimizePerTypeScales adjusts each enemy row's xp_reward via PerType multipliers (Global fixed at 1).
// Elite templates still use elite multiplier from spec. Resulting XPReward are integers (rounded).
// If enforceMonotonic is true, xp_reward is non-decreasing with EnemyLevelTierMid (strictly up when tier rises).
func OptimizePerTypeScales(
	base map[string]model.Enemy,
	params ProgressionSimParams,
	targets [5]time.Duration,
	elite float64,
	maxIters int,
	enforceMonotonic bool,
) (map[string]float64, map[string]model.Enemy, ProgressionBandResult, float64) {
	if maxIters < 1 {
		maxIters = 120
	}
	types := SortEnemyTypesByLevelTier(base)
	perType := make(map[string]float64, len(types))
	for _, t := range types {
		perType[t] = 1
	}

	applyAndSim := func(spec XPRewardScaleSpec) (ProgressionBandResult, error) {
		tmpl := ApplyXPRewardScaleSpec(base, spec)
		if enforceMonotonic {
			tmpl = EnforceMonotonicXPRewardByTier(tmpl)
		}
		return SimulateProgressionBands(params, tmpl)
	}

	bestSpec := XPRewardScaleSpec{Global: 1, Elite: elite, PerType: perType, PerBand: [5]float64{1, 1, 1, 1, 1}}
	res, _ := applyAndSim(bestSpec)
	bestErr := SquaredErrorSum(res.BandDurations, targets)

	// Wide factors so integer xp_reward can move (DB often has 1–3); include coarse steps to approach targets.
	factors := []float64{8, 4, 2, 1.5, 1.25, 1.1, 1.05, 1.02, 1.01, 0.99, 0.98, 0.95, 0.9, 0.75, 0.5, 0.25}
	for iter := 0; iter < maxIters; iter++ {
		improved := false
		for _, typ := range types {
			for _, f := range factors {
				cand := clonePerTypeMap(perType)
				next := cand[typ] * f
				if next < 0.05 || next > 200 {
					continue
				}
				cand[typ] = next
				spec := XPRewardScaleSpec{Global: 1, Elite: elite, PerType: cand, PerBand: [5]float64{1, 1, 1, 1, 1}}
				r, err := applyAndSim(spec)
				if err != nil {
					continue
				}
				errVal := SquaredErrorSum(r.BandDurations, targets)
				if errVal < bestErr {
					bestErr = errVal
					perType = cand
					res = r
					improved = true
				}
			}
		}
		if !improved {
			break
		}
	}

	finalSpec := XPRewardScaleSpec{Global: 1, Elite: elite, PerType: perType, PerBand: [5]float64{1, 1, 1, 1, 1}}
	scaled := ApplyXPRewardScaleSpec(base, finalSpec)
	if enforceMonotonic {
		scaled = EnforceMonotonicXPRewardByTier(scaled)
	}
	res, _ = SimulateProgressionBands(params, scaled)
	return perType, scaled, res, SquaredErrorSum(res.BandDurations, targets)
}

// OptimizeBandScales searches PerBand multipliers to minimize squared relative error vs targets.
// global and elite are fixed; only PerBand[5] is optimized (coordinate descent).
func OptimizeBandScales(
	base map[string]model.Enemy,
	params ProgressionSimParams,
	targets [5]time.Duration,
	global, elite float64,
	maxIters int,
) ([5]float64, ProgressionBandResult, float64) {
	if maxIters < 1 {
		maxIters = 120
	}

	scales := [5]float64{1, 1, 1, 1, 1}
	bestSpec := XPRewardScaleSpec{Global: global, Elite: elite, PerBand: scales}
	templates := ApplyXPRewardScaleSpec(base, bestSpec)
	res, _ := SimulateProgressionBands(params, templates)
	bestErr := SquaredErrorSum(res.BandDurations, targets)

	step := 0.08
	for iter := 0; iter < maxIters; iter++ {
		improved := false
		for g := 0; g < 5; g++ {
			for _, mult := range []float64{1 + step, 1 - step, 1 + step/2, 1 - step/2} {
				if mult <= 0.05 {
					continue
				}
				cand := scales
				cand[g] *= mult
				if cand[g] < 0.05 || cand[g] > 200 {
					continue
				}
				spec := XPRewardScaleSpec{Global: global, Elite: elite, PerBand: cand}
				tmpl := ApplyXPRewardScaleSpec(base, spec)
				r, err := SimulateProgressionBands(params, tmpl)
				if err != nil {
					continue
				}
				errVal := SquaredErrorSum(r.BandDurations, targets)
				if errVal < bestErr {
					bestErr = errVal
					scales = cand
					res = r
					improved = true
				}
			}
		}
		if !improved {
			step *= 0.5
			if step < 0.005 {
				break
			}
		}
	}

	finalSpec := XPRewardScaleSpec{Global: global, Elite: elite, PerBand: scales}
	templates = ApplyXPRewardScaleSpec(base, finalSpec)
	res, _ = SimulateProgressionBands(params, templates)
	return scales, res, SquaredErrorSum(res.BandDurations, targets)
}