from random import randrange
from typing import Tuple
from statistics import median
import pandas as pd # i plan to use altair eventually to visualize the results, and you need to load your data in a dataframe to do so

def run_single_simulation(games: int, players: int) -> Tuple[int, int, int, int]:
    imposter_ids: list = []
    longest_string: int = 1
    current_string: int = 1
    previous_imposter: int = -1
    for g in range(games):
        imposter = randrange(0, players) # randint has inclusive bounds, so start at 1
        if imposter == previous_imposter:
            current_string += 1
            longest_string = max(current_string, longest_string) # update longest string if the current imposter has a high count
        else:
            current_string = 1
            previous_imposter = imposter
        imposter_ids.append(imposter)
    imposter_counts: list = []
    for p in range(players):
        imposter_counts.append(imposter_ids.count(p)) # keep a list of how many games each participant was imposter
    max_count = max(imposter_counts) # what is the highest number? we don't care who, just the value
    min_count = min(imposter_counts)
    median_count = int(median(imposter_counts)) # using int to round down
    return longest_string, max_count, min_count, median_count

def run_many_simulations(simulations: int, games: int, players: int) -> pd.DataFrame:
    longest_unbroken_strings: list = [] # for each simulation, what was the longest string of someone being the imposter?
    max_imposters: list = [] # for each sim, what was the most times anyone was imposter?
    min_imposters: list = [] # for each sim, what was the least times anyone was imposter?
    median_imposters: list = [] # for each sim, what was the median imposter count for all players?

    for i in range(simulations):
        longest, max_, min_, median_ = run_single_simulation(games=games, players=players)
        longest_unbroken_strings.append(longest)
        max_imposters.append(max_)
        min_imposters.append(min_)
        median_imposters.append(median_)

    # calculate the statistics now:
    longest_prob = []
    max_prob = []
    min_prob = []
    median_prob = []
    for g in range(games):
        longest_prob.append(1.0*longest_unbroken_strings.count(g)/simulations)
        max_prob.append(1.0*max_imposters.count(g)/simulations)
        min_prob.append(1.0*min_imposters.count(g)/simulations)
        median_prob.append(1.0*median_imposters.count(g)/simulations)

    # load data into dataframe. again, this will make plotting easier
    df = pd.DataFrame()
    df["LongestString"] = longest_prob
    df["LongestStringCumSum"] = df["LongestString"].cumsum()
    df["LongestStringPVal"] = 1 - df["LongestStringCumSum"] + df["LongestString"] #one sided p value - what is the probability of being equal to or greater than this value?
    df["Max"] = max_prob
    df["MaxCumSum"] = df["Max"].cumsum()
    df["MaxPVal"] = 1 - df["MaxCumSum"] + df["Max"]
    df["Min"] = min_prob
    df["MinCumSum"] = df["Min"].cumsum()
    df["MinPVal"] = 1 - df["MinCumSum"] + df["Min"]
    df["Median"] = median_prob
    df["MedianCumSum"] = df["Median"].cumsum()
    df["MedianPVal"] = 1 - df["MedianCumSum"] + df["Median"]
    df["Counts"] = list(df.index)

    return df


# let's test this out! we need pandas so we can visiualize in altair
import pandas as pd

simulations: int = 100000 # how many simulations to run
games: int = 20 # how many games are in each simulation
players: int = 7 # how many people are playing?

df_imposter_probabilities = run_many_simulations(simulations, games, players)
df_imposter_probabilities


# visualize!
import altair as alt

c_main = alt.Chart(df_imposter_probabilities)

c1 = c_main.mark_bar(size=15).encode(
    x=alt.X('Counts',title = "Consecutive games of any one person being the imposter"),
    y=alt.Y('LongestString',title = "Probability"),
    tooltip=['LongestString','LongestStringPVal']
).properties(
    title="Probability Distribution of Being the Imposter X Times in a Row, 7 players, 20 games",
)

c1.display()


from random import random

def time_until_rare_event_occurs(chance: float) -> int:
    times: int = 0
    event_happened: bool = False
    while not event_happened:
        event_happened = random() < chance # only triggers if a random # from 0 to 1 is less than chance, a float from 0 to 1
        times += 1
    return times

def simulate_many_rare_events(simulations: int, chance: float) -> pd.DataFrame:
    times_when_event_triggered: list = []
    for s in range(simulations):
        time_until_event: int = time_until_rare_event_occurs(chance)
        times_when_event_triggered.append(time_until_event)
    max_time: int = max(times_when_event_triggered) # just get a reasonable max value
    counts_events: list = []
    for i in range(max_time):
        event_count = times_when_event_triggered.count(i)
        event_chance = 1.0*event_count / simulations
        counts_events.append(event_chance)
    df = pd.DataFrame(counts_events, columns = ["EventObservedThisTime"])
    df["EventObservedByThisTime"] = df.EventObservedThisTime.cumsum()
    df["Time"] = list(df.index)
    df["Time"] = df["Time"] + 1 #fix off by 1 error
    return df

sims: int = 100000
chance: float = 0.005

df_time_until_event = simulate_many_rare_events(sims, chance)

alt.Chart(df_time_until_event.iloc[:300]).mark_point(size=1).encode(
    x=alt.X('Time',title = "Time of rare event", scale = alt.Scale(domain=[0,300])),
    y=alt.Y('EventObservedByThisTime',title = "Cumulative Probability of Event"),
    tooltip=['EventObservedThisTime','EventObservedByThisTime','Time']
)


alt.Chart(df_imposter_probabilities).mark_bar(size=15).encode(
    x=alt.X('Counts',title = "Maximum times someone was the imposter"),
    y=alt.Y('Max',title = "Probability"),
    tooltip=['Max','MaxPVal']
).properties(
    title="Probability Distribution of Max Times someone was the imposter, 7 players, 20 games",
)


alt.Chart(df_imposter_probabilities).mark_bar(size=15).encode(
    x=alt.X('Counts',title = "Minimum times someone was the imposter"),
    y=alt.Y('Min',title = "Probability"),
    tooltip=['Min','MinPVal']
).properties(
    title="Probability distribution of least times someone was the imposter, 7 players, 20 games",
)


simulations: int = 100000 
games: int = 20 
players: int = 6 # reduced from 7

df_imposter_probabilities_6 = run_many_simulations(simulations, games, players)

alt.Chart(df_imposter_probabilities_6).mark_bar(size=15).encode(
    x=alt.X('Counts',title = "Minimum times someone was the imposter"),
    y=alt.Y('Min',title = "Probability"),
    tooltip=['Min','MinPVal']
).properties(
    title="Probability distribution of least times someone was the imposter, 6 players, 20 games",
)


alt.Chart(df_imposter_probabilities).mark_bar(size=15).encode(
    x=alt.X('Counts',title = "Median times someone was the imposter"),
    y=alt.Y('Median',title = "Probability"),
    tooltip=['Median','MedianPVal']
).properties(
    title="Probability distribution of median times someone was the imposter, 7 players, 20 games",
)

	LongestString	LongestStringCumSum	LongestStringPVal	Max	MaxCumSum	MaxPVal	Min	MinCumSum	MinPVal	Median	MedianCumSum	MedianPVal	Counts
0	0.00000	0.00000	1.00000	0.00000	0.00000	1.000000e+00	0.29698	0.29698	1.00000	0.00000	0.00000	1.000000e+00	0
1	0.05412	0.05412	1.00000	0.00000	0.00000	1.000000e+00	0.57654	0.87352	0.70302	0.00417	0.00417	1.000000e+00	1
2	0.66672	0.72084	0.94588	0.00000	0.00000	1.000000e+00	0.12648	1.00000	0.12648	0.28474	0.28891	9.958300e-01	2
3	0.23723	0.95807	0.27916	0.00230	0.00230	1.000000e+00	0.00000	1.00000	0.00000	0.68307	0.97198	7.110900e-01	3
4	0.03628	0.99435	0.04193	0.20525	0.20755	9.977000e-01	0.00000	1.00000	0.00000	0.02801	0.99999	2.802000e-02	4
5	0.00490	0.99925	0.00565	0.42477	0.63232	7.924500e-01	0.00000	1.00000	0.00000	0.00001	1.00000	1.000000e-05	5
6	0.00062	0.99987	0.00075	0.24852	0.88084	3.676800e-01	0.00000	1.00000	0.00000	0.00000	1.00000	1.110223e-16	6
7	0.00009	0.99996	0.00013	0.08885	0.96969	1.191600e-01	0.00000	1.00000	0.00000	0.00000	1.00000	1.110223e-16	7
8	0.00004	1.00000	0.00004	0.02390	0.99359	3.031000e-02	0.00000	1.00000	0.00000	0.00000	1.00000	1.110223e-16	8
9	0.00000	1.00000	0.00000	0.00513	0.99872	6.410000e-03	0.00000	1.00000	0.00000	0.00000	1.00000	1.110223e-16	9
10	0.00000	1.00000	0.00000	0.00101	0.99973	1.280000e-03	0.00000	1.00000	0.00000	0.00000	1.00000	1.110223e-16	10
11	0.00000	1.00000	0.00000	0.00025	0.99998	2.700000e-04	0.00000	1.00000	0.00000	0.00000	1.00000	1.110223e-16	11
12	0.00000	1.00000	0.00000	0.00001	0.99999	2.000000e-05	0.00000	1.00000	0.00000	0.00000	1.00000	1.110223e-16	12
13	0.00000	1.00000	0.00000	0.00001	1.00000	1.000000e-05	0.00000	1.00000	0.00000	0.00000	1.00000	1.110223e-16	13
14	0.00000	1.00000	0.00000	0.00000	1.00000	2.220446e-16	0.00000	1.00000	0.00000	0.00000	1.00000	1.110223e-16	14
15	0.00000	1.00000	0.00000	0.00000	1.00000	2.220446e-16	0.00000	1.00000	0.00000	0.00000	1.00000	1.110223e-16	15
16	0.00000	1.00000	0.00000	0.00000	1.00000	2.220446e-16	0.00000	1.00000	0.00000	0.00000	1.00000	1.110223e-16	16
17	0.00000	1.00000	0.00000	0.00000	1.00000	2.220446e-16	0.00000	1.00000	0.00000	0.00000	1.00000	1.110223e-16	17
18	0.00000	1.00000	0.00000	0.00000	1.00000	2.220446e-16	0.00000	1.00000	0.00000	0.00000	1.00000	1.110223e-16	18
19	0.00000	1.00000	0.00000	0.00000	1.00000	2.220446e-16	0.00000	1.00000	0.00000	0.00000	1.00000	1.110223e-16	19

Is the imposter selection in Among Us truly random?¶

What's the big takeaway?¶