Two Wrongs

Bayesian/Markovian Filter for Boolean Events

Bayesian/Markovian Filter for Boolean Events

/// Cache writer faceting/sorting stats are never reset so it stops caching
/// requests when a client (like Jollyroom) starts sending facets all the time

#r "../.nuget/packages/mathnet.numerics/4.1.0/lib/netstandard2.0/MathNet.Numerics.dll"
#r "../.nuget/packages/mathnet.numerics.fsharp/4.1.0/lib/netstandard2.0/MathNet.Numerics.FSharp.dll"

open MathNet.Numerics.LinearAlgebra


/// A bayesian probabilistic model to determine whether some event X happens
/// "always" or "only sometimes", where the switch from "sometimes" to "always"
/// happens slightly more frequently than the other way around.
module AlwaysSometimesX =
    // Parameters to algorithm
    // -------
    //
    // This can be estimated from the request log. Reasoning for current numbers:
    // 0.99 because an engine that always facets mustn't always facet
    // 0.3 because an engine that sometimes facets... sometimes facets
    //
    /// What proportion of requests fulfill X...
    /// ... when the state is is "always X"?
    /// ... when the state is "only sometimes X"?
    let pXAxSx = vector [0.95; 0.3]
    //
    // This can be estimated by CS. Reasoning for current numbers:
    // 0.99999 because once an engine upgrades to API v3, unlikely to downgrade
    // 0.0001 because there's a small change old API engines upgrade
    //
    /// For each event, how likely is it that state is "always X" now...
    /// ... if it was "always X" before?
    /// ... if it was "only sometimes X" before?
    let pAxAxSx = vector [0.999999; 0.001]

    /// Get updated belief that the state is "always X" given old belief and
    /// whether or not the last event fulfilled X
    let step (beliefInAlwaysX : float) (requestFulfillsX : bool) =
        /// Conditional observation probabilities of hidden Markov model
        let observations = matrix [ pXAxSx ; 1.0 - pXAxSx ]
        /// Conditional transition probabilities of hidden Markov model
        let transitions = matrix [ pXAxSx ; 1.0 - pXAxSx ]

        let input = vector (if requestFulfillsX then [1.0; 0.0] else [0.0; 1.0])
        /// Probability of being in either state
        let states = vector [beliefInAlwaysX; 1.0 - beliefInAlwaysX]

        // Probability of observing current event, given input and state belief
        let observation = observations * states * input
        // Updated probability of both state beliefs
        let newStates = observations.Transpose() * input .* states / observation

        // Probability of state "always X"
        newStates.[0]

    let stepVerbose (beliefInAlwaysX : float) (requestFulfillsX : bool) =
        let belief = step beliefInAlwaysX requestFulfillsX

        printfn "Predicting %s (based on %f)"
            (if belief > 0.5 then "always X" else "sometimes X")
            belief

        belief


module SanityChecking =
    /// Verify that if we expectAlwaysFaceting for requests, that is indeed
    /// the prediction of the model with greater than 0.5 probability.
    /// Requests is a string of 'f' and 'n' where f indicates a faceted
    /// request, and n a non-faceted request.
    let ensure (expectAlwaysFaceting : bool) (requests : string) =
        let predictionCorrect =
            requests
            |> Seq.map (fun c -> c = 'f')
            |> Seq.fold AlwaysSometimesX.step 0.5  // 50–50 initial probability
            |> (<) 0.5
            |> (=) expectAlwaysFaceting

        if not predictionCorrect then
            if expectAlwaysFaceting then "always faceting" else "sometimes faceting"
            |> printfn "Did not expect %s to be %s" requests

    // About 80% faceted requests
    let alwaysFaceting = "ffffffnnffffffffnfffnffffnnnffffnffnfffffffnfffff"
    // About 60% faceted requests
    let sometimesFaceting = "nfffnfnffnfnnfffffnfnnfnnffffnnffffffnnnfnnfffnnn"

    let automated () =
        ensure true alwaysFaceting
        ensure false sometimesFaceting

        // Started out looking like always faceting, but turns out isn't
        ensure false (alwaysFaceting + sometimesFaceting)

        // Only sometimes faceting, may have switched to always faceting
        // – but too little data to be sure yet!
        ensure false (sometimesFaceting + alwaysFaceting)

        // Definitely switched to always faceting by now
        ensure true (sometimesFaceting + alwaysFaceting + alwaysFaceting)

    let interactive () =
        printfn "Enter 'f' for faceted requests, and 'n' for non-faceted:"
        fun _ -> System.Console.ReadLine()
        |> Seq.initInfinite
        |> Seq.takeWhile (fun line -> line <> null)
        |> Seq.map (fun line -> line = "f")
        |> Seq.fold AlwaysSometimesX.stepVerbose 0.5
        |> ignore


// SanityChecking.interactive ()
SanityChecking.automated ()

more tw-friendly implementation:

module InactiveCustomer =
    let pBotWhenActive = 0.12
    let pBotWhenInactive = 0.95

    let pGoingActive = 1.0e-10;
    let pGoingInactive = 1.0e-7;

    let step (beliefThatCustomerIsActive : float) (requestIsBot : bool) =
        printfn "This request is %s"
            (if requestIsBot then "a bot request" else "a regular request")

        let pStayActive = 1.0 - pGoingInactive

        // p(active) =
        //     p(active|previous active) * p(previous active)
        //     + p(active|previous inactive) * p(previous inactive)
        let pActive =
            beliefThatCustomerIsActive * pStayActive
            + (1.0 - beliefThatCustomerIsActive) * pGoingActive

        printfn "Probability that customer is active: %f" pActive

        // p(request|inactive) =
        //     if request = bot then p(bot|active) else p(regular|active)
        let pRequestWhenActive =
            if requestIsBot then pBotWhenActive else 1.0 - pBotWhenActive

        // p(request|inactive) =
        //     if request = bot then p(bot|inactive) else p(regular|inactive)
        let pRequestWhenInactive =
            if requestIsBot then pBotWhenInactive else 1.0 - pBotWhenInactive

        // p(request) =
        //     p(request|active) * p(active)
        //     + p(request|inactive) * p(inactive)
        let pRequest =
            pRequestWhenActive * pActive + pRequestWhenInactive * (1.0 - pActive)

        printfn "Probability of getting this request: %f" pActive

        // p(active|request) = p(request|active) * p(active) / p(request)
        let pActiveGivenRequest =
            pRequestWhenActive * pActive / pRequest

        printfn "Probability that customer is active, given this request: %f"
            pActiveGivenRequest

        pActiveGivenRequest


module SanityChecking =
    let ensure (expectCustomerActive : bool) (requests : string) =
        let predictionCorrect =
            requests
            |> Seq.map (fun c -> c = 'b')
            |> Seq.fold InactiveCustomer.step 0.5  // 50–50 initial probability
            |> (>) 0.5
            |> (=) expectCustomerActive

        if not predictionCorrect then
            printfn "Expected customer to be %s given requests:\n%s"
                (if expectCustomerActive then "active" else "inactive")
                requests

    // About 90% bot requests
    let alwaysBotRequests = "bbbbbbrrbbbbbbbbrbbbbbbbbbbbbrrrbbbbbbbbbbbrbbbbb"
    // About 40% bot requests
    let sometimesBotRequests = "rbbbrrrrbrbrrbrbbbrbrrrrrbbbbrrbbbbbbrrrbrrbbrrrr"

    let automated () =
        ensure true alwaysBotRequests
        ensure false sometimesBotRequests

        // Started out looking like inactive customer, but turns out isn't
        ensure false (alwaysBotRequests + sometimesBotRequests)

        // Active customer that may have gone inactive – but too little data
        // to be sure yet!
        ensure false (sometimesBotRequests + alwaysBotRequests)

        // Definitely gone inactive now, though
        ensure true (sometimesBotRequests + alwaysBotRequests + alwaysBotRequests)

    let interactive () =
        printfn "Enter 'b' for bot requests, and 'r' for regular requests:"
        fun _ -> System.Console.ReadLine()
        |> Seq.initInfinite
        |> Seq.takeWhile (fun line -> line <> null)
        |> Seq.map (fun line -> line = "b")
        |> Seq.fold InactiveCustomer.step 0.5
        |> ignore


SanityChecking.interactive ()
//SanityChecking.automated ()