Boggle - count all possible paths on a N*N grid. Performance

752 views Asked by At

While reading up this question, I wondered why no one would "simply" iterate all the possible paths on the boggle grid and have the word-tries follow and then cancel the path if there is no match in the word-trie. Cannot be that many paths on a tiny 4 by 4 grid, right? How many paths are there? So I set out to code a path-counter function in F#. The results yield what no one stated on that other page: Way more paths on the grid than I would have guessed (more paths than words in the word-set, actually).

While all that is pretty much the back story to my question, the code I ended up with was running rather slow and I found that I could not give good answers to a few aspects of the code. So here, the code first, then below it, you will find points which I think deserve explanations...

let moves n state square =
    let allSquares = [0..n*n-1] |> Set.ofList
    let right = Set.difference allSquares (Set.ofList [n-1..n..n*n])
    let left = Set.difference allSquares (Set.ofList [0..n..n*n-1])
    let up = Set.difference allSquares (Set.ofList [0..n-1])
    let down = Set.difference allSquares (Set.ofList [n*n-n..n*n-1])
    let downRight = Set.intersect right down
    let downLeft = Set.intersect left down
    let upRight = Set.intersect right up
    let upLeft = Set.intersect left up
    let appendIfInSet se v res =
        if Set.contains square se then res @ v else res
    []
    |> appendIfInSet right [square + 1]
    |> appendIfInSet left [square - 1]
    |> appendIfInSet up [square - n]
    |> appendIfInSet down [square + n]
    |> appendIfInSet downRight [square + n + 1]
    |> appendIfInSet downLeft [square + n - 1]
    |> appendIfInSet upRight [square - n + 1]
    |> appendIfInSet upLeft [square - n - 1]
    |> List.choose (fun s -> if ((uint64 1 <<< s) &&& state) = 0UL then Some s else None )

let block state square =
    state ||| (uint64 1 <<< square)

let countAllPaths n lmin lmax =
    let mov = moves n                 // line 30
    let rec count l state sq c =
        let state' = block state sq
        let m = mov state' sq
        match l with
        | x when x <= lmax && x >= lmin ->
            List.fold (fun acc s -> count (l+1) state' s acc) (c+1) m
        | x when x < lmin ->
            List.fold (fun acc s -> count (l+1) state' s acc) (c) m
        | _ ->
            c
    List.fold (fun acc s -> count 0 (block 0UL s) s acc) 0 [0..n*n-1]

[<EntryPoint>] 
let main args =
    printfn "%d: %A" (Array.length args) args
    if 3 = Array.length args then
        let n = int args.[0]
        let lmin = int args.[1]
        let lmax = int args.[2]
        printfn "%d %d %d -> %d" n lmin lmax (countAllPaths n lmin lmax)
    else
        printfn "usage: wordgames.exe n lmin lmax"
    0
  1. In line 30, I curried the moves function with the first argument, hoping that maybe code optimization would benefit from it. Maybe optimizing the 9 sets I create in move which are only a function of n. After all, they need not be generated over and over again, right? On the other hand, I would not really bet on it actually happening.
    So, question #1 is: How could I enforce this optimization in an as little code bloating way as possible? (I could of course create a type with 9 members and then an array of that type for each possible n and then do a look up table like usage of the pre-computed sets but that would be code bloat in my opinion).

  2. Many sources hint that parallel folds are considered critical. How could I create a parallel version of the counting function (which runs on multiple cores)?

  3. Does anyone have clever ideas how to speed this up? Maybe some pruning or memoization etc?

At first, when I ran the function for n=4 lmin=3 lmax=8 I thought it takes so long because I ran it in fsi. But then I compiled the code with -O and it still took about the same time...

UPDATE

While waiting for input from you guys, I did the code bloated manual optimization version (runs much faster) and then found a way to make it run on multiple cores.
All in all those 2 changes yielded about a speed up by a factor of 30. Here the (bloated) version I came up with (still looking for a way to avoid the bloat):

let squareSet n =
    let allSquares = [0..n*n-1] |> Set.ofList
    let right = Set.difference allSquares (Set.ofList [n-1..n..n*n])
    let left = Set.difference allSquares (Set.ofList [0..n..n*n-1])
    let up = Set.difference allSquares (Set.ofList [0..n-1])
    let down = Set.difference allSquares (Set.ofList [n*n-n..n*n-1])
    let downRight = Set.intersect right down
    let downLeft = Set.intersect left down
    let upRight = Set.intersect right up
    let upLeft = Set.intersect left up
    [|right;left;up;down;upRight;upLeft;downRight;downLeft|]    

let RIGHT,LEFT,UP,DOWN = 0,1,2,3
let UPRIGHT,UPLEFT,DOWNRIGHT,DOWNLEFT = 4,5,6,7

let squareSets =
    [|Set.empty;Set.empty;Set.empty;Set.empty;Set.empty;Set.empty;Set.empty;Set.empty;|]
    ::
    [    for i in 1..8 do
            yield squareSet i
    ]
    |> Array.ofList


let moves n state square =
    let appendIfInSet se v res =
        if Set.contains square se then res @ v else res

    []
    |> appendIfInSet squareSets.[n].[RIGHT] [square + 1]
    |> appendIfInSet squareSets.[n].[LEFT] [square - 1]
    |> appendIfInSet squareSets.[n].[UP] [square - n]
    |> appendIfInSet squareSets.[n].[DOWN] [square + n]
    |> appendIfInSet squareSets.[n].[DOWNRIGHT] [square + n + 1]
    |> appendIfInSet squareSets.[n].[DOWNLEFT] [square + n - 1]
    |> appendIfInSet squareSets.[n].[UPRIGHT] [square - n + 1]
    |> appendIfInSet squareSets.[n].[UPLEFT] [square - n - 1]
    |> List.choose (fun s -> if ((uint64 1 <<< s) &&& state) = 0UL then Some s else None )

let block state square =
    state ||| (uint64 1 <<< square)

let countAllPaths n lmin lmax =
    let mov = moves n
    let rec count l state sq c =
        let state' = block state sq
        let m = mov state' sq
        match l with
        | x when x <= lmax && x >= lmin ->
            List.fold (fun acc s -> count (l+1) state' s acc) (c+1) m
        | x when x < lmin ->
            List.fold (fun acc s -> count (l+1) state' s acc) (c) m
        | _ ->
            c
    //List.fold (fun acc s -> count 0 (block 0UL s) s acc) 0 [0..n*n-1]
    [0..n*n-1]
    |> Array.ofList
    |> Array.Parallel.map (fun start -> count 0 (block 0UL start) start 0)
    |> Array.sum

[<EntryPoint>] 
let main args =
    printfn "%d: %A" (Array.length args) args
    if 3 = Array.length args then
        let n = int args.[0]
        let lmin = int args.[1]
        let lmax = int args.[2]
        printfn "%d %d %d -> %d" n lmin lmax (countAllPaths n lmin lmax)
    else
        printfn "usage: wordgames.exe n lmin lmax"
    0
1

There are 1 answers

3
BitTickler On

As for the non-optimization of the generation of sets. The second version posted in the update to the question showed, that this is actually the case (not optimized by compiler) and it yielded a significant speed up. The final version (posted below in this answer) carries that approach even further and speeds up the path counting (and the solving of a boggle puzzle) even further.

Combined with parallel execution on multiple cores, the initially really slow (maybe 30s) version could be sped up to only around 100ms for the n=4 lmin=3 lmax=8 case.

For n=6 class of problems, the parallel and hand tuned implementation solves a puzzle in around 60ms on my machine. It makes sense, that this is faster than the path counting, as the word list probing (used a dictionary with around 80000 words) along with the dynamic programming approach pointed out by @GuyCoder renders the solution of the puzzle a less complex problem than the (brute force) path counting.

Lesson learned

The f# compiler does not seem to be all too mystical and magical if it comes to code optimizations. Hand tuning is worth the effort if performance is really required.

Turning a single threaded recursive search function into a parallel (concurrent) function was not really hard in this case.

The final version of the code

Compiled with:

fsc --optimize+ --tailcalls+ wordgames.fs

(Microsoft (R) F# Compiler version 14.0.23413.0)

let wordListPath = @"E:\temp\12dicts-6.0.2\International\3of6all.txt"

let acceptableWord (s : string) : bool =
    let s' = s.Trim()
    if s'.Length > 2
    then
        if System.Char.IsLower(s'.[0]) && System.Char.IsLetter(s'.[0]) then true
        else false
    else
        false

let words = 
    System.IO.File.ReadAllLines(wordListPath)
    |> Array.filter acceptableWord


let squareSet n =
    let allSquares = [0..n*n-1] |> Set.ofList
    let right = Set.difference allSquares (Set.ofList [n-1..n..n*n])
    let left = Set.difference allSquares (Set.ofList [0..n..n*n-1])
    let up = Set.difference allSquares (Set.ofList [0..n-1])
    let down = Set.difference allSquares (Set.ofList [n*n-n..n*n-1])
    let downRight = Set.intersect right down
    let downLeft = Set.intersect left down
    let upRight = Set.intersect right up
    let upLeft = Set.intersect left up
    [|right;left;up;down;upRight;upLeft;downRight;downLeft|]    

let RIGHT,LEFT,UP,DOWN = 0,1,2,3
let UPRIGHT,UPLEFT,DOWNRIGHT,DOWNLEFT = 4,5,6,7

let squareSets =
    [|Set.empty;Set.empty;Set.empty;Set.empty;Set.empty;Set.empty;Set.empty;Set.empty;|]
    ::
    [    for i in 1..8 do
            yield squareSet i
    ]
    |> Array.ofList


let movesFromSquare n square =
    let appendIfInSet se v res =
            if Set.contains square se then v :: res  else res

    []
    |> appendIfInSet squareSets.[n].[RIGHT] (square + 1)
    |> appendIfInSet squareSets.[n].[LEFT] (square - 1)
    |> appendIfInSet squareSets.[n].[UP] (square - n)
    |> appendIfInSet squareSets.[n].[DOWN] (square + n)
    |> appendIfInSet squareSets.[n].[DOWNRIGHT] (square + n + 1)
    |> appendIfInSet squareSets.[n].[DOWNLEFT] (square + n - 1)
    |> appendIfInSet squareSets.[n].[UPRIGHT] (square - n + 1)
    |> appendIfInSet squareSets.[n].[UPLEFT] (square - n - 1)

let lutMovesN n =
    Array.init n (fun i -> if i > 0 then Array.init (n*n-1) (fun j -> movesFromSquare i j) else Array.empty)

let lutMoves =
    lutMovesN 8

let moves n state square =
    let appendIfInSet se v res =
            if Set.contains square se then v :: res  else res

    lutMoves.[n].[square]
    |> List.filter (fun s -> ((uint64 1 <<< s) &&& state) = 0UL)

let block state square =
    state ||| (uint64 1 <<< square)

let countAllPaths n lmin lmax =
    let mov = moves n
    let rec count l state sq c =
        let state' = block state sq
        let m = mov state' sq
        match l with
        | x when x <= lmax && x >= lmin ->
            List.fold (fun acc s -> count (l+1) state' s acc) (c+1) m
        | x when x < lmin ->
            List.fold (fun acc s -> count (l+1) state' s acc) (c) m
        | _ ->
            c
    //List.fold (fun acc s -> count 0 (block 0UL s) s acc) 0 [0..n*n-1]
    [|0..n*n-1|]
    |> Array.Parallel.map (fun start -> count 0 (block 0UL start) start 0)
    |> Array.sum


//printfn "%d " (words |> Array.distinct |> Array.length)

let usage() =
    printfn "usage: wordgames.exe [--gen n count problemPath | --count n lmin lmax | --solve problemPath ]"

let rng = System.Random()

let genProblem n (sb : System.Text.StringBuilder) =
    let a = Array.init (n*n) (fun _ -> char (rng.Next(26) + int 'a'))
    sb.Append(a) |> ignore
    sb.AppendLine()

let genProblems nproblems n (sb : System.Text.StringBuilder) : System.Text.StringBuilder =
    for i in 1..nproblems do
        genProblem n sb |> ignore
    sb

let solve n (board : System.String) =
    let ba = board.ToCharArray()

    let testWord (w : string) : bool =
        let testChar k sq = (ba.[sq] = w.[k])
        let rec testSquare state k sq =
            match k with
            | 0 -> testChar 0 sq
            | x -> 
                if testChar x sq
                then
                    let state' = block state x
                    moves n state' x
                    |> List.exists (testSquare state' (x-1))
                else
                    false

        [0..n*n-1]    
        |> List.exists (testSquare 0UL (String.length w - 1))

    words
    |> Array.splitInto 32
    |> Array.Parallel.map (Array.filter testWord)
    |> Array.concat

[<EntryPoint>] 
let main args =
    printfn "%d: %A" (Array.length args) args
    let nargs = Array.length args
    let sw = System.Diagnostics.Stopwatch()
    match nargs with
    | x when x >= 2 ->
        match args.[0] with
        | "--gen" ->
            if nargs = 4
            then
                let n = int args.[1]
                let nproblems = int args.[2]
                let outpath = args.[3]
                let problems = genProblems nproblems n (System.Text.StringBuilder())
                System.IO.File.WriteAllText (outpath,problems.ToString())
                0
            else
                usage()
                0
        | "--count" ->
            if nargs = 4 
            then
                let n = int args.[1]
                let lmin = int args.[2]
                let lmax = int args.[3]
                sw.Start()
                let count = countAllPaths n lmin lmax
                sw.Stop()
                printfn "%d %d %d -> %d (took: %d)" n lmin lmax count (sw.ElapsedMilliseconds)
                0
            else
                usage ()
                0
        | "--solve" ->
            if nargs = 2
            then
                let problems = System.IO.File.ReadAllLines(args.[1])
                problems 
                |> Array.iter 
                    (fun (p : string) -> 
                        let n = int (sqrt (float (String.length p)))
                        sw.Reset()
                        sw.Start()
                        let found = solve n p
                        sw.Stop()
                        printfn "%s\n%A\n%dms" p found (sw.ElapsedMilliseconds)
                    )
                0
            else
                usage ()
                0
        | _ ->
            usage ()
            0
    | _ -> 
        usage ()
        0