While reading up this question, I wondered why no one would "simply" iterate all the possible paths on the boggle grid and have the word-tries follow and then cancel the path if there is no match in the word-trie. Cannot be that many paths on a tiny 4 by 4 grid, right? How many paths are there? So I set out to code a path-counter function in F#. The results yield what no one stated on that other page: Way more paths on the grid than I would have guessed (more paths than words in the word-set, actually).
While all that is pretty much the back story to my question, the code I ended up with was running rather slow and I found that I could not give good answers to a few aspects of the code. So here, the code first, then below it, you will find points which I think deserve explanations...
let moves n state square =
let allSquares = [0..n*n-1] |> Set.ofList
let right = Set.difference allSquares (Set.ofList [n-1..n..n*n])
let left = Set.difference allSquares (Set.ofList [0..n..n*n-1])
let up = Set.difference allSquares (Set.ofList [0..n-1])
let down = Set.difference allSquares (Set.ofList [n*n-n..n*n-1])
let downRight = Set.intersect right down
let downLeft = Set.intersect left down
let upRight = Set.intersect right up
let upLeft = Set.intersect left up
let appendIfInSet se v res =
if Set.contains square se then res @ v else res
[]
|> appendIfInSet right [square + 1]
|> appendIfInSet left [square - 1]
|> appendIfInSet up [square - n]
|> appendIfInSet down [square + n]
|> appendIfInSet downRight [square + n + 1]
|> appendIfInSet downLeft [square + n - 1]
|> appendIfInSet upRight [square - n + 1]
|> appendIfInSet upLeft [square - n - 1]
|> List.choose (fun s -> if ((uint64 1 <<< s) &&& state) = 0UL then Some s else None )
let block state square =
state ||| (uint64 1 <<< square)
let countAllPaths n lmin lmax =
let mov = moves n // line 30
let rec count l state sq c =
let state' = block state sq
let m = mov state' sq
match l with
| x when x <= lmax && x >= lmin ->
List.fold (fun acc s -> count (l+1) state' s acc) (c+1) m
| x when x < lmin ->
List.fold (fun acc s -> count (l+1) state' s acc) (c) m
| _ ->
c
List.fold (fun acc s -> count 0 (block 0UL s) s acc) 0 [0..n*n-1]
[<EntryPoint>]
let main args =
printfn "%d: %A" (Array.length args) args
if 3 = Array.length args then
let n = int args.[0]
let lmin = int args.[1]
let lmax = int args.[2]
printfn "%d %d %d -> %d" n lmin lmax (countAllPaths n lmin lmax)
else
printfn "usage: wordgames.exe n lmin lmax"
0
In line 30, I curried the
moves
function with the first argument, hoping that maybe code optimization would benefit from it. Maybe optimizing the 9 sets I create in move which are only a function ofn
. After all, they need not be generated over and over again, right? On the other hand, I would not really bet on it actually happening.
So, question #1 is: How could I enforce this optimization in an as little code bloating way as possible? (I could of course create a type with 9 members and then an array of that type for each possible n and then do a look up table like usage of the pre-computed sets but that would be code bloat in my opinion).Many sources hint that parallel folds are considered critical. How could I create a parallel version of the counting function (which runs on multiple cores)?
Does anyone have clever ideas how to speed this up? Maybe some pruning or memoization etc?
At first, when I ran the function for n=4 lmin=3 lmax=8
I thought it takes so long because I ran it in fsi. But then I compiled the code with -O and it still took about the same time...
UPDATE
While waiting for input from you guys, I did the code bloated manual optimization version (runs much faster) and then found a way to make it run on multiple cores.
All in all those 2 changes yielded about a speed up by a factor of 30. Here the (bloated) version I came up with (still looking for a way to avoid the bloat):
let squareSet n =
let allSquares = [0..n*n-1] |> Set.ofList
let right = Set.difference allSquares (Set.ofList [n-1..n..n*n])
let left = Set.difference allSquares (Set.ofList [0..n..n*n-1])
let up = Set.difference allSquares (Set.ofList [0..n-1])
let down = Set.difference allSquares (Set.ofList [n*n-n..n*n-1])
let downRight = Set.intersect right down
let downLeft = Set.intersect left down
let upRight = Set.intersect right up
let upLeft = Set.intersect left up
[|right;left;up;down;upRight;upLeft;downRight;downLeft|]
let RIGHT,LEFT,UP,DOWN = 0,1,2,3
let UPRIGHT,UPLEFT,DOWNRIGHT,DOWNLEFT = 4,5,6,7
let squareSets =
[|Set.empty;Set.empty;Set.empty;Set.empty;Set.empty;Set.empty;Set.empty;Set.empty;|]
::
[ for i in 1..8 do
yield squareSet i
]
|> Array.ofList
let moves n state square =
let appendIfInSet se v res =
if Set.contains square se then res @ v else res
[]
|> appendIfInSet squareSets.[n].[RIGHT] [square + 1]
|> appendIfInSet squareSets.[n].[LEFT] [square - 1]
|> appendIfInSet squareSets.[n].[UP] [square - n]
|> appendIfInSet squareSets.[n].[DOWN] [square + n]
|> appendIfInSet squareSets.[n].[DOWNRIGHT] [square + n + 1]
|> appendIfInSet squareSets.[n].[DOWNLEFT] [square + n - 1]
|> appendIfInSet squareSets.[n].[UPRIGHT] [square - n + 1]
|> appendIfInSet squareSets.[n].[UPLEFT] [square - n - 1]
|> List.choose (fun s -> if ((uint64 1 <<< s) &&& state) = 0UL then Some s else None )
let block state square =
state ||| (uint64 1 <<< square)
let countAllPaths n lmin lmax =
let mov = moves n
let rec count l state sq c =
let state' = block state sq
let m = mov state' sq
match l with
| x when x <= lmax && x >= lmin ->
List.fold (fun acc s -> count (l+1) state' s acc) (c+1) m
| x when x < lmin ->
List.fold (fun acc s -> count (l+1) state' s acc) (c) m
| _ ->
c
//List.fold (fun acc s -> count 0 (block 0UL s) s acc) 0 [0..n*n-1]
[0..n*n-1]
|> Array.ofList
|> Array.Parallel.map (fun start -> count 0 (block 0UL start) start 0)
|> Array.sum
[<EntryPoint>]
let main args =
printfn "%d: %A" (Array.length args) args
if 3 = Array.length args then
let n = int args.[0]
let lmin = int args.[1]
let lmax = int args.[2]
printfn "%d %d %d -> %d" n lmin lmax (countAllPaths n lmin lmax)
else
printfn "usage: wordgames.exe n lmin lmax"
0
As for the non-optimization of the generation of sets. The second version posted in the update to the question showed, that this is actually the case (not optimized by compiler) and it yielded a significant speed up. The final version (posted below in this answer) carries that approach even further and speeds up the path counting (and the solving of a boggle puzzle) even further.
Combined with parallel execution on multiple cores, the initially really slow (maybe 30s) version could be sped up to only around 100ms for the
n=4 lmin=3 lmax=8
case.For n=6 class of problems, the parallel and hand tuned implementation solves a puzzle in around 60ms on my machine. It makes sense, that this is faster than the path counting, as the word list probing (used a dictionary with around 80000 words) along with the dynamic programming approach pointed out by @GuyCoder renders the solution of the puzzle a less complex problem than the (brute force) path counting.
Lesson learned
The f# compiler does not seem to be all too mystical and magical if it comes to code optimizations. Hand tuning is worth the effort if performance is really required.
Turning a single threaded recursive search function into a parallel (concurrent) function was not really hard in this case.
The final version of the code
Compiled with:
(Microsoft (R) F# Compiler version 14.0.23413.0)