How to formally specify string split

125 views Asked by At

I'm trying to verify string splitting equivalent to the native JavaScript string split method. I created the following js code as a recursive function so that it could be a function and not a method in Dafny.

function splitHelper(s, separator, index=0, sindex=0, results=[]) {
    //console.log(index, sindex, results)
    if(index == s.length) return results.concat(s.slice(sindex, index));
    if(separator.length == 0 && index == s.length -1) return splitHelper(s, separator, index+1, index, results );
    if(separator.length == 0) return splitHelper(s, separator, index+1, index+1, results.concat(s.slice(index, index+1)) );
    
    if(index + separator.length > s.length) return splitHelper(s, separator, s.length, sindex, results);
    if(s.slice(index, index+separator.length) == separator) {
        return splitHelper(s, separator, index+separator.length, index+separator.length, results.concat(s.slice(sindex, index)))
    }else{
        return splitHelper(s, separator, index+1, sindex, results)
    }
}

function arrEq(left, right) {
    return JSON.stringify(left) === JSON.stringify(right);
}

function splitHelpertests() {
    console.assert(arrEq(splitHelper("a,bc",""), ["a",",","b","c"]), "empty separator")
    console.assert(arrEq(splitHelper("a,bc",","), ["a","bc"]),"basic")
    console.assert(arrEq(splitHelper("||a||bc", "||"), ["","a","bc"]),"basic2")
    console.assert(arrEq(splitHelper("||a||bc", "|"), ["","","a","","bc"]),"basic2")
    console.assert(arrEq(splitHelper("||a||bc|", "|"), ["","","a","","bc",""]),"ending")
}
splitHelpertests();

In Dafny I wrote up the following.

function splitHelper(s: string, separator: string, index: nat, sindex: nat, results: seq<string>): seq<string>
    requires index <= |s|
    requires sindex <= |s|
    requires sindex <= index
    decreases |s| - index
{
    if index >= |s| then results + [s[sindex..index]]
    else if |separator| == 0 && index == |s|-1 then splitHelper(s, separator, index+1, index, results)
    else if |separator| == 0 then splitHelper(s, separator, index+1, index+1, results + [s[sindex..index]])
    else if index+|separator| > |s| then splitHelper(s, separator, |s|, sindex, results)
    else if s[index..index+|separator|] == separator then splitHelper(s, separator, index+|separator|, index+|separator|, results + [s[sindex..index]])
    else splitHelper(s, separator, index+1, sindex, results)
}

function split(s: string, separator: string): seq<string> 
    ensures split(s, separator) == splitHelper(s, separator, 0,0, [])
{
    splitHelper(s, separator, 0, 0, [])
}

method Main() {
    //"111||2222||333||".split("||")
    var example := splitHelper("a,bc", ",",0,0,[]);
    print example;
    assert "a" == ['a'];
    assert "bc" == ['b','c'];
    assert example == ["a","bc"];
    assert example == [['a'],['b','c']];
    assert example == ["a","bc"];
    // assert splitHelper("a,b", ",",0,0,[]) == [['a'],['b']];
    // assert split("111||2222||333||", "||") == ["111", "2222", "333", ""];
    // assert split("111||2222||333||", "|") == ["111", "", "2222", "", "333", "", ""];
    // assert split("111||2222||333||", "") == [ "|", "1", "1", "1", "|", "|", "2", "2", "2", "2", "|", "|", "3", "3", "3", "|", "|" ];
}

Surprisingly, none of the assertions about splitHelper would verify. I assume because splitHelper is underspecified.

I feel like the following properties are roughly in the right direction.

predicate NotContains(haystack: string, needle: string) {
    forall i,k :: 0 <= i <= k <= |haystack| ==> haystack[i..k] != needle
}

ghost predicate NotContainsTwo(haystack: string, needle: string) {
    //forall i,k :: 0 <= i <= k <= |haystack| ==> haystack[i..k] != needle
    !(exists xs: string, ys: string :: xs + needle + ys == haystack)
}

predicate NotContainsThree(haystack: string, needle: string) {
    forall i :: 0 <= i <= |haystack| ==> !(needle <= haystack[i..])
}

predicate Contains(haystack: string, needle: string)
    // ensures !NotContainsThree(haystack, needle)
    ensures Contains(haystack, needle) <==> exists k :: 0 <= k <= |haystack| && needle <= haystack[k..] 
    ensures Contains(haystack, needle) <==> !NotContainsThree(haystack, needle)
{
    if needle <= haystack then 
        assert haystack[0..] == haystack;
        true 
    else if |haystack| > 0 then 
        assert forall i :: 1 <= i <= |haystack| ==> haystack[i..] == haystack[1..][(i-1)..];
        Contains(haystack[1..], needle)
    else 
        false
}

//no subsequence contains the separator
forall ss : ss in split(s, separator) ==> !Contains(ss, separator)

//relative ordering
forall i,k :: 0 <= i < k < |split(s, separator)| ==> exists m,n,s,t :: split(s, separator)[i] == s[m..n] && split(s, separator)[k] == s[s..t] && 0 <= m <= n < s <= t <= |s|

function sumSeq(ss: seq<string>, separator: string ): string {
    if |ss| == 0 then "" else if |ss| == 1 then ss[0] else ss[0] + separator + sumSeq(ss[1..], separator)
}

//completeness
sumSeq(split(s, separator), separator) == s

Are there any other properties required? Are the above good formulations of those properties or will there be issues in verification caused by them?

Edit: Added alternate definitions and renamed predicate notin to NotContains.

0

There are 0 answers