I'm trying to verify string splitting equivalent to the native JavaScript string split method. I created the following js code as a recursive function so that it could be a function and not a method in Dafny.
function splitHelper(s, separator, index=0, sindex=0, results=[]) {
//console.log(index, sindex, results)
if(index == s.length) return results.concat(s.slice(sindex, index));
if(separator.length == 0 && index == s.length -1) return splitHelper(s, separator, index+1, index, results );
if(separator.length == 0) return splitHelper(s, separator, index+1, index+1, results.concat(s.slice(index, index+1)) );
if(index + separator.length > s.length) return splitHelper(s, separator, s.length, sindex, results);
if(s.slice(index, index+separator.length) == separator) {
return splitHelper(s, separator, index+separator.length, index+separator.length, results.concat(s.slice(sindex, index)))
}else{
return splitHelper(s, separator, index+1, sindex, results)
}
}
function arrEq(left, right) {
return JSON.stringify(left) === JSON.stringify(right);
}
function splitHelpertests() {
console.assert(arrEq(splitHelper("a,bc",""), ["a",",","b","c"]), "empty separator")
console.assert(arrEq(splitHelper("a,bc",","), ["a","bc"]),"basic")
console.assert(arrEq(splitHelper("||a||bc", "||"), ["","a","bc"]),"basic2")
console.assert(arrEq(splitHelper("||a||bc", "|"), ["","","a","","bc"]),"basic2")
console.assert(arrEq(splitHelper("||a||bc|", "|"), ["","","a","","bc",""]),"ending")
}
splitHelpertests();
In Dafny I wrote up the following.
function splitHelper(s: string, separator: string, index: nat, sindex: nat, results: seq<string>): seq<string>
requires index <= |s|
requires sindex <= |s|
requires sindex <= index
decreases |s| - index
{
if index >= |s| then results + [s[sindex..index]]
else if |separator| == 0 && index == |s|-1 then splitHelper(s, separator, index+1, index, results)
else if |separator| == 0 then splitHelper(s, separator, index+1, index+1, results + [s[sindex..index]])
else if index+|separator| > |s| then splitHelper(s, separator, |s|, sindex, results)
else if s[index..index+|separator|] == separator then splitHelper(s, separator, index+|separator|, index+|separator|, results + [s[sindex..index]])
else splitHelper(s, separator, index+1, sindex, results)
}
function split(s: string, separator: string): seq<string>
ensures split(s, separator) == splitHelper(s, separator, 0,0, [])
{
splitHelper(s, separator, 0, 0, [])
}
method Main() {
//"111||2222||333||".split("||")
var example := splitHelper("a,bc", ",",0,0,[]);
print example;
assert "a" == ['a'];
assert "bc" == ['b','c'];
assert example == ["a","bc"];
assert example == [['a'],['b','c']];
assert example == ["a","bc"];
// assert splitHelper("a,b", ",",0,0,[]) == [['a'],['b']];
// assert split("111||2222||333||", "||") == ["111", "2222", "333", ""];
// assert split("111||2222||333||", "|") == ["111", "", "2222", "", "333", "", ""];
// assert split("111||2222||333||", "") == [ "|", "1", "1", "1", "|", "|", "2", "2", "2", "2", "|", "|", "3", "3", "3", "|", "|" ];
}
Surprisingly, none of the assertions about splitHelper would verify. I assume because splitHelper is underspecified.
I feel like the following properties are roughly in the right direction.
predicate NotContains(haystack: string, needle: string) {
forall i,k :: 0 <= i <= k <= |haystack| ==> haystack[i..k] != needle
}
ghost predicate NotContainsTwo(haystack: string, needle: string) {
//forall i,k :: 0 <= i <= k <= |haystack| ==> haystack[i..k] != needle
!(exists xs: string, ys: string :: xs + needle + ys == haystack)
}
predicate NotContainsThree(haystack: string, needle: string) {
forall i :: 0 <= i <= |haystack| ==> !(needle <= haystack[i..])
}
predicate Contains(haystack: string, needle: string)
// ensures !NotContainsThree(haystack, needle)
ensures Contains(haystack, needle) <==> exists k :: 0 <= k <= |haystack| && needle <= haystack[k..]
ensures Contains(haystack, needle) <==> !NotContainsThree(haystack, needle)
{
if needle <= haystack then
assert haystack[0..] == haystack;
true
else if |haystack| > 0 then
assert forall i :: 1 <= i <= |haystack| ==> haystack[i..] == haystack[1..][(i-1)..];
Contains(haystack[1..], needle)
else
false
}
//no subsequence contains the separator
forall ss : ss in split(s, separator) ==> !Contains(ss, separator)
//relative ordering
forall i,k :: 0 <= i < k < |split(s, separator)| ==> exists m,n,s,t :: split(s, separator)[i] == s[m..n] && split(s, separator)[k] == s[s..t] && 0 <= m <= n < s <= t <= |s|
function sumSeq(ss: seq<string>, separator: string ): string {
if |ss| == 0 then "" else if |ss| == 1 then ss[0] else ss[0] + separator + sumSeq(ss[1..], separator)
}
//completeness
sumSeq(split(s, separator), separator) == s
Are there any other properties required? Are the above good formulations of those properties or will there be issues in verification caused by them?
Edit: Added alternate definitions and renamed predicate notin to NotContains.