Infinite loop in parsing a string using pointer math

388 views Asked by At

I have a routine that processes a C-like string, resulting in usual Delphi string:

class function UTIL.ProcessString(const S: string): string;
var
  SB:TStringBuilder;
  P:MarshaledString;
  procedure DoIt(const S:string;const I:Integer=2);
  begin
  SB.Append(S);
  Inc(P,I);
  end;
begin
SB:=TStringBuilder.Create;
P:=PChar(S);
while P<>nil do
  begin
  if P^<>'\' then DoIt(P^,1) else
    case (P+1)^ of
    '\','"':DoIt((P+1)^);
    #0,'n':DoIt(sLineBreak);
    't':DoIt(#9);
    else DoIt('\'+(P+1)^,2);
    end;
  end;
Result:=SB.ToString;
SB.Free;
end;

The problem is the loop never exits. Debugging shows the line while P<>nil do doesn't evaluate to False because P is '' at the end of processing, so the code tries to perform out-of-range operations on it. Since I didn't find any concise documentation on pointer math in Delphi, it's quite possible I'm at fault here.

EDIT: I've rewritten the function with everything read in mind like that:

class function UTIL.ProcessString(const S: string): string;
var
  SB:TStringBuilder;
  P:PChar;
  C:Char;
begin
SB:=TStringBuilder.Create;
P:=PChar(S);
  repeat
  C:=P^;
  Inc(P);
    case C of
    #0:;
    '\':
      begin
      C:=P^;
      Inc(P);
        case C of
        #0,'n':SB.Append(sLineBreak);
        '\','"':SB.Append(C);
        't':SB.Append(#9);
        else SB.Append('\').Append(C);
        end;
      end;
    else SB.Append(C);
    end;
  until P^=#0;
Result:=SB.ToString;
SB.Free;
end;

I check for #0 in the inner case statement for "such \ strings" being fed into the routine, i. e. a sequence of strings broken into pieces read from a source and then formatted one by one. So far this works great, however it fails to correctly parse '\\t' as '\t' and similar constructs, it returns just #9. I can't really think of any cause. Oh, and the old version also had this bug BTW.

1

There are 1 answers

2
Remy Lebeau On BEST ANSWER

Your loop runs forever because P will never be nil to begin with, not because of an issue with your pointer math (although I will get to that further below). PChar() will always return a non-nil pointer. If S is not empty, PChar() returns a pointer to the first Char, but if S is empty then PChar() will return a pointer to a null-terminator in const memory. Your code is not accounting for that latter possibility.

If you want to process S as a null-terminated C string (why not take the full Length() of S into account instead?), then you need to use while P^ <> #0 do instead of while P <> nil do.

Aside from that:

  • P should be declared as PChar instead of MarshaledString. There is no reason to use MarshaledString in this situation, or this manner.

  • It would be more efficient to use TStringBuilder.Append(Char) in the cases where you are passing a single Char to DoIt(). In fact, I would suggest just getting rid of DoIt() altogether, as it does not really gain you anything useful.

  • Why are you treating '\'#0 as a line break? To account for a \ character at the end of the input string? If you encounter that condition, you are incrementing P past the null-terminator, and then you are in undefined territory since you are reading into surrounding memory. Or does your input string really have embedded #0 characters, and then a final null terminator? That would be unusual format for textual data.

Try something more like this (if there really are embedded #0 characters):

class function UTIL.ProcessString(const S: string): string;
var
  SB: TStringBuilder;
  P: PChar;
begin
  Result := '';
  P := PChar(S);
  if P^ = #0 then Exit;
  SB := TStringBuilder.Create;
  try
    repeat
      if P^ <> '\' then
      begin
        SB.Append(P^);
        Inc(P);
      end else
      begin
        Inc(P);
        case P^ of
          '\','"': SB.Append(P^);
          #0, 'n': SB.Append(sLineBreak);
          't':     SB.Append(#9);
          else     SB.Append('\'+P^);
        end;
        Inc(P);
      end;
    until P^ = #0;
    Result := SB.ToString;
  finally
    SB.Free;
  end;
end;

Or this (if there are no embedded #0 characters):

class function UTIL.ProcessString(const S: string): string;
var
  SB: TStringBuilder;
  P: PChar;
  Ch: Char;
begin
  Result := '';
  P := PChar(S);
  if P^ = #0 then Exit;
  SB := TStringBuilder.Create;
  try
    repeat
      Ch := P^;
      Inc(P);
      if Ch <> '\' then
        SB.Append(Ch)
      else
      begin
        Ch := P^;
        if Ch = #0 then
        begin
          // up to you if you really need this or not:
          // SB.Append(sLineBreak);
          Break;
        end;
        Inc(P);
        case Ch of
          '\','"': SB.Append(Ch);
          'n':     SB.Append(sLineBreak);
          't':     SB.Append(#9);
          else     SB.Append('\'+Ch);
        end;
      end;
    until P^ = #0;
    Result := SB.ToString;
  finally
    SB.Free;
  end;
end;