Mirror

String Pattern Matching - Improved & Extended (Views: 708)


Problem/Question/Abstract:

How do I check a given string with "Template" pattern.

Answer:

Some of you may have already seen the translation of PattenMatch() API of common.c in MSDN Samples\VC98\sdk\sdktools\tlist by Jerome FORESTIER.

This function is an attempt at improving & extending the translated code. The various improvements & extensions are:

1. Removed bug wherein InputString="ab" is rejected by Pattern="a*b".

2. Optimization has been incorporated for trailing wildcard "*" in the Pattern string. For such cases, there is a very significant speed increase which is directly propotional to the number of chars in the InputString that match the trailing Pattern wildcard. e.g. this function is 20 times faster for Pattern="a*" and InputString="abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb". The speed difference becomes even more on increasing the number of "b" characters in the InputString.

3. Added new functionality of set exclusion. e.g. [^a-d] matches true if the InputString character is not a,b,c or d.

4. Discarded local variables in the original translation. Used array notation instead.

5. Added plenty of comments!

6. This implementation is on an average 20% faster than the original translation.

{*****************************************************************}
{* This function implements a subset of regular expression based *}
{* search and is based on the translation of PattenMatch() API   *}
{* of common.c in MSDN Samples\VC98\sdk\sdktools\tlist           *}
{*****************************************************************}
{* MetaChars are  :                                              *}
{*            '*' : Zero or more chars.                          *}
{*            '?' : Any one char.                                *}
{*         [adgj] : Individual chars (inclusion).                *}
{*        [^adgj] : Individual chars (exclusion).                *}
{*          [a-d] : Range (inclusion).                           *}
{*         [^a-d] : Range (exclusion).                           *}
{*       [a-dg-j] : Multiple ranges (inclusion).                 *}
{*      [^a-dg-j] : Multiple ranges (exclusion).                 *}
{*  [ad-fhjnv-xz] : Mix of range & individual chars (inclusion). *}
{* [^ad-fhjnv-xz] : Mix of range & individual chars (exclusion). *}
{*****************************************************************}

function MatchPattern(InpStr, Pattern: PChar): Boolean;
begin
  while (True) do
  begin
    case Pattern[0] of
      #0:
        begin //End of pattern reached.
          Result := (InpStr[0] = #0); //TRUE if end of InpStr.
          Exit;
        end;

      '*':
        begin //Match zero or more occurances of any char.
          if (Pattern[1] = #0) then
          begin //Match any number of trailing chars.
            Result := True;
            Exit;
          end
          else
            Inc(Pattern);

          while (InpStr[0] <> #0) do
          begin //Try to match any substring of InpStr.
            if (MatchPattern(InpStr, Pattern)) then
            begin
              Result := True;
              Exit;
            end;

            //Continue testing next char...
            Inc(InpStr);
          end;
        end;

      '?':
        begin //Match any one char.
          if (InpStr[0] = #0) then
          begin
            Result := False;
            Exit;
          end;

          //Continue testing next char...
          Inc(InpStr);
          Inc(Pattern);
        end;

      '[':
        begin //Match given set of chars.
          if (Pattern[1] in [#0, '[', ']']) then
          begin //Invalid Set - So no match.
            Result := False;
            Exit;
          end;

          if (Pattern[1] = '^') then
          begin //Match for exclusion of given set...
            Inc(Pattern, 2);
            Result := True;
            while (Pattern[0] <> ']') do
            begin
              if (Pattern[1] = '-') then
              begin //Match char exclusion range.
                if (InpStr[0] >= Pattern[0]) and (InpStr[0] <= Pattern[2]) then
                begin //Given char failed set exclusion range.
                  Result := False;
                  Break;
                end
                else
                  Inc(Pattern, 3);
              end
              else
              begin //Match individual char exclusion.
                if (InpStr[0] = Pattern[0]) then
                begin //Given char failed set element exclusion.
                  Result := False;
                  Break;
                end
                else
                  Inc(Pattern);
              end;
            end;
          end
          else
          begin //Match for inclusion of given set...
            Inc(Pattern);
            Result := False;
            while (Pattern[0] <> ']') do
            begin
              if (Pattern[1] = '-') then
              begin //Match char inclusion range.
                if (InpStr[0] >= Pattern[0]) and (InpStr[0] <= Pattern[2]) then
                begin //Given char matched set range inclusion. Continue testing...
                  Result := True;
                  Break;
                end
                else
                  Inc(Pattern, 3);
              end
              else
              begin //Match individual char inclusion.
                if (InpStr[0] = Pattern[0]) then
                begin //Given char matched set element inclusion. Continue testing...
                  Result := True;
                  Break;
                end
                else
                  Inc(Pattern);
              end;
            end;
          end;

          if (Result) then
          begin //Match was found. Continue further.
            Inc(InpStr);

            //Position Pattern to char after "]"
            while (Pattern[0] <> ']') and (Pattern[0] <> #0) do
              Inc(Pattern);

            if (Pattern[0] = #0) then
            begin //Invalid Pattern - missing "]"
              Result := False;
              Exit;
            end
            else
              Inc(Pattern);
          end
          else
            Exit;
        end;

    else
      begin //Match given single char.
        if (InpStr[0] <> Pattern[0]) then
        begin
          Result := False;
          Break;
        end;

        //Continue testing next char...
        Inc(InpStr);
        Inc(Pattern);
      end;
    end;
  end;
end;

<< Back to main page