The biggest issue with sorting text strings is that simple string comparison won't cut it for us humans. (Wiki)
For example, let's say we have this list:
file 3.txt
file 2.txt
file 10.txt
file 9.txt
file 1.txt
Regular sorting would produce this:
file 1.txt
file 10.txt
file 2.txt
file 3.txt
file 9.txt
But that is not how we humans sort, we'd like to see (called "natural sort"):
file 1.txt
file 2.txt
file 3.txt
file 9.txt
file 10.txt
In case you have seen my QuickSort examples (for example this one - I'll add the code below), you'll see that we often use "StrIcomp" to compare 2 strings to see which one goes first. Unfortunately results in the wrong output I just showed. So I wrote a "drop in" replacement for this "NaturalCompare". Simply replace the StrIcomp with NaturalCompare and the QuickSort will now produce a natural sorted list.
function NaturalCompare(const S1, S2: string): Integer;
var
I, J: Integer;
N1, N2: Int64;
C1, C2: Char;
begin
I := 1;
J := 1;
while (I <= Length(S1)) and (J <= Length(S2)) do
begin
C1 := S1[I];
C2 := S2[J];
// If both characters are digits, compare numbers
if (C1 in ['0'..'9']) and (C2 in ['0'..'9']) then
begin
N1 := 0;
while (I <= Length(S1)) and (S1[I] in ['0'..'9']) do
begin
N1 := N1 * 10 + Ord(S1[I]) - Ord('0');
Inc(I);
end;
N2 := 0;
while (J <= Length(S2)) and (S2[J] in ['0'..'9']) do
begin
N2 := N2 * 10 + Ord(S2[J]) - Ord('0');
Inc(J);
end;
if N1 <> N2 then
Exit(N1 - N2);
end
else
begin
// Case-insensitive character compare
C1 := UpCase(C1);
C2 := UpCase(C2);
if C1 <> C2 then
Exit(Ord(C1) - Ord(C2));
Inc(I);
Inc(J);
end;
end;
// If all equal so far, shorter string comes first
Result := Length(S1) - Length(S2);
end;
Example QuickStort procedure:
procedure QuickSort(var A: TStringList);
procedure Sort(L, R: Integer);
var
I, J: Integer;
Y, X:string;
begin
I:= L; J:= R; X:= A[(L+R) DIV 2];
repeat
// was: while StrIcomp(pchar(A),pchar(X))<0 do inc(I);
// was: while StrIComp(pchar(X),pchar(A[J]))<0 do dec(J);
while NaturalCompare(pchar(A),pchar(X))<0 do inc(I);
while NaturalCompare(pchar(X),pchar(A[J]))<0 do dec(J);
if I <= J then
begin
Y:= A; A:= A[J]; A[J]:= Y;
inc(I); dec(J);
end;
until I > J;
if L < J then Sort(L,J);
if I < R then Sort(I,R);
end;
begin
Sort(0,A.Count-1);
end;
This topic was modified 2 hours ago by
Hans