1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
| ListeLien := TStringList.Create;
try
GetLinksInHTMLDoc('http://www.unsite.com', ListeLien);
...
finally
ListeLien.Free;
end;
procedure TFormWeb.GetLinksInHTMLDoc(const szURL : String; var Links: TStringList);
function PosEx(szSubStr, szStr: String; nIndex: Integer): Integer;
var
nMax: Integer;
begin
nMax := Length(szStr) - Length(szSubStr) + 1;
if nIndex < 1 then
nIndex := 1;
while nIndex <= nMax do
begin
if Copy(szStr, nIndex, Length(szSubStr)) = szSubStr then
Exit
else
Inc(nIndex);
end;
end;
var
HTTP: TIdHTTP;
szContent, szLink: String;
nPosition, nBeginPos: Integer;
begin
HTTP := TIdHTTP.Create(nil);
try
Links.Clear;
szContent := HTTP.Get(szURL);
nPosition := 0;
while True do
begin
nPosition := PosEx('href', szContent, nPosition + 1);
if nPosition = 0 then
Break;
Inc(nPosition, 4);
while(nPosition < Length(szContent)) and (szContent[nPosition] in [#0, #10, #13, ' ']) do
Inc(nPosition);
if szContent[nPosition] <> '=' then
Continue;
Inc(nPosition);
while(nPosition < Length(szContent)) and (szContent[nPosition] in [#0, #10, #13, ' ']) do
Inc(nPosition);
if szContent[nPosition] <> '"' then
Continue;
Inc(nPosition);
nBeginPos := nPosition;
while(nPosition <= Length(szContent)) and (szContent[nPosition] <> '"') do
Inc(nPosition);
szLink := Copy(szContent, nBeginPos, nPosition - nBeginPos);
if(szLink <> '') and (Links.IndexOf(szLink) = -1) then
Links.Add(szLink);
end;
finally
HTTP.Free;
end;
end; |
Partager