Witam
Mam w pełni działający kod pobierania danych z filmweb do programu Ant Movie Catalog. Tutaj kod:
program filmwebpl;
uses
StringUtils7552;
var
MovieName : String;
SearchAddresses : TStringList;
procedure GetMovieTitles(Address : String);
var
Page : TStringList;
Line : String;
i : Integer;
LineNr, LineNr2 : Integer;
MovieTitle, MovieAddress : String;
Year : String;
begin
Page := TStringList.Create;
Page.Text := UTF8Decode(GetPage(Address));
//W wynikach wyszukiwanaia wszystkie wpisy sa w jednej linii.
//Rozdzielamy zeby kazdy wynik byl w osoblnej, bo tak sie latwiej na tym operuje
Page.Text := StringReplace(Page.Text, '<li class="gr5">', #13#10 + '<li class="gr5">');
Page.Text := StringReplace(Page.Text, '<a class=searchResultTitle', #13#10 + '<a class=searchResultTitle');
LineNr := FindLine('<a class=searchResultTitle', Page, 0);
while LineNr > -1 do
begin
//Pobieranie tytułu i URL
Line := Page.GetString(LineNr);
MovieAddress := 'http://www.filmweb.pl' + TextBetween(Line, 'href="', '">');
//Czasem w adresie strony sa dodatkowe spacje
MovieAddress := StringReplace(MovieAddress, ' ', '');
MovieTitle := TextBetween(Line, '">', '</a>');
HTMLRemoveTags(MovieTitle);
MovieTitle := Trim(MovieTitle);
CorrectTextError(MovieTitle);
UniToPol(MovieTitle);
//Pobierania roku produkcji
Year := TextBetween(Line, '<span class=searchResultDetails>', '</span>');
if (Year = '') then
begin
Line := Page.GetString(LineNr + 1);
Year := TextBetween(Line, '<span class=searchResultDetails>', '</span>');
end;
HTMLRemoveTags(Year);
Year := Trim(Year);
CorrectTextError(Year);
Year := TextBefore(Year, '|', '');
Year := StringReplace(Year, ' ', '');
MovieTitle := MovieTitle + ' (' + Year + ')';
//Dodanie do drzewka
if (FindLine(MovieAddress, SearchAddresses, 0) < 0) then
begin
PickTreeAdd(MovieTitle, MovieAddress);
SearchAddresses.Add(MovieAddress);
end;
LineNr := FindLine('<a class=searchResultTitle', Page, LineNr + 1);
end;
Page.Free;
end;
procedure AnalyzeSearchPage(Address : String);
var
Page : TStringList;
Line : String;
LineNr : Integer;
Count : Integer;
MoviesCount, i, NumOfPages : Integer;
MovieVariety : String;
begin
Page := TStringList.Create;
Page.Text := UTF8Decode(GetPage(Address));
LineNr := FindLine('setTimeout("', Page, 0);
if LineNr > -1 then
Page.Text := UTF8Decode(GetPage(Address));
Line := TextBetween(Page.Text, 'Znaleziono <b>', '</b>');
MoviesCount := StrToInt(Line, 0);
PickTreeClear;
if MoviesCount = 1 then
MovieVariety := ' film:'
else if (MoviesCount >= 2) and (MoviesCount <= 3) then
MovieVariety := ' filmy:'
else if (MoviesCount >= 5) and (MoviesCount <= 21) then
MovieVariety := ' filmów:'
else begin
if ((MoviesCount - ((MoviesCount div 10) * 10)) >= 2) and ((MoviesCount - ((MoviesCount div 10) * 10)) <= 4) then
MovieVariety := ' filmy:'
else
MovieVariety := ' filmów:';
end;
PickTreeAdd('Znaleziono ' + IntToStr(MoviesCount) + MovieVariety, '');
case GetOption('LimitWynikow') of
0: Count := 260;
1: Count := 1;
2: Count := 5;
3: Count := 10;
4: Count := 50;
end;
NumOfPages := (MoviesCount - 1) div 10 + 1;
if NumOfPages < 1 then
NumOfPages := 1;
if NumOfPages > Count then
NumOfPages := Count;
SearchAddresses := TStringList.Create;
for i := 1 to NumOfPages do
GetMovieTitles(Address + '&page=' + IntToStr(i));
if SearchAddresses.Count > 0 then
begin
if (MoviesCount = 1) and (SearchAddresses.Count = 1) then
begin
SetField(fieldURL, SearchAddresses.GetString(0));
AnalyzeMoviePage(SearchAddresses.GetString(0));
end else if PickTreeExec(Address) then
begin
SetField(fieldURL, Address);
AnalyzeMoviePage(Address);
end;
end else
begin
PickTreeClear;
MovieName := SmartFormatMovieName(MovieName);
win2utf(MovieName);
AnalyzeGooglesResultsPage('http://www.google.pl/search?hl=pl&num=20&q=' + MovieName);
if PickTreeExec(Address) then
begin
SetField(fieldURL, Address);
AnalyzeMoviePage(Address);
end;
end;
Page.Free;
end;
// Getting search results; spec = 1 - results, spec = 2 - www addresses
function GooglesResultsPage(GoogleAddress: string; spec: Integer):TStringList;
var
PageText: string;
Value: string;
Address: string;
begin
Result := TStringList.Create;
PageText := GetPage(GoogleAddress);
repeat
Value := TextBetween(PageText, '<h3 class=r>', '</a>');
PageText := RemainingText;
Address := TextBetween(Value, '<a href="', 'class=l');
HTMLRemoveTags(Value);
HTMLDecode(Value);
if (Pos(') - ', Value) = 0) and (Value <> '') then
begin
if (spec = 1) then
Result.Add(Value);
if (spec = 2) then
Result.Add(Address);
end;
until Value = '';
end;
function SmartFormatMovieName(s : String):String;
var
i : Integer;
url : String;
begin
url := GetUrl(s, '', '.pl');
if url <> '' then
s := StringReplace(s, url, '');
for i := 0 to 115 do
s := TextBefore(s + IntToStr(2015 - i), IntToStr(2015 - i), '');
s := FormatMovieName(s);
s := StringReplace(s, '.', ' ');
s := StringReplace(s, '*', '');
s := StringReplace(s, 'torrent', '');
s := TextBefore(s + 'dvdrip', 'dvdrip', '');
s := TextBefore(s + 'dvdscr', 'dvdscr', '');
s := TextBefore(s + 'cd1', 'cd1', '');
s := TextBefore(s + 'cd2', 'cd2', '');
CorrectTextError(s);
Result := s;
end;
procedure AnalyzeGooglesResultsPage(GoogleAddress: string);
var
i, j, k : Integer;
Value, tmp, MovieNamePriv : string;
Results, WordList, ShinglesList, vcountwords : TStringList;
begin
Results := GooglesResultsPage(GoogleAddress, 1);
PickTreeAdd('Google searsh results:', '');
// Usuwamy z wynikow wyszukiwania niepotrzebne slowa
for i := 0 to Results.Count-1 do
if (Length(SmartFormatMovieName(Results.GetString(i))) > 1) then
Value := Value + ' ' + SmartFormatMovieName(Results.GetString(i));
Value := StringReplace(' ' + Value, ' ', '');
Trim(Value);
CorrectTextError(Value);
parseToken(Value, ' ');
WordList := token;
ShinglesList := TStringList.Create;
// Wyszukujemy w wynikach wyszukiwania najlepiej pasujaca dluga fraze
// Maksymalnie 5 wyrazow, bo tyle maja filmy
for i := 1 to (*WordList.Count*) 5 do
for j := 0 to WordList.Count-i do
begin
tmp := WordList.GetString(j);
for k := 1 to i-1 do
tmp := tmp + ' ' + WordList.GetString(j + k);
if (FindLine(tmp, ShinglesList, 0) < 0) then
begin
vcountwords := TStringList.Create;
vcountwords.Text := StringReplace(Value, tmp, crlf);
// Jakosc dopasowania frazy to liczba wystapien frazy w wynikach wyszukiwania
// razy liczba slow we frazie.
ShinglesList.Add(IntToStr(9000000 - vcountwords.Count*Words(tmp)) + ' ' + tmp);
vcountwords.Free;
end;
end;
SortList(ShinglesList);
SearchAddresses := TStringList.Create;
// Wybieramy 10 dlugich i najlepiej pasujacych fraz i szukamy ich w filmweb
for i := 0 to 9 do
begin
MovieNamePriv := TextAfter(ShinglesList.GetString(i), ' ');
win2utf(MovieNamePriv);
GetMovieTitles('http://www.filmweb.pl/search/film?q=' + MovieNamePriv + '&c=film&page=1');
end;
SearchAddresses.Free;
Results.Free;
token.Free;
ShinglesList.Free;
end;
function RemoveSpace(Value : String):String;
var
i, j, k : Integer;
begin
Result := ' ' + Value + ' ';
while Pos(' ', Result) > 0 do
Result := StringReplace(Result, ' ', ' ');
Result := Copy(Result, 2, Length(Result) - 2);
end;
procedure CorrectTextError(var Value : String);
begin
Value := StringReplace(Value, ' )', ')');
Value := StringReplace(Value, '( ', '(');
Value := StringReplace(Value, ' ]', ']');
Value := StringReplace(Value, '[ ', '[');
// Value := StringReplace(Value, '''', '"');
Value := StringReplace(Value, '""', '"');
Value := StringReplace(Value, ',,', '"');
Value := StringReplace(Value, ' .', '.');
Value := StringReplace(Value, ' ,', ',');
Value := StringReplace(Value, ' :', ':');
Value := StringReplace(Value, ' ;', ';');
Value := StringReplace(Value, '.:', ':');
Value := StringReplace(Value, ':.', ':');
Value := StringReplace(Value, ' ?', '?');
Value := StringReplace(Value, ' !', '!');
Value := StringReplace(Value, '.', '. ');
Value := StringReplace(Value, ',', ', ');
Value := StringReplace(Value, '. . .', '...');
Value := StringReplace(Value, '. ..', '...');
Value := StringReplace(Value, '.. .', '...');
Value := StringReplace(Value, '..', '...');
Value := StringReplace(Value, '....', '...');
Value := StringReplace(Value, '....', '...');
Value := StringReplace(Value, '....', '...');
Value := StringReplace(Value, '...', '... ');
Value := StringReplace(Value, ' ', ' ');
Value := StringReplace(Value, ' ', '');
Value := StringReplace(Value, '[ dystrybutora]', '');
Value := StringReplace(Value, '[opis dystrybutora do wydania VHS]', '');
Value := StringReplace(Value, '–', '-');
Value := StringReplace(Value, '—', '-');
Value := StringReplace(Value, '‘', '''');
Value := StringReplace(Value, '’', '''');
Value := StringReplace(Value, '“', '"');
Value := StringReplace(Value, '”', '"');
Value := StringReplace(Value, '„', '"');
Value := StringReplace(Value, '…', '...');
Value := StringReplace(Value, ' ', ' ');
Value := StringReplace(Value, ' ', ' ');
Value := StringReplace(Value, '<', '<');
Value := StringReplace(Value, '>', '>');
Value := StringReplace(Value, '"', '"');
Value := StringReplace(Value, '&', '&');
Value := StringReplace(Value, '€', '€');
Value := StringReplace(Value, 'Á', 'A');
Value := StringReplace(Value, 'ú', 'u');
Value := StringReplace(Value, #13#10, '');
end;
procedure GetFilmCuriosities(Address : String);
var
Line, Value : String;
LineNr, i : Integer;
Page : TStringList;
begin
Page := TStringList.Create;
Page.Text := UTF8Decode(GetPage(Address));
LineNr := FindLine('<dl class="curiositesList', Page, 0);
if LineNr > -1 then
begin
LineNr := LineNr + 1;
Line := Page.GetString(LineNr);
while (Pos('<script ', Line) <= 0) do
begin
Line := Page.GetString(LineNr);
if (Pos('<span>', Line) > 0) then
begin
i := i + 1;
Line := TextBetween(Line, '<span>', '</span>');
Line := StringReplace(Line, '<br/>', '');
CorrectTextError(Line);
UniToPol(Line);
HTMLRemoveTags(Line);
Value := Value + IntToStr(i) + '. ' + Line + #13#10 + #13#10;
end;
LineNr := LineNr + 1;
end;
if (Value <> '') then
begin
Value := Copy(Value, 1, Length(Value) - 4);
Value := Trim(Value);
SetField(fieldComments, Value);
end;
end;
Page.Free;
end;
procedure GetFilmReview(Address : String);
var
Line, Value : String;
LineNr, i : Integer;
Page : TStringList;
begin
Page := TStringList.Create;
Page.Text := UTF8Decode(GetPage(Address));
Line := TextBetween(Page.Text, '<div class="reviewContent fontSizeCont description">', '</div>');
CorrectTextError(Line);
UniToPol(Line);
Line := StringReplace(Line, '<br/>', #13#10);
Line := StringReplace(Line, '<br />', #13#10);
HTMLRemoveTags(Line);
Line := StringReplace(Line, ' ', '');
Line := StringReplace(Line, #9, '');
SetField(fieldComments, Trim(Line));
Page.Free;
end;
procedure AnalyzeFilmReview(Address : String; Count : Integer);
var
Line, Value, PageTmp : String;
LineNr, i : Integer;
Page : TStringList;
begin
Page := TStringList.Create;
Page.Text := UTF8Decode(GetPage(Address));
PickTreeClear;
PageTmp := TextBetween(Page.Text, '<li class=userRev>', '</body>');
if PageTmp = '' then
begin
Page.Free;
Exit;
end;
PickTreeAdd('Recenzje redakcji:', '');
while (Pos('Podstawowe informacje', Line) <= 0) do
begin
if Pos('<div class=revContent>', PageTmp) <= 0 then
Break;
Line := TextBetween(PageTmp, '<h3>', '</p>') + '</p>';
if Pos('<a href="', Line) > 0 then
begin
Value := 'http://www.filmweb.pl' + TextBetween(Line, '<a href="', '">');
Value := RemoveSpace(Value);
Line := TextBetween(Line, '<p>', '</p>');
CorrectTextError(Line);
Line := RemoveSpace(Line);
UniToPol(Line);
HTMLRemoveTags(Line);
PickTreeAdd(Line, UrlEncode(Value));
end
else
Break;
PageTmp := Copy(PageTmp, Pos('</p', PageTmp) + Length('</p'), Length(PageTmp));
end;
if PickTreeExec(Address) then
begin
GetFilmReview(Address);
end;
Page.Free;
end;
procedure AnalyzeFilmDescriptionsPage(Address : String);
var
Line, Value : String;
Page : TStringList;
begin
Page := TStringList.Create;
Page.Text := UTF8Decode(GetPage(Address));
// Opisy (2009-02-17 modify by jlatk)
PickListClear;
Line := TextBetween(Page.Text, '<ul class=descriptionsList', '</body');
while (Pos('<li class="desc', Line) > 0) do
begin
Value := '';
Value := TextBetween(Line, '<p>', '</p>');
Value := StringReplace(Value, '#13#10', ' ');
CorrectTextError(Value);
Value := StringReplace(Value, '<br/>', #13#10);
UniToPol(Value);
HTMLRemoveTags(Value);
Value := Trim(Value);
if (GetOption('OpisFilmu') = 0) then
begin
PickListAdd(Value);
end else
begin
SetField(fieldDescription, Trim(Value));
Page.Free;
Exit;
end;
Line := Copy(Line, Pos('</p', Line) + Length('</p'), Length(Line));
end;
if not PickListExec('Wybierz opis dla "' + GetField(fieldOriginalTitle) + '"', Value) then
Value := '';
SetField(fieldDescription, Trim(Value));
Page.Free;
end;
procedure AnalyzeFilmContribs(Address : String);
var
Value : String;
Page : TStringList;
begin
Page := TStringList.Create;
Page.Text := UTF8Decode(GetPage(Address));
Value := TextBetween(Page.Text, 'wrap="virtual"' + #13#10 + '>', '</textarea>');
Value := StringReplace(Value, '#13#10', ' ');
Value := StringReplace(Value, '<br/>', #13#10);
HTMLRemoveTags(Value);
UniToPol(Value);
CorrectTextError(Value);
Value := Trim(Value);
if (GetOption('OpisFilmu') = 0) then
begin
PickListAdd(Value);
end else
begin
SetField(fieldDescription, Value);
Page.Free;
Exit;
end;
Page.Free;
end;
procedure AnalyzeFilmContribsPage(Address : String);
var
Line, Value : String;
LineNr : Integer;
StartPos, EndPos : Integer;
Page : TStringList;
begin
Page := TStringList.Create;
Page.Text := UTF8Decode(GetPage(Address));
PickListClear;
while (Pos('<td style="padding-left:10">opis</td>', Page.Text) > 0) do
begin
LineNr := FindLine('<td style="padding-left:10">opis</td>', Page, 0);
if LineNr > -1 then
begin
Line := Page.GetString(LineNr + 3);
Line := 'http://www.filmweb.pl' + TextBetween(Line, 'href="', '"');
if (Pos('login', Line) > 0) then
begin
Page.Free;
Exit;
end;
AnalyzeFilmContribs(Line);
Page.Text := Copy(Page.Text, Pos('<td style="padding-left:10">opis</td>', Page.Text) + 10, Length(Page.Text));
end;
end;
PickListExec('Wybierz opis dla "' + GetField(fieldOriginalTitle) + '"', Value);
SetField(fieldDescription, Value);
Page.Free;
end;
// \brief Return list (Actor, Role) in specific format
// i.e. "Actor (jako Role)" in each line
//
// \param Page HTML page with Film roles, directors, producers etc.
// \param Material Name of material which we are interested in
// i.e. "producent" means that we are looking for all producers
// \param Format Specific format which each line should look like
// i.e. "$1 (jako $2)": $1 = Actor, $2 = Role
// \param Format2 Use Format2 when Role doesn't exist
//
// \return String One String with all Actors and Roles
//
function GetMaterialsFormat(Page : TStringList; Material : String;
Format : String; Format2 : String):String;
var
Line, Value, Actor, Role : String;
begin
Line := TextBetween(Page.Text, '<div class="castListWrapper">', '<script');
Line := TextBetween(Line, Material, '</dd>');
while (Pos('<li', Line) > 0) do
begin
Line := Copy(Line, Pos('<li id="role', Line), Length(Line));
Actor := TextBetween(Line, '<span>', '</span>');
Role := TextBetween(Line, '<div>', '</div>');
CorrectTextError(Role);
Role := Trim(Role);
HTMLRemoveTags(Role);
// Remove "(jako glos)" or "(producent wykonawczy)" from Roles
if (GetOption('CzyDodDaneWPoluAktorzy') <> 1) and (GetOption('CzyDodDaneWPoluAktorzy') <> 3) then
while (Pos('(', Role) > 0) and (Pos(')', Role) > Pos('(', Role)) do
Role := Copy(Role, 1, Pos('(', Role) - 1) + Copy(Role, Pos(')', Role) + 1, Length(Role));
if (Length(Role) > 0) and (Length(Actor) > 0) then
begin
Value := Format;
Value := StringReplace(Value, '$1', Actor);
Value := StringReplace(Value, '$2', Role);
Result := Result + Value;
end
else
if (Length(Actor) > 0) then
begin
Value := Format2;
Value := StringReplace(Value, '$1', Actor);
Result := Result + Value;
end;
Line := Copy(Line, Pos('</li', Line) + Length('</li'), Length(Line));
Role := '';
Actor := '';
end;
HTMLRemoveTags(Result);
UniToPol(Result);
end;
function GetMaterials(Page : TStringList; Materials : String):String;
begin
Result := GetMaterialsFormat(Page, Materials, '$1 $2, ', '$1, ');
Result := Copy(Result, 1, Length(Result) - Length(', '));
end;
procedure AnalyzeFilmRolesPage(Address : String);
var
i : Integer;
Line, Value : String;
LineNr : Integer;
StartPos : Integer;
Page : TStringList;
Prefix, Sufix : String;
Count : Integer;
ActorsPage : TStringList;
begin
Page := TStringList.Create;
Page.Text := UTF8Decode(GetPage(Address));
// Reżyseria (2009-02-17 modify by jlatk)
Value := GetMaterials(Page, 'reżyser');
if (Length(Value) > 0) then
SetField(fieldDirector, Trim(Value));
// Producent (2009-02-17 modify by jlatk)
Value := GetMaterials(Page, 'producent');
if (Length(Value) > 0) then
SetField(fieldProducer, Trim(Value));
Value := '';
// Obsada (2009-02-17 modify by jlatk)
begin
case GetOption('UkladAktorow') of
0, 1: Prefix := ' (jako ';
2, 3: Prefix := ' - ';
end;
case GetOption('UkladAktorow') of
0, 2, 4: Sufix := ', ';
1, 3, 5: Sufix := #13#10;
end;
case GetOption('IloscAktorow') of
0: Count := 999;
1: Count := 10;
2: Count := 20;
3: Count := 30;
4: Count := 40;
5: Count := 50;
end;
ActorsPage := TStringList.Create;
ActorsPage.Text := GetMaterialsFormat(Page, 'aktor', '$1 : $2' + #13#10, '$1' + #13#10);
for LineNr := 0 to ActorsPage.Count - 1 do
begin
//Pobieranie aktorów/postaci
if Pos(' : ', ActorsPage.GetString(LineNr)) > 0 then
begin
if (GetOption('UkladAktorow') < 2) then
Value := Value + Trim(StringReplace(ActorsPage.GetString(LineNr), ' : ', Prefix)) + ')' + Sufix
else
if (GetOption('UkladAktorow') >= 2) and (GetOption('UkladAktorow') < 4) then
Value := Value + Trim(StringReplace(ActorsPage.GetString(LineNr), ' : ', Prefix)) + Sufix
else
Value := Value + Trim(Copy(ActorsPage.GetString(LineNr), 1, Pos(' : ', ActorsPage.GetString(LineNr)) - 1)) + Sufix;
Count := Count - 1;
end else if Length(ActorsPage.GetString(LineNr)) > 0 then
begin
Value := Value + Trim(ActorsPage.GetString(LineNr)) + Sufix;
Count := Count - 1;
end;
//Przerywa po określonej ilości dodanych aktorów
if Count = 0 then
Break;
end;
Value := Copy(Value, 1, Length(Value) - Length(Sufix));
Value := StringReplace(Value, ' ', ' ');
UniToPol(Value);
SetField(fieldActors, Value); //Zmienić kodowanie
end;
Value := '';
if GetOption('CzyDodDaneWPoluAktorzy') >= 2 then
begin
// Scenariusz (2009-02-17 by jlatk)
Value := GetMaterials(Page, 'scenarzysta');
if (Length(Value) > 0) then
begin
Value := GetField(fieldActors) + #13#10 + 'Scenariusz: ' + Trim(Value) + '; ';
SetField(fieldActors, Value);
end;
// Materiały do scenariusza (2009-02-17 by jlatk)
Value := GetMaterials(Page, 'materiały do scenariusza');
if (Length(Value) > 0) then
begin
Value := GetField(fieldActors) + #13#10 + 'Materiały do scenariusza: ' + Trim(Value) + '; ';
SetField(fieldActors, Value);
end;
// Muzyka (2009-02-17 by jlatk)
Value := GetMaterials(Page, 'muzyka');
if (Length(Value) > 0) then
begin
Value := GetField(fieldActors) + #13#10 + 'Myzyka: ' + Trim(Value) + '; ';
SetField(fieldActors, Value);
end;
// Zdjęcia (2009-02-17 by jlatk)
Value := GetMaterials(Page, 'zdjęcia');
if (Length(Value) > 0) then
begin
Value := GetField(fieldActors) + #13#10 + 'Zdjęcia: ' + Trim(Value) + '; ';
SetField(fieldActors, Value);
end;
// Montaż (2009-02-17 by jlatk)
Value := GetMaterials(Page, 'montaż');
if (Length(Value) > 0) then
begin
Value := GetField(fieldActors) + #13#10 + 'Montaż: ' + Trim(Value) + '; ';
SetField(fieldActors, Value);
end;
// Scenografia (2009-02-17 by jlatk)
Value := GetMaterials(Page, 'scenografia');
if (Length(Value) > 0) then
begin
Value := GetField(fieldActors) + #13#10 + 'Scenografia: ' + Trim(Value) + '; ';
SetField(fieldActors, Value);
end;
// Kostiumy (2009-02-17 by jlatk)
Value := GetMaterials(Page, 'kostiumy');
if (Length(Value) > 0) then
begin
Value := GetField(fieldActors) + #13#10 + 'Kostiumy: ' + Trim(Value) + '; ';
SetField(fieldActors, Value);
end;
// Dźwięk (2009-02-17 by jlatk)
Value := GetMaterials(Page, 'dźwięk');
if (Length(Value) > 0) then
begin
Value := GetField(fieldActors) + #13#10 + 'Dźwięk: ' + Trim(Value) + '; ';
SetField(fieldActors, Value);
end;
end;
Page.Free;
ActorsPage.Free;
end;
procedure AnalyzeMoviePage(Address : String);
var
Page : TStringList;
Line, Value : String;
Rates : String;
LineNr : Integer;
StartPos, EndPos : Integer;
Szukane, SzukaneEnd : String;
Separator : String;
begin
Page := TStringList.Create;
Page.Text := UTF8Decode(GetPage(Address));
// Tytuł polski i oryginalny
Line := TextBetween(Page.Text, '<title>', '</title>');
Line := StringReplace(Line, #9, '');
Line := StringReplace(Line, #13#10, '');
HTMLRemoveTags(Line);
UniToPol(Line);
// sprawdzamy czy jest ukośnik oddzieljący tytuł polski od oryginalnego
Szukane := '/';
SzukaneEnd := '(';
StartPos := Pos(Szukane, Line);
EndPos := Pos(SzukaneEnd, Line);
if StartPos > 0 then
begin
// polski
Value := Copy(Line, 0, StartPos-1);
if (Length(Trim(Value)) > 0) then
SetField(fieldTranslatedTitle, Trim(Value));
// oryginalny
Value := TextBetween(Line, Szukane, SzukaneEnd);
if (Length(Trim(Value)) > 0) then
SetField(fieldOriginalTitle, Trim(Value));
end else begin
// nie ma polskiego tytulu
Value := Copy(Line, 0, EndPos-1);
if (Length(Value) > 0) then
SetField(fieldOriginalTitle, Trim(Value));
// jeśli nie ma polskiego tytułu, a jest AKA, wstawiamy AKA
Value := TextBetween(Page.Text, '(AKA', ')');
Value := StringReplace(Value, #9, '');
Value := StringReplace(Value, #13#10, '');
HTMLRemoveTags(Value);
UniToPol(Value);
if (Length(Trim(Value)) > 0) then
SetField(fieldTranslatedTitle, Trim(Value));
end;
// nie ma polskiego a jest orginalny to wstawiamy orginalny
if (Length(GetField(fieldOriginalTitle)) > 0) and (Length(GetField(fieldTranslatedTitle)) < 3) then
SetField(fieldTranslatedTitle, GetField(fieldOriginalTitle));
// Rok
Value := TextBetween(Page.Text, '<span id=filmYear class=filmYear>', '</span>');
if (Length(Value) > 0) then
SetField(fieldYear, Trim(Value));
// Ratting
Rates := TextBetween(Page.Text, '<strong rel="v:rating"><span property="v:average">', '</span></strong>');
Rates := StringReplace(Rates, ' ', '');
if (Length(Rates) > 0) then
SetField(fieldRating, Trim(Rates));
// Kraj
LineNr := FindLine('produkcja:', Page, 0);
if LineNr > -1 then
begin
case GetOption('SeparatorKraju') of
0: Separator := ' / ';
1: Separator := ', ';
2: Separator := '/';
end;
Line := Page.GetString(LineNr);
Value := '';
while Pos('?countryIds=', Line) > 0 do
begin
Szukane := '?countryIds=';
StartPos := Pos(Szukane, Line) + Length(Szukane);
Line := Copy(Line, StartPos, Length(Line) - StartPos + 1);
Szukane := '">';
StartPos := Pos(Szukane, Line) + Length(Szukane);
Line := Copy(Line, StartPos, Length(Line) - StartPos + 1);
Value := Value + Copy(Line, 1, Pos('</a>', Line) - 1) + Separator;
end;
Value := Copy(Value, 1, Length(Value) - Length(Separator));
UniToPol(Value);
SetField(fieldCountry, Value);
end;
// Gatunek
LineNr := FindLine('gatunek:', Page, 0);
if LineNr > -1 then
begin
case GetOption('SeparatorGatunku') of
0: Separator := ' / ';
1: Separator := ', ';
2: Separator := '/';
end;
Line := Page.GetString(LineNr);
Value := '';
while Pos('genreIds=', Line) > 0 do
begin
Szukane := 'genreIds=';
StartPos := Pos(Szukane, Line) + Length(Szukane);
Line := Copy(Line, StartPos, Length(Line) - StartPos + 1);
Szukane := '">';
StartPos := Pos(Szukane, Line) + Length(Szukane);
Line := Copy(Line, StartPos, Length(Line) - StartPos + 1);
Value := Value + Copy(Line, 1, Pos('</', Line) - 1) + Separator;
end;
Value := Copy(Value, 1, Length(Value) - Length(Separator));
Value := StringReplace(Value, ', ', Separator);
UniToPol(Value);
SetField(fieldCategory, Trim(Value));
end;
// Czas trwania
if ((GetField(fieldLength) = '') or (GetField(fieldVideoFormat) = '')) then
begin
Value := TextBetween(Page.Text, '<div class=time>', '<span>');
if (Length(Value) > 0) then
SetField(fieldLength, Trim(Value));
end;
// Plakat
Line := TextBetween(Page.Text, '<div class=posterLightbox>', '</div>');
case GetOption('Plakat') of
1: begin
Szukane := TextBetween(Line, 'href="', '"');
Szukane := TextBefore(Szukane, '?', '');
GetPicture(Szukane);
end;
2: begin
Szukane := TextBetween(Line, 'src="', '"');
Szukane := TextBefore(Szukane, '?', '');
GetPicture(Szukane);
end;
end;
// Obsada, reżyseria, scenariusz, materiały do scenariusza, produkcja,
// muzyka, zdjęcia, montaż, scenografia, kostiumy, dźwięk
AnalyzeFilmRolesPage(Address + '/cast');
// Opis
AnalyzeFilmDescriptionsPage(Address + '/descs');
// Komentarze
case GetOption('Komentarze') of
1: begin
Line := TextBetween(Page.Text, '<span class=hdrAddInfo>(', ')</span>');
if Line <> '' then
begin
Value := TextBetween(Page.Text, '<div class="reviews', '</body');
Value := TextBetween(Value, '/reviews/', '"');
GetFilmReview('http://www.filmweb.pl/reviews/' + Value);
end;
end;
2: begin
Line := TextBetween(Page.Text, '<span class=hdrAddInfo>(', ')</span>');
if Line <> '' then
AnalyzeFilmReview(Address + '/reviews', StrToInt(Line, 0));
end;
3: begin
Line := TextBetween(Page.Text, 'Ciekawostki<span>', '</span>');
LineNr := StrToInt(TextBetween(Line, '(', ')'), 0);
if LineNr > 0 then
begin
GetFilmCuriosities(Address + '/trivia');
end;
end;
end;
Page.Free;
end;
procedure UniToPol(var Value : String);
begin
Value := StringReplace(Value, 'ą', 'ą');
Value := StringReplace(Value, 'ć', 'ć');
Value := StringReplace(Value, 'ę', 'ę');
Value := StringReplace(Value, 'ł', 'ł');
Value := StringReplace(Value, 'ń', 'ń');
Value := StringReplace(Value, 'ó', 'ó');
Value := StringReplace(Value, 'ś', 'ś');
Value := StringReplace(Value, 'ź', 'ź');
Value := StringReplace(Value, 'ż', 'ż');
Value := StringReplace(Value, 'Ą', 'Ą');
Value := StringReplace(Value, 'Ć', 'Ć');
Value := StringReplace(Value, 'Ę', 'Ę');
Value := StringReplace(Value, 'Ł', 'Ł');
Value := StringReplace(Value, 'Ń', 'Ń');
Value := StringReplace(Value, 'Ó', 'Ó');
Value := StringReplace(Value, 'Ś', 'Ś');
Value := StringReplace(Value, 'Ź', 'Ź');
Value := StringReplace(Value, 'Ż', 'Ż');
Value := StringReplace(Value, '¥', 'Ą');
Value := StringReplace(Value, '¹', 'ą');
Value := StringReplace(Value, 'Æ', 'Ć');
Value := StringReplace(Value, 'æ', 'ć');
Value := StringReplace(Value, 'Ê', 'Ę');
Value := StringReplace(Value, 'ê', 'ę');
Value := StringReplace(Value, '£', 'Ł');
Value := StringReplace(Value, '³', 'ł');
Value := StringReplace(Value, 'Ñ', 'Ń');
Value := StringReplace(Value, 'ñ', 'ń');
Value := StringReplace(Value, 'Ó', 'Ó');
Value := StringReplace(Value, 'ó', 'ó');
Value := StringReplace(Value, 'Œ', 'Ś');
Value := StringReplace(Value, 'œ', 'ś');
Value := StringReplace(Value, '', 'Ź');
Value := StringReplace(Value, 'Ÿ', 'ź');
Value := StringReplace(Value, '¯', 'Ż');
Value := StringReplace(Value, '¿', 'ż');
Value := StringReplace(Value, 'Ą', 'Ą');
Value := StringReplace(Value, 'ą', 'ą');
Value := StringReplace(Value, 'Ć', 'Ć');
Value := StringReplace(Value, 'ć', 'ć');
Value := StringReplace(Value, 'Ę', 'Ę');
Value := StringReplace(Value, 'ę', 'ę');
Value := StringReplace(Value, 'Ł', 'Ł');
Value := StringReplace(Value, 'ł', 'ł');
Value := StringReplace(Value, 'Ń', 'Ń');
Value := StringReplace(Value, 'ń', 'ń');
Value := StringReplace(Value, 'Ó', 'Ó');
Value := StringReplace(Value, 'ó', 'ó');
Value := StringReplace(Value, 'É', 'É');
Value := StringReplace(Value, 'é', 'é');
Value := StringReplace(Value, 'Ś', 'Ś');
Value := StringReplace(Value, 'ś', 'ś');
Value := StringReplace(Value, 'Ź', 'Ź');
Value := StringReplace(Value, 'ź', 'ź');
Value := StringReplace(Value, 'Ż', 'Ż');
Value := StringReplace(Value, 'ż', 'ż');
Value := StringReplace(Value, '&egrave;', 'é');
Value := StringReplace(Value, 'ç', 'ç');
Value := StringReplace(Value, 'î', 'î');
Value := StringReplace(Value, '–', '-');
Value := StringReplace(Value, 'ö', 'ö');
Value := StringReplace(Value, '…', '...');
Value := StringReplace(Value, 'ñ', 'n');
Value := StringReplace(Value, 'í', 'í');
Value := StringReplace(Value, 'á', 'á');
Value := StringReplace(Value, 'à', 'á');
Value := StringReplace(Value, 'ö', 'é');
Value := StringReplace(Value, 'ò', 'o');
Value := StringReplace(Value, 'ô', 'ô');
Value := StringReplace(Value, '„', '"');
Value := StringReplace(Value, '”', '"');
Value := StringReplace(Value, '“', '"');
Value := StringReplace(Value, 'è', 'e');
end;
// Convert CP1250 (Windows EE) to UTF8
procedure win2utf(var Value : String);
begin
Value := StringReplace(Value, 'ę', '%C4%99');
Value := StringReplace(Value, 'Ę', '%C4%98');
Value := StringReplace(Value, 'ó', '%C3%B3');
Value := StringReplace(Value, 'Ó', '%C3%93');
Value := StringReplace(Value, 'ą', '%C4%85');
Value := StringReplace(Value, 'Ą', '%C4%84');
Value := StringReplace(Value, 'ś', '%C5%9B');
Value := StringReplace(Value, 'Ś', '%C5%9A');
Value := StringReplace(Value, 'ł', '%C5%82');
Value := StringReplace(Value, 'Ł', '%C5%81');
Value := StringReplace(Value, 'ż', '%C5%BC');
Value := StringReplace(Value, 'Ż', '%C5%BB');
Value := StringReplace(Value, 'ź', '%C5%BA');
Value := StringReplace(Value, 'Ź', '%C5%B9');
Value := StringReplace(Value, 'ć', '%C4%87');
Value := StringReplace(Value, 'Ć', '%C4%86');
Value := StringReplace(Value, 'ń', '%C5%84');
Value := StringReplace(Value, 'Ń', '%C5%83');
Value := StringReplace(Value, ' ', '+');
end;
Chciałbym sobie zrobić pobieranie danych w swoim programie. Niestety, utknąłem na razie na jednej funkcji, której nie wiem skąd wziąć. Jest to: TextBetween. Nigdzie nie ma tej funkcji w kodzie, przypuszczam, że jest w programie. Niestety - mimo podanych źródeł programu na stronie - nie ma tego kodu (pewnie dlatego, że są podane źródła w starej wersji programu).
Jak ją zdobyć?
Może ktoś ma ten kod "czysty" - czyli, żebym mógł sobie od razu dodać w swoim programie?
Z góry dziękuję za pomoc.