Pobieranie z filmweb - problem z gotowcem

0

Witam
Mam w pełni działający kod pobierania danych z filmweb do programu Ant Movie Catalog. Tutaj kod:

program filmwebpl;

uses
	StringUtils7552;

var
	MovieName : String;
	SearchAddresses : TStringList;

procedure GetMovieTitles(Address : String);
var
  Page : TStringList;
  Line : String;
  i : Integer;
  LineNr, LineNr2 : Integer;
  MovieTitle, MovieAddress : String;
  Year : String;
begin
  Page := TStringList.Create;
  Page.Text := UTF8Decode(GetPage(Address));
  //W wynikach wyszukiwanaia wszystkie wpisy sa w jednej linii.
  //Rozdzielamy zeby kazdy wynik byl w osoblnej, bo tak sie latwiej na tym operuje
  Page.Text := StringReplace(Page.Text, '<li class="gr5">', #13#10 + '<li class="gr5">');
  Page.Text := StringReplace(Page.Text, '<a class=searchResultTitle', #13#10 + '<a class=searchResultTitle');

  LineNr := FindLine('<a class=searchResultTitle', Page, 0);
  while LineNr > -1 do
  begin
    //Pobieranie tytułu i URL
    Line := Page.GetString(LineNr);
    MovieAddress := 'http://www.filmweb.pl' + TextBetween(Line, 'href="', '">');
    //Czasem w adresie strony sa dodatkowe spacje
    MovieAddress := StringReplace(MovieAddress, ' ', '');
    MovieTitle := TextBetween(Line, '">', '</a>');
    HTMLRemoveTags(MovieTitle);
    MovieTitle := Trim(MovieTitle);
    CorrectTextError(MovieTitle);
    UniToPol(MovieTitle);
    //Pobierania roku produkcji
    Year := TextBetween(Line, '<span class=searchResultDetails>', '</span>');
    if (Year = '') then
    begin
      Line := Page.GetString(LineNr + 1);
      Year := TextBetween(Line, '<span class=searchResultDetails>', '</span>');
    end;
    HTMLRemoveTags(Year);
    Year := Trim(Year);
    CorrectTextError(Year);
    Year := TextBefore(Year, '|', '');
    Year := StringReplace(Year, ' ', '');
    MovieTitle := MovieTitle + ' (' + Year + ')';
    //Dodanie do drzewka
    if (FindLine(MovieAddress, SearchAddresses, 0) < 0) then
    begin
      PickTreeAdd(MovieTitle, MovieAddress);
      SearchAddresses.Add(MovieAddress);
    end;
    LineNr := FindLine('<a class=searchResultTitle', Page, LineNr + 1);
  end;
  Page.Free;
end;


procedure AnalyzeSearchPage(Address : String);
var
  Page : TStringList;
  Line : String;
  LineNr : Integer;
  Count : Integer;
  MoviesCount, i, NumOfPages : Integer;
  MovieVariety : String;
begin
  Page := TStringList.Create;
  Page.Text := UTF8Decode(GetPage(Address));

  LineNr := FindLine('setTimeout("', Page, 0);
  if LineNr > -1 then
    Page.Text := UTF8Decode(GetPage(Address));

  Line := TextBetween(Page.Text, 'Znaleziono <b>', '</b>');
  MoviesCount := StrToInt(Line, 0);

  PickTreeClear;
  if MoviesCount = 1 then
    MovieVariety := ' film:'
  else if (MoviesCount >= 2) and (MoviesCount <= 3) then
    MovieVariety := ' filmy:'
  else if (MoviesCount >= 5) and (MoviesCount <= 21) then
    MovieVariety := ' filmów:'
  else begin
    if ((MoviesCount - ((MoviesCount div 10) * 10)) >= 2) and ((MoviesCount - ((MoviesCount div 10) * 10)) <= 4) then
       MovieVariety := ' filmy:'
    else
       MovieVariety := ' filmów:';
  end;

  PickTreeAdd('Znaleziono ' + IntToStr(MoviesCount) + MovieVariety, '');

	case GetOption('LimitWynikow') of
		0:	Count := 260;
		1:	Count := 1;
		2:	Count := 5;
		3:	Count := 10;
		4:	Count := 50;
  end;

  NumOfPages := (MoviesCount - 1) div 10 + 1;

  if NumOfPages < 1 then
    NumOfPages := 1;
  if NumOfPages > Count then
    NumOfPages := Count;

  SearchAddresses := TStringList.Create;
  for i := 1 to NumOfPages do
    GetMovieTitles(Address + '&page=' + IntToStr(i));

  if SearchAddresses.Count > 0 then
  begin
    if (MoviesCount = 1) and (SearchAddresses.Count = 1) then
    begin
      SetField(fieldURL, SearchAddresses.GetString(0));
      AnalyzeMoviePage(SearchAddresses.GetString(0));
    end else if PickTreeExec(Address) then
    begin
      SetField(fieldURL, Address);
      AnalyzeMoviePage(Address);
    end;
  end else
  begin
    PickTreeClear;
    MovieName := SmartFormatMovieName(MovieName);
    win2utf(MovieName);
    AnalyzeGooglesResultsPage('http://www.google.pl/search?hl=pl&num=20&q=' + MovieName);
    if PickTreeExec(Address) then
    begin
      SetField(fieldURL, Address);
      AnalyzeMoviePage(Address);
    end;
  end;
  Page.Free;
end;

// Getting search results; spec = 1 - results, spec = 2 - www addresses
function GooglesResultsPage(GoogleAddress: string; spec: Integer):TStringList;
var
  PageText: string;
  Value: string;
  Address: string;
begin
  Result := TStringList.Create;
  PageText := GetPage(GoogleAddress);
  repeat
    Value := TextBetween(PageText, '<h3 class=r>', '</a>');
    PageText := RemainingText;
    Address := TextBetween(Value, '<a href="', 'class=l');
    HTMLRemoveTags(Value);
    HTMLDecode(Value);
    if (Pos(') - ', Value) = 0) and (Value <> '') then
      begin
        if (spec = 1) then
          Result.Add(Value);
        if (spec = 2) then
          Result.Add(Address);
      end;
  until Value = '';
end;

function SmartFormatMovieName(s : String):String;
var
  i : Integer;
  url : String;
begin
  url := GetUrl(s, '', '.pl');
  if url <> '' then
    s := StringReplace(s, url, '');
	for i := 0 to 115 do
    s := TextBefore(s + IntToStr(2015 - i), IntToStr(2015 - i), '');
  s := FormatMovieName(s);
  s := StringReplace(s, '.', ' ');
  s := StringReplace(s, '*', '');
  s := StringReplace(s, 'torrent', '');
  s := TextBefore(s + 'dvdrip', 'dvdrip', '');
  s := TextBefore(s + 'dvdscr', 'dvdscr', '');
  s := TextBefore(s + 'cd1', 'cd1', '');
  s := TextBefore(s + 'cd2', 'cd2', '');

  CorrectTextError(s);
  Result := s;
end;

procedure AnalyzeGooglesResultsPage(GoogleAddress: string);
var
  i, j, k : Integer;
  Value, tmp, MovieNamePriv : string;
  Results, WordList, ShinglesList, vcountwords : TStringList;
begin
  Results := GooglesResultsPage(GoogleAddress, 1);

  PickTreeAdd('Google searsh results:', '');
  // Usuwamy z wynikow wyszukiwania niepotrzebne slowa
  for i := 0 to Results.Count-1 do
    if (Length(SmartFormatMovieName(Results.GetString(i))) > 1) then
      Value := Value + ' ' + SmartFormatMovieName(Results.GetString(i));

  Value := StringReplace(' ' + Value, '  ', '');
  Trim(Value);
  CorrectTextError(Value);

  parseToken(Value, ' ');
  WordList := token;
  ShinglesList := TStringList.Create;

  // Wyszukujemy w wynikach wyszukiwania najlepiej pasujaca dluga fraze
  // Maksymalnie 5 wyrazow, bo tyle maja filmy
  for i := 1 to (*WordList.Count*) 5 do
    for j := 0 to WordList.Count-i do
    begin
      tmp := WordList.GetString(j);
      for k := 1 to i-1 do
        tmp := tmp + ' ' + WordList.GetString(j + k);
      if (FindLine(tmp, ShinglesList, 0) < 0) then
      begin
        vcountwords := TStringList.Create;
      	vcountwords.Text := StringReplace(Value, tmp, crlf);
      	// Jakosc dopasowania frazy to liczba wystapien frazy w wynikach wyszukiwania
      	// razy liczba slow we frazie.
        ShinglesList.Add(IntToStr(9000000 - vcountwords.Count*Words(tmp)) + ' ' + tmp);
      	vcountwords.Free;
      end;
    end;
    
  SortList(ShinglesList);
  SearchAddresses := TStringList.Create;
  // Wybieramy 10 dlugich i najlepiej pasujacych fraz i szukamy ich w filmweb
  for i := 0 to 9 do
    begin
      MovieNamePriv := TextAfter(ShinglesList.GetString(i), ' ');
			win2utf(MovieNamePriv);
      GetMovieTitles('http://www.filmweb.pl/search/film?q=' + MovieNamePriv + '&c=film&page=1');
    end;
    
  SearchAddresses.Free;
  Results.Free;
  token.Free;
  ShinglesList.Free;
end;

function RemoveSpace(Value : String):String;
var
	i, j, k : Integer;
begin
  Result := ' ' + Value + ' ';
  while Pos('  ', Result) > 0 do
    Result := StringReplace(Result, '  ', ' ');

	Result := Copy(Result, 2, Length(Result) - 2);
end;

procedure CorrectTextError(var Value : String);
begin
	Value := StringReplace(Value, ' )', ')');
	Value := StringReplace(Value, '( ', '(');
	Value := StringReplace(Value, ' ]', ']');
	Value := StringReplace(Value, '[ ', '[');
//	Value := StringReplace(Value, '''', '"');
	Value := StringReplace(Value, '""', '"');
	Value := StringReplace(Value, ',,', '"');
	Value := StringReplace(Value, ' .', '.');
	Value := StringReplace(Value, ' ,', ',');
	Value := StringReplace(Value, ' :', ':');
	Value := StringReplace(Value, ' ;', ';');
	Value := StringReplace(Value, '.:', ':');
	Value := StringReplace(Value, ':.', ':');
	Value := StringReplace(Value, ' ?', '?');
	Value := StringReplace(Value, ' !', '!');
	Value := StringReplace(Value, '.', '. ');
	Value := StringReplace(Value, ',', ', ');
	Value := StringReplace(Value, '. . .', '...');
	Value := StringReplace(Value, '. ..', '...');
	Value := StringReplace(Value, '.. .', '...');
	Value := StringReplace(Value, '..', '...');
	Value := StringReplace(Value, '....', '...');
	Value := StringReplace(Value, '....', '...');
	Value := StringReplace(Value, '....', '...');
	Value := StringReplace(Value, '...', '... ');
	Value := StringReplace(Value, '  ', ' ');
  Value := StringReplace(Value, '	', '');
  Value := StringReplace(Value, '[ dystrybutora]', '');
  Value := StringReplace(Value, '[opis dystrybutora do wydania VHS]', '');
	Value := StringReplace(Value, '&#8211;', '-');
	Value := StringReplace(Value, '&#8212;', '-');
	Value := StringReplace(Value, '&#8216;', '''');
	Value := StringReplace(Value, '&#8217;', '''');
	Value := StringReplace(Value, '&#8220;', '"');
	Value := StringReplace(Value, '&#8221;', '"');
	Value := StringReplace(Value, '&#8222;', '"');
	Value := StringReplace(Value, '&#8230;', '...');
	Value := StringReplace(Value, '&#160;', ' ');
	Value := StringReplace(Value, '&nbsp;', ' ');
	Value := StringReplace(Value, '&lt;', '<');
	Value := StringReplace(Value, '&gt;', '>');
	Value := StringReplace(Value, '&quot;', '"');
	Value := StringReplace(Value, '&amp;', '&');
	Value := StringReplace(Value, '&#8364;', '€');
	Value := StringReplace(Value, '&Aacute;', 'A');
	Value := StringReplace(Value, '&uacute;', 'u');
	Value := StringReplace(Value, #13#10, '');
end;

procedure GetFilmCuriosities(Address : String);
var
	Line, Value : String;
	LineNr, i : Integer;
	Page : TStringList;
begin
	Page := TStringList.Create;
	Page.Text := UTF8Decode(GetPage(Address));
	LineNr := FindLine('<dl class="curiositesList', Page, 0);
	if LineNr > -1 then
	begin
		LineNr := LineNr + 1;
		Line := Page.GetString(LineNr);
		while (Pos('<script ', Line) <= 0) do
		begin
			Line := Page.GetString(LineNr);
			if (Pos('<span>', Line) > 0) then
			begin
				i := i + 1;
				Line := TextBetween(Line, '<span>', '</span>');
				Line := StringReplace(Line, '<br/>', '');
	  		CorrectTextError(Line);
	   		UniToPol(Line);
		  	HTMLRemoveTags(Line);
				Value := Value + IntToStr(i) + '. ' + Line + #13#10 + #13#10;
			end;
			LineNr := LineNr + 1;
		end;
		if (Value <> '') then
		begin
			Value := Copy(Value, 1, Length(Value) - 4);
			Value := Trim(Value);
			SetField(fieldComments, Value);
		end;	
	end;
	Page.Free;
end;

procedure GetFilmReview(Address : String);
var
	Line, Value : String;
	LineNr, i : Integer;
	Page : TStringList;
begin
	Page := TStringList.Create;
	Page.Text := UTF8Decode(GetPage(Address));
	Line := TextBetween(Page.Text, '<div class="reviewContent fontSizeCont description">', '</div>');
	CorrectTextError(Line);
	UniToPol(Line);
	Line := StringReplace(Line, '<br/>', #13#10);
	Line := StringReplace(Line, '<br />', #13#10);
	HTMLRemoveTags(Line);
	Line := StringReplace(Line, '  ', '');
	Line := StringReplace(Line, #9, '');
	SetField(fieldComments, Trim(Line));
	Page.Free;
end;

procedure AnalyzeFilmReview(Address : String; Count : Integer);
var
	Line, Value, PageTmp : String;
	LineNr, i : Integer;
	Page : TStringList;
begin
	Page := TStringList.Create;
	Page.Text := UTF8Decode(GetPage(Address));

	PickTreeClear;
	PageTmp := TextBetween(Page.Text, '<li class=userRev>', '</body>');
	if PageTmp = '' then
  begin
    Page.Free;
    Exit;
  end;
  
	PickTreeAdd('Recenzje redakcji:', '');
	while (Pos('Podstawowe informacje', Line) <= 0) do
	begin
    if Pos('<div class=revContent>', PageTmp) <= 0 then
      Break;
    Line := TextBetween(PageTmp, '<h3>', '</p>') + '</p>';
		if Pos('<a href="', Line) > 0 then
		begin
			Value := 'http://www.filmweb.pl' + TextBetween(Line, '<a href="', '">');
			Value := RemoveSpace(Value);
			Line := TextBetween(Line, '<p>', '</p>');
			CorrectTextError(Line);
			Line := RemoveSpace(Line);
      UniToPol(Line);
	  	HTMLRemoveTags(Line);
			PickTreeAdd(Line, UrlEncode(Value));
		end
		else
      Break;
		PageTmp := Copy(PageTmp, Pos('</p', PageTmp) + Length('</p'), Length(PageTmp));
	end;

	if PickTreeExec(Address) then
	begin
		GetFilmReview(Address);
	end;

	Page.Free;
end;

procedure AnalyzeFilmDescriptionsPage(Address : String);
var
	Line, Value : String;
	Page : TStringList;
begin
	Page := TStringList.Create;
	Page.Text := UTF8Decode(GetPage(Address));

	// Opisy (2009-02-17 modify by jlatk)
	PickListClear;

  Line := TextBetween(Page.Text, '<ul class=descriptionsList', '</body');
  while (Pos('<li class="desc', Line) > 0) do
  begin
    Value := '';
    Value := TextBetween(Line, '<p>', '</p>');
    Value := StringReplace(Value, '#13#10', ' ');
		CorrectTextError(Value);
		Value := StringReplace(Value, '<br/>', #13#10);
		UniToPol(Value);
		HTMLRemoveTags(Value);
		Value := Trim(Value);
		if (GetOption('OpisFilmu') = 0) then
		begin
			PickListAdd(Value);
		end else
		begin
			SetField(fieldDescription, Trim(Value));
			Page.Free;
			Exit;
		end;
    Line := Copy(Line, Pos('</p', Line) + Length('</p'), Length(Line));
  end;
	if not PickListExec('Wybierz opis dla "' + GetField(fieldOriginalTitle) + '"', Value) then
        Value := '';
	SetField(fieldDescription, Trim(Value));

	Page.Free;
end;



procedure AnalyzeFilmContribs(Address : String);
var
	Value : String;
	Page : TStringList;
begin
	Page := TStringList.Create;
	Page.Text := UTF8Decode(GetPage(Address));

	Value := TextBetween(Page.Text, 'wrap="virtual"' + #13#10 + '>', '</textarea>');
	Value := StringReplace(Value, '#13#10', ' ');
	Value := StringReplace(Value, '<br/>', #13#10);
	HTMLRemoveTags(Value);
	UniToPol(Value);
	CorrectTextError(Value);
	Value := Trim(Value);
	if (GetOption('OpisFilmu') = 0) then
	begin
		PickListAdd(Value);
	end else
	begin
		SetField(fieldDescription, Value);
		Page.Free;
		Exit;
	end;
	Page.Free;
end;	

procedure AnalyzeFilmContribsPage(Address : String);
var
	Line, Value : String;
	LineNr : Integer;
	StartPos, EndPos : Integer;
	Page : TStringList;
begin
	Page := TStringList.Create;
	Page.Text := UTF8Decode(GetPage(Address));

	PickListClear;
	while (Pos('<td style="padding-left:10">opis</td>', Page.Text) > 0) do
	begin
		LineNr := FindLine('<td style="padding-left:10">opis</td>', Page, 0);
		if LineNr > -1 then
		begin
			Line := Page.GetString(LineNr + 3);
			Line := 'http://www.filmweb.pl' + TextBetween(Line, 'href="', '"');
			if (Pos('login', Line) > 0) then
			begin
				Page.Free;
				Exit;
			end;	
			AnalyzeFilmContribs(Line);
			Page.Text := Copy(Page.Text, Pos('<td style="padding-left:10">opis</td>', Page.Text) + 10, Length(Page.Text));
		end;
	end;	
  PickListExec('Wybierz opis dla "' + GetField(fieldOriginalTitle) + '"', Value);
	SetField(fieldDescription, Value);

	Page.Free;
end;	


// \brief Return list (Actor, Role) in specific format
//        i.e. "Actor (jako Role)" in each line
//
// \param Page      HTML page with Film roles, directors, producers etc.
// \param Material  Name of material which we are interested in
//                  i.e. "producent" means that we are looking for all producers
// \param Format    Specific format which each line should look like
//                  i.e. "$1 (jako $2)": $1 = Actor, $2 = Role
// \param Format2   Use Format2 when Role doesn't exist
//
// \return String   One String with all Actors and Roles
//
function GetMaterialsFormat(Page : TStringList; Material : String;
  Format : String; Format2 : String):String;
var
	Line, Value, Actor, Role : String;
begin
  Line := TextBetween(Page.Text, '<div class="castListWrapper">', '<script');
  Line := TextBetween(Line, Material, '</dd>');
  while (Pos('<li', Line) > 0) do
  begin
    Line := Copy(Line, Pos('<li id="role', Line), Length(Line));
    Actor := TextBetween(Line, '<span>', '</span>');
    Role := TextBetween(Line, '<div>', '</div>');
    CorrectTextError(Role);
    Role := Trim(Role);
    HTMLRemoveTags(Role);
    // Remove "(jako glos)" or "(producent wykonawczy)" from Roles
    if (GetOption('CzyDodDaneWPoluAktorzy') <> 1) and (GetOption('CzyDodDaneWPoluAktorzy') <> 3) then
      while (Pos('(', Role) > 0) and (Pos(')', Role) > Pos('(', Role)) do
        Role := Copy(Role, 1, Pos('(', Role) - 1) + Copy(Role, Pos(')', Role) + 1, Length(Role));
    if (Length(Role) > 0) and (Length(Actor) > 0) then
    begin
      Value := Format;
      Value := StringReplace(Value, '$1', Actor);
      Value := StringReplace(Value, '$2', Role);
      Result := Result + Value;
    end
    else
      if (Length(Actor) > 0) then
      begin
        Value := Format2;
        Value := StringReplace(Value, '$1', Actor);
        Result := Result + Value;
      end;
    Line := Copy(Line, Pos('</li', Line) + Length('</li'), Length(Line));
    Role := '';
    Actor := '';
  end;
  HTMLRemoveTags(Result);
  UniToPol(Result);
end;

function GetMaterials(Page : TStringList; Materials : String):String;
begin
  Result := GetMaterialsFormat(Page, Materials, '$1 $2, ', '$1, ');
  Result := Copy(Result, 1, Length(Result) - Length(', '));
end;


procedure AnalyzeFilmRolesPage(Address : String);
var
  i : Integer;
	Line, Value : String;
	LineNr : Integer;
	StartPos : Integer;
	Page : TStringList;
	Prefix, Sufix : String;
	Count : Integer;
	ActorsPage : TStringList;
begin
	Page := TStringList.Create;
	Page.Text := UTF8Decode(GetPage(Address));

	// Reżyseria (2009-02-17 modify by jlatk)
	Value := GetMaterials(Page, 'reżyser');
	if (Length(Value) > 0) then
  	SetField(fieldDirector, Trim(Value));

	// Producent (2009-02-17 modify by jlatk)
	Value := GetMaterials(Page, 'producent');
	if (Length(Value) > 0) then
    SetField(fieldProducer, Trim(Value));

  Value := '';

	// Obsada (2009-02-17 modify by jlatk)
	begin
		case GetOption('UkladAktorow') of
			0, 1:	Prefix := ' (jako ';
			2, 3:	Prefix := ' - ';
		end;
		case GetOption('UkladAktorow') of
			0, 2, 4:	Sufix := ', ';
			1, 3, 5:	Sufix := #13#10;
		end;
		case GetOption('IloscAktorow') of
			0:	Count := 999;
			1:	Count := 10;
			2:	Count := 20;
			3:	Count := 30;
			4:	Count := 40;
			5:	Count := 50;
		end;

    ActorsPage := TStringList.Create;
    ActorsPage.Text := GetMaterialsFormat(Page, 'aktor', '$1 : $2' + #13#10, '$1' + #13#10);

 		for LineNr := 0 to ActorsPage.Count - 1 do
 		begin
			//Pobieranie aktorów/postaci
			if Pos(' : ', ActorsPage.GetString(LineNr)) > 0 then
			begin
				if (GetOption('UkladAktorow') < 2) then
					Value := Value + Trim(StringReplace(ActorsPage.GetString(LineNr), ' : ', Prefix)) + ')' + Sufix
				else
				if (GetOption('UkladAktorow') >= 2) and (GetOption('UkladAktorow') < 4) then
					Value := Value + Trim(StringReplace(ActorsPage.GetString(LineNr), ' : ', Prefix)) + Sufix
				else	
					Value := Value + Trim(Copy(ActorsPage.GetString(LineNr), 1, Pos(' : ', ActorsPage.GetString(LineNr)) - 1)) + Sufix;
				Count := Count - 1;
			end	else if Length(ActorsPage.GetString(LineNr)) > 0 then
			begin
				Value := Value + Trim(ActorsPage.GetString(LineNr)) + Sufix;
				Count := Count - 1;
			end;
			//Przerywa po określonej ilości dodanych aktorów
			if Count = 0 then
				Break;
		end;
		Value := Copy(Value, 1, Length(Value) - Length(Sufix));
		Value := StringReplace(Value, '  ', ' ');
		UniToPol(Value);
		SetField(fieldActors, Value); //Zmienić kodowanie
	end;


	Value := '';

	if GetOption('CzyDodDaneWPoluAktorzy') >= 2 then
  begin
 	// Scenariusz (2009-02-17 by jlatk)
  	Value := GetMaterials(Page, 'scenarzysta');
    if (Length(Value) > 0) then
    begin
      Value := GetField(fieldActors) + #13#10 + 'Scenariusz: ' + Trim(Value) + '; ';
    	SetField(fieldActors, Value);
    end;

	// Materiały do scenariusza (2009-02-17 by jlatk)
  	Value := GetMaterials(Page, 'materiały do scenariusza');
    if (Length(Value) > 0) then
    begin
      Value := GetField(fieldActors) + #13#10 + 'Materiały do scenariusza: ' + Trim(Value) + '; ';
    	SetField(fieldActors, Value);
    end;

	// Muzyka (2009-02-17 by jlatk)
  	Value := GetMaterials(Page, 'muzyka');
    if (Length(Value) > 0) then
    begin
      Value := GetField(fieldActors) + #13#10 + 'Myzyka: ' + Trim(Value) + '; ';
    	SetField(fieldActors, Value);
    end;

	// Zdjęcia (2009-02-17 by jlatk)
  	Value := GetMaterials(Page, 'zdjęcia');
    if (Length(Value) > 0) then
    begin
      Value := GetField(fieldActors) + #13#10 + 'Zdjęcia: ' + Trim(Value) + '; ';
    	SetField(fieldActors, Value);
    end;

	// Montaż (2009-02-17 by jlatk)
  	Value := GetMaterials(Page, 'montaż');
    if (Length(Value) > 0) then
    begin
      Value := GetField(fieldActors) + #13#10 + 'Montaż: ' + Trim(Value) + '; ';
    	SetField(fieldActors, Value);
    end;

	// Scenografia (2009-02-17 by jlatk)
  	Value := GetMaterials(Page, 'scenografia');
    if (Length(Value) > 0) then
    begin
      Value := GetField(fieldActors) + #13#10 + 'Scenografia: ' + Trim(Value) + '; ';
    	SetField(fieldActors, Value);
    end;

	// Kostiumy (2009-02-17 by jlatk)
  	Value := GetMaterials(Page, 'kostiumy');
    if (Length(Value) > 0) then
    begin
      Value := GetField(fieldActors) + #13#10 + 'Kostiumy: ' + Trim(Value) + '; ';
    	SetField(fieldActors, Value);
    end;

	// Dźwięk (2009-02-17 by jlatk)
  	Value := GetMaterials(Page, 'dźwięk');
    if (Length(Value) > 0) then
    begin
      Value := GetField(fieldActors) + #13#10 + 'Dźwięk: ' + Trim(Value) + '; ';
    	SetField(fieldActors, Value);
    end;
  end;

	Page.Free;
	ActorsPage.Free;
end;

procedure AnalyzeMoviePage(Address : String);
var
	Page : TStringList;
	Line, Value : String;
	Rates : String;
	LineNr : Integer;
	StartPos, EndPos : Integer;
	Szukane, SzukaneEnd : String;
	Separator : String;
begin
	Page := TStringList.Create;
	Page.Text := UTF8Decode(GetPage(Address));

	// Tytuł polski i oryginalny
	Line := TextBetween(Page.Text, '<title>', '</title>');
	Line := StringReplace(Line, #9, '');
	Line := StringReplace(Line, #13#10, '');
	HTMLRemoveTags(Line);
	UniToPol(Line);

	// sprawdzamy czy jest ukośnik oddzieljący tytuł polski od oryginalnego
  Szukane := '/';
  SzukaneEnd := '(';

	StartPos := Pos(Szukane, Line);
	EndPos := Pos(SzukaneEnd, Line);

	if  StartPos > 0 then
	begin
    // polski
  	Value := Copy(Line, 0, StartPos-1);
 		if (Length(Trim(Value)) > 0) then
      SetField(fieldTranslatedTitle, Trim(Value));
      
    // oryginalny
    Value := TextBetween(Line, Szukane, SzukaneEnd);
  	if (Length(Trim(Value)) > 0) then
      SetField(fieldOriginalTitle, Trim(Value));
    
	end else begin
    // nie ma polskiego tytulu
    Value := Copy(Line, 0, EndPos-1);

  	if (Length(Value) > 0) then
	   	SetField(fieldOriginalTitle, Trim(Value));

    // jeśli nie ma polskiego tytułu, a jest AKA, wstawiamy AKA
  	Value := TextBetween(Page.Text, '(AKA', ')');
  	Value := StringReplace(Value, #9, '');
  	Value := StringReplace(Value, #13#10, '');
  	HTMLRemoveTags(Value);
	  UniToPol(Value);
 		if (Length(Trim(Value)) > 0) then
      SetField(fieldTranslatedTitle, Trim(Value));
	end;
  // nie ma polskiego a jest orginalny to wstawiamy orginalny
  if (Length(GetField(fieldOriginalTitle)) > 0) and (Length(GetField(fieldTranslatedTitle)) < 3) then
      SetField(fieldTranslatedTitle, GetField(fieldOriginalTitle));

	// Rok
	Value := TextBetween(Page.Text, '<span id=filmYear class=filmYear>', '</span>');
	if (Length(Value) > 0) then
		SetField(fieldYear, Trim(Value));

	// Ratting
	Rates := TextBetween(Page.Text, '<strong rel="v:rating"><span property="v:average">', '</span></strong>');
	Rates := StringReplace(Rates, ' ', '');
	if (Length(Rates) > 0) then
		SetField(fieldRating, Trim(Rates));

	// Kraj
	LineNr := FindLine('produkcja:', Page, 0);
	if LineNr > -1 then
	begin
		case GetOption('SeparatorKraju') of
			0:	Separator := ' / ';
			1:	Separator := ', ';
			2:	Separator := '/';
		end;
		Line := Page.GetString(LineNr);
		Value := '';
		while Pos('?countryIds=', Line) > 0 do
		begin
			Szukane := '?countryIds=';
			StartPos := Pos(Szukane, Line) + Length(Szukane);
			Line := Copy(Line, StartPos, Length(Line) - StartPos + 1);
			Szukane := '">';
			StartPos := Pos(Szukane, Line) + Length(Szukane);
			Line := Copy(Line, StartPos, Length(Line) - StartPos + 1);
			Value := Value + Copy(Line, 1, Pos('</a>', Line) - 1) + Separator;
		end;
		Value := Copy(Value, 1, Length(Value) - Length(Separator));
		UniToPol(Value);
		SetField(fieldCountry, Value);
	end;

	// Gatunek
	LineNr := FindLine('gatunek:', Page, 0);
	if LineNr > -1 then
	begin
		case GetOption('SeparatorGatunku') of
			0:	Separator := ' / ';
			1:	Separator := ', ';
			2:	Separator := '/';
		end;
		Line := Page.GetString(LineNr);
		Value := '';
		while Pos('genreIds=', Line) > 0 do
		begin
			Szukane := 'genreIds=';
			StartPos := Pos(Szukane, Line) + Length(Szukane);
			Line := Copy(Line, StartPos, Length(Line) - StartPos + 1);
			Szukane := '">';
			StartPos := Pos(Szukane, Line) + Length(Szukane);
			Line := Copy(Line, StartPos, Length(Line) - StartPos + 1);
			Value := Value + Copy(Line, 1, Pos('</', Line) - 1) + Separator;
		end;
		Value := Copy(Value, 1, Length(Value) - Length(Separator));
    Value := StringReplace(Value, ', ', Separator);
		UniToPol(Value);
		SetField(fieldCategory, Trim(Value));
	end;
	
	// Czas trwania
	if ((GetField(fieldLength) = '') or (GetField(fieldVideoFormat) = '')) then
	begin
		Value := TextBetween(Page.Text, '<div class=time>', '<span>');
		if (Length(Value) > 0) then
			SetField(fieldLength, Trim(Value));
	end;
	
	// Plakat
  Line := TextBetween(Page.Text, '<div class=posterLightbox>', '</div>');
	case GetOption('Plakat') of
		1:	begin
				Szukane := TextBetween(Line, 'href="', '"');
				Szukane := TextBefore(Szukane, '?', '');
        GetPicture(Szukane);
				end;
		2:	begin
  			Szukane := TextBetween(Line, 'src="', '"');
				Szukane := TextBefore(Szukane, '?', '');
        GetPicture(Szukane);
				end;
	end;


	// Obsada, reżyseria, scenariusz, materiały do scenariusza, produkcja,
  // muzyka, zdjęcia, montaż, scenografia, kostiumy, dźwięk
  AnalyzeFilmRolesPage(Address + '/cast');

	// Opis
	AnalyzeFilmDescriptionsPage(Address + '/descs');
	
	// Komentarze
	case GetOption('Komentarze') of
		1:	begin
				Line := TextBetween(Page.Text, '<span class=hdrAddInfo>(', ')</span>');
				if Line <> '' then
				begin
          Value := TextBetween(Page.Text, '<div class="reviews', '</body');
          Value := TextBetween(Value, '/reviews/', '"');
          GetFilmReview('http://www.filmweb.pl/reviews/' + Value);
				end;
			end;
		2:	begin
 				Line := TextBetween(Page.Text, '<span class=hdrAddInfo>(', ')</span>');
				if Line <> '' then
  				AnalyzeFilmReview(Address + '/reviews', StrToInt(Line, 0));
			end;
		3:	begin
				Line := TextBetween(Page.Text, 'Ciekawostki<span>', '</span>');
				LineNr := StrToInt(TextBetween(Line, '(', ')'), 0);
				if LineNr > 0 then
				begin
					GetFilmCuriosities(Address + '/trivia');
				end;
			end;
	end;

	Page.Free;
end;

procedure UniToPol(var Value : String);
begin
	Value := StringReplace(Value, '&#x105;', 'ą');
	Value := StringReplace(Value, '&#x107;', 'ć');
	Value := StringReplace(Value, '&#x119;', 'ę');
	Value := StringReplace(Value, '&#x142;', 'ł');
	Value := StringReplace(Value, '&#x144;', 'ń');
	Value := StringReplace(Value, '&#xF3;', 'ó');
	Value := StringReplace(Value, '&#x15B;', 'ś');
	Value := StringReplace(Value, '&#x17A;', 'ź');
	Value := StringReplace(Value, '&#x17C;', 'ż');
	Value := StringReplace(Value, '&#x104;', 'Ą');
	Value := StringReplace(Value, '&#x106;', 'Ć');
	Value := StringReplace(Value, '&#x118;', 'Ę');
	Value := StringReplace(Value, '&#x141;', 'Ł');
	Value := StringReplace(Value, '&#x143;', 'Ń');
	Value := StringReplace(Value, '&#xD3;', 'Ó');
	Value := StringReplace(Value, '&#x15A;', 'Ś');
	Value := StringReplace(Value, '&#x179;', 'Ź');
	Value := StringReplace(Value, '&#x17B;', 'Ż');
	
	Value := StringReplace(Value, '&#165;', 'Ą');
	Value := StringReplace(Value, '&#185;', 'ą');
	Value := StringReplace(Value, '&#198;', 'Ć');
	Value := StringReplace(Value, '&#230;', 'ć');
	Value := StringReplace(Value, '&#202;', 'Ę');
	Value := StringReplace(Value, '&#234;', 'ę');
	Value := StringReplace(Value, '&#163;', 'Ł');
	Value := StringReplace(Value, '&#179;', 'ł');
	Value := StringReplace(Value, '&#209;', 'Ń');
	Value := StringReplace(Value, '&#241;', 'ń');
	Value := StringReplace(Value, '&#211;', 'Ó');
	Value := StringReplace(Value, '&#243;', 'ó');
	Value := StringReplace(Value, '&#140;', 'Ś');
	Value := StringReplace(Value, '&#156;', 'ś');
	Value := StringReplace(Value, '&#143;', 'Ź');
	Value := StringReplace(Value, '&#159;', 'ź');
	Value := StringReplace(Value, '&#175;', 'Ż');
	Value := StringReplace(Value, '&#191;', 'ż');
	
	Value := StringReplace(Value, '&#260;', 'Ą');
	Value := StringReplace(Value, '&#261;', 'ą');
	Value := StringReplace(Value, '&#262;', 'Ć');
	Value := StringReplace(Value, '&#263;', 'ć');
	Value := StringReplace(Value, '&#280;', 'Ę');
	Value := StringReplace(Value, '&#281;', 'ę');
	Value := StringReplace(Value, '&#321;', 'Ł');
	Value := StringReplace(Value, '&#322;', 'ł');
	Value := StringReplace(Value, '&#323;', 'Ń');
	Value := StringReplace(Value, '&#324;', 'ń');
	Value := StringReplace(Value, '&Oacute;', 'Ó');
	Value := StringReplace(Value, '&oacute;', 'ó');
	Value := StringReplace(Value, '&Eacute;', 'É');
	Value := StringReplace(Value, '&eacute;', 'é');
	Value := StringReplace(Value, '&#346;', 'Ś');
	Value := StringReplace(Value, '&#347;', 'ś');
	Value := StringReplace(Value, '&#377;', 'Ź');
	Value := StringReplace(Value, '&#378;', 'ź');
	Value := StringReplace(Value, '&#379;', 'Ż');
	Value := StringReplace(Value, '&#380;', 'ż');
	Value := StringReplace(Value, '&amp;egrave;', 'é');
  Value := StringReplace(Value, '&ccedil;', 'ç');
  Value := StringReplace(Value, '&icirc;', 'î');
  Value := StringReplace(Value, '&ndash;', '-');
  Value := StringReplace(Value, '&ouml;', 'ö');
  Value := StringReplace(Value, '&hellip;', '...');
  Value := StringReplace(Value, '&ntilde;', 'n');
  Value := StringReplace(Value, '&iacute;', 'í');
  Value := StringReplace(Value, '&aacute;', 'á');
  Value := StringReplace(Value, '&agrave;', 'á');
  Value := StringReplace(Value, '&ouml;', 'é');
  Value := StringReplace(Value, '&ograve;', 'o');
  Value := StringReplace(Value, '&ocirc;', 'ô');
  Value := StringReplace(Value, '&bdquo;', '"');
  Value := StringReplace(Value, '&rdquo;', '"');
  Value := StringReplace(Value, '&ldquo;', '"');
  Value := StringReplace(Value, '&egrave;', 'e');
end;


// Convert CP1250 (Windows EE) to UTF8
procedure win2utf(var Value : String);
begin
	Value := StringReplace(Value, 'ę', '%C4%99');
	Value := StringReplace(Value, 'Ę', '%C4%98');
	Value := StringReplace(Value, 'ó', '%C3%B3');
	Value := StringReplace(Value, 'Ó', '%C3%93');
	Value := StringReplace(Value, 'ą', '%C4%85');
	Value := StringReplace(Value, 'Ą', '%C4%84');
	Value := StringReplace(Value, 'ś', '%C5%9B');
	Value := StringReplace(Value, 'Ś', '%C5%9A');
	Value := StringReplace(Value, 'ł', '%C5%82');
	Value := StringReplace(Value, 'Ł', '%C5%81');
	Value := StringReplace(Value, 'ż', '%C5%BC');
	Value := StringReplace(Value, 'Ż', '%C5%BB');
	Value := StringReplace(Value, 'ź', '%C5%BA');
	Value := StringReplace(Value, 'Ź', '%C5%B9');
	Value := StringReplace(Value, 'ć', '%C4%87');
	Value := StringReplace(Value, 'Ć', '%C4%86');
	Value := StringReplace(Value, 'ń', '%C5%84');
	Value := StringReplace(Value, 'Ń', '%C5%83');
	Value := StringReplace(Value, ' ', '+');
end;

Chciałbym sobie zrobić pobieranie danych w swoim programie. Niestety, utknąłem na razie na jednej funkcji, której nie wiem skąd wziąć. Jest to: TextBetween. Nigdzie nie ma tej funkcji w kodzie, przypuszczam, że jest w programie. Niestety - mimo podanych źródeł programu na stronie - nie ma tego kodu (pewnie dlatego, że są podane źródła w starej wersji programu).
Jak ją zdobyć?
Może ktoś ma ten kod "czysty" - czyli, żebym mógł sobie od razu dodać w swoim programie?

Z góry dziękuję za pomoc.

0

Nie chce mi się czytać całego kodu, bo wstawiony jest w po...pieprzony wręcz sposób z enterem co linijkę, co uniemozliwia jego wygodne przeczytanie, a domyslam się, że TextBetween będzie wyciągała tekst z pomiędzy dwóch znaczników. Taką funkcję, która tobi to jednorazowo i z pomiędzy <cosik>test</cosik> wyciagnie samo: test. Jednak tylko pierwsze wystąpienie i ważna jest wielkość liter. Autorem tej funkcji jest Misiekd, który kiedyś pod inną nazwą podesłal mi ją w przykładowym kodzie. A do parsowania większej ilości tekstu powinno się raczej stosować moduły do wyrażeń regularnych, na przykład ten z: http://regexpstudio.com - ma on dołączoną dokumentację oraz przykłady uźycia.

function SimpleParse(StrBegin, StrEnd, Str : string) : string;
var
  B, E : integer;
begin
  Result := '';
  if StrBegin = '' then
  begin
    B := 1;
  end
  else
  begin
    B := Pos(StrBegin, Str);
  end;
  if B > 0 then
  begin
    Str := Copy(Str, B + Length(StrBegin), MaxInt);
    if StrEnd = '' then
    begin
      E := Length(Str) + 1;
    end
    else
    begin
      E := Pos(StrEnd, Str);
    end;
    if E > 0 then
    begin
      Result := Copy(Str, 1, E - 1);
    end;
  end;
end;

1 użytkowników online, w tym zalogowanych: 0, gości: 1