B
BifTek
Guest
I'm making a website scraper to archive a website. This involves circumventing a captcha. If I do this exact same process via Postman or a regular browser, the entire.zip file is downloaded. But this code in GameMaker only downloads the first few bytes. I have the FileManager and ExecuteShell extension packages, but they are only to bypass the sandbox.
Note: The website's name has been replaced here as "example" but if necessary to help me I will gladly post the true URLs so anyone can run it as a test.
You should probably just scroll down to the last few lines of code.
Key Press Enter Event:
Async HTTP Event:
If I open the last file created, these are the contents:
The result key only contains "PK\u0003\u0004\n\u0003", but these are only the first few bytes of the .zip file. Where is the rest!?
Note: The website's name has been replaced here as "example" but if necessary to help me I will gladly post the true URLs so anyone can run it as a test.
You should probably just scroll down to the last few lines of code.
Key Press Enter Event:
Code:
request = http_get_file("https://www.example.net/4327/", working_directory);
Code:
if ds_map_find_value(async_load,"id") == request
{
cookie = ds_map_find_value(json_decode(json_encode(ds_map_write(async_load))), "Set-Cookie");
cookie = string_delete(cookie, string_pos(";", cookie), 8);
get_response = http_get_file("https://www.example.net/download/4327/", "\download.html");
}
if ds_map_find_value(async_load,"id") == get_response
{
if ds_map_find_value(async_load, "http_status") == "200"
{
file_copy(string_replace(temp_directory,"Temp","Project") + "\\download.html", working_directory + "\download.html");
document = file_text_open_read(working_directory + "\\download.html");
repeat(477) //jump to the captcha line and parse the password it asks you to enter to prove you're human
{
file_text_readln(document);
}
line = file_text_read_string(document);
if string_count("Enter this password", line)
{
pos = string_pos("Enter this password", line);
line = string_delete(line, 1, pos + 18);
line = string_delete(line, string_pos(")", line), string_length(line));
}
if string_count("Enter the following ", line)
{
pos = string_pos("Enter the following ", line);
line = string_delete(line, 1, pos + 19);
line = string_delete(line, string_pos(")", line), string_length(line));
}
if string_count("Copy and paste the following: ", line)
{
pos = string_pos("Copy and paste the following: ", line);
line = string_delete(line, 1, pos + 29);
line = string_delete(line, string_pos(")", line), string_length(line));
}
if string_count("Please enter the password: ", line)
{
pos = string_pos("Please enter the password: ", line);
line = string_delete(line, 1, pos + 26);
line = string_delete(line, string_pos(")", line), string_length(line));
}
password = line;
map = ds_map_create();
ds_map_add(map, "referer", "http://www.example.net/download/4327/");
ds_map_add(map, "content-type", "application/x-www-form-urlencoded");
ds_map_add(map, "cookie", cookie);
data = "passwrd=" + password + "&id=https%3A%2F%2Fwww.example.net%2Fdownload%2F4327%2F&I_am_Human=I+am+Human";
download = http_request("http://www.example.net/download/4327/", "POST", map, data);
}
}
if variable_instance_exists(id, "download")
{
if ds_map_find_value(async_load,"id") == download
{
response = json_encode(ds_map_write(async_load));
if !variable_instance_exists(id, "counter")
{
counter = 0;
}
counter++;
response_file = file_text_open_write("response" + string(counter) + ".txt");
file_text_write_string(response_file, response);
file_text_close(response_file);
ExecuteShell(string_replace(temp_directory,"Temp","Project"), false);
}
}
Code:
{ "response_headers": 3.000000, "http_status": 200.000000, "url": "http:\/\/www.example.net\/download\/4327\/", "id": 16.000000, "status": 0.000000, "result": "PK\u0003\u0004\n\u0003" }