Author neologix
Recipients catalin.iacob, neologix, pitrou, theamk, thomaslee
Date 2012-10-09.22:54:39
SpamBayes Score -1.0
Marked as misclassified Yes
Message-id <CAH_1eM2y+8k8TwoCBQB_FdcHr1WFPhfOA8zSUzkJ6K=Z91BvCg@mail.gmail.com>
In-reply-to <CAH_1eM0KckXinWx+K4wTi73MukDeXjoMQQiv4ZB-OBOaeurDVQ@mail.gmail.com>
Content
wget trace:

Here a 404 is treated correctly:
"""
socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 5
connect(5, {sa_family=AF_INET, sin_port=htons(4242),
sin_addr=inet_addr("127.0.0.1")}, 16) = 0
select(6, NULL, [5], NULL, {900, 0})    = 1 (out [5], left {900, 0})
write(5, "GET /MISSING-8276 HTTP/1.1\r\nUser"..., 126) = 126
select(6, [5], NULL, NULL, {900, 0})    = 1 (in [5], left {899,
999000})
recvfrom(5, "HTTP/1.0 404 Not Found\r\n", 511, MSG_PEEK, NULL, NULL) = 24
read(5, "HTTP/1.0 404 Not Found\r\n", 24) = 24
select(6, [5], NULL, NULL, {900, 0})    = 1 (in [5], left {900, 0})
recvfrom(5, "Server: BaseHTTP/0.3 Python/2.4."..., 487, MSG_PEEK,
NULL, NULL) = 35
read(5, "Server: BaseHTTP/0.3 Python/2.4."..., 35) = 35
select(6, [5], NULL, NULL, {900, 0})    = 1 (in [5], left {900, 0})
recvfrom(5, "Date: Tue, 09 Oct 2012 22:09:59 "..., 452, MSG_PEEK,
NULL, NULL) = 37
read(5, "Date: Tue, 09 Oct 2012 22:09:59 "..., 37) = 37
select(6, [5], NULL, NULL, {900, 0})    = 1 (in [5], left {900, 0})
recvfrom(5, "Content-Type: text/html\r\n", 415, MSG_PEEK, NULL, NULL) = 25
read(5, "Content-Type: text/html\r\n", 25) = 25
select(6, [5], NULL, NULL, {900, 0})    = 1 (in [5], left {900, 0})
recvfrom(5, "Connection: close\r\n", 390, MSG_PEEK, NULL, NULL) = 19
read(5, "Connection: close\r\n", 19)    = 19
select(6, [5], NULL, NULL, {900, 0})    = 1 (in [5], left {900, 0})
recvfrom(5, "\r\n", 371, MSG_PEEK, NULL, NULL) = 2
read(5, "\r\n", 2)                      = 2
select(6, [5], NULL, NULL, {0, 1})      = 1 (in [5], left {0, 1})
close(5)                                = 0
"""

And here a 404 is treated incorrectly:
"""
socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 5
connect(5, {sa_family=AF_INET, sin_port=htons(4242),
sin_addr=inet_addr("127.0.0.1")}, 16) = 0
select(6, NULL, [5], NULL, {900, 0})    = 1 (out [5], left {900, 0})
write(5, "GET /MISSING-8153 HTTP/1.1\r\nUser"..., 126) = 126
select(6, [5], NULL, NULL, {900, 0})    = 1 (in [5], left {899, 998000})
recvfrom(5, "HTTP/1.0 404 Not Found\r\n", 511, MSG_PEEK, NULL, NULL) = 24
read(5, "HTTP/1.0 404 Not Found\r\n", 24) = 24
select(6, [5], NULL, NULL, {900, 0})    = 1 (in [5], left {900, 0})
recvfrom(5, "Server: BaseHTTP/0.3 Python/2.4."..., 487, MSG_PEEK,
NULL, NULL) = 35
read(5, "Server: BaseHTTP/0.3 Python/2.4."..., 35) = 35
select(6, [5], NULL, NULL, {900, 0})    = 1 (in [5], left {900, 0})
recvfrom(5, "Date: Tue, 09 Oct 2012 22:10:01 "..., 452, MSG_PEEK,
NULL, NULL) = 37
read(5, "Date: Tue, 09 Oct 2012 22:10:01 "..., 37) = 37
select(6, [5], NULL, NULL, {900, 0})    = 1 (in [5], left {900, 0})
recvfrom(5, "Content-Type: text/html\r\n", 415, MSG_PEEK, NULL, NULL) = 25
read(5, "Content-Type: text/html\r\n", 25) = 25
select(6, [5], NULL, NULL, {900, 0})    = 1 (in [5], left {900, 0})
recvfrom(5, "Connection: close\r\n", 390, MSG_PEEK, NULL, NULL) = 19
read(5, "Connection: close\r\n", 19)    = 19
select(6, [5], NULL, NULL, {900, 0})    = 1 (in [5], left {900, 0})
recvfrom(5, "\r\n", 371, MSG_PEEK, NULL, NULL) = 2
read(5, "\r\n", 2)                      = 2
HERE=> select(6, [5], NULL, NULL, {0, 1})      = 0 (Timeout)
select(6, NULL, [5], NULL, {900, 0})    = 1 (out [5], left {900, 0})
write(5, "GET /FOUND-8154 HTTP/1.1\r\nUser-A"..., 124) = 124
select(6, [5], NULL, NULL, {900, 0})    = 1 (in [5], left {900, 0})
recvfrom(5, "<head>\n<title>Error response</ti"..., 511, MSG_PEEK,
NULL, NULL) = 190
stat("FOUND-8154", 0x7fffcc530d60)      = -1 ENOENT (No such file or directory)
open("FOUND-8154", O_WRONLY|O_CREAT|O_TRUNC, 0666) = 7
clock_gettime(CLOCK_MONOTONIC, {130079, 24215187}) = 0
select(6, [5], NULL, NULL, {900, 0})    = 1 (in [5], left {900, 0})
read(5, "<head>\n<title>Error response</ti"..., 8192) = 190
clock_gettime(CLOCK_MONOTONIC, {130079, 24258187}) = 0
fstat(7, {st_mode=S_IFREG|0644, st_size=0, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1,
0) = 0x2b0004bbc000
write(7, "<head>\n<title>Error response</ti"..., 190) = 190
write(4, "\n     0K                        "..., 777) = 777
select(6, [5], NULL, NULL, {900, 0})    = 1 (in [5], left {900, 0})
read(5, 0x1becc8f0, 8192)               = -1 ECONNRESET (Connection
reset by peer)
close(5)                                = 0
"""

You can se that after 404 and "Connection: close" is returned, wget
calls select() with a 1us timeout, to check if the socket has been
closed by the other end.
That's /src/connect.c:test_socket_open() in wget source code.
This is used by persistent_available_p() in src/http.c, with this comment:
"""
Finally, check whether the connection is still open.  This is
     important because most servers implement liberal (short) timeout
     on persistent connections.  Wget can of course always reconnect
     if the connection doesn't work out, but it's nicer to know in
     advance.
"""

So it deliberately reuses the same connection (so-called persistent
connection) if the http server doesn't close it right away. This
explains probably why you see this mainly when many threads are in
use.
I'm not an http expert, so I'm not sure whether it's legal...
History
Date User Action Args
2012-10-09 22:54:40neologixsetrecipients: + neologix, pitrou, thomaslee, catalin.iacob, theamk
2012-10-09 22:54:40neologixlinkissue15991 messages
2012-10-09 22:54:39neologixcreate