[PATCH v2 2/2] wget: add support for retries in http requests

Martin Lewis martin.lewis.x84 at gmail.com
Tue Jan 15 11:05:11 UTC 2019


Replace die handlers with error returning so download_one_url can retry
from the beginning.
When retries is 1 (default) the behaviour should be the same as before.

v2: updated diff to mind the -o option, prettified help

Signed-off-by: Martin Lewis <martin.lewis.x84 at gmail.com>
---
 networking/wget.c | 125
++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 98 insertions(+), 27 deletions(-)

diff --git a/networking/wget.c b/networking/wget.c
index d11b201..b41f8ed 100644
--- a/networking/wget.c
+++ b/networking/wget.c
@@ -125,13 +125,14 @@
 //usage:       "[-c|--continue] [--spider] [-q|--quiet]
[-O|--output-document FILE]\n"
 //usage:       "    [-o|--output-file FILE] [--header 'header: value']
[-Y|--proxy on/off]\n"
 /* Since we ignore these opts, we don't show them in --help */
-/* //usage:    "    [--no-check-certificate] [--no-cache] [--passive-ftp]
[-t TRIES]" */
+/* //usage:    "    [--no-check-certificate] [--no-cache] [--passive-ftp]"
*/
 /* //usage:    "    [-nv] [-nc] [-nH] [-np]" */
-//usage:       "    [-P DIR] [-S|--server-response] [-U|--user-agent
AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
+//usage:       "    [-t|--tries TRIES] [-P DIR] [-S|--server-response]\n"
+//usage:       "    [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T
SEC]") " URL..."
 //usage:    )
 //usage:    IF_NOT_FEATURE_WGET_LONG_OPTIONS(
-//usage:       "[-cq] [-O FILE] [-o FILE] [-Y on/off] [-P DIR] [-S] [-U
AGENT]"
-//usage:            IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
+//usage:       "[-cq] [-O FILE] [-o FILE] [-Y on/off] [-P DIR] [-S] [-U
AGENT]\n"
+//usage:       "        [-t TRIES]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") "
URL..."
 //usage:    )
 //usage:#define wget_full_usage "\n\n"
 //usage:       "Retrieve files via HTTP or FTP\n"
@@ -150,6 +151,7 @@
 //usage:     "\n    -o FILE        Log messages to FILE"
 //usage:     "\n    -U STR        Use STR for User-Agent header"
 //usage:     "\n    -Y on/off    Use proxy"
+//usage:     "\n    -t TRIES    Set number of retries to TRIES (0
unlimits)"

 #include "libbb.h"

@@ -235,6 +237,7 @@ struct globals {
     char *fname_log;        /* where to direct log (-o) */
     const char *proxy_flag; /* Use proxies if env vars are set */
     const char *user_agent; /* "User-Agent" header field */
+    unsigned tries; /* For -t option */
     int output_fd;
     int log_fd;
     int o_flags;
@@ -410,6 +413,7 @@ static int is_ip_address(const char *string)
 }
 #endif

+/* Return NULL if connect() fails */
 static FILE *open_socket(len_and_sockaddr *lsa)
 {
     int fd;
@@ -417,13 +421,19 @@ static FILE *open_socket(len_and_sockaddr *lsa)
 #if ENABLE_FEATURE_WGET_TIMEOUT
     struct timeval timeout = {G.timeout_seconds, 0};
 #endif
+
     fd = xsocket(lsa->u.sa.sa_family, SOCK_STREAM, 0);
 #if ENABLE_FEATURE_WGET_TIMEOUT
     if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof
(timeout)) < 0) {
         bb_perror_msg_and_die("setsockopt failed\n");
     }
 #endif
-    xconnect(fd, &lsa->u.sa, lsa->len);
+    if (connect(fd, &lsa->u.sa, lsa->len) < 0) {
+        /* Failure */
+        bb_perror_msg("connect failed");
+        close(fd);
+        return NULL;
+    }

     /* glibc 2.4 seems to try seeking on it - ??! */
     /* hopefully it understands what ESPIPE means... */
@@ -592,14 +602,16 @@ static int fread_buffered(char *buffer, size_t len,
FILE *fp)
     return fread(buffer, 1, len, fp);
 }

-/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
-static char fgets_trim_sanitize(FILE *fp, const char *fmt)
+/* Returns '\n' if it was seen, -1 if timeout occured, else '\0'. Trims at
first '\r' or '\n' */
+static signed char fgets_trim_sanitize(FILE *fp, const char *fmt)
 {
     char c;
     char *buf_ptr;

-    if (fgets_read_to_newline(G.wget_buf, sizeof(G.wget_buf), fp) == NULL)
-        bb_perror_msg_and_die("error getting response");
+    if (fgets_read_to_newline(G.wget_buf, sizeof(G.wget_buf), fp) == NULL)
{
+        bb_perror_msg("error getting response");
+        return -1;
+    }

     buf_ptr = strchrnul(G.wget_buf, '\n');
     c = *buf_ptr;
@@ -637,7 +649,9 @@ static int ftpcmd(const char *s1, const char *s2, FILE
*fp)
     /* Read until "Nxx something" is received */
     G.wget_buf[3] = 0;
     do {
-        fgets_trim_sanitize(fp, "%s\n");
+        if (fgets_trim_sanitize(fp, "%s\n") == -1) {
+            xfunc_die();
+        }
     } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');

     G.wget_buf[3] = '\0';
@@ -742,6 +756,11 @@ static char *get_sanitized_hdr(FILE *fp)
     /* retrieve header line */
     c = fgets_trim_sanitize(fp, "  %s\n");

+    if (c == -1) {
+        /* Timed out */
+        return NULL;
+    }
+
     /* end of the headers? */
     if (G.wget_buf[0] == '\0')
         return NULL;
@@ -924,7 +943,11 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct
host_info *target, len_and_
     char *pass;
     int port;

+    /* TODO: Add retry support for ftp */
     sfp = open_socket(lsa);
+    if (!sfp) {
+        xfunc_die();
+    }
 #if ENABLE_FEATURE_WGET_HTTPS
     if (target->protocol == P_FTPS)
         spawn_ssl_client(target->host, fileno(sfp),
TLSLOOP_EXIT_ON_LOCAL_EOF);
@@ -979,7 +1002,11 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct
host_info *target, len_and_

     set_nport(&lsa->u.sa, htons(port));

+    /* TODO: Add retry support for ftp */
     *dfpp = open_socket(lsa);
+    if (!*dfpp) {
+        xfunc_die();
+    }

 #if ENABLE_FEATURE_WGET_HTTPS
     if (target->protocol == P_FTPS) {
@@ -1008,7 +1035,8 @@ static FILE* prepare_ftp_session(FILE **dfpp, struct
host_info *target, len_and_
     return sfp;
 }

-static void NOINLINE retrieve_file_data(FILE *dfp)
+/* Return -1 if times out so we can retry */
+static int NOINLINE retrieve_file_data(FILE *dfp)
 {
 #if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
 # if ENABLE_FEATURE_WGET_TIMEOUT
@@ -1065,6 +1093,8 @@ static void NOINLINE retrieve_file_data(FILE *dfp)
                     rdsz = (unsigned)G.content_len;
                 }
             }
+            /* We probably have some data in fgets_buffer, so we need to
+               flush it first */
             n = fread_buffered(G.wget_buf, rdsz, dfp);

             if (n > 0) {
@@ -1105,7 +1135,7 @@ static void NOINLINE retrieve_file_data(FILE *dfp)
 # if ENABLE_FEATURE_WGET_TIMEOUT
                 if (second_cnt != 0 && --second_cnt == 0) {
                     progress_meter(PROGRESS_END);
-                    bb_error_msg_and_die("download timed out");
+                    return -1;
                 }
 # endif
                 /* We used to loop back to poll here,
@@ -1129,7 +1159,9 @@ static void NOINLINE retrieve_file_data(FILE *dfp)
             break;

         /* Each chunk ends with "\r\n" - eat it */
-        fgets_trim_sanitize(dfp, NULL);
+        if (fgets_trim_sanitize(dfp, NULL) == -1 ) {
+            return -1;
+        }
  get_clen:
         /* chunk size format is "HEXNUM[;name[=val]]\r\n" */
         fgets_trim_sanitize(dfp, NULL);
@@ -1172,6 +1204,8 @@ static void NOINLINE retrieve_file_data(FILE *dfp)
         fprintf(stderr, "written to stdout\n");
     else
         fprintf(stderr, "'%s' saved\n", G.fname_out);
+
+    return 0;
 }

 static void download_one_url(const char *url)
@@ -1181,6 +1215,7 @@ static void download_one_url(const char *url)
     len_and_sockaddr *lsa;
     FILE *sfp;                      /* socket to web/ftp server         */
     FILE *dfp;                      /* socket to ftp server (data)      */
+    unsigned cur_tries = 0;         /* number of tries so far           */
     char *fname_out_alloc;
     char *redirected_path = NULL;
     struct host_info server;
@@ -1246,9 +1281,21 @@ static void download_one_url(const char *url)
          * We are not sure it exists on remote side */
     }

+ retry:
+    cur_tries++;
+    if (G.tries != 0 && cur_tries > G.tries) {
+        if (G.tries != 1) /* Show message about the tries only if was set
to more than one */
+            bb_error_msg_and_die("Gave up after %u tries", G.tries);
+        else
+            xfunc_die();
+    }
     redir_limit = 5;
  resolve_lsa:
-    lsa = xhost2sockaddr(server.host, server.port);
+    /* If DNS resolution fails, retry, don't die */
+    lsa = host2sockaddr(server.host, server.port);
+    if (!lsa)
+        goto retry;
+
     if (!(option_mask32 & WGET_OPT_QUIET)) {
         char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
         fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
@@ -1278,6 +1325,8 @@ static void download_one_url(const char *url)
 # if ENABLE_FEATURE_WGET_HTTPS
             if (fd < 0) { /* no openssl? try internal */
                 sfp = open_socket(lsa);
+                if (!sfp)
+                    goto retry;
                 spawn_ssl_client(server.host, fileno(sfp), /*flags*/ 0);
                 goto socket_opened;
             }
@@ -1290,15 +1339,22 @@ static void download_one_url(const char *url)
             goto socket_opened;
         }
         sfp = open_socket(lsa);
+        if (!sfp) {
+            goto retry;
+        }
  socket_opened:
 #elif ENABLE_FEATURE_WGET_HTTPS
         /* Only internal TLS support is configured */
         sfp = open_socket(lsa);
-        if (server.protocol == P_HTTPS)
+        if (!sfp)
+            goto retry;
+        if (server.protocol == P_HTTPS) {
             spawn_ssl_client(server.host, fileno(sfp), /*flags*/ 0);
 #else
         /* ssl (https) support is not configured */
         sfp = open_socket(lsa);
+        if (!sfp)
+            goto retry;
 #endif
         /* Send HTTP request */
         if (use_proxy) {
@@ -1374,7 +1430,11 @@ static void download_one_url(const char *url)
          * Retrieve HTTP response line and check for "200" status code.
          */
  read_response:
-        fgets_trim_sanitize(sfp, "  %s\n");
+        if (fgets_trim_sanitize(sfp, "  %s\n") == -1) {
+            /* Timed out */
+            bb_error_msg("timed out");
+            goto retry;
+        }

         str = G.wget_buf;
         str = skip_non_whitespace(str);
@@ -1449,7 +1509,8 @@ However, in real world it was observed that some web
servers
             /* Partial Content even though we did not ask for it??? */
             /* fall through */
         default:
-            bb_error_msg_and_die("server returned error: %s", G.wget_buf);
+            bb_error_msg("server returned error: %s", G.wget_buf);
+            goto retry;
         }

         /*
@@ -1475,19 +1536,24 @@ However, in real world it was observed that some
web servers
             if (key == KEY_content_length) {
                 G.content_len = BB_STRTOOFF(str, NULL, 10);
                 if (G.content_len < 0 || errno) {
-                    bb_error_msg_and_die("content-length %s is garbage",
str);
+                    bb_error_msg("content-length %s is garbage", str);
+                    goto retry;
                 }
                 G.got_clen = 1;
                 continue;
             }
             if (key == KEY_transfer_encoding) {
-                if (strcmp(str_tolower(str), "chunked") != 0)
-                    bb_error_msg_and_die("transfer encoding '%s' is not
supported", str);
+                if (strcmp(str_tolower(str), "chunked") != 0) {
+                    bb_error_msg("transfer encoding '%s' is not
supported", str);
+                    goto retry;
+                }
                 G.chunked = 1;
             }
             if (key == KEY_location && status >= 300) {
-                if (--redir_limit == 0)
-                    bb_error_msg_and_die("too many redirections");
+                if (--redir_limit == 0) {
+                    bb_error_msg("too many redirections");
+                    goto retry;
+                }
                 fclose(sfp);
                 if (str[0] == '/') {
                     free(redirected_path);
@@ -1526,13 +1592,18 @@ However, in real world it was observed that some
web servers
     free(lsa);

     if (!(option_mask32 & WGET_OPT_SPIDER)) {
+        int retrieve_retval;
         if (G.output_fd < 0)
             G.output_fd = xopen(G.fname_out, G.o_flags);
-        retrieve_file_data(dfp);
+        retrieve_retval = retrieve_file_data(dfp);
         if (!(option_mask32 & WGET_OPT_OUTNAME)) {
             xclose(G.output_fd);
             G.output_fd = -1;
         }
+        if (retrieve_retval < 0) { /* We timed out, if we retry so the fd
is closed */
+            bb_error_msg("download timed out");
+            goto retry;
+        }
     } else {
         fprintf(stderr, "remote file exists\n");
     }
@@ -1570,8 +1641,8 @@ int wget_main(int argc UNUSED_PARAM, char **argv)
         "user-agent\0"       Required_argument "U"
 IF_FEATURE_WGET_TIMEOUT(
         "timeout\0"          Required_argument "T")
+        "tries\0"            Required_argument "t"
         /* Ignored: */
-IF_DESKTOP(    "tries\0"            Required_argument "t")
         "header\0"           Required_argument "\xff"
         "post-data\0"        Required_argument "\xfe"
         "spider\0"           No_argument       "\xfd"
@@ -1601,14 +1672,14 @@ IF_DESKTOP(    "no-parent\0"
No_argument       "\xf0")
 #if ENABLE_FEATURE_WGET_TIMEOUT
     G.timeout_seconds = 900;
 #endif
+    G.tries = 1;
     G.proxy_flag = "on";   /* use proxies if env vars are set */
     G.user_agent = "Wget"; /* "User-Agent" header field */

 #if ENABLE_FEATURE_WGET_LONG_OPTIONS
 #endif
     GETOPT32(argv, "^"
-        "cqSO:o:P:Y:U:T:+"
-        /*ignored:*/ "t:"
+        "cqSO:o:P:Y:U:T:+t:+"
         /*ignored:*/ "n::"
         /* wget has exactly four -n<letter> opts, all of which we can
ignore:
          * -nv --no-verbose: be moderately quiet (-q is full quiet)
@@ -1625,7 +1696,7 @@ IF_DESKTOP(    "no-parent\0"        No_argument
"\xf0")
         , &G.fname_out, &G.fname_log, &G.dir_prefix,
         &G.proxy_flag, &G.user_agent,
         IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds)
IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
-        NULL, /* -t RETRIES */
+        &G.tries, /* -t RETRIES */
         NULL  /* -n[ARG] */
         IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
         IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
-- 
1.9.1
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.busybox.net/pipermail/busybox/attachments/20190115/e68d9456/attachment-0001.html>


More information about the busybox mailing list