keeping as much persistent log-data as will possibly fit (was: [PATCH] syslogd: try to handle ENOSPC during writes)

Joshua Judson Rosen jrosen at harvestai.com
Fri May 23 17:59:57 UTC 2014


I know that my "free(globpat);" calls are broken in this patch
(I should be assigning "globpat=NULL" after freeing it, and
  I also should have "#if" clauses around them); but aside from that,
I think that the logic is correct--I'd really appreciate whatever
review anyone can here give.

Especially if anyone else has been in the same sort of situation
(really needing to keep as much persistent log-data as will possibly
fit into their available storage) and especially if you've found
other ways of dealing with it.

Also, note that this depends on some of the other patches I sent
earlier; I expect that the whole "log until you hit ENOSPC and then
try to recover" idea may be somewhat controversial (I'm still budgeting
to try to make ENOSPC _unlikely_ on my project, but I have little
enough _guaranteed_ headroom that I do feel like I need a patch like this)....

(I'm hoping the the _other_ patches are _not_ controversial...)

On 2014-05-23 01:50, Joshua Judson Rosen wrote:
> Unlink the oldest rotated-out logfile
> and then try writing the rest of the msg
> if there's a chance that the retry will succeed.
>
> Signed-off-by: Joshua Judson Rosen <jrosen at harvestai.com>
> ---
>   sysklogd/Config.src |    9 +++++
>   sysklogd/syslogd.c  |  112 ++++++++++++++++++++++++++++++++++++++++++++++-----
>   2 files changed, 112 insertions(+), 9 deletions(-)
>
> diff --git a/sysklogd/Config.src b/sysklogd/Config.src
> index 7938f6f..eefd261 100644
> --- a/sysklogd/Config.src
> +++ b/sysklogd/Config.src
> @@ -50,6 +50,15 @@ config FEATURE_LOGROTATE_CMD
>   	  an external command to call in order to rotate
>   	  logs when they pass the rotation-threshold.
>
> +config FEATURE_LOGROTATE_ENOSPC
> +	bool "Support emergency pruning on ENOSPC"
> +	default n
> +	depends on FEATURE_ROTATE_LOGFILE
> +	help
> +	  This makes the log-writing logic for each
> +	  logfile try to handle ENOSPC errors by
> +	  discarding the oldest already-rotated logfiles.
> +
>   config FEATURE_LOGROTATE_TIMESTAMPS
>   	bool "Suffix rotated logfile names with timestamps"
>   	default n
> diff --git a/sysklogd/syslogd.c b/sysklogd/syslogd.c
> index fc6b097..cde5748 100644
> --- a/sysklogd/syslogd.c
> +++ b/sysklogd/syslogd.c
> @@ -603,6 +603,32 @@ static void kmsg_cleanup(void) {}
>   static void log_to_kmsg(int pri UNUSED_PARAM, const char *msg UNUSED_PARAM) {}
>   #endif /* FEATURE_KMSG_SYSLOG */
>
> +#if ENABLE_FEATURE_LOGROTATE_TIMESTAMPS || ENABLE_FEATURE_LOGROTATE_ENOSPC
> +/* malloc and format a glob-pattern to match rotated versions of a logfile */
> +static char *mkglobpat(const logFile_t *log_file, int path_len)
> +{
> +#if ENABLE_FEATURE_LOGROTATE_TIMESTAMPS
> +	char *globpat = malloc(path_len + 1   +   5*14   +  1);
> +	                               /* . YYYYmmddHHMMSS */
> +#elif ENABLE_FEATURE_LOGROTATE_ENOSPC
> +	char *globpat = malloc(path_len + 1 + 1 + 5 + 1  +  10  +  1 + 1);
> +	                               /* .   { [0-9] , [0-9][0-9] } */
> +#endif
> +	if (!globpat) {
> +		return NULL;
> +	}
> +#if ENABLE_FEATURE_LOGROTATE_TIMESTAMPS
> +	/* NOTE: this glob ignore any old series-stamped "*.[[:digit:]]" files: */
> +	sprintf(globpat, "%s.[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]",
> +		log_file->path); /* .YYYYmmddHHMMSS */
> +#elif ENABLE_FEATURE_LOGROTATE_ENOSPC
> +	sprintf(globpat, "%s.{[0-9],[0-9][0-9]}", log_file->path);
> +#endif
> +
> +	return globpat;
> +}
> +#endif
> +
>   /* Print a message to the log file. */
>   static void log_locally(time_t now, char *msg, logFile_t *log_file)
>   {
> @@ -613,6 +639,10 @@ static void log_locally(time_t now, char *msg, logFile_t *log_file)
>   #if ENABLE_FEATURE_ROTATE_LOGFILE
>   	int len_written;
>   #endif
> +#if ENABLE_FEATURE_LOGROTATE_TIMESTAMPS || ENABLE_FEATURE_LOGROTATE_ENOSPC
> +	char *globpat = NULL;
> +	glob_t globres;
> +#endif
>
>   	if (log_file->fd >= 0) {
>   		/* Reopen log files every second. This allows admin
> @@ -641,6 +671,9 @@ static void log_locally(time_t now, char *msg, logFile_t *log_file)
>   			full_write(fd, msg, len);
>   			if (fd != 2)
>   				close(fd);
> +			if (globpat) {
> +				free(globpat);
> +			}
>   			return;
>   		}
>   #if ENABLE_FEATURE_ROTATE_LOGFILE
> @@ -685,17 +718,12 @@ static void log_locally(time_t now, char *msg, logFile_t *log_file)
>   #endif
>   			char newFile[i];
>   #if ENABLE_FEATURE_LOGROTATE_TIMESTAMPS
> -			char globpat[oldname_len + 1 + 5*14 + 1]; /* .YYYYmmddHHMMSS */
> -			glob_t globres;
> -
>   			struct stat statbuf;
>   			struct tm *ltmp;
>
> -			/* FIXME: this glob doesn't DTRT for old series-stamped
> -			   "^*.[[:digit:]]$" logfiles; may want to use scandir
> -			   to control sorting:
> -			*/
> -			sprintf(globpat, "%s.[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]", log_file->path);
> +			if (!globpat) {
> +				globpat = mkglobpat(log_file, oldname_len);
> +			}
>   			glob(globpat, 0, NULL, &globres);
>
>   			i = 0;
> @@ -742,15 +770,81 @@ static void log_locally(time_t now, char *msg, logFile_t *log_file)
>   		close(log_file->fd);
>   		goto reopen;
>   	}
> -
> +#if ENABLE_FEATURE_LOGROTATE_ENOSPC
> +continue_writing:
> +#endif
>   	len_written =
>   #endif
>   			full_write(log_file->fd, msg, len);
>   #if ENABLE_FEATURE_ROTATE_LOGFILE
>   	if (len_written >= 0) {
> +		/* len_written may be -1 if the write fails *immediately*,
> +		 * or any other value <= len if full_write needs
> +		 * to break the write up into multiple chunks
> +		 * and doesn't fail until one of the later chunks:
> +		 */
>   		log_file->size += len_written;
>   	}
>   #endif
> +
> +#if ENABLE_FEATURE_LOGROTATE_ENOSPC
> +	if (len_written < len && errno == ENOSPC) {
> +		int res = 0;
> +
> +		if (len_written > 0) {
> +			len -= len_written;
> +			msg += len_written;
> +		}
> +
> +		if (!globpat) {
> +			globpat = mkglobpat(log_file,
> +					    strlen(log_file->path));
> +		}
> +
> +#if ENABLE_FEATURE_LOGROTATE_TIMESTAMPS
> +		glob(globpat, 0, NULL, &globres);
> +#else
> +		glob(globpat, GLOB_BRACE, NULL, &globres);
> +#endif
> +		if (globres.gl_pathc) {
> +			res =
> +#if ENABLE_FEATURE_LOGROTATE_TIMESTAMPS
> +				/* older timestamps sort earlier;
> +				 * remove the oldest=first one:
> +				 */
> +				unlink(globres.gl_pathv[0])
> +#else
> +				/* older logfiles have higher sequence-numbers
> +				 * and sort later; remove the oldest=last one:
> +				 */
> +				unlink(globres.gl_pathv[globres.gl_pathc - 1])
> +#endif
> +				;
> +		}
> +		globfree(&globres);
> +
> +		if (res == 0 || res == ENOENT)
> +		{
> +			goto continue_writing;
> +
> +			/* ENOENT is the only failure-mode of unlink()
> +			 * from which we're likely to be able to recover
> +			 * (if someone else already unlinked the file, then
> +			 * when we try again either the write will succeed* or
> +			 * we'll move on to try unlinking the next-oldest file);
> +			 * anything else is more bizarre and probably
> +			 * needs manual intervention to resolve, so just
> +			 * call it `done' rather than making syslogd
> +			 * get stuck spinning in a loop....
> +			 */
> +		}
> +	}
> +#endif
> +
> +	if (globpat) {
> +		free(globpat); globpat = NULL;
> +	}
> +
>   #ifdef SYSLOGD_WRLOCK
>   	fl.l_type = F_UNLCK;
>   	fcntl(log_file->fd, F_SETLKW, &fl);
>

-- 
"'tis an ill wind that blows no minds."


More information about the busybox mailing list