#include <time.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <assert.h>
#ifndef NO_GZIP
#include <zlib.h> // link with -lz
#endif
#define LOG(fmt, ...) fprintf(stderr, fmt "\n", ##__VA_ARGS__)
typedef struct {
union {
FILE* fp;
#ifndef NO_GZIP
gzFile gp;
};
int zipped;
size_t bufoff;
#else
};
#endif
char* fn;
char* buf;
size_t buflen;
time_t t, oldt;
const char* t_str;
} infile_t;
static const char* const default_fmt = "%b %d %H:%M:%S ";
static struct {
int graceful;
int verbose;
const char* time_fmt;
unsigned time_field;
struct tm now;
time_t start, end;
} config = {0, 0, default_fmt, 0, {}, 0, 0};
static time_t str2time(const char* fmt, unsigned field, const char* s) {
while (field > 0) {
while (*s == ' ') ++s;
while (*s != ' ') {
if (!*s) return 0;
s++;
}
while (*s == ' ') ++s;
field--;
}
struct tm stm = {};
if (!*s || !strptime(s, fmt, &stm)) { // does not properly support e.g. tz offsets
return 0;
}
if (!stm.tm_year) { // some formats don't provide a year
if (stm.tm_mon > config.now.tm_mon) {
stm.tm_year = config.now.tm_year - 1;
} else {
stm.tm_year = config.now.tm_year;
}
}
time_t rv = mktime(&stm);
return (rv == (time_t)-1)? 0: rv;
}
static int open_files(infile_t* files, const char** argv, size_t num) {
size_t i;
for (i=0; i<num; ++i) {
files[i].fn = strdup(argv[i]);
#ifndef NO_GZIP
if (strstr(files[i].fn, ".gz") == files[i].fn + strlen(files[i].fn)-3) {
files[i].zipped = 1;
files[i].gp = gzopen(files[i].fn, "rb");
} else // XXX: -->
#endif
files[i].fp = fopen(files[i].fn, "r");
if (!files[i].fp) {
if (config.verbose || !config.graceful) {
LOG("cannot open '%s': %s", files[i].fn, strerror(errno));
}
if (config.graceful) {
continue;
} else {
return -1;
}
}
if (config.verbose) {
LOG("opened '%s'", files[i].fn);
}
}
return 0;
}
static int read_next(infile_t* f) {
while (f->fp && !f->t) {
#ifndef NO_GZIP
if (f->zipped) {
// already have a whole line buffered?
assert(f->bufoff <= f->buflen);
char* nl = (char*)memchr(f->buf + f->bufoff, '\n', f->buflen - f->bufoff);
if (!nl) {
// move leftovers to the start if needed
size_t newoff; // remember where to read new data to
if (f->buflen == f->bufoff) {
newoff = 0;
} else { // no ringbuf, thus have to copy to make room
memmove(f->buf, f->buf + f->bufoff, f->buflen - f->bufoff);
newoff = f->buflen - f->bufoff;
}
f->bufoff = 0;
do {
// can read?
if (gzeof(f->gp)) {
gzclose(f->gp);
f->fp = NULL;
if (newoff) {
LOG("eof before line end in from '%s'", f->fn);
return config.graceful? 0: -1;
} else {
return 0; // finished a whole line before
}
}
// increase space if required, as getline() does
if (newoff >= f->buflen/2) {
if (!f->buflen) {
f->buflen = 4096;
} else {
f->buflen *= 2;
}
f->buf = (char*)realloc(f->buf, f->buflen);
if (!f->buf) return -1;
}
// read decompressed file
int rv = gzread(f->gp, f->buf + newoff, f->buflen - newoff);
if (rv == -1) {
LOG("cannot decompress from '%s': %s", f->fn, strerror(errno));
gzclose(f->gp);
f->fp = NULL;
return config.graceful? 0: -1;
} else if (rv == 0) {
gzclose(f->gp);
f->fp = NULL;
if (newoff) {
LOG("premature eof before line end in from '%s'", f->fn);
return config.graceful? 0: -1;
}
return 0; // done
} else if ((size_t)rv < f->buflen - newoff) {
assert(gzeof(f->gp)); // will never read again
f->buflen = newoff + (size_t)rv;
}
// try again
assert(!f->bufoff);
} while ((nl = (char*)memchr(f->buf, '\n', f->buflen)) == NULL);
}
// found it, remember and check timestamp
*nl = '\0';
f->t_str = f->buf + f->bufoff;
f->bufoff = nl - f->buf + 1;
f->t = str2time(config.time_fmt, config.time_field, f->t_str);
#else
if (__builtin_expect(0,0)) {
#endif
} else {
// get next line in a whole
errno = 0; // for eof check below
ssize_t rv = getline(&f->buf, &f->buflen, f->fp);
if (rv == -1) {
int eof = (errno == 0);
if (!eof) {
LOG("cannot read from '%s': %s", f->fn, strerror(errno));
}
fclose(f->fp);
f->fp = NULL;
return (eof || config.graceful)? 0: -1;
}
// found it, remember and check timestamp
assert(rv >= 1); // includes delimiter
assert(f->buf[(size_t)rv-1] == '\n');
f->buf[(size_t)rv-1] = '\0';
f->t_str = f->buf;
f->t = str2time(config.time_fmt, config.time_field, f->t_str);
}
// timestamp parsing success?
if (!f->t) {
if (config.verbose) {
LOG("cannot parse timestamp from '%s'", f->fn); // TODO: track linenos
}
if (!config.graceful) return -1;
} else if (f->t < f->oldt) {
if (config.verbose) {
LOG("timestamp in '%s' out of order (%ld/%ld)", f->fn, (long)f->oldt, (long)f->t);
}
if (!config.graceful) return -1;
#if 0
f->t = 0; // skip
#else
f->t = f->oldt; // keep out of order but stable
#endif
} else if (f->t < config.start) {
f->t = 0;
} else if (config.end && f->t >= config.end) {
#ifndef NO_GZIP
if (f->zipped)
gzclose(f->gp);
else
#endif
fclose(f->fp);
f->fp = NULL;
f->t = 0;
}
}
// next line found or file done
return 0;
}
static infile_t* find_next(infile_t* files, size_t num) {
infile_t* min = NULL;
size_t i;
for (i=0; i<num; ++i) {
if (!files[i].t) continue;
if (!min || files[i].t < min->t) { // < s.t. it is stable
min = &files[i];
}
}
if (min) { // flag line as used
min->oldt = min->t;
min->t = 0;
}
return min;
}
static int usage(const char* name) {
LOG("usage: %s [-b] [-v] [-s timestamp] [-e timestamp] [-t num] [-f format] files...", name);
LOG(" -b: best-effort/graceful: don't fail if a file could not be opened, there is no input, or upon other correctable errors");
LOG(" -v: verbose: log upon parsing errors");
LOG(" -s: start: provide timestamp for the earliest entry to print (inclusive, in the format specified)");
LOG(" -e: end: provide timestamp for when to stop (exclusive, in the format specified)");
LOG(" -t: skip this number of space-separated tokens for timestamp parsing");
LOG(" -f: provide format for timestamp parsing (default: '%s')", default_fmt);
return 1;
}
int main(int argc, char** argv) {
close(0);
int arg;
const char* start_str = NULL;
const char* end_str = NULL;
for (arg=1; arg<argc; ++arg) {
if (*argv[arg] != '-') {
break;
} else if (strcmp(argv[arg], "-b") == 0) {
config.graceful = 1;
} else if (strcmp(argv[arg], "-v") == 0) {
config.verbose = 1;
} else if (strcmp(argv[arg], "-s") == 0) {
if (arg+1 >= argc) {
return usage(argv[0]);
}
start_str = argv[++arg];
} else if (strcmp(argv[arg], "-e") == 0) {
if (arg+1 >= argc) {
return usage(argv[0]);
}
end_str = argv[++arg];
} else if (strcmp(argv[arg], "-f") == 0) {
if (arg+1 >= argc) {
return usage(argv[0]);
}
config.time_fmt = argv[++arg];
} else if (strcmp(argv[arg], "-t") == 0) {
if (arg+1 >= argc) {
return usage(argv[0]);
}
int tf = atoi(argv[++arg]);
if (tf < 0) {
return usage(argv[0]);
} else if (!tf && strcmp(argv[arg], "0") != 0) {
return usage(argv[0]);
} else {
config.time_field = (unsigned)tf;
}
} else {
return usage(argv[0]);
}
}
time_t now = time(NULL);
config.now = *localtime(&now);
if (start_str && *start_str) { // non-empty
config.start = str2time(config.time_fmt, 0, start_str);
if (!config.start) {
return usage(argv[0]);
}
}
if (end_str && *end_str) {
config.end = str2time(config.time_fmt, 0, end_str);
if (!config.end || config.end <= config.start) {
return usage(argv[0]);
}
}
const size_t num = argc-arg;
if (!num) {
if (config.graceful) {
return 0;
} else {
return usage(argv[0]);
}
}
infile_t* files = (infile_t*)calloc(num, sizeof(infile_t));
if (open_files(files, (const char**)&(argv[arg]), num) != 0) {
return 1;
}
while (1) {
size_t i;
for (i=0; i<num; ++i) {
if (read_next(&files[i]) != 0) {
return 1;
}
}
infile_t* min = find_next(files, num);
if (!min) {
break;
} else if (puts(min->t_str) == EOF) {
return 1;
}
}
return 0;
}