/* file: field.c (from "The UNIX System" by S.R. Bourne, pp. 227-9) Select fields or columns from a file Modified by G. Moody, 5 August 1983 This version includes several enhancements to the original, including - array range checking; truncation of excessive-length records - dynamically allocated arrays (fp, fv, and L) - selectable maximum record length (-l; default: 20000 characters) - selectable input/output field separators (-i, -o, -t; default: \t) - an option (-c) to check that all input records contain the same number of fields - an option (-w) to identify fields as "words" surrounded by arbitrary amounts of whitespace - an option (-m) to treat multiple consecutive instances of an input record separator as single record separators - selectable record separators (-r; default: \n) - standard error output diagnostics */ #include #include #define iswhite(c) ((c)==' '||(c)=='\t'||(c)=='\n'||(c)=='\f'||(c)==IFS) int mf; /* number of fields in the current record */ char **fp; /* pointers into 'L' at field boundaries */ main(int argc, char **argv) { register char *cp; register char **ap; register int c; int *fv; /* numerical equivalent of arguments */ char *L; /* current line buffer */ char IFS = '\t'; /* input field separator */ char OFS = '\t'; /* output field separator */ char RS = '\n'; /* record separator */ int cf = 0, count_errors = 0, i, LMAX = 20000, mflag = 0, nf = 0, rn = 1, wflag = 0; void putf(int n); void help(void); /* read options */ for (i = 1; i < argc && *argv[i] == '-'; i++) switch (*(argv[i]+1)) { case 'c': cf = -1; break; case 'h': help(); exit(3); break; case 'i': IFS = *(argv[i]+2); if (!IFS && i < argc-1) IFS = *argv[++i]; break; case 'o': OFS = *(argv[i]+2); if (!OFS && i < argc-1) OFS = *argv[++i]; break; case 'l': if (*(argv[i]+2)) LMAX = atoi(argv[i]+2); else if (i < argc-1) LMAX = atoi(argv[++i]); break; case 'm': mflag = 1; break; case 'r': RS = *(argv[i]+2); if (!RS && i < argc-1) RS = *argv[++i]; break; case 't': IFS = OFS = *(argv[i]+2); if (!IFS && i < argc-1) IFS = OFS = *argv[++i]; break; case 'w': wflag = 1; break; default: fprintf(stderr, "%s: illegal option %s\n", argv[0], argv[i]); help(); exit(3); } /* allocate storage for arrays */ L = calloc(LMAX, sizeof(char)); fp = (char **)calloc(LMAX, sizeof(char *)); fv = (int*)calloc(argc - i, sizeof(int)); if (L == NULL || fv == NULL || fp == NULL) { fprintf(stderr, "%s: insufficient memory\n", argv[0]); return (2); } /* read arguments into fv[...] */ while (i < argc) if (sscanf(argv[i++], "%d", &fv[nf++]) != 1) { help(); } /* read and copy input */ nf--; cp = L; ap = fp; *ap++ = cp; while (1) { c = getchar(); if (cp >= L + LMAX) { fprintf(stderr, "warning: record %d truncated (> %d bytes)\n", rn, LMAX); while (c != RS && c!= EOF) c = getchar(); } if (c == RS || c == EOF) { int fc; if (cp == L && c == EOF) break; *cp++ = 0; mf = ap - fp; if (cf) { if (cf < 0) cf = mf; else if (cf != mf) { count_errors++; fprintf(stderr, "record %d: field count %d (should be %d)\n", rn, mf, cf); } } /* print this line */ for (fc = 0; fc <= nf; fc++) { putf(fv[fc] - 1); if (fc != nf) putchar(OFS); } if (c == EOF) break; putchar(RS); cp = L; ap = fp; *ap++ = cp; rn++; } else if (wflag && iswhite(c)) { if (cp > L) { *cp++ = 0; *ap++ = cp; } do { c = getchar(); } while (c != RS && c != EOF && iswhite(c)); ungetc(c, stdin); } else if (c == IFS) { *cp++ = 0; *ap++ = cp; if (mflag) { while ((c = getchar()) == IFS) ; ungetc(c, stdin); } } else *cp++ = c; } return (0); } /* output field n from the current line */ void putf(n) int n; { register char *cp = fp[n]; register char c; if (n < 0 || n >= mf) return; while (c = *cp++) putchar(c); } void help() { fprintf(stderr, "usage: field [OPTIONS ...] [FIELD ...]\n" " where FIELD is a field number (1 or greater), and OPTION may include:\n" " -c count fields in each record (line), print warning if unequal\n" " -h print this help message\n" " -i C interpret C as field separator in input (default: tab)\n" " -l N accept up to N characters per line (default: 20000)\n" " -m treat multiple consecutive input field separators as one\n" " -o C use C as field separator in output (default: tab)\n" " -r C use C as record separator in input and output (default: newline)\n" " -t C equivalent to -i C -o C\n" " -w 'word mode': input fields are words separated by one or more\n" " whitespace characters\n" " The standard output contains the selected fields from the standard\n" " input. Fields may be rearranged in any order or duplicated.\n"); }