, 1 min read

Filtering Lines with Specific Length

Task at hand: extract all lines of a file, which do not have a specific line length.

Example: Print out all lines, which do not have a line length of 171 and not 1370.

Also see Line Length Distribution in Files.

Below Perl one-liner does that, here for the line lengths 20 and 21:

perl -ne 'print if (($n=length($_)) != 20 && $n != 21))' <file>

Below C program does it with a little bit of comfort on the command line. It might also be useful if Perl is not available. With command line option -g you specify a line length, which should not be filtered. Multiple line lengths are given by multiple -g's.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#define MAXBUF	8192
char buf[MAXBUF];

int main (int argc, char *argv[]) {
    int i, c, gcnt=0, len;
    int good[10] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 };
    FILE *fp;

    while ((c = getopt(argc,argv,"g:")) != -1) {
        switch(c) {
            case 'g':
                if (gcnt >= 9) {
                    printf("%s: too many 'good' lines\n",argv[0]);
                    return 1;
                }
                good[gcnt++] = atoi(optarg);
                break;
            default:
                printf("%s: illegal option %c\n",argv[0],c);
                return 2;
        }
    }
    if (optind < argc) {
        if ((fp = fopen(argv[optind],"r")) == NULL) {
            printf("%s: cannot open %s\n",argv[0],argv[optind]);
            return 3;
        }
    } else fp = stdin;

L1:	while (fgets(buf,MAXBUF,fp) != NULL) {
        len = strlen(buf);
        for (i=0; i<gcnt; ++i)
            if (len == good[i]) goto L1;
        printf("%s",buf);
    }

    return 0;
}

The equivalent of the above Perl command would be

lenfilter -g20 -g21 <file>