Code:
$ cat mysrc/m1join.c
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
#include <limits.h>
static int msep_ign = 1 ;
static int fullouter = 0 ;
static int leftouter = 0 ;
static int rightouter = 0 ;
static int caseless = 0 ;
static int paste = 0 ;
static int keycols = 1 ;
static int keylen1 ;
static int keylen2 ;
static int i ;
static int cmp ;
static int eof1 = 0 ;
static int eof2 = 0 ;
static int unsent1 = 0 ;
static int unsent2 = 0 ;
static char seps[256] = "\t \r\n" ;
static char *fn1 = NULL ;
static char *fn2 = NULL ;
static char *cp1 ;
static char *cp2 ;
static char *cpx ;
static char *cpy ;
static FILE *fp1 = NULL ;
static FILE *fp2 = NULL ;
static char fb1[65536] ;
static char fb2[65536] ;
int main( int argc, char **argv ){
for ( i = 1 ; i < argc ; i++ ){
if ( !strcmp( argv[i], "-a" ) ){
fullouter = 1 ;
continue ;
}
if ( !strcmp( argv[i], "-a1" ) ){
leftouter = 1 ;
continue ;
}
if ( !strcmp( argv[i], "-a2" ) ){
rightouter = 1 ;
continue ;
}
if ( !strcmp( argv[i], "-i" ) ){
caseless = 1 ;
continue ;
}
if ( !strcmp( argv[i], "-p" ) ){
paste = 1 ;
continue ;
}
if ( !strcmp( argv[i], "-m" ) ){
msep_ign = 0 ;
continue ;
}
if ( !strcmp( argv[i], "-c" ) ){
if ( ++i == argc
|| 1 > ( keycols = atoi( argv[ i ] ) ) ){
fputs( "Invalid -c option!\n", stderr );
goto usage ;
}
continue ;
}
if ( !strcmp( argv[i], "-t" ) ){
if ( ++i == argc ){
fputs( "Invalid -t option!\n", stderr );
goto usage ;
}
sprintf( seps, "%s\r\n", argv[i] );
continue ;
}
if ( !fn1 ){
fn1 = argv[i] ;
continue ;
}
if ( !fn2 ){
fn2 = argv[i] ;
continue ;
}
fprintf( stderr, "Extra %d Options!\n", argc - i );
goto usage ;
}
if ( !fn2 ){
fputs( "Insufficient arguments!\n", stderr );
usage:
fputs(
"\n"
"Usage: m1_join [-i] [-a] [-t <sep_chars>] [-m] [-c <key_col_ct>] f1 f2\n"
"\n"
"Joins (possibly multiple) lines from sorted file f1 with each line from\n"
"sorted file f2, on leading key fields. Leading separators are not ignored.\n"
"Output is all the f1 fields followed by the first separator character (tab)\n"
"followed by non-matched fields of f2.\n"
"** Does not mind pipes as files! **\n"
"** Does not support 'one to many' or 'many to many', just 'many to one'! **\n"
"** (Duplicates in the second file are not matched.) **\n"
"\n"
"Options:\n"
"\n"
" -a All lines are output (full outer join).\n"
" -a1 All lines of f1 are output (left outer join).\n"
" -a2 All lines of f2 are output (right outer join).\n"
" -c Only one column is matched, unless -c is specified.\n"
" -i Keys (and the required sort order) are case-sensitive unless -i is\n"
" specified, in which case all letters are treated as lower case in the\n"
" ASCII binary sort order: both 'A' 0101 0x41 and 'a' 0141 0x61 are\n"
" greater than '_' 0137 0x5F.\n"
" -m Multiple separators are treated as one unless -m is specified.\n"
" -p Turns on paste mode: forces one to one matching.\n"
" -t Columns are separated by tab, space, carriage return or linefeed\n"
" unless -t specifies a string of other character(s).\n"
"\n",
stderr );
exit( 1 );
}
if ( !( fp1 = fopen( fn1, "r" ) ) ){
perror( fn1 );
exit( 1 );
}
if ( !( fp2 = fopen( fn2, "r" ) ) ){
perror( fn2 );
exit( 1 );
}
setvbuf( stdout, NULL, _IOFBF, PIPE_MAX );
setvbuf( fp1, NULL, _IOFBF, 65536 );
setvbuf( fp2, NULL, _IOFBF, 65536 );
read2:
if ( !fgets( fb2, sizeof(fb2), fp2 ) ){
if ( ferror( fp2 ) ){
perror( fn2 );
exit( 2 );
}
eof2 = 1 ;
}
else
unsent2 = 1 ;
read1:
if ( !fgets( fb1, sizeof(fb1), fp1 ) ){
if ( ferror( fp1 ) ){
perror( fn1 );
exit( 1 );
}
eof1 = 1 ;
}
else
unsent1 = 1 ;
loop:
if ( ( eof1
&& eof2 )
|| ( eof1
&& !rightouter
&& !fullouter )
|| ( eof2
&& !leftouter
&& !fullouter ) ){
exit( 0 );
}
if ( eof1 )
cmp = 1 ;
else if ( eof2 )
cmp = -1 ;
else
cmp = 0 ;
cp1 = fb1 ;
cp2 = fb2 ;
i = 0 ;
do { /* Find keys and if necessary compare them */
keylen1 = strcspn( cp1, seps );
keylen2 = strcspn( cp2, seps );
if ( !cmp ){
if ( keylen1 > keylen2 ){
cmp = ( caseless
? strncasecmp( cp1, cp2, keylen2 )
: strncmp( cp1, cp2, keylen2 ) );
if ( !cmp )
cmp = 1 ;
} else {
cmp = ( caseless
? strncasecmp( cp1, cp2, keylen1 )
: strncmp( cp1, cp2, keylen1 ) );
if ( !cmp
&& keylen1 < keylen2 )
cmp = -1 ;
}
}
cp1 += keylen1 ;
cp2 += keylen2 ;
if ( msep_ign ){
cp1 += strspn( cp1, seps ) ;
cp2 += strspn( cp2, seps ) ;
} else {
cp1++ ;
cp2++ ;
}
} while ( ++i < keycols ) ;
if ( cmp < 0 ){
if ( ( fullouter
|| leftouter )
&& unsent1
&& *fb1 ){
if ( 0 > printf( "%s", fb1 ) ){
if ( ferror( stdout ) ){
perror( "stdout" );
exit( 1 );
}
exit( 0 );
}
}
goto read1 ;
}
else if ( cmp > 0 ){
if ( ( fullouter
|| rightouter )
&& unsent2
&& *fb2 ){
if ( 0 > printf( "%.*s%c%s",
cp2 - fb2, fb2, *seps, cp2 ) ){
if ( ferror( stdout ) ){
perror( "stdout" );
exit( 1 );
}
exit( 0 );
}
}
if ( !fgets( fb2, sizeof( fb2 ), fp2 ) ){
if ( ferror( fp2 ) ){
perror( fn2 );
exit( 1 );
}
eof2 = 1 ;
}
else
unsent2 = 1 ;
goto loop ;
}
if ( !*cp2 ) /* Stepped over the linefeed! */
cp2-- ;
if ( 0 > printf( "%.*s%c%s", strlen( fb1 ) - 1, fb1, *seps, cp2 ) ){
if ( ferror( stdout ) ){
perror( "stdout" );
exit( 1 );
}
exit( 0 );
}
unsent1 = 0 ;
unsent2 = 0 ;
if ( paste )
goto read2 ;
goto read1 ;
}