Hi All,
I have written a code on Linux that searches a long dictionary. I have used hsearch() function but the problem is it does not work.
This is my code:
//Search the count values from the dictionary.
Code :
#define _GNU_SOURCE
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<search.h>
#include<inttypes.h>
#include<math.h>
#define DICTIONARY_LENGTH 180326
#define N 6075
char ** read_file ( char * );
size_t number_of_words = 0;
char ** read_file ( char *path )
{
char ch;
char *line = NULL;
size_t len = 0;
ssize_t read;
number_of_words = 0;
unsigned long int i = 0;
unsigned long int j = 0;
FILE *pointer = NULL;
char **word_array = NULL;
pointer = fopen ( path , "r" );
if ( pointer == NULL)
{
perror ( "File read error " );
//return ( EXIT_FAILURE );
}
//counting the number of words...
while ( !feof ( pointer ) )
{
ch = fgetc ( pointer );
if ( ch == '\n' && ch != EOF )
{
number_of_words ++;
}
}
rewind ( pointer );
word_array = malloc ( number_of_words * sizeof ( char * ) );
if ( word_array == NULL )
{
perror ( "malloc() memory allocation failure" );
//return ( EXIT_FAILURE );
}
for ( i = 0 ; i < number_of_words ; i ++ )
{
word_array[i] = malloc ( 100 * sizeof ( char ) );//CHANGEDHERE
if ( word_array[i] == NULL )
{
perror ( "malloc() memory allocation failure" );
//return ( EXIT_FAILURE );
}
}
//lets extract text here...
i = 0;
j = 0;
while ( !feof ( pointer ) )
{
while ( ( read = getline ( &line , &len , pointer ) ) != -1 )
{
strcpy ( word_array[i] , line );
//read = read - 1;
//for ( i = 0 ; i < read ; i ++ )
//{
// * ( word_array + j ++ ) = line [i];
//}
if ( i <= number_of_words )
{
i++;
}
}
}
//return ( word_array );
/*for ( i = 0 ; i < number_of_words ; i ++ )
{
printf ( "%s" , word_array[i] );
}*/
fclose ( pointer );
return ( word_array );//CHANGE MADE HERE....
/*for ( i = 0 ; i < number_of_words ; i ++ )
{
free ( word_array [i] );
}
free ( word_array );*/
//return ( word_array );
}
int32_t main ( int argc , char **argv )
{
FILE *dictionary = NULL;
FILE *dic_list = NULL;
FILE *output_pointer = NULL;
char *line = NULL;
size_t len = 0;
ssize_t read;
char temp_char[150] = {0};
float idf = 0;
char **words_from_dic = NULL;
size_t j = 0;
size_t i = 0;
float value = 0;
system ( "ls -1 *.dic > files_dic.mtp" );
ENTRY e , *ep;
int32_t number = 0;
char *word = NULL;
word = ( char * ) malloc ( 18 * sizeof ( char ) );
if ( word == NULL )
{
perror ( "malloc() memory allocation failure" );
return ( EXIT_FAILURE );
}
dictionary = fopen ( "count_words.txt" , "r" );
if ( dictionary == NULL )
{
perror ( "file open error:dictionary" );
return ( EXIT_FAILURE );
}
hcreate ( DICTIONARY_LENGTH );
while ( !feof ( dictionary ) )
{
fscanf ( dictionary , "%d %s" , &number , word );
e.key = word;
e.data = ( void * ) number;
ep = hsearch ( e , ENTER );
if ( ep == NULL )
{
fprintf ( stderr , "Entry failed\n" );
exit ( 1 );
}
}
dic_list = fopen ( "files_dic.mtp" , "r" );
if ( dic_list == NULL )
{
perror ( "file open error" );
return ( EXIT_FAILURE );
}
while ( ( read = getline ( &line , &len , dic_list ) ) != -1 )
{
if ( line [ read-1 ] == '\n' )
{
line [ read-1 ] = '\0';
}
words_from_dic = read_file ( line );
words_from_dic [ number_of_words ] = '\0';
strcat ( temp_char , "/data/out/" );
strcat ( temp_char , line );
output_pointer = fopen ( temp_char , "a" );
while ( words_from_dic[j] != NULL )
{
if ( words_from_dic[j] [ strlen ( words_from_dic[j] ) - 1 ] == '\n' )
{
words_from_dic[j] [ strlen ( words_from_dic [j] ) - 1 ] = '\0';
}
e.key = words_from_dic [ j ];
ep = hsearch ( e , FIND );
value = ( int ) ( ep->data );
fprintf ( output_pointer , "%f\n" , value );
j ++;
}
for ( i = 0 ; i < j ; i ++ )
{
free ( words_from_dic[i] );
}
free ( words_from_dic );
fclose ( output_pointer );
memset ( &temp_char , 0 , strlen ( temp_char ) );
memset ( line , 0 , strlen ( line ) );
}
hdestroy();
fclose ( dic_list );
return ( EXIT_SUCCESS );
}
My dictionary file looks like this:
Code :
1 aanandi
1 aandrostane
1 aanganwadis
1 aanhui
1 aanhydrate
I have some 600 *.dic files which I open one by one and extract words from them. THe dic files look something like this:
Code :
abc
efg
orange
apple
Hence, I open each DIC file, get the word from it and search the hash table and extract the key from it. The problem with the above code is that it is able to make the hash table but it returns NULL when searching. It should not return NULL in any case because all words from DIC files are there in the dictionary. I am not able to figure out why?