Sponsored Content
Top Forums Programming Help regarding memory leak in this C program Post 302608855 by shoaibjameel123 on Monday 19th of March 2012 03:36:04 AM
Old 03-19-2012
Help regarding memory leak in this C program

I have written this code in C which reads a very large collection of text files and does some processing. The problem with this code is that there are memory leaks which I am not able to figure out as to where the problem is. When I run this code, and see the memory usage using top command, then I see the memory usage keeps on increasing as the code executes and finally the program gets killed by the Linux kernel. I am pasting the code below, understanding the logic of the code is not essential but the way malloc() and free() pairs are working and also the way dynamically allocated memory locations are passed to functions.

Code:
#include<stdio.h>
#include<stdlib.h>
#include<inttypes.h>
#include<math.h>
#include<search.h>
#include<string.h>
#include<unistd.h>

#define DICTIONARY_FILE_LENGTH 819558
#define NUMBER_OF_CLUSTERS 150
#define DIMENSIONS 200
#define NUMBER_OF_FILES 297180 //Always give a larger number than actual
#define MAXIMUM_WORDS 2227449

void read_text_file ( FILE * , char ** , unsigned long int * );
void read_cluster_file ( FILE * , int * , unsigned long int * );
void read_centroid_matrix ( FILE * , float ** );
char *file_name_generator ( unsigned long int , char * , char * , char * );
char * itoa( int32_t , char * );
char * reverse ( char [] );
float cosine_similarity ( float ** , unsigned long int , unsigned long int );

int32_t main ( int32_t argc , char ** argv )
{
    FILE *cluster_membership_file = NULL;
    FILE *cluster_centroid_file = NULL;
    FILE *dictionary_file = NULL;
    FILE *input_file = NULL;
    FILE *output_file = NULL;

    ENTRY e , *ep;

    unsigned long int i = 0;
    unsigned long int number_of_words = 0;
    int32_t *cluster_data = NULL;
    int32_t j = 0;
    unsigned long int first_word_coordinate = 0;
    unsigned long int second_word_coordinate = 0;
    int32_t counter = 0;
    int32_t first_word_cluster_member = 0;
    int32_t second_word_cluster_member = 0;
    int32_t signal = 0;

    float *store_centroids = NULL;
    store_centroids = ( float * ) malloc ( MAXIMUM_WORDS * sizeof ( float ) );
    if ( store_centroids == NULL )
    {
        fprintf ( stderr , "malloc() memory allocation failure in store_centroids\n" );
        return ( EXIT_FAILURE );
    }

    float ** centroid_matrix = NULL;
    centroid_matrix = ( float ** ) malloc ( NUMBER_OF_CLUSTERS * sizeof ( float * ) );
    if ( centroid_matrix == NULL )
    {
        fprintf ( stderr , "malloc() memory allocation error in centroid matrix\n" );
        return ( EXIT_FAILURE );
    }

    for ( i = 0 ; i < NUMBER_OF_CLUSTERS ; i ++ )
    {
        centroid_matrix [ i ] = ( float * ) malloc ( DIMENSIONS * sizeof ( float ) );
        if ( centroid_matrix [ i ] == NULL )
        {
            fprintf ( stderr , "malloc() memory allocation error in centroid matrix\n" );
            return ( EXIT_FAILURE );
        }
    }

    char *file_extension = NULL;
    char *file_path = NULL;
    char *file_name = NULL;

    file_extension = ( char * ) malloc ( 4 * sizeof ( char ) );
    if ( file_extension == NULL )
    {
      fprintf ( stderr , "malloc() memory allocation failure in file_extension\n" );
      return ( EXIT_FAILURE );
    }

    file_path = ( char * ) malloc ( 300 * sizeof ( char ) );
    if ( file_path == NULL )
    {
      fprintf ( stderr , "malloc() memory allocation failure in file_path\n" );
      return ( EXIT_FAILURE );
    }

    file_name = ( char * ) malloc ( 300 * sizeof ( char ) );
    if ( file_name == NULL )
    {
      fprintf ( stderr , "malloc() memory allocation failure in file_name\n" );
      return ( EXIT_FAILURE );
    }

    char **words_from_dictionary = NULL;
    words_from_dictionary = ( char ** ) malloc ( DICTIONARY_FILE_LENGTH * sizeof ( char * ) );
    if ( words_from_dictionary == NULL )
    {
        fprintf ( stderr , "malloc() memory allocation failure in creating words_from_dictionary space\n" );
        return ( EXIT_FAILURE );
    }

    char **words_from_web_page = NULL; //get the chunk of memory once using malloc() and keep using the same for subsequent reads
    words_from_web_page = ( char ** ) malloc ( MAXIMUM_WORDS * sizeof ( char * ) );
    if ( words_from_web_page == NULL )
    {
        fprintf ( stderr , "malloc() memory allocation failure in creating words_from_web_page\n" );
        return ( EXIT_FAILURE );
    }

    for ( i = 0 ; i < MAXIMUM_WORDS ; i ++ )
    {
        words_from_web_page [ i ] = ( char * ) malloc ( 30 * sizeof ( char ) );
        if ( words_from_web_page [ i ] == NULL )
        {
            fprintf ( stderr , "malloc() memory allocation failure in words_from_web_page\n" );
            return ( EXIT_FAILURE );
        }
    }

    cluster_data = ( int * ) malloc ( DICTIONARY_FILE_LENGTH * sizeof ( int ) );
    if ( cluster_data == NULL )
    {
        fprintf ( stderr , "malloc() memory allocation failure in cluster_data\n" );
        return ( EXIT_FAILURE );
    }

    for ( i = 0 ; i < DICTIONARY_FILE_LENGTH ; i ++ )
    {
        words_from_dictionary [ i ] = ( char * ) malloc ( 30 * sizeof ( char ) );
        if ( words_from_dictionary [ i ] == NULL )
        {
            fprintf ( stderr , "malloc() memory allocation failure in words_from_dictionary\n" );
            return ( EXIT_FAILURE );
        }
    }

    cluster_membership_file = fopen ( "cluster_membership.txt" , "r" );
    if ( cluster_membership_file == NULL )
    {
        fprintf ( stderr, "cluster membership file read error\n" );
        return ( EXIT_FAILURE );
    }

    cluster_centroid_file = fopen ( "cluster_centroids.txt" , "r" );
    if ( cluster_centroid_file == NULL )
    {
        fprintf ( stderr , "cluster centroid file read error\n" );
        return ( EXIT_FAILURE );
    }

    dictionary_file = fopen ( "dictionary.dit" , "r" );
    if ( dictionary_file == NULL )
    {
        fprintf ( stderr , "dictionary file read error\n" );
        return ( EXIT_FAILURE );
    }

    i = 0;
    read_text_file ( dictionary_file , words_from_dictionary , &number_of_words );
    number_of_words = 0;
    read_cluster_file ( cluster_membership_file , cluster_data , &number_of_words );
    read_centroid_matrix ( cluster_centroid_file , centroid_matrix );

    hcreate ( DICTIONARY_FILE_LENGTH );

    for ( i = 0 ; i < DICTIONARY_FILE_LENGTH ; i++ )
    {
        e.key = words_from_dictionary [ i ];
        e.data = ( void * ) i;
        ep = hsearch ( e, ENTER );
        if ( ep == NULL )
        {
            fprintf(stderr, "entry failed\n");
            exit(1);
        }
    }

    for ( i = 0 ; i < NUMBER_OF_FILES ; i ++ )
    {
        file_name = file_name_generator ( i , file_extension , file_path , "/science/original_files/" );//file name generator generates the 
                                                                                                      //file     names which will be opened further for reading
        input_file = fopen ( file_name , "r" );
        if ( input_file == NULL )
        {
            memset ( file_name , '\0' , 300 );
            memset ( file_extension , '\0' , 4 );
            memset ( file_path , '\0' , 300 );
            continue;
        }

        number_of_words = 0;
        read_text_file ( input_file , words_from_web_page , &number_of_words );//read text files one by one
        for ( j = 0 ; j < ( number_of_words - 1 ) ; j ++ )
        {
            e.key = words_from_web_page [ j ];
            ep = hsearch ( e , FIND );
            first_word_coordinate = ( int32_t ) ( ep->data );
            first_word_cluster_member = cluster_data [ first_word_coordinate ];
            e.key = words_from_web_page [ j + 1 ];
            ep = hsearch ( e , FIND );
            second_word_coordinate = ( int32_t ) ( ep -> data );
            second_word_cluster_member = cluster_data [ second_word_coordinate ];

            if ( first_word_cluster_member == second_word_cluster_member )
            {
                signal = signal + 1;
                continue;
            }
            else
            {
                * ( store_centroids + counter ++ ) = 
                     cosine_similarity ( centroid_matrix , ( first_word_cluster_member - 1 ) , ( second_word_cluster_member - 1 ) );
            }
        }

        memset ( file_name , 0 , 300 );
        memset ( file_extension , 0 , 4 );
        memset ( file_path , 0 , 300 );
        memset ( words_from_web_page , 0 , MAXIMUM_WORDS );

        file_name = file_name_generator ( i , file_extension , file_path , "segmentation_scores/" );//file writing done here
        output_file = fopen ( file_name , "w" );
        if ( output_file == NULL )
        {
            fprintf ( stderr , "file write error\n" );
            return ( EXIT_FAILURE );
        }

        if ( signal != ( number_of_words - 1 ) )
        {
            for ( j = 0 ; j < counter ; j ++ )
            {
                fprintf ( output_file , "%f\n" , store_centroids [ j ] );
            }
        }
        else
        {
            fprintf ( output_file , "%d" , 1 );
        }
        
        fclose ( output_file );
        counter = 0;

        memset ( store_centroids , 0 , MAXIMUM_WORDS );
        memset ( file_name , 0 , 300 );
        memset ( file_extension , 0 , 4 );
        memset ( file_path , 0 , 300 );
    }

    for ( i = 0 ; i < DICTIONARY_FILE_LENGTH ; i ++ )
    {
        free ( words_from_dictionary [ i ] );
    }
    free ( words_from_dictionary );

    for ( i = 0 ; i < NUMBER_OF_CLUSTERS ; i ++ )
    {
        free ( centroid_matrix [ i ] );
    }
    free ( centroid_matrix );

    for ( i = 0 ; i < MAXIMUM_WORDS ; i ++ )
    {
        free ( words_from_web_page [ i ] );
    }
    free ( words_from_web_page );

    free ( cluster_data );
    free ( file_extension );
    free ( file_path );
    //free ( file_name );
    free ( store_centroids );

    return ( EXIT_SUCCESS );
}

//reading the text file. I think this where the leaks occur the most because this is the function which is called several times 
//in main()  

void read_text_file ( FILE *file_pointer , char ** words_to_be_read , unsigned long int *number_of_words )
{
    unsigned long int len = 0;
    char *line = NULL;

    while ( ! feof ( file_pointer ) )
    {
        getline ( &words_to_be_read [ ( *number_of_words) ++ ] , &len , file_pointer );
    }

    ( *number_of_words )--;
    fclose ( file_pointer );
}


void read_cluster_file ( FILE *file_pointer , int * cluster_data , unsigned long int *number_of_words )
{
    while ( ! feof ( file_pointer ) )
    {
        fscanf ( file_pointer , "%d\n" , &cluster_data [ (*number_of_words) ++ ] );
    }

    ( *number_of_words )--;
    fclose ( file_pointer );
}

//this reads a two dimensional array
void read_centroid_matrix ( FILE * file_pointer , float ** centroid_matrix )
{
    int i = 0;
    int j = 0;

    for ( i = 0 ; i < NUMBER_OF_CLUSTERS ; i ++ )
    {
        for ( j = 0 ; j < DIMENSIONS ; j ++ )
        {
            fscanf ( file_pointer,  "%f" , &centroid_matrix [ i ] [ j ] );
        }
    }

    fclose ( file_pointer );
}

//this generates the file names to be read
char *file_name_generator ( unsigned long int i , char * str , char * file_path , char *common_path )
{
    str = itoa ( i , str );
    strcat ( file_path , common_path );
    strcat ( str , ".dat" );
    strcat ( file_path , str );
    i ++;
    return ( file_path );
}


char * itoa ( int n , char * s )
{
     int i, sign;
 
     if ( ( sign = n ) < 0 )  /* record sign */
         n = -n;          /* make n positive */
     i = 0;
     do {       /* generate digits in reverse order */
         s [ i++ ] = n % 10 + '0';   /* get next digit */
     } while ( ( n /= 10 ) > 0 );     /* delete it */
     if ( sign < 0 )
         s [ i++ ] = '-';
     s [ i ] = '\0';
     reverse ( s );
    return ( s );
}


char * reverse ( char s [ ] )
{
     int i, j;
     char c;
 
     for ( i = 0, j = strlen ( s ) - 1; i < j; i ++, j -- ) 
    {
             c = s [ i ];
             s [ i ] = s [ j ];
             s [ j ] = c;
         }
    return ( s );
}
//this function computes cosine between the two vectors

float cosine_similarity ( float ** cluster_centroid , unsigned long int first_coordinate , unsigned long int second_coordinate )
{
    size_t i = 0;
    size_t j = 0;

    float eval1 = 0;
    float eval2 = 0;
    float eval3 = 0;
    float cosine = 0;

    for ( i = 0  ; i < DIMENSIONS ; i++ )
    {
        eval1 = eval1 + ( cluster_centroid [ first_coordinate ] [ i ] * cluster_centroid [ second_coordinate ] [ i ] );
        eval2 = eval2 + ( cluster_centroid [ first_coordinate ] [ i ] * cluster_centroid [ first_coordinate ] [ i ] );
        eval3 = eval3 + ( cluster_centroid [ second_coordinate ] [ i ] * cluster_centroid [ second_coordinate ] [ i ] );
    }

    cosine = eval1 / ( sqrtf ( eval2 ) * sqrtf ( eval3 ) );

    return ( cosine );
}

---------- Post updated at 03:36 PM ---------- Previous update was at 10:01 AM ----------

I reasoned it out where actually the problem is:
Code:
void read_text_file ( FILE *file_pointer , char ** words_to_be_read , unsigned long int *number_of_words )
{
    unsigned long int len = 0;
    char *line = NULL;

    while ( ! feof ( file_pointer ) )
    {
        getline ( &words_to_be_read [ ( *number_of_words) ++ ] , &len , file_pointer );
    }

    ( *number_of_words )--;
    fclose ( file_pointer );
}

In the above code, getline() creates another memory location in addition to what has already been created. that allocated location does not get free'd when the functions returns. Hence, memory leak occur. After fixing the problem, I don't face any problems now. This is what I have done now.


Code:
void read_text_file ( FILE *file_pointer , char ** words_to_be_read , unsigned long int *number_of_words )
{
    unsigned long int len = 0;
    unsigned long int read = 0;
    char *line = NULL;

    while ( ! feof ( file_pointer ) )
    {
        read = getline ( &line , &len , file_pointer );
        if ( read != -1 || line != NULL )
        {
              strcpy ( words_to_be_read [ ( *number_of_words) ++ ] , line );
         }
    }
      if ( line )
        free ( line );


    fclose ( file_pointer );
}


Last edited by jim mcnamara; 03-19-2012 at 10:05 AM.. Reason: reformat
 

9 More Discussions You Might Find Interesting

1. Programming

about virtual memory and memory leak

Hi, First of all I appreciate this group very much for its informative discussions and posts. Here is my question. I have one process whose virtual memory size increases linearly from 6MB to 12MB in 20 minutes. Does that mean my process has memory leaks? In what cases does the... (4 Replies)
Discussion started by: shriashishpatil
4 Replies

2. Programming

Memory leak of fork()

Today, I wrote a test code for fork/execvp/waitpid. In the parent process, it fork 100 child processes which only execute "date" to print the current datetime. When any child process die, the parent process will receive a SIGCHLD signal. Then, the parent process will re-fork-execvp the child... (7 Replies)
Discussion started by: whererush
7 Replies

3. Programming

how to check memory leak in C program under Unix

Hi, How to detect memory leak in C program under unix ? Thanks (6 Replies)
Discussion started by: useless79
6 Replies

4. Programming

Memory LEAK with pthreads

I have this code... #include <stdio.h> #include <iostream> #include <pthread.h> static void* cliente(void *datos); int main() { pthread_attr_t tattr; int ret; size_t size = PTHREAD_STACK_MIN + 0x0100; ret = pthread_attr_init(&tattr); ret =... (8 Replies)
Discussion started by: JEscola
8 Replies

5. UNIX for Advanced & Expert Users

Need to create a memory leak

Hi. This might be a strange request, but does anyone have any idea on a simple shell script that would use more and more memory as it ran? Like a purposeful leak. I want to test the behaviour of an already running program when the machine runs out of memory. Thanks! (4 Replies)
Discussion started by: rebelbuttmunch
4 Replies

6. IP Networking

memory leak?

Hi All, my client server application can work in two modes: 1) one direction - only client sends msgs to server 2) two directions - server gives 'answers' to client. when program run in the first mode it looks OK, but when server answers to client than client's application exit its... (2 Replies)
Discussion started by: lenna
2 Replies

7. Programming

Memory Leak

Hi, I am trying a database server which keeps a B+ plus tree structure and works on it. I am trying to find the memory used/leak while executing this process. I check the memory leak by using ps uax command. When i execute a delete query i am sure that my code frees up the existing... (9 Replies)
Discussion started by: kumaran_5555
9 Replies

8. Red Hat

Memory leak

Hi all I am using RED HAT 5.4, and i am getting memory uses problem. when i use "sync;echo 3 > /proc/sys/vm/drop_cache" command the memory will release after 2,3 hour memory show 95%. pls suggest right way. thanks (37 Replies)
Discussion started by: reply.ravi
37 Replies

9. Web Development

Finding Cause of Memory Leak

Hi We have just got a dedicated server with Fasthosts, O/S is Linux CentOS 6 64 bit. It was a fresh install and I have just moved one WordPress site onto there. The problem is we seem to be getting a memory leak (that's what Fasthosts said) and the database (I think) keeps crashing, so we... (3 Replies)
Discussion started by: Pokeyzx
3 Replies
All times are GMT -4. The time now is 10:21 PM.
Unix & Linux Forums Content Copyright 1993-2022. All Rights Reserved.
Privacy Policy