Extract sequences of bytes from binary for differents blocks


 
Thread Tools Search this Thread
Top Forums Shell Programming and Scripting Extract sequences of bytes from binary for differents blocks
# 36  
Old 08-22-2013
As I said len is overflowing, I increased the size of the buf to 256 chars and added warning if len overflows, see below.

Also note if you change the size of pat strings you need to also change length passed to memcmp see changes in read:

Code:
#include <stdio.h>
#include <stdlib.h>

#define err(x) {printf("\nError: %s... Exiting...\n", x); exit(1);}

static unsigned char start = 0x32;
static unsigned char pat1[] = {0x99, 0x11, 0x45};
static unsigned char pat2[] = {0x73, 0x49};
static unsigned char pat3[] = {0xff, 0x34};
static unsigned char intrim_pat1[][2] = { {0x03, 0x80}, {0x03, 0x81}, {0x03, 0x83}, {0x03, 0x86}, {0x03, 0x87} };
static unsigned char end[] = {0xff, 0x33};

typedef enum {
        MAIN_BLOCK,
        SUB_BLOCK
} block;

void print_bytes(const unsigned char *ptr, int len)
{
        int i;
        for(i=0;i<len;i++)
                printf("%02x", ptr[i]);
        printf("|");
        return;

}
void print_data(const unsigned char *ptr, int len, block bl)
{
        int i;
        if(MAIN_BLOCK == bl){
                print_bytes(ptr, 1);
                print_bytes(ptr+1, 3);
                print_bytes(ptr+4, 8);
                print_bytes(ptr+12, 8);
        } else {
                print_bytes(ptr, 1);
                print_bytes(ptr+1, 1);
                print_bytes(ptr+2, 1);
                print_bytes(ptr+3, 1);
                print_bytes(ptr+4, 4);
                print_bytes(ptr+8, 8);
                print_bytes(ptr+16, 1);
                if(*ptr == intrim_pat1[2][1]){
                        print_bytes(ptr+17, 1);
                }
        }
        return;
}

void get_len_and_print(FILE *fp, unsigned char *ptr)
{
        int len = 0;

        //only buf[0] is populated at this stage
        if(1 != fread(ptr+1, sizeof(char), 1, fp))
                err("Insufficient data");
        len = *(ptr+1);
        if(len != fread(ptr+2, sizeof(char), len, fp))
                err("Insufficient data");
        print_data(ptr, len+1, SUB_BLOCK);
        return;
}

int main(int argc, char **argv)
{

        if(argc < 2)
                err("File name missing");

        char found = 0, more = 0, again = 0;
        unsigned char buf[256];
        unsigned char *ptr = buf;
        int pos = 0, i, len;
        int arr_size = (sizeof(intrim_pat1)/2);

        FILE *fp = fopen(argv[1], "rb");
        if(!fp) err("Unable to open the file");

        while(2 == fread(ptr, sizeof(char), 2, fp)){
                pos = ftell(fp);

                //check for end of file pattern
                if(found && !memcmp(buf, end, 2)){
                        found=0; //start over or stop??
                        continue;
                }

                //check for 0xff 0x34
                if(found && !(memcmp(buf, pat3, 2))){
                        more = 1;
                        continue;
                }

                if(found && more){
                        for(i=0; i < arr_size; i++){
                                // We got the intrim pattern.
                                if(!memcmp(buf, intrim_pat1[i], 2)){
                                        again=1;
                                        more=0;
                                        break;
                                }
                        }
                        if(again) {
                                // Now read the next 1 byte which is actually the size
                                if(1 != fread(ptr+2, sizeof(char), 1, fp))
                                        err("Insufficient data");
                                len = buf[2];
                                if(len > 255) {
                                    fprintf(stderr, "Len overflow: %d\n", len);
                                    return 1;
                                }
                                if(len != fread(ptr+3, sizeof(char), len, fp))
                                        err("Insufficient data");
                                print_data(ptr+1, len+2, SUB_BLOCK);

                                // Now check for the remaining pattern
                                // Assuming it will be in ascending order
                                // Break even if we dont find the very next pattern
                                // Also, reset the fp. We will flush all the data in buffer
                                // till now, since we have printed them already
                                pos = ftell(fp);

                                if(1 != fread(ptr, sizeof(char), 1, fp))
                                        err("Insufficient data");

                                while(i < arr_size){
                                        if( buf[0] == intrim_pat1[i][1]){
                                                get_len_and_print(fp, ptr);

                                                if(1 != fread(ptr, sizeof(char), 1, fp))
                                                        err("Insufficient data");
                                                pos = ftell(fp);
                                        }
                                        i++;
                                }
                                again=0;
                        }
                }

                if(buf[0] == start){
                        if(18 != fread(ptr+2, sizeof(char), 18, fp))
                                err("Insufficient data");
                        if(memcmp(buf+4, pat1, 3) && memcmp(buf+12, pat2, 2)){
                                fseek(fp, pos, SEEK_SET);
                        }else{
                                found = 1; //found the starting of the block with data
                                printf("\n");
                                print_data(ptr, 19, MAIN_BLOCK);
                        }
                        continue;
                }
                pos--;
                if(fseek(fp, pos, SEEK_SET))
                        err("Error in seeking");
        }

        return 0;
}

These 2 Users Gave Thanks to Chubler_XL For This Post:
# 37  
Old 08-23-2013
Thanks for your intervention Chubler_XL, I've tested with your suggestions and
works fine up to the end of file.

Now, almost to complete extraction, I would like to remove the "f's" from those values that have "fffff".
So, instead of using the function "print_bytes" like in
Code:
print_bytes(ptr+8, 8);

I'm trying to get a function that removes or doesn't print the "f's" but is not working.

I'm trying with this:

Code:
void Remove_Fs(const unsigned char *ptr, int len)
{
        char dest[]="x";
        char B[17];
        snprintf( B,17,"%02x%02x%02x%02x%02x%02x%02x%02x", ptr[0],ptr[1],ptr[2],ptr[3],ptr[4],ptr[5],ptr[6],ptr[7]);
        memmove(dest, B, 11);
        printf("%s", dest);
        printf("|");
        return;
}

I hope you can give me some suggestion.

Thanks in advance.
# 38  
Old 08-24-2013
Which extra f's are you talking about?

--ahamed
# 39  
Old 08-24-2013
Hello ahamed!

I'm trying to avoid printing the "f's" that are printing for the fields
in red below, but for now is secondary that issue:
Code:
void print_data(const unsigned char *ptr, int len, block bl)
{
        int i;
        if(MAIN_BLOCK == bl){
                print_bytes(ptr, 1);
                print_bytes(ptr+1, 3);
                print_bytes(ptr+4, 8);
                print_bytes(ptr+12, 8);
        } else {
                print_bytes(ptr, 1);
                print_bytes(ptr+1, 1);
                print_bytes(ptr+2, 1);
                print_bytes(ptr+3, 1);
                print_bytes(ptr+4, 4);
                print_bytes(ptr+8, 8);
                print_bytes(ptr+16, 1);
                if(*ptr == intrim_pat1[2][1]){
                        print_bytes(ptr+17, 1);
                }
        }
        return;
}

If you want, better please help me in extract the last sequences of interest to me and is at the end of the sub-block and begins with 84 0e, where 0e is the length, so is needed to extract the 84 and the next 14 bytes after 0e, separating each byte by "|". As is part of the same block, print as before in the same line.

I'm attaching and image showing in red the bytes I mention.

I've added 0x03, 0x84 at the end of the intrim_pat1, and it seems is pending add some other code.
Code:
static unsigned char intrim_pat1[][2] = { {0x03, 0x80}, {0x03, 0x81}, {0x03, 0x83}, {0x03, 0x86}, {0x03, 0x87},{0x03,0x84} };

PD: 84 0E will appear at the end of sub-block, won't appear after 0x03, but I've added in intrim_pat1 because is the way I see possible to do it for me.

Thanks in advance.
Extract sequences of bytes from binary for differents blocks-image2jpg
# 40  
Old 08-25-2013
I still didn't understand about the extra f's you are talking about. May be you can paste the out and point out which are those extra f's.

As for the new pattern 0x84, it should've worked. Can you upload a sample file so that I can check?

--ahamed
# 41  
Old 08-25-2013
Hello ahamed,

I'm showing in the attached image:
- Where the f's appear (in pat1 and pat2).
- Where pattern that begins with 0x84 appears (at the end of sub-block) and
is composed by 0x84 0x0e + 14 bytes.

I'm attaching another sample bin file too.

Thanks so much for your help again.

Regards
# 42  
Old 08-25-2013
Just extending the intrim_pat1 is working for me

Code:
user@Imperfecto_:~$ ./extract binfile.txt 

32|000001|991145018934550f|73494549232fffff|
32|000002|991145018934551f|73494554768fffff|80|0f|01|02|00000030|7349526905ffffff|00|81|0f|01|02|0000013a|
73495269559fffff|00|83|10|01|0c|0000009f|7349526905ffffff|01|01|86|0f|01|0e|000000eb|73495269596fffff|00|87|0f|
01|01|0006f699|73495269563fffff|00|84|0e|00|01|00000100|0100ffff00000101|00|
32|000003|991145018934552f|73494557521fffff|81|0f|01|02|0000000d|7349526905ffffff|00|83|10|01|0c|0000000d|
7349526905ffffff|01|01|86|0f|01|0c|0000000d|73495269565fffff|00|84|0e|01|02|01020100|01ffffff02010201|00|
32|000004|991145018934558f|73497380427fffff|81|0f|01|02|0000000d|7349526905ffffff|00|83|10|01|0c|0000000d|
7349526905ffffff|01|01|86|0f|01|0c|0000000d|73495269594fffff|00|84|0e|01|02|01020100|01ffffff02010201|00|

--ahamed
Login or Register to Ask a Question

Previous Thread | Next Thread

10 More Discussions You Might Find Interesting

1. Shell Programming and Scripting

Blocks of text in a file - extract when matches...

I sat down yesterday to write this script and have just realised that my methodology is broken........ In essense I have..... ----------------------------------------------------------------- (This line really is in the file) Service ID: 12345 ... (7 Replies)
Discussion started by: Bashingaway
7 Replies

2. Shell Programming and Scripting

Extract the part of sequences from a file

I have a text file, input.fasta contains some protein sequences. input.fasta is shown below. >P02649 MKVLWAALLVTFLAGCQAKVEQAVETEPEPELRQQTEWQSGQRWELALGRFWDYLRWVQT LSEQVQEELLSSQVTQELRALMDETMKELKAYKSELEEQLTPVAEETRARLSKELQAAQA RLGADMEDVCGRLVQYRGEVQAMLGQSTEELRVRLASHLRKLRKRLLRDADDLQKRLAVY... (8 Replies)
Discussion started by: rahim42
8 Replies

3. Shell Programming and Scripting

Extract length wise sequences from fastq file

I have a fastq file from small RNA sequencing with sequence lengths between 15 - 30. I wanted to filter sequence lengths between 21-25 and write to another fastq file. how can i do that? (4 Replies)
Discussion started by: empyrean
4 Replies

4. Shell Programming and Scripting

Extract sequences based on the list

Hi, I have a file with more than 28000 records and it looks like below.. >mm10_refflat_ABCD range=chr1:1234567-2345678 tgtgcacactacacatgactagtacatgactagac....so on >mm10_refflat_BCD range=chr1:3234567-4545678... tgtgcacactacacatgactagtatgtgcacactacacatgactagta . . . . . so on ... (2 Replies)
Discussion started by: Diya123
2 Replies

5. UNIX for Dummies Questions & Answers

X bytes of 0, Y bytes of random data, Z bytes of 5, T bytes of 1. ??

Hello guys. I really hope someone will help me with this one.. So, I have to write this script who: - creates a file home/student/vmdisk of 10 mb - formats that file to ext3 - mounts that partition to /mnt/partition - creates a file /mnt/partition/data. In this file, there will... (1 Reply)
Discussion started by: razolo13
1 Replies

6. Linux

Why does ext3 allocate 8 blocks for files that are few bytes long

The title is clear: why does ext3 allocate 8 blocks for files that are few bytes long? If I create a file named "test", put a few chars in it, and then I run: stat test I get that "Blocks: 8" I searched in the web and found that ext does that, it allocates 8 blocks even if It doesn't need... (4 Replies)
Discussion started by: Tavo
4 Replies

7. Shell Programming and Scripting

extract blocks of text from a file

Hi, This is part of a large text file I need to separate out. I'd like some help to build a shell script that will extract the text between sets of dashed lines, write that to a new file using the whole or part of the first text string as the new file name, then move on to the next one and... (7 Replies)
Discussion started by: cajunfries
7 Replies

8. Shell Programming and Scripting

Extract sequence blocks

Hi, I have an one-line file consisting of a sequence of 660 letters. I would like to extract 9-letter blocks iteratively: ASDFGHJKLQWERTYUIOPZXCVBNM first block: ASDFGHJKL 1nd block: SDFGHJKLQ What I have so far only gives me the first block, can anyone please explain why? cat... (7 Replies)
Discussion started by: solli
7 Replies

9. UNIX for Advanced & Expert Users

Deal with binary sequences

Hello, I have come across the necessity for me to deal with binary sequences and I had a few questions. 1- Does any UNIX scripting language provide any tool or command for converting text data to binary sequences? Example of binary sequence: "0x97 0x93 0x85 0x40 0xd5 0xd6 0xd7" 2- If I want... (2 Replies)
Discussion started by: Indalecio
2 Replies

10. Shell Programming and Scripting

Remove first N bytes and last N bytes from a binary file on AIX.

Hi all, Does anybody know or guide me on how to remove the first N bytes and the last N bytes from a binary file? Is there any AWK or SED or any command that I can use to achieve this? Your help is greatly appreciated!! Best Regards, Naveen. (1 Reply)
Discussion started by: naveendronavall
1 Replies
Login or Register to Ask a Question