Search Results

Search found 886 results on 36 pages for 'duplicates'.

Page 17/36 | < Previous Page | 13 14 15 16 17 18 19 20 21 22 23 24  | Next Page >

  • Suggestions for duplicate file finder algorithm (using C)

    - by Andrei Ciobanu
    Hello, I wanted to write a program that test if two files are duplicates (have exactly the same content). First I test if the files have the same sizes, and if they have i start to compare their contents. My first idea, was to "split" the files into fixed size blocks, then start a thread for every block, fseek to startup character of every block and continue the comparisons in parallel. When a comparison from a thread fails, the other working threads are canceled, and the program exits out of the thread spawning loop. The code looks like this: dupf.h #ifndef __NM__DUPF__H__ #define __NM__DUPF__H__ #define NUM_THREADS 15 #define BLOCK_SIZE 8192 /* Thread argument structure */ struct thread_arg_s { const char *name_f1; /* First file name */ const char *name_f2; /* Second file name */ int cursor; /* Where to seek in the file */ }; typedef struct thread_arg_s thread_arg; /** * 'arg' is of type thread_arg. * Checks if the specified file blocks are * duplicates. */ void *check_block_dup(void *arg); /** * Checks if two files are duplicates */ int check_dup(const char *name_f1, const char *name_f2); /** * Returns a valid pointer to a file. * If the file (given by the path/name 'fname') cannot be opened * in 'mode', the program is interrupted an error message is shown. **/ FILE *safe_fopen(const char *name, const char *mode); #endif dupf.c #include <errno.h> #include <pthread.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> #include "dupf.h" FILE *safe_fopen(const char *fname, const char *mode) { FILE *f = NULL; f = fopen(fname, mode); if (f == NULL) { char emsg[255]; sprintf(emsg, "FOPEN() %s\t", fname); perror(emsg); exit(-1); } return (f); } void *check_block_dup(void *arg) { const char *name_f1 = NULL, *name_f2 = NULL; /* File names */ FILE *f1 = NULL, *f2 = NULL; /* Streams */ int cursor = 0; /* Reading cursor */ char buff_f1[BLOCK_SIZE], buff_f2[BLOCK_SIZE]; /* Character buffers */ int rchars_1, rchars_2; /* Readed characters */ /* Initializing variables from 'arg' */ name_f1 = ((thread_arg*)arg)->name_f1; name_f2 = ((thread_arg*)arg)->name_f2; cursor = ((thread_arg*)arg)->cursor; /* Opening files */ f1 = safe_fopen(name_f1, "r"); f2 = safe_fopen(name_f2, "r"); /* Setup cursor in files */ fseek(f1, cursor, SEEK_SET); fseek(f2, cursor, SEEK_SET); /* Initialize buffers */ rchars_1 = fread(buff_f1, 1, BLOCK_SIZE, f1); rchars_2 = fread(buff_f2, 1, BLOCK_SIZE, f2); if (rchars_1 != rchars_2) { /* fread failed to read the same portion. * program cannot continue */ perror("ERROR WHEN READING BLOCK"); exit(-1); } while (rchars_1-->0) { if (buff_f1[rchars_1] != buff_f2[rchars_1]) { /* Different characters */ fclose(f1); fclose(f2); pthread_exit("notdup"); } } /* Close streams */ fclose(f1); fclose(f2); pthread_exit("dup"); } int check_dup(const char *name_f1, const char *name_f2) { int num_blocks = 0; /* Number of 'blocks' to check */ int num_tsp = 0; /* Number of threads spawns */ int tsp_iter = 0; /* Iterator for threads spawns */ pthread_t *tsp_threads = NULL; thread_arg *tsp_threads_args = NULL; int tsp_threads_iter = 0; int thread_c_res = 0; /* Thread creation result */ int thread_j_res = 0; /* Thread join res */ int loop_res = 0; /* Function result */ int cursor; struct stat buf_f1; struct stat buf_f2; if (name_f1 == NULL || name_f2 == NULL) { /* Invalid input parameters */ perror("INVALID FNAMES\t"); return (-1); } if (stat(name_f1, &buf_f1) != 0 || stat(name_f2, &buf_f2) != 0) { /* Stat fails */ char emsg[255]; sprintf(emsg, "STAT() ERROR: %s %s\t", name_f1, name_f2); perror(emsg); return (-1); } if (buf_f1.st_size != buf_f2.st_size) { /* File have different sizes */ return (1); } /* Files have the same size, function exec. is continued */ num_blocks = (buf_f1.st_size / BLOCK_SIZE) + 1; num_tsp = (num_blocks / NUM_THREADS) + 1; cursor = 0; for (tsp_iter = 0; tsp_iter < num_tsp; tsp_iter++) { loop_res = 0; /* Create threads array for this spawn */ tsp_threads = malloc(NUM_THREADS * sizeof(*tsp_threads)); if (tsp_threads == NULL) { perror("TSP_THREADS ALLOC FAILURE\t"); return (-1); } /* Create arguments for every thread in the current spawn */ tsp_threads_args = malloc(NUM_THREADS * sizeof(*tsp_threads_args)); if (tsp_threads_args == NULL) { perror("TSP THREADS ARGS ALLOCA FAILURE\t"); return (-1); } /* Initialize arguments and create threads */ for (tsp_threads_iter = 0; tsp_threads_iter < NUM_THREADS; tsp_threads_iter++) { if (cursor >= buf_f1.st_size) { break; } tsp_threads_args[tsp_threads_iter].name_f1 = name_f1; tsp_threads_args[tsp_threads_iter].name_f2 = name_f2; tsp_threads_args[tsp_threads_iter].cursor = cursor; thread_c_res = pthread_create( &tsp_threads[tsp_threads_iter], NULL, check_block_dup, (void*)&tsp_threads_args[tsp_threads_iter]); if (thread_c_res != 0) { perror("THREAD CREATION FAILURE"); return (-1); } cursor+=BLOCK_SIZE; } /* Join last threads and get their status */ while (tsp_threads_iter-->0) { void *thread_res = NULL; thread_j_res = pthread_join(tsp_threads[tsp_threads_iter], &thread_res); if (thread_j_res != 0) { perror("THREAD JOIN FAILURE"); return (-1); } if (strcmp((char*)thread_res, "notdup")==0) { loop_res++; /* Closing other threads and exiting by condition * from loop. */ while (tsp_threads_iter-->0) { pthread_cancel(tsp_threads[tsp_threads_iter]); } } } free(tsp_threads); free(tsp_threads_args); if (loop_res > 0) { break; } } return (loop_res > 0) ? 1 : 0; } The function works fine (at least for what I've tested). Still, some guys from #C (freenode) suggested that the solution is overly complicated, and it may perform poorly because of parallel reading on hddisk. What I want to know: Is the threaded approach flawed by default ? Is fseek() so slow ? Is there a way to somehow map the files to memory and then compare them ?

    Read the article

  • How to make this Makefile more concise?

    - by David
    I know there are ways to remove duplicates in Makefile. Can you tell me how to make the Makefile below more concise? Thanks! And is there any tool to automate it? CC=gcc CFLAGS=-pthread -g -o all: dotprod_mutex dotprod_serial dotprod_mutex: dotprod_mutex.c $(CC) $(CFLAGS) dotprod_mutex dotprod_mutex.c dotprod_serial: dotprod_serial.c $(CC) $(CFLAGS) dotprod_serial dotprod_serial.c

    Read the article

  • What's with "foo" and "bar" [closed]

    - by Matt S.
    Possible Duplicates: When foo and bar is not enough Code examples Foo Bar I've always wondered, why is there always "foo" and "bar" named as variables in most tutorials I've seen. Where did it come from? Why do we call them "foo" and "bar" why not something else?

    Read the article

  • How do I do the SQL equivalent of "DISTINCT" in CouchDB?

    - by Blaine LaFreniere
    I have a bunch of MP3 metadata in couchDB. I want to return every album that is in the MP3 metadata, but no duplicates. A typical document looks like this: { "_id": "005e16a055ba78589695c583fbcdf7e26064df98", "_rev": "2-87aa12c52ee0a406084b09eca6116804", "name": "Fifty-Fifty Clown", "number": 15, "artist": "Cocteau Twins", "bitrate": 320, "album": "Stars and Topsoil: A Collection (1982-1990)", "path": "Cocteau Twins/Stars and Topsoil: A Collection (1982-1990)/15 - Fifty-Fifty Clown.mp3", "year": 0, "genre": "Shoegaze" }

    Read the article

  • How will Facebook Authenticate let me ID a user?

    - by Donny P
    I have a website where I need to have data that is ID'd by user. For example, they enter their favorite food: userid favorite food ------ ------------- 1 french fries 2 tacos 3 fish sticks 4 chipotle When I use Facebook Authentication, what identifier will I use for the userid? I'm assuming it's not name, since this would create duplicates. Is it just the person's Facebook ID? Also is the correct API to use for 3rd party websites 'Facebook Connect' or 'Facebook Authorization' or something else?

    Read the article

  • Whats the maximum key length in NSDictionary?

    - by x3ro
    Hey there, I'm currently working on an app which displays a bunch of files in a table, and you can add and remove them and whatsoever. To prevent duplicates in the table, I'd like to create a NSDictionary using the files full path as keys for another NSDictionary which contains all the file information, but I am a little concerned about the maximum key length of NSDictionary, and also whether this solution would be performance killer or not... Looking forward to your answers. Best regards, x3ro

    Read the article

  • Does C# support inner classes? [closed]

    - by Amy
    Possible Duplicates: Using Inner classes in C# Inner classes in C# Class declared inside of another class in C# Does C# support the concept of inner classes? If so, what are the benefits? Could someone please explain this to me?

    Read the article

  • Objective C: Which is changed, property or ivar?

    - by Wilhelmsen
    Worrying about duplicates but can not seem to find and answer I can understand in any of the other posts, I just have to ask: When I have in my .h: @interface SecondViewController : UIViewController{ NSString *changeName; } @property (readwrite, retain) NSString *changeName; then in my .m @synthesize changeName; -(IBAction)changeButton:(id)sender{ changeName = @"changed"; } Is it the synthesized property or the instance variable that get changed when I press "changeButton" ?

    Read the article

  • Same keyword for two purposes in java? [closed]

    - by gurukulki
    Possible Duplicates: Same keyword for two purposes in java? Same keyword for two purposes in java? As we use "default" keyword as a access specifier, and it can be used in switch statements as well with complete different purpose, So i was curious that is there any other keywords in java which can be used in more then one purposes

    Read the article

  • wordpress num rows, how?

    - by krike
    I'm creating a plugin for wordpress and I need to check if a post with a specific title exist or not. This to prevent duplicates, how can I query this and see if it returned true or false?

    Read the article

  • how to do p2p with flash? [closed]

    - by Female Gay
    Possible Duplicates: how to do p2p with flash? Does Flash10 + p2p really work? It's not difficult to tell what is being asked here. This question is not ambiguous, not vague, not incomplete, either not rhetorical and can be reasonably answered in its current form. So, good luck!

    Read the article

  • Searching through large data set

    - by calccrypto
    how would i search through a list with ~5 mil 128bit (or 256, depending on how you look at it) strings quickly and find the duplicates (in python)? i can turn the strings into numbers, but i don't think that's going to help much. since i haven't learned much information theory, is there anything about this in information theory? and since these are hashes already, there's no point in hashing them again

    Read the article

  • What do you miss in C# language most [closed]

    - by Peter Stegnar
    Possible Duplicates: Most wanted feature for C# 4.0 ? What features should C# 4.0 onwards have to encourage switching from Java? Possible Duplicate: Most wanted feature for C# 4.0 ? C# language is around for about 10 years and it is becoming pretty mature language, but anyway, I wonder what do you miss most in C#? What do you need and is not available in it?

    Read the article

  • Merging some sorted lists with unknown order sequence

    - by Gabriel
    I've some sorted lists with variable number of elements. I wold like to merge the lists into one big list which contains all other lists in same order, without duplicates. Example: 1. XS,M,L,XL 2. S,M,XXL 3. XXS,XS,S,L Result: XXS,XS,S,M,L,XL,XXL The function should notify, if there are elements which have ambiguous positions. Here, it would be XXL and I need to specify its position after XL.

    Read the article

  • SEO and dynamic javascript HTML switching

    - by Gazow
    just wondering if anyone knows anything of using javascript to set html to new content instead of linking to new pages, if this is generally a bad idea or if it kind of hurts SEO(which im kind of new to) Basically the home page displays given content, and the links to like contact pages and stuff, just change the body content to what would normally be a separate html page. my OCD kinda bugs me when pages reload and either flash the background or its offset somehow, so i wanted to know if making sites like this was a bad idea or whatever- i suppose at the least, i could create duplicates/hidden pages for SEO purposes

    Read the article

  • Authentication between domains

    - by gneash
    I have: a service running under user account domainA\userA on hostA from domainA SPNs created for the service running on hostA for user account domainA\userA a client part of my application is trying to access resources (through remoting) from the service running on hostA but this client part is running on hostB from domainB and under the user account domainB\userB appropriate delegation settings are done for user account domainA\userA trusted for delegation SPNs set properly with no duplicates domainA and domainB have two way trust I get this error: A secuity package specific error occured: Unspecified error (0x80004005) Could you please see if there is any authentication setting I missed?

    Read the article

  • With Protobuf-net, does it allow deserialization reuse existing instances if they are the same?

    - by Stecy
    I'm using Protobuf-net. Suppose I have a list of Gizmo objects serialized and that each gizmo object reference a Gazoo object. The Gazoo object might be the same object referred by several Gizmo objects. How would deserialization work in this situation? Would I get more than one copies of Gazoos for the same referred one in the gizmo objects? What I would expect would be one copy of Gazoo for all the duplicates in the serialized data.

    Read the article

  • MySQL forgot about automatically creating an index for a foreign key?

    - by bobo
    After running the following SQL statements, you will see that, MySQL has automatically created the non-unique index question_tag_tag_id_tag_id on the tag_id column for me after the first ALTER TABLE statement has run. But after the second ALTER TABLE statement has run, I think MySQL should also automatically create another non-unique index question_tag_question_id_question_id on the question_id column for me. But as you can see from the SHOW INDEXES statement output, it's not there. Why does MySQL forget about the second ALTER TABLE statement? By the way, since I have already created a unique index question_id_tag_id_idx used by both question_id and tag_id columns. Is creating a separate index for each of them redundant? mysql> DROP DATABASE mydatabase; Query OK, 1 row affected (0.00 sec) mysql> CREATE DATABASE mydatabase; Query OK, 1 row affected (0.00 sec) mysql> USE mydatabase; Database changed mysql> CREATE TABLE question (id BIGINT AUTO_INCREMENT, html TEXT, PRIMARY KEY(id)) ENGINE = INNODB; Query OK, 0 rows affected (0.05 sec) mysql> CREATE TABLE tag (id BIGINT AUTO_INCREMENT, name VARCHAR(10) NOT NULL, UNIQUE INDEX name_idx (name), PRIMARY KEY(id)) ENGINE = INNODB; Query OK, 0 rows affected (0.05 sec) mysql> CREATE TABLE question_tag (question_id BIGINT, tag_id BIGINT, UNIQUE INDEX question_id_tag_id_idx (question_id, tag_id), PRIMARY KEY(question_id, tag_id)) ENGINE = INNODB; Query OK, 0 rows affected (0.00 sec) mysql> ALTER TABLE question_tag ADD CONSTRAINT question_tag_tag_id_tag_id FOREIGN KEY (tag_id) REFERENCES tag(id); Query OK, 0 rows affected (0.10 sec) Records: 0 Duplicates: 0 Warnings: 0 mysql> ALTER TABLE question_tag ADD CONSTRAINT question_tag_question_id_question_id FOREIGN KEY (question_id) REFERENCES question(id); Query OK, 0 rows affected (0.13 sec) Records: 0 Duplicates: 0 Warnings: 0 mysql> SHOW INDEXES FROM question_tag; +--------------+------------+----------------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+ | Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | +--------------+------------+----------------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+ | question_tag | 0 | PRIMARY | 1 | question_id | A | 0 | NULL | NULL | | BTREE | | | question_tag | 0 | PRIMARY | 2 | tag_id | A | 0 | NULL | NULL | | BTREE | | | question_tag | 0 | question_id_tag_id_idx | 1 | question_id | A | 0 | NULL | NULL | | BTREE | | | question_tag | 0 | question_id_tag_id_idx | 2 | tag_id | A | 0 | NULL | NULL | | BTREE | | | question_tag | 1 | question_tag_tag_id_tag_id | 1 | tag_id | A | 0 | NULL | NULL | | BTREE | | +--------------+------------+----------------------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+ 5 rows in set (0.01 sec) mysql>

    Read the article

< Previous Page | 13 14 15 16 17 18 19 20 21 22 23 24  | Next Page >