/*
 * Copyright (C) 2011 Andrea Mazzoleni
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#include "portable.h"

#include "support.h"
#include "util.h"
#include "elem.h"
#include "state.h"
#include "parity.h"
#include "handle.h"

/****************************************************************************/
/* dup */

struct snapraid_hash {
	struct snapraid_disk* disk; /**< Disk. */
	struct snapraid_file* file; /**< File. */
	unsigned char hash[HASH_MAX]; /**< Hash of the whole file. */

	/* nodes for data structures */
	tommy_hashdyn_node node;
};

struct snapraid_hash* hash_alloc(struct snapraid_state* state, struct snapraid_disk* disk, struct snapraid_file* file)
{
	struct snapraid_hash* hash;
	block_off_t i;
	unsigned char* buf;

	hash = malloc_nofail(sizeof(struct snapraid_hash));
	hash->disk = disk;
	hash->file = file;

	buf = malloc_nofail(file->blockmax * BLOCK_HASH_SIZE);

	/* set the back pointer */
	for (i = 0; i < file->blockmax; ++i) {
		struct snapraid_block* block = fs_file2block_get(file, i);

		memcpy(buf + i * BLOCK_HASH_SIZE, block->hash, BLOCK_HASH_SIZE);

		if (!block_has_updated_hash(block)) {
			free(buf);
			free(hash);
			return 0;
		}
	}

	memhash(state->besthash, state->hashseed, hash->hash, buf, file->blockmax * BLOCK_HASH_SIZE);

	free(buf);

	return hash;
}

static inline tommy_uint32_t hash_hash(struct snapraid_hash* hash)
{
	return tommy_hash_u32(0, hash->hash, HASH_MAX);
}

void hash_free(struct snapraid_hash* hash)
{
	free(hash);
}

int hash_compare(const void* void_arg, const void* void_data)
{
	const char* arg = void_arg;
	const struct snapraid_hash* hash = void_data;

	return memcmp(arg, hash->hash, HASH_MAX);
}

void state_dup(struct snapraid_state* state)
{
	tommy_hashdyn hashset;
	tommy_node* i;
	unsigned count;
	data_off_t size;
	char esc_buffer[ESC_MAX];
	char esc_buffer_alt[ESC_MAX];

	tommy_hashdyn_init(&hashset);

	count = 0;
	size = 0;

	msg_progress("Comparing...\n");

	/* for each disk */
	for (i = state->disklist; i != 0; i = i->next) {
		tommy_node* j;
		struct snapraid_disk* disk = i->data;

		/* for each file */
		for (j = disk->filelist; j != 0; j = j->next) {
			struct snapraid_file* file = j->data;
			struct snapraid_hash* hash;
			tommy_hash_t hash32;

			/* if empty, skip it */
			if (file->size == 0)
				continue;

			hash = hash_alloc(state, disk, file);

			/* if no hash, skip it */
			if (!hash)
				continue;

			hash32 = hash_hash(hash);

			struct snapraid_hash* found = tommy_hashdyn_search(&hashset, hash_compare, hash->hash, hash32);
			if (found) {
				++count;
				size += found->file->size;
				log_tag("dup:%s:%s:%s:%s:%" PRIu64 ": dup\n", disk->name, esc_tag(file->sub, esc_buffer), found->disk->name, esc_tag(found->file->sub, esc_buffer_alt), found->file->size);
				printf("%12" PRIu64 " %s = %s\n", file->size, fmt_term(disk, file->sub, esc_buffer), fmt_term(found->disk, found->file->sub, esc_buffer_alt));
				hash_free(hash);
			} else {
				tommy_hashdyn_insert(&hashset, &hash->node, hash, hash32);
			}
		}
	}

	tommy_hashdyn_foreach(&hashset, (tommy_foreach_func*)hash_free);
	tommy_hashdyn_done(&hashset);

	msg_status("\n");
	msg_status("%8u duplicates, for %" PRIu64 " GB\n", count, size / GIGA);
	if (count)
		msg_status("There are duplicates!\n");
	else
		msg_status("No duplicates\n");

	log_tag("summary:dup_count:%u\n", count);
	log_tag("summary:dup_size:%" PRIu64 "\n", size);
	if (count == 0) {
		log_tag("summary:exit:unique\n");
	} else {
		log_tag("summary:exit:dup\n");
	}
	log_flush();
}