Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
218 changes: 138 additions & 80 deletions src/be/tarsos/lsh/HashTable.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@

import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.List;
import java.util.concurrent.*;

import be.tarsos.lsh.families.HashFamily;
import be.tarsos.lsh.families.HashFunction;
Expand All @@ -35,90 +36,147 @@
* An index contains one or more locality sensitive hash tables. These hash
* tables contain the mapping between a combination of a number of hashes
* (encoded using an integer) and a list of possible nearest neighbours.
*
*
* @author Joren Six
*/
class HashTable implements Serializable {
public class HashTable implements Serializable
{

private static final long serialVersionUID = -5410017645908038641L;
private static final long serialVersionUID = -5410017645908038641L;

/**
* Contains the mapping between a combination of a number of hashes (encoded
* using an integer) and a list of possible nearest neighbours
*/
private HashMap<Integer,List<Vector>> hashTable;
private HashFunction[] hashFunctions;
private HashFamily family;

/**
* Initialize a new hash table, it needs a hash family and a number of hash
* functions that should be used.
*
* @param numberOfHashes
* The number of hash functions that should be used.
* @param family
* The hash function family knows how to create new hash
* functions, and is used therefore.
*/
public HashTable(int numberOfHashes,HashFamily family){
hashTable = new HashMap<Integer, List<Vector>>();
this.hashFunctions = new HashFunction[numberOfHashes];
for(int i=0;i<numberOfHashes;i++){
hashFunctions[i] = family.createHashFunction();
}
this.family = family;
}
/**
* Contains the mapping between a combination of a number of hashes (encoded
* using an integer) and a list of possible nearest neighbours
*/
private Hashtable<Long, ArrayList<Vector>> hashTable;
private HashFunction[] hashFunctions;
private HashFamily family;
//private ExecutorService executor;

/**
* Query the hash table for a vector. It calculates the hash for the vector,
* and does a lookup in the hash table. If no candidates are found, an empty
* list is returned, otherwise, the list of candidates is returned.
*
* @param query
* The query vector.
* @return Does a lookup in the table for a query using its hash. If no
* candidates are found, an empty list is returned, otherwise, the
* list of candidates is returned.
*/
public List<Vector> query(Vector query) {
Integer combinedHash = hash(query);
if(hashTable.containsKey(combinedHash))
return hashTable.get(combinedHash);
else
return new ArrayList<Vector>();
}
/**
* Initialize a new hash table, it needs a hash family and a number of hash
* functions that should be used.
*
* @param numberOfHashes The number of hash functions that should be used.
* @param family The hash function family knows how to create new hash
* functions, and is used therefore.
*/
public HashTable(int numberOfHashes, HashFamily family)
{
hashTable = new Hashtable<>();
this.hashFunctions = new HashFunction[numberOfHashes];
for (int i = 0; i < numberOfHashes; i++)
{
hashFunctions[i] = family.createHashFunction();
}
this.family = family;
//executor = Executors.newFixedThreadPool(16);
}

/**
* Add a vector to the index.
* @param vector
*/
public void add(Vector vector) {
Integer combinedHash = hash(vector);
if(! hashTable.containsKey(combinedHash)){
hashTable.put(combinedHash, new ArrayList<Vector>());
}
hashTable.get(combinedHash).add(vector);
}

/**
* Calculate the combined hash for a vector.
* @param vector The vector to calculate the combined hash for.
* @return An integer representing a combined hash.
*/
private Integer hash(Vector vector){
int hashes[] = new int[hashFunctions.length];
for(int i = 0 ; i < hashFunctions.length ; i++){
hashes[i] = hashFunctions[i].hash(vector);
}
Integer combinedHash = family.combine(hashes);
return combinedHash;
}
/**
* Query the hash table for a vector. It calculates the hash for the vector,
* and does a lookup in the hash table. If no candidates are found, an empty
* list is returned, otherwise, the list of candidates is returned.
*
* @param query The query vector.
* @return Does a lookup in the table for a query using its hash. If no
* candidates are found, an empty list is returned, otherwise, the
* list of candidates is returned.
*/
public List<Vector> query(Vector query)
{
Long combinedHash = hash(query);
if (hashTable.containsKey(combinedHash))
return hashTable.get(combinedHash);
else
return new ArrayList<Vector>();
}

/**
* Return the number of hash functions used in the hash table.
* @return The number of hash functions used in the hash table.
*/
public int getNumberOfHashes() {
return hashFunctions.length;
}
/**
* Add a vector to the index.
*
* @param vector
*/
public void add(Vector vector)
{
Long combinedHash = hash(vector);
if (!hashTable.containsKey(combinedHash))
{
hashTable.put(combinedHash, new ArrayList<Vector>());
}
hashTable.get(combinedHash).add(vector);
}

/**
* Calculate the combined hash for a vector.
*
* @param vector The vector to calculate the combined hash for.
* @return An integer representing a combined hash.
*/
public Long hash(final Vector vector)
{
int hashes[] = new int[hashFunctions.length];
//Try hashing in Parallel. If fails, do it serially.
// try
// {
// List<Future<Integer>> futures = new ArrayList<Future<Integer>>();
// for(int i = 0 ; i < hashFunctions.length ; i++)
// {
// futures.add(executor.submit(new ParallelHash(hashFunctions[i], vector)));
// }
// for(int i = 0 ; i < hashFunctions.length ; i++)
// {
// hashes[i] = futures.get(i).get();
// }
// } catch (InterruptedException | ExecutionException e)
// {
for(int i = 0 ; i < hashFunctions.length ; i++)
{
hashes[i] = hashFunctions[i].hash(vector);
}
// }

return family.combine(hashes);
}

/**
* Return the number of hash functions used in the hash table.
*
* @return The number of hash functions used in the hash table.
*/
public int getNumberOfHashes()
{
return hashFunctions.length;
}

/**
* Class to parallelize hashing
* @author utsav
*
*/
private class ParallelHash implements Callable<Integer>
{
private HashFunction mHashFunction;
private Vector mVector;
/**
* @param hashFunction The hashfunction to use
* @param vector The vector to hash
*/
public ParallelHash(HashFunction hashFunction, Vector vector)
{
mHashFunction = hashFunction;
mVector = vector;
}

/**
* Returns the Hash for the given Vector
* @see java.util.concurrent.Callable#call()
*/
@Override
public Integer call() throws Exception
{
return mHashFunction.hash(mVector);
}

}
}
Loading