Initial commit
[trapthecap.git] / src / ttc-ga-clustering.rb
1 require '/home/neil/programming/ruby/programming-collective/intelligence/k-means-clustering'
2
3 def read_genome_file(filename)
4 genomes = []
5 IO.foreach(filename) { |line| genomes << line.chomp.split('').collect {|i| i.to_i} }
6 genomes
7 end
8
9 def find_centroid_sets(rows, k = 10, n = 10)
10 centroid_set = []
11 1.upto(n) do
12 centroid_set << (k_means_cluster(rows, k, :domain => :discrete) {|x, y| hamming_distance x, y})
13 end
14 centroid_set
15 end
16
17 def order_centroids_by_cluster_size(centroids, rows)
18 clusters = clusters_of_centroids(centroids, rows) {|x, y| hamming_distance x, y}
19 cluster_sizes = clusters.collect {|c| c.length}
20 sized_centroids = centroid.zip(cluster_sizes)
21 sorted_sized_centroids = (sized_centroids.sort_by {|t| t[1]}).reverse
22 end
23
24 def best_centroids(centroid_set)
25 centroid_set.collect {|tc| tc[0][0]}
26 end
27