1 require '/home/neil/programming/ruby/programming-collective/intelligence/k-means-clustering'
3 def read_genome_file(filename)
5 IO.foreach(filename) { |line| genomes << line.chomp.split('').collect {|i| i.to_i} }
9 def find_centroid_sets(rows, k = 10, n = 10)
12 centroid_set << (k_means_cluster(rows, k, :domain => :discrete) {|x, y| hamming_distance x, y})
17 def order_centroids_by_cluster_size(centroids, rows)
18 clusters = clusters_of_centroids(centroids, rows) {|x, y| hamming_distance x, y}
19 cluster_sizes = clusters.collect {|c| c.length}
20 sized_centroids = centroid.zip(cluster_sizes)
21 sorted_sized_centroids = (sized_centroids.sort_by {|t| t[1]}).reverse
24 def best_centroids(centroid_set)
25 centroid_set.collect {|tc| tc[0][0]}