require '/home/neil/programming/ruby/programming-collective/intelligence/k-means-clustering' def read_genome_file(filename) genomes = [] IO.foreach(filename) { |line| genomes << line.chomp.split('').collect {|i| i.to_i} } genomes end def find_centroid_sets(rows, k = 10, n = 10) centroid_set = [] 1.upto(n) do centroid_set << (k_means_cluster(rows, k, :domain => :discrete) {|x, y| hamming_distance x, y}) end centroid_set end def order_centroids_by_cluster_size(centroids, rows) clusters = clusters_of_centroids(centroids, rows) {|x, y| hamming_distance x, y} cluster_sizes = clusters.collect {|c| c.length} sized_centroids = centroid.zip(cluster_sizes) sorted_sized_centroids = (sized_centroids.sort_by {|t| t[1]}).reverse end def best_centroids(centroid_set) centroid_set.collect {|tc| tc[0][0]} end