1 require '/home/neil/programming/ruby/programming-collective/intelligence/k-means-clustering'
3 def read_genome_file(filename
)
5 IO
.foreach(filename
) { |line
| genomes
<< line
.chomp
.split('').collect
{|i
| i
.to_i
} }
9 def find_centroid_sets(rows
, k
= 10, n
= 10)
12 centroid_set
<< (k_means_cluster(rows
, k
, :domain => :discrete) {|x
, y
| hamming_distance x
, y
})
17 def order_centroids_by_cluster_size(centroids
, rows
)
18 clusters
= clusters_of_centroids(centroids
, rows
) {|x
, y
| hamming_distance x
, y
}
19 cluster_sizes
= clusters
.collect
{|c
| c
.length
}
20 sized_centroids
= centroid
.zip(cluster_sizes
)
21 sorted_sized_centroids
= (sized_centroids
.sort_by
{|t
| t
[1]}).reverse
24 def best_centroids(centroid_set
)
25 centroid_set
.collect
{|tc
| tc
[0][0]}