4 attr_accessor
:data, :classification
6 def initialize(data = Array
.new
, classification
= nil)
8 @classification = classification
12 def load_data(filename
, only_numbers
= false)
14 IO
.foreach(filename
) do |line
|
15 rows
<< ClassifiedData
.new(line
.chomp
.split(',')[0..-2].map
{|field
| only_numbers
? field
.to_f
: field
},
16 line
.chomp
.split(',')[-1].to_i
)
22 def plot_age_matches(rows
)
24 Gnuplot
::Plot.new( gp
) do |plot
|
26 plot
.title
"Ages of matches"
30 matches
= rows
.select
{|r
| r
.classification
== 1}.map
{|r
| r
.data}
31 non_matches
= rows
.select
{|r
| r
.classification
== 0}.map
{|r
| r
.data}
33 plot
.data = [ Gnuplot
::DataSet.new( [matches
] ) do |ds
|
36 end , Gnuplot
::DataSet.new( [non_matches
] ) do |ds
|
45 def linear_train(rows
)
51 row_class
= row
.classification
52 sums
[row_class
] ||= [0.0] * (row
.data.length
)
54 (row
.data.length
).times
do |i
|
55 sums
[row_class
][i
] += row
.data[i
]
57 counts
[row_class
] += 1
60 sums
.keys
.each
do |match_class
|
61 averages
[match_class
] = sums
[match_class
].map
{|sum
| sum
/ counts
[match_class
]}
66 def dot_product(v1
, v2
)
67 (v1
.zip v2
).map
{|c
| c
.reduce(:*) }.reduce(:+)
70 def dot_product_classify(point
, averages
)
71 b
= (dot_product(averages
[1], averages
[1]) - dot_product(averages
[0], averages
[0])) / 2
72 y
= dot_product(point
, averages
[0]) - dot_product(point
, averages
[1]) + b
82 elsif v
== 'no' then -1
87 def match_count(interests1
, interests2
)
88 (interests1
.split(':') & interests2
.split(':')).length
91 def miles_distance(a1
, a2
)