+def scale_data_set(rows)
+ # Could be many rows, so still make one pass through the data rather than
+ # using Array#max and #min for each data field
+ lows = Array.new(rows[0].data.length, 999999999.0)
+ highs = Array.new(rows[0].data.length, -999999999.0)
+ rows.each do |row|
+ data = row.data
+ (data.length).times do |i|
+ lows[i] = data[i] if data[i] < lows[i]
+ highs[i] = data[i] if data[i] > highs[i]
+ end
+ end
+
+ scale_data = Proc.new do |row|
+ row.zip(lows, highs).map {|d| (d[0] - d[1]) / (d[2] - d[1]) }
+ end
+
+ new_rows = rows.map do |row|
+ ClassifiedData.new(scale_data.call(row.data), row.classification)
+ end
+
+ return new_rows, scale_data
+end
+
+# Usage:
+# numeric_matches = matches_to_numeric matches
+# scaled_set, scale_f = scale_data_set numeric_matches
+# averages = linear_train scaled_set
+# dot_product_classify(scale_f.call(numeric_matches[11].data), averages)
+
+def radial_basis(v1, v2, gamma = 20)
+ len = Math.sqrt((v1.zip v2).map {|c| (c[0] - c[1]) ** 2 }.reduce(:+))
+ Math.exp(-gamma * len)
+end
+
+def nonlinear_classify(point, rows, offset, gamma = 10)
+ match_sum = no_match_sum = 0.0
+ match_count = no_match_count = 0
+ rows.each do |row|
+ if row.classification == 1
+ match_sum += radial_basis(point, row.data, gamma)
+ match_count += 1
+ else
+ no_match_sum += radial_basis(point, row.data, gamma)
+ no_match_count += 1
+ end
+ end
+ y = match_sum / match_count - no_match_sum / no_match_count + offset
+ if y < 0
+ 0
+ else
+ 1
+ end
+end