diff --git a/machine_learning/course2/assignment1/Datasets/dataset1.mat b/machine_learning/course2/assignment1/Datasets/dataset1.mat new file mode 100644 index 0000000..9c11e61 Binary files /dev/null and b/machine_learning/course2/assignment1/Datasets/dataset1.mat differ diff --git a/machine_learning/course2/assignment1/Datasets/dataset1_ancient_octave.mat b/machine_learning/course2/assignment1/Datasets/dataset1_ancient_octave.mat new file mode 100644 index 0000000..bd57cc2 --- /dev/null +++ b/machine_learning/course2/assignment1/Datasets/dataset1_ancient_octave.mat @@ -0,0 +1,39 @@ +# Created by Octave 3.4.3, Tue Oct 09 10:54:27 2012 EDT +# name: neg_examples_nobias +# type: matrix +# rows: 4 +# columns: 2 + -0.8085714285714286 0.8372093023255818 + 0.3571428571428572 0.8504983388704321 + -0.7514285714285714 -0.7308970099667773 + -0.2999999999999999 0.1262458471760799 + + +# name: pos_examples_nobias +# type: matrix +# rows: 4 +# columns: 2 + 0.8714285714285714 0.6245847176079737 + -0.01999999999999991 -0.9235880398671097 + 0.362857142857143 -0.3189368770764118 + 0.8885714285714283 -0.8704318936877078 + + +# name: w_gen_feas +# type: matrix +# rows: 3 +# columns: 1 + 4.349652602017646 + -2.609972353001155 + -0.6941474928102369 + + +# name: w_init +# type: matrix +# rows: 3 +# columns: 1 + -0.6217014737809046 + 0.7609152728514132 + 0.7718720489184812 + + diff --git a/machine_learning/course2/assignment1/Datasets/dataset2.mat b/machine_learning/course2/assignment1/Datasets/dataset2.mat new file mode 100644 index 0000000..3907a26 Binary files /dev/null and b/machine_learning/course2/assignment1/Datasets/dataset2.mat differ diff --git a/machine_learning/course2/assignment1/Datasets/dataset2_ancient_octave.mat b/machine_learning/course2/assignment1/Datasets/dataset2_ancient_octave.mat new file mode 100644 index 0000000..1e900c1 --- /dev/null +++ b/machine_learning/course2/assignment1/Datasets/dataset2_ancient_octave.mat @@ -0,0 +1,38 @@ +# Created by Octave 3.4.3, Tue Oct 09 10:55:17 2012 EDT +# name: neg_examples_nobias +# type: matrix +# rows: 5 +# columns: 2 + -0.8085714285714286 0.8372093023255818 + 0.3571428571428572 0.8504983388704321 + -0.7514285714285714 -0.7308970099667773 + -0.2999999999999999 0.1262458471760799 + 0.6428571428571428 -0.5448504983388702 + + +# name: pos_examples_nobias +# type: matrix +# rows: 5 +# columns: 2 + 0.8714285714285714 0.6245847176079737 + -0.01999999999999991 -0.9235880398671097 + 0.362857142857143 -0.3189368770764118 + 0.8885714285714283 -0.8704318936877078 + -0.5285714285714286 0.5116279069767444 + + +# name: w_gen_feas +# type: matrix +# rows: 0 +# columns: 0 + + +# name: w_init +# type: matrix +# rows: 3 +# columns: 1 + 1.846898867105877 + -0.5832492921428249 + -0.5417888259410228 + + diff --git a/machine_learning/course2/assignment1/Datasets/dataset3.mat b/machine_learning/course2/assignment1/Datasets/dataset3.mat new file mode 100644 index 0000000..67a40fb Binary files /dev/null and b/machine_learning/course2/assignment1/Datasets/dataset3.mat differ diff --git a/machine_learning/course2/assignment1/Datasets/dataset3_ancient_octave.mat b/machine_learning/course2/assignment1/Datasets/dataset3_ancient_octave.mat new file mode 100644 index 0000000..8d8fb3e --- /dev/null +++ b/machine_learning/course2/assignment1/Datasets/dataset3_ancient_octave.mat @@ -0,0 +1,45 @@ +# Created by Octave 3.4.3, Tue Oct 09 10:55:23 2012 EDT +# name: neg_examples_nobias +# type: matrix +# rows: 7 +# columns: 2 + -0.7914285714285715 0.07973421926910329 + -0.5571428571428571 0.4119601328903657 + -0.2257142857142856 0.6976744186046515 + 0.1628571428571428 0.8305647840531565 + 0.46 0.6578073089701 + 0.734285714285714 0.3388704318936879 + 0.8257142857142856 -0.01328903654485036 + + +# name: pos_examples_nobias +# type: matrix +# rows: 7 +# columns: 2 + -0.7628571428571429 -0.1926910299003322 + -0.6028571428571429 -0.485049833887043 + -0.3857142857142858 -0.6976744186046511 + -0.1914285714285714 -0.3588039867109634 + 0.2828571428571429 -0.4318936877076411 + 0.4085714285714284 -0.6976744186046511 + 0.7514285714285713 -0.2325581395348835 + + +# name: w_gen_feas +# type: matrix +# rows: 3 +# columns: 1 + -0.6727286447504772 + -11.4892171706386 + -0.8941109530297788 + + +# name: w_init +# type: matrix +# rows: 3 +# columns: 1 + 0.9036803371988847 + -0.4908754917186831 + 0.9485529482883966 + + diff --git a/machine_learning/course2/assignment1/Datasets/dataset4.mat b/machine_learning/course2/assignment1/Datasets/dataset4.mat new file mode 100644 index 0000000..6e1d633 Binary files /dev/null and b/machine_learning/course2/assignment1/Datasets/dataset4.mat differ diff --git a/machine_learning/course2/assignment1/Datasets/dataset4_ancient_octave.mat b/machine_learning/course2/assignment1/Datasets/dataset4_ancient_octave.mat new file mode 100644 index 0000000..58ecace --- /dev/null +++ b/machine_learning/course2/assignment1/Datasets/dataset4_ancient_octave.mat @@ -0,0 +1,45 @@ +# Created by Octave 3.4.3, Tue Oct 09 10:55:54 2012 EDT +# name: neg_examples_nobias +# type: matrix +# rows: 9 +# columns: 2 + -0.8657142857142858 -0.3920265780730896 + -0.7857142857142857 -0.1727574750830563 + -0.5571428571428571 0.2458471760797345 + -0.2885714285714286 0.524916943521595 + -0.1285714285714287 0.524916943521595 + 0.08285714285714274 0.3322259136212626 + 0.1800000000000002 0.1063122923588042 + 0.2542857142857144 -0.166112956810631 + 0.3285714285714287 -0.3521594684385381 + + +# name: pos_examples_nobias +# type: matrix +# rows: 8 +# columns: 2 + -0.08857142857142863 0.2325581395348839 + 0.008571428571428452 -0.06644518272425226 + 0.1285714285714286 -0.3255813953488371 + 0.2885714285714287 -0.5249169435215946 + 0.597142857142857 -0.4651162790697674 + 0.7171428571428571 -0.1461794019933553 + 0.8599999999999999 0.1594684385382061 + 0.9457142857142857 0.4451827242524919 + + +# name: w_gen_feas +# type: matrix +# rows: 0 +# columns: 0 + + +# name: w_init +# type: matrix +# rows: 3 +# columns: 1 + -0.03182596423633538 + -0.2551127277965609 + -0.007102522003047257 + + diff --git a/machine_learning/course2/assignment1/learn_perceptron.m b/machine_learning/course2/assignment1/learn_perceptron.m new file mode 100644 index 0000000..086613f --- /dev/null +++ b/machine_learning/course2/assignment1/learn_perceptron.m @@ -0,0 +1,163 @@ +%% Learns the weights of a perceptron and displays the results. +function [w] = learn_perceptron(neg_examples_nobias,pos_examples_nobias,w_init,w_gen_feas) +%% +% Learns the weights of a perceptron for a 2-dimensional dataset and plots +% the perceptron at each iteration where an iteration is defined as one +% full pass through the data. If a generously feasible weight vector +% is provided then the visualization will also show the distance +% of the learned weight vectors to the generously feasible weight vector. +% Required Inputs: +% neg_examples_nobias - The num_neg_examples x 2 matrix for the examples with target 0. +% num_neg_examples is the number of examples for the negative class. +% pos_examples_nobias - The num_pos_examples x 2 matrix for the examples with target 1. +% num_pos_examples is the number of examples for the positive class. +% w_init - A 3-dimensional initial weight vector. The last element is the bias. +% w_gen_feas - A generously feasible weight vector. +% Returns: +% w - The learned weight vector. +%% + +%Bookkeeping +num_neg_examples = size(neg_examples_nobias,1); +num_pos_examples = size(pos_examples_nobias,1); +num_err_history = []; +w_dist_history = []; + +%Here we add a column of ones to the examples in order to allow us to learn +%bias parameters. +neg_examples = [neg_examples_nobias,ones(num_neg_examples,1)]; +pos_examples = [pos_examples_nobias,ones(num_pos_examples,1)]; + +%If weight vectors have not been provided, initialize them appropriately. +if (~exist('w_init','var') || isempty(w_init)) + w = randn(3,1); +else + w = w_init; +end + +if (~exist('w_gen_feas','var')) + w_gen_feas = []; +end + +%Find the data points that the perceptron has incorrectly classified +%and record the number of errors it makes. +iter = 0; +[mistakes0, mistakes1] = eval_perceptron(neg_examples,pos_examples,w); +num_errs = size(mistakes0,1) + size(mistakes1,1); +num_err_history(end+1) = num_errs; +fprintf('Number of errors in iteration %d:\t%d\n',iter,num_errs); +fprintf(['weights:\t', mat2str(w), '\n']); +plot_perceptron(neg_examples, pos_examples, mistakes0, mistakes1, num_err_history, w, w_dist_history); +key = input('', 's'); +if (key == 'q') + return; +end + +%If a generously feasible weight vector exists, record the distance +%to it from the initial weight vector. +if (length(w_gen_feas) ~= 0) + w_dist_history(end+1) = norm(w - w_gen_feas); +end + +%Iterate until the perceptron has correctly classified all points. +while (num_errs > 0) + iter = iter + 1; + + %Update the weights of the perceptron. + w = update_weights(neg_examples,pos_examples,w); + + %If a generously feasible weight vector exists, record the distance + %to it from the current weight vector. + if (length(w_gen_feas) ~= 0) + w_dist_history(end+1) = norm(w - w_gen_feas); + end + + %Find the data points that the perceptron has incorrectly classified. + %and record the number of errors it makes. + [mistakes0, mistakes1] = eval_perceptron(neg_examples,pos_examples,w); + num_errs = size(mistakes0,1) + size(mistakes1,1); + num_err_history(end+1) = num_errs; + + fprintf('Number of errors in iteration %d:\t%d\n',iter,num_errs); + fprintf(['weights:\t', mat2str(w), '\n']); + plot_perceptron(neg_examples, pos_examples, mistakes0, mistakes1, num_err_history, w, w_dist_history); + key = input('', 's'); + if (key == 'q') + break; + end +end + +%WRITE THE CODE TO COMPLETE THIS FUNCTION +function [w] = update_weights(neg_examples, pos_examples, w_current) +%% +% Updates the weights of the perceptron for incorrectly classified points +% using the perceptron update algorithm. This function makes one sweep +% over the dataset. +% Inputs: +% neg_examples - The num_neg_examples x 3 matrix for the examples with target 0. +% num_neg_examples is the number of examples for the negative class. +% pos_examples- The num_pos_examples x 3 matrix for the examples with target 1. +% num_pos_examples is the number of examples for the positive class. +% w_current - A 3-dimensional weight vector, the last element is the bias. +% Returns: +% w - The weight vector after one pass through the dataset using the perceptron +% learning rule. +%% +a = -1 % learning rate +w = w_current; +num_neg_examples = size(neg_examples,1); +num_pos_examples = size(pos_examples,1); +for i=1:num_neg_examples + this_case = neg_examples(i,:); + x = this_case'; %Hint + activation = this_case*w; + if (activation >= 0) + %YOUR CODE HERE + w = w - a * x; + end +end +for i=1:num_pos_examples + this_case = pos_examples(i,:); + x = this_case'; + activation = this_case*w; + if (activation < 0) + %YOUR CODE HERE + w = w + a * x; + end +end + +function [mistakes0, mistakes1] = eval_perceptron(neg_examples, pos_examples, w) +%% +% Evaluates the perceptron using a given weight vector. Here, evaluation +% refers to finding the data points that the perceptron incorrectly classifies. +% Inputs: +% neg_examples - The num_neg_examples x 3 matrix for the examples with target 0. +% num_neg_examples is the number of examples for the negative class. +% pos_examples- The num_pos_examples x 3 matrix for the examples with target 1. +% num_pos_examples is the number of examples for the positive class. +% w - A 3-dimensional weight vector, the last element is the bias. +% Returns: +% mistakes0 - A vector containing the indices of the negative examples that have been +% incorrectly classified as positive. +% mistakes0 - A vector containing the indices of the positive examples that have been +% incorrectly classified as negative. +%% +num_neg_examples = size(neg_examples,1); +num_pos_examples = size(pos_examples,1); +mistakes0 = []; +mistakes1 = []; +for i=1:num_neg_examples + x = neg_examples(i,:)'; + activation = x'*w; + if (activation >= 0) + mistakes0 = [mistakes0;i]; + end +end +for i=1:num_pos_examples + x = pos_examples(i,:)'; + activation = x'*w; + if (activation < 0) + mistakes1 = [mistakes1;i]; + end +end + diff --git a/machine_learning/course2/assignment1/plot_perceptron.m b/machine_learning/course2/assignment1/plot_perceptron.m new file mode 100644 index 0000000..7d190b2 --- /dev/null +++ b/machine_learning/course2/assignment1/plot_perceptron.m @@ -0,0 +1,75 @@ +%% Plots information about a perceptron classifier on a 2-dimensional dataset. +function plot_perceptron(neg_examples, pos_examples, mistakes0, mistakes1, num_err_history, w, w_dist_history) +%% +% The top-left plot shows the dataset and the classification boundary given by +% the weights of the perceptron. The negative examples are shown as circles +% while the positive examples are shown as squares. If an example is colored +% green then it means that the example has been correctly classified by the +% provided weights. If it is colored red then it has been incorrectly classified. +% The top-right plot shows the number of mistakes the perceptron algorithm has +% made in each iteration so far. +% The bottom-left plot shows the distance to some generously feasible weight +% vector if one has been provided (note, there can be an infinite number of these). +% Points that the classifier has made a mistake on are shown in red, +% while points that are correctly classified are shown in green. +% The goal is for all of the points to be green (if it is possible to do so). +% Inputs: +% neg_examples - The num_neg_examples x 3 matrix for the examples with target 0. +% num_neg_examples is the number of examples for the negative class. +% pos_examples- The num_pos_examples x 3 matrix for the examples with target 1. +% num_pos_examples is the number of examples for the positive class. +% mistakes0 - A vector containing the indices of the datapoints from class 0 incorrectly +% classified by the perceptron. This is a subset of neg_examples. +% mistakes1 - A vector containing the indices of the datapoints from class 1 incorrectly +% classified by the perceptron. This is a subset of pos_examples. +% num_err_history - A vector containing the number of mistakes for each +% iteration of learning so far. +% w - A 3-dimensional vector corresponding to the current weights of the +% perceptron. The last element is the bias. +% w_dist_history - A vector containing the L2-distance to a generously +% feasible weight vector for each iteration of learning so far. +% Empty if one has not been provided. +%% +f = figure(1); +clf(f); + +neg_correct_ind = setdiff(1:size(neg_examples,1),mistakes0); +pos_correct_ind = setdiff(1:size(pos_examples,1),mistakes1); + +subplot(2,2,1); +hold on; +if (~isempty(neg_examples)) + plot(neg_examples(neg_correct_ind,1),neg_examples(neg_correct_ind,2),'og','markersize',20); +end +if (~isempty(pos_examples)) + plot(pos_examples(pos_correct_ind,1),pos_examples(pos_correct_ind,2),'sg','markersize',20); +end +if (size(mistakes0,1) > 0) + plot(neg_examples(mistakes0,1),neg_examples(mistakes0,2),'or','markersize',20); +end +if (size(mistakes1,1) > 0) + plot(pos_examples(mistakes1,1),pos_examples(mistakes1,2),'sr','markersize',20); +end +title('Classifier'); + +%In order to plot the decision line, we just need to get two points. +plot([-5,5],[(-w(end)+5*w(1))/w(2),(-w(end)-5*w(1))/w(2)],'k') +xlim([-1,1]); +ylim([-1,1]); +hold off; + +subplot(2,2,2); +plot(0:length(num_err_history)-1,num_err_history); +xlim([-1,max(15,length(num_err_history))]); +ylim([0,size(neg_examples,1)+size(pos_examples,1)+1]); +title('Number of errors'); +xlabel('Iteration'); +ylabel('Number of errors'); + +subplot(2,2,3); +plot(0:length(w_dist_history)-1,w_dist_history); +xlim([-1,max(15,length(num_err_history))]); +ylim([0,15]); +title('Distance') +xlabel('Iteration'); +ylabel('Distance'); \ No newline at end of file