Add Music Model Generation

Add the music model generation and usage scripts with documentation
This commit is contained in:
avi.vajpeyi@gmail.com 2017-05-06 18:43:51 -04:00
parent 453bf9a3bd
commit 3fe2b59d90
6 changed files with 492 additions and 7 deletions

View File

@ -0,0 +1,110 @@
'''
Thomas Matlak, Avi Vajpeyi, Avery Rapson
CS 310 Final Project
Given textfiles with the musical notes in int format, this creates a pickle of
the attributes and classes for all the musical data stored in the text files
(each text file is for one class).
The data is stored as frequencies of each note on a keyboard, and the class label
is stored in 'one hot' format. 10 pre cent of data present set aside as testing data.
Usage:
python createMusicalFeaturesets.py
OUTPUT: notesData.pickle
A pickle with the attributes and classes for music data
pickle data continas: train_attribute ,train_class, test_attribute, test_class
NOTE: Need to update the follwoing depending on usage of script
ROOT_DIR = root/directrory/where/text/reside
DataFile = ["emotion1.txt","emotion2.txt"...])
'''
from mido import MidiFile, MidiTrack, Message
import mido
import random
import pickle
from collections import Counter
import numpy as np
import os
'''
Assume we have the following as our 'LEXICON'
unique word list : [chair, table, spoon, television]
Assume this is our current sample data:
String: I pulled my chair up to the table
Create a training vector that holds the count of each lexicon word:
training vector : [1, 1, 0, 0]
(since chair table are in string, but spoon TV arnt)
Do this for all strings
'''
ROOT_DIR = "TrainingData/"
DataFile = ["NegExamples/sadSongs.txt","PosExamples/happySongs.txt"]
pianoSize = 128 # notes 0 - 127
# this also defines our lexicon
# larger dataset, more memory gets used up MemoryError
def sample_handling(sample, classification):
featureset = []
'''
featureset =
[
[[0 1 0 0 1 0 0 ...], [1, 0]]
[[0 1 0 0 1 1 1 ...], [0, 1]]
....
]
so the first list is the array of matches with the lexicon
the second is which classification the features falls into (yes or no)
'''
with open(sample,'r') as f:
contents = f.readlines()
for l in contents:
notes = np.fromstring(l, dtype=int, sep=' ')
noteCount = np.zeros(pianoSize)
for note in notes:
noteCount[note] += 1
noteCount = list(noteCount)
featureset.append([noteCount, classification])
return featureset
def create_feature_sets_and_labels(DataFile,test_size = 0.1):
features = []
features += sample_handling(ROOT_DIR+DataFile[0],[0,1])# neg
features += sample_handling(ROOT_DIR+DataFile[1],[1,0]) # pos
random.shuffle(features)
'''
does tf.argmax([output]) == tf.argmax([expectations]) will look like:
tf.argmax([55454, 342324]) == tf.argmax([1,0])
'''
features = np.array(features)
testing_size = int(test_size*len(features))
train_x = list(features[:,0][:-testing_size]) #[[5,8],[7,9]] --> [:,0] does [5,7] (all of the 0 elememts) ie the labels in this case
train_y = list(features[:,1][:-testing_size])
test_x = list(features[:,0][-testing_size:])
test_y = list(features[:,1][-testing_size:])
return train_x,train_y,test_x,test_y
if __name__ == '__main__':
train_x,train_y,test_x,test_y = create_feature_sets_and_labels(DataFile)
with open('notesData.pickle','wb') as f:
pickle.dump([train_x,train_y,test_x,test_y],f) # dump data as a list, into a file
# this saves the lexicon for pos and neg words
# every inputted value is converted to a lexicon saving this info
# a lot of memory!

View File

@ -0,0 +1,81 @@
'''
Thomas Matlak
CS 310 Final Project
Takes directory containing midi files as input, produces a text file containing only the midi note values for the first 10 seconds of each musical piece.
Usage:
python midiNoteSegments.py /path/to/midi/folder/ [/path/to/output/file.txt]
'''
import sys, glob
from mido import MidiFile, MidiTrack, Message
from keras.layers import LSTM, Dense, Activation, Dropout
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.optimizers import RMSprop
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import mido
import csv
indir = sys.argv[1]
outfile_name = indir + "/out.txt"
if 2 < len(sys.argv):
outfile_name = sys.argv[2]
midi_files = glob.glob(indir + "/*.mid")
transposition_intervals = {
'Cb': -11,
'Gb': -6,
'Db': -1,
'Ab': -8,
'Eb': -3,
'Bb': -10,
'F': -5,
'C': 0,
'G': -7,
'D': -2,
'A': -9,
'E': -4,
'B': -11,
'F#': -6,
'C#':-1
}
with open(outfile_name, 'wb') as outfile:
writer = csv.writer(outfile, delimiter=' ')
for midi_file in midi_files:
mid = MidiFile(midi_file)
notes = []
time = float(0)
prev = float(0)
key = "C"
for msg in mid:
if time >= 10:
break
### this time is in seconds, not ticks
time += msg.time
if msg.type == "key_signature":
key = msg.key
if not msg.is_meta:
### only interested in piano channel
if msg.channel == 0:
if msg.type == 'note_on':
# note in vector form to train on
note = msg.bytes()
# only interested in the note #and velocity. note message is in the form of [type, note, velocity]
note = note[1] #:3]
# note.append(time - prev)
prev = time
notes.append(note + transposition_intervals[key]) # this preserves the intervlas, but transposes a;; samples to C
writer.writerow(notes)

Binary file not shown.

View File

@ -0,0 +1,158 @@
import tensorflow as tf
import numpy as np
import pickle
import os
# from tensorflow.examples.tutorials.mnist import input_data
# mnist = input_data.read_data_sets("/tmp/data/", one_hot = True)
# from createMusicalFeaturesets import create_feature_sets_and_labels
train_x,train_y,test_x,test_y = pickle.load(open("notesData2.pickle", "rb"))
saveFile = "savedModels/musicModelpy27"
n_nodes_hl1 = 1000
n_nodes_hl2 = 1000
n_nodes_hl3 = 1000
n_classes = 2
batch_size = 10
hm_epochs = 9
input_data_size = len(train_x[0])# each train_x instance is one song, and so one lexicon of notes
print("DEBUG: input data size = "+str(input_data_size))
x = tf.placeholder('float')
y = tf.placeholder('float')
hidden_1_layer = {'f_fum':n_nodes_hl1,
'weight':tf.Variable(tf.random_normal([128, n_nodes_hl1])),
'bias':tf.Variable(tf.random_normal([n_nodes_hl1]))}
hidden_2_layer = {'f_fum':n_nodes_hl2,
'weight':tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])),
'bias':tf.Variable(tf.random_normal([n_nodes_hl2]))}
hidden_3_layer = {'f_fum':n_nodes_hl3,
'weight':tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])),
'bias':tf.Variable(tf.random_normal([n_nodes_hl3]))}
output_layer = {'f_fum':None,
'weight':tf.Variable(tf.random_normal([n_nodes_hl3, n_classes])),
'bias':tf.Variable(tf.random_normal([n_classes])),}
# Nothing changes
def neural_network_model(data):
####INPUT LAYER (HIDDEN LAYER 1)
l1 = tf.add(tf.matmul(data,hidden_1_layer['weight']), hidden_1_layer['bias'])
l1 = tf.nn.relu(l1)
####HIDDEN LAYER 2
l2 = tf.add(tf.matmul(l1,hidden_2_layer['weight']), hidden_2_layer['bias'])
l2 = tf.nn.relu(l2)
####HIDDEN LAYER 3
l3 = tf.add(tf.matmul(l2,hidden_3_layer['weight']), hidden_3_layer['bias'])
l3 = tf.nn.relu(l3)
####OUTPUT LAYER
output = tf.matmul(l3,output_layer['weight']) + output_layer['bias']
return output
def train_neural_network(x):
prediction = neural_network_model(x)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=y) )
optimizer = tf.train.AdamOptimizer().minimize(cost)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# try:
# epoch = int(open(tf_log,'r').read().split('\n')[-2])+1
# print('STARTING EPOCH:',epoch)
# except:
# epoch = 1
batches_run = 0
epoch = 1
while epoch <= hm_epochs:
# if epoch != 1:
# #saver.restore(sess,'/'+saveFile)
# print("Should Restore Saved File")
epoch_loss = 1
i=0
while i < len(train_x):
start = i
end = i+batch_size
batch_x = np.array(train_x[start:end])
batch_y = np.array(train_y[start:end])
_, c = sess.run([optimizer, cost], feed_dict={x: batch_x, y: batch_y})
epoch_loss += c
i+=batch_size
batches_run +=1
print('Batch run:',batches_run,'/',batch_size,'| Epoch:',
epoch,'| Batch Loss:',c,)
saver.save(sess, saveFile)
print("Should Save session in "+ saveFile )
print('Epoch', epoch+1, 'completed out of',hm_epochs,'loss:', epoch_loss)
# with open(tf_log,'a') as f:
# f.write(str(epoch)+'\n')
epoch +=1
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Trained',len(train_x),'samples.')
print('Tested',len(test_x),'samples.')
accPercent = accuracy.eval({x:test_x, y:test_y})*100
print('Accuracy: '+ str(accPercent)+ '%')
saver = tf.train.Saver()
# tf_log = 'tf.log' ## SAVES EPOCH NUMBER
train_neural_network(x)
def test_neural_network():
prediction = neural_network_model(x)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# for epoch in range(hm_epochs):
# try:
# y =2
# # saver.restore(sess,'/'+saveFile)
# print("Restoring "+ saveFile )
# except Exception as e:
# print(str(e))
# epoch_loss = 0
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
## WHEN WE SAVE TESTING DATA SEPARATLY
# feature_sets = []
# labels = []
# counter = 0
# with open('processed-test-set.csv', buffering=20000) as f:
# for line in f:
# try:
# features = list(eval(line.split('::')[0]))
# label = list(eval(line.split('::')[1]))
# feature_sets.append(features)
# labels.append(label)
# counter += 1
# except:
# pass
testx = np.array(test_x)
testy = np.array(test_y)
counter = len(test_x)
print(testx,testy)
print(test_x,test_y)
print('******RESULTS******')
print('Tested',counter,'samples.')
print('Accuracy:', accuracy.eval({x:testx, y:testy}) )
#test_neural_network()
print ("\n\n\nFINISHED\n\n\n")
# x =os.remove("tf.log")
# print("removed :" + str(x))

View File

@ -0,0 +1,134 @@
'''
Thomas Matlak Avi Vajpeyi, Avery Rapson
CS 310 Final Project
Loads the NN saved in the dir 'savedFile'. The function predictmood(input_midi_file)
takes a midi files in MIDO format and returns if it is happy or sad
Usage:
python usingMusicNN.py
'''
import tensorflow as tf
import json
from mido import MidiFile
import numpy as np
import tempfile
midiFile = "01.mid"
saveFile = "savedModels/musicModelpy27"
pianoSize = 128
print("Bad ass Neural Net being loaded...")
hm_data = 2000000
n_nodes_hl1 = 1000
n_nodes_hl2 = 1000
n_nodes_hl3 = 1000
n_classes = 2
batch_size = 10
hm_epochs = 9
x = tf.placeholder('float')
y = tf.placeholder('float')
current_epoch = tf.Variable(1)
hidden_1_layer = {'f_fum':n_nodes_hl1,
'weight':tf.Variable(tf.random_normal([pianoSize, n_nodes_hl1])),
'bias':tf.Variable(tf.random_normal([n_nodes_hl1]))}
hidden_2_layer = {'f_fum':n_nodes_hl2,
'weight':tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])),
'bias':tf.Variable(tf.random_normal([n_nodes_hl2]))}
hidden_3_layer = {'f_fum':n_nodes_hl3,
'weight':tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])),
'bias':tf.Variable(tf.random_normal([n_nodes_hl3]))}
output_layer = {'f_fum':None,
'weight':tf.Variable(tf.random_normal([n_nodes_hl3, n_classes])),
'bias':tf.Variable(tf.random_normal([n_classes])),}
def neural_network_model(data):
####INPUT LAYER (HIDDEN LAYER 1)
l1 = tf.add(tf.matmul(data,hidden_1_layer['weight']), hidden_1_layer['bias'])
l1 = tf.nn.relu(l1)
####HIDDEN LAYER 2
l2 = tf.add(tf.matmul(l1,hidden_2_layer['weight']), hidden_2_layer['bias'])
l2 = tf.nn.relu(l2)
####HIDDEN LAYER 3
l3 = tf.add(tf.matmul(l2,hidden_3_layer['weight']), hidden_3_layer['bias'])
l3 = tf.nn.relu(l3)
####OUTPUT LAYER
output = tf.matmul(l3,output_layer['weight']) + output_layer['bias']
return output
#
def predictmood(input_midi_file):
prediction = neural_network_model(x)
# with open('musicModel.pickle','rb') as f:
# lexicon = pickle.load(f)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver = tf.train.import_meta_graph(saveFile+'.meta')
saver.restore(sess, saveFile)
#### CONVERT THE MIDI TO NOTES AND FEATURES (without [0,1])
#### need it in the [0 112 1 1 0 0 0 ....] format
mid = input_midi_file
notes = []
time = float(0)
prev = float(0)
for msg in mid:
if time >= 10:
break
### this time is in seconds, not ticks
time += msg.time
if not msg.is_meta:
### only interested in piano channel
if msg.channel == 0:
if msg.type == 'note_on':
# note in vector form to train on
note = msg.bytes()
# only interested in the note #and velocity. note message is in the form of [type, note, velocity]
note = note[1] #:3]
# note.append(time - prev)
prev = time
notes.append(note)
noteCount = np.zeros(pianoSize)
for note in notes:
noteCount[note] += 1
noteCount = list(noteCount)
#features = np.array(list(features))
# pos: [1,0] , argmax: 0
# neg: [0,1] , argmax: 1
result = (sess.run(tf.argmax(prediction.eval(feed_dict={x:[noteCount]}),1)))
if result[0] == 0:
return ("Sad")
elif result[0] == 1:
return ("Happy")
# with open('mood.txt', 'w') as outfile:
# mood_dict = dict()
# if result[0] == 0:
# mood_dict = {'Mood': "Happy"}
# elif result[0] == 1:
# mood_dict = {'Mood': "Sad"}
# json.dump(mood_dict, outfile)
# output.seek(0) #resets the pointer to the data of the file to the start
# return output

View File

@ -2,12 +2,14 @@
Thomas Matlak Avi Vajpeyi, Avery Rapson
CS 310 Final Project
Takes example midi file and prints if its happy or sad
Loads the NN saved in the dir 'savedFile'. The function predictmood(input_midi_file)
takes a midi files in MIDO format and returns if it is happy or sad
Usage:
python [/path/to/midi/file.mid]
python usingMusicNN.py
'''
import tensorflow as tf
import json
from mido import MidiFile