My python final [simple database analysis]
December 19, 2009
My python final project was to write a simple database analysis based on any given data. I thought about it and I decided to have the entire class use the same sets of data. I created a Google form and let them fill them out. I sent out the result in csv and a few other formats, which really made the whole programming easier.
Python has a very nice module called csv. It is already included in 2.6.4 so there is no need to find it. All we need to do is to import it. After a few days fighting with this program, I finally had finished my project.
Here each student is an object. This program is not very complicated. It is very portable. We can control each column at any instance. It is not the best, but good enough to earn an A.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 | import csv class Student(object): sports = [] ftopics = [] stopics = [] genders = [] movieyrs = [] countrys = [] def __init__(self,row): # we set up the row from the csv data self.lname, self.fname, self.ID, self.gender, self.sport, self.movie, self.movieyr, self.country, self.ftopic, self.stopic = row # we import each column into our pre-defined lists self.sports.append(self.sport) self.ftopics.append(self.ftopic) self.stopics.append(self.stopic) self.genders.append(self.gender) self.movieyrs.append(self.movieyr) self.countrys.append(self.country) # functions to print results anytime we want def print_information(self): return (self.lname, self.fname, self.ID, self.gender) def print_first(self): return (self.lname, self.fname, self.sport) def print_second(self): return (self.lname, self.fname, self.movie, self.movieyr) def print_third(self): return (self.lname, self.fname, self.country) def print_fourth(self): return (self.lname, self.fname, self.ftopic, self.stopic) ######################### THIS IS OUR WELCOME MENU ######################### # # Welcome to CSC 1000 (Fall 2009) - Final Project # # # Author: John Yeukhon Wong # E-mail: gokoproject@gmail.com # # Select one of the following to retrieve speific database analysis # # [0]: Basic Information (Last name, first name, ID, gender) # [1]: My Favorite Sport (Last name, first name, favorite sport) # [2]: My Favorite Movie (Last name, first name, movie title, movie year) # [3]: My Favorite Country to Visit (Last name, first name, country name) # [4]: My Favorite class topic (Last name, first name, first preference, second preference) # # # Each item has its own speific analysis result # A while = True loop to re-request a new analysis # Enter QUIT to leave this program # ######################### THIS IS OUR WELCOME MENU ######################### # let us initalize some stuff we will use throughout # we use csv.reader module here to read csv data choice_list = [] choice_dict = {} # assuming our csv data in the same path as the python directory reader = csv.reader(open('new_mondy_csc_data_revise.csv'), delimiter=',', quotechar='"') header = tuple(reader.next()) # make sure our reading in tuple form students = list(map(Student, reader)) # read all remaining lines num_students = len(students) # get the total numbers of students in this survey def search_function(x): if x == 0: print "%-17s|%-10s|%-6s|%s" %header[:4] # running header from column 0 to 3 print "-" * 45 for student in students: print "%-17s|%-10s|%-6s|%3s" % student.print_information() print '\n' # gender for s in set(Student.genders): # class attribute print s,":", Student.genders.count(s),"student(s)", "or",round(((float(Student.genders.count(s)) / num_students) *100),1),"%" if x == 1: # Basic information print '\n' * 2 print "Students' Sport Preference" print '\n' print "%-17s|%-10s|%s" %(header[0],header[1],header[4]) print "-" * 45 for student in students: print "%-17s|%-10s|%s" %student.print_first() print '\n' # Printing all sports that are specified by students for s in set(Student.sports): # class attribute print s,":", Student.sports.count(s),"student(s)", "or", round(((float(Student.sports.count(s)) / num_students) *100),1),"%" # Printing sports that are not picked allsports = ['Basketball','Football','Other','Baseball','Handball','Soccer','Volleyball','I do not like sport'] for s in set(allsports) - set(Student.sports): print s,":", "---", '0%' print '\n' # Here we list sports most favorite and least favorite choice_list = Student.sports for choice in choice_list: choice_dict[choice] = choice_dict.get(choice, 0) + 1 print "The MOST favoritable sport is: ", max(choice_dict) print "The LEAST favoritable sport is: ", min(choice_dict) elif x == 2: # My favorite movie print "%-17s|%-10s|%-16s|%s" %(header[0],header[1],header[5],header[6]) print "-" * 45 for student in students: print "%-17s|%-10s|%-16s|%s" % student.print_second() print '\n' # number of old / new movie newyear = ['2001','2002','2003','2004','2005','2006','2007','2008','2009'] counter = 0 freq = {} for i in Student.movieyrs: if i not in newyear: freq[i] = freq.get(i, 0) + 1 counter = counter + 1 print "Numbers of students' favorite movies are OLDER than 2001: ", counter print "Numbers of students' favorite movies are NEWER than 2001: ", (num_students - counter) print '\n' elif x == 3: # My favorite country to visit print "%-17s|%-10s|%s" %(header[0],header[1],header[7]) print "-" * 45 for student in students: print "%-17s|%-10s|%s" %student.print_third() print '\n' for s in set(Student.countrys): print s,":", Student.countrys.count(s),"student(s)", "or", round(((float(Student.countrys.count(s)) / num_students) *100),1),"%" choice_list = Student.countrys choice_dict = {} for choice in set(Student.countrys): choice_dict[choice] = choice_dict.get(choice, 0) + 1 print "The TOP country wanted to visit is: ", max(choice_dict) elif x == 4: # First and Second class topic preference print "%-17s|%-10s|%-15s|%s" %(header[0],header[1],header[8],header[9]) print "-" * 45 for student in students: print "%-17s|%-10s|%-16s|%s" % student.print_fourth() print '\n' # first topic preference print "My first most preferred class topic" print "-"*20 for s in set(Student.ftopics): # class attribute print s,":", Student.ftopics.count(s),"student(s)", "or", round(((float(Student.ftopics.count(s)) / num_students) *100),1),"%" alltopics = ['Matrices','Probability','Object Oriented Programming','Matlab','Python'] for s in set(alltopics) - set(Student.ftopics): print s, 0, '0%' choice_list = Student.ftopics choice_dict = {} for choice in choice_list: choice_dict[choice] = choice_dict.get(choice, 0) + 1 print "The TOP class topic is: ", max(choice_dict) print "The LEAST wanted class topic as first preference: ", min(choice_dict) print '\n' print "My second most preferred class topic" print "-"*20 # second topic preference for s in set(Student.stopics): # class attribute print s,":", Student.stopics.count(s),"student(s)", "or", round(((float(Student.stopics.count(s)) / num_students) *100),1),"%" for s in set(alltopics) - set(Student.stopics): print s, "---", '0%' choice_list = Student.stopics choice_dict = {} for choice in choice_list: choice_dict[choice] = choice_dict.get(choice, 0) + 1 print "Second most TOP class topic is: ", max(choice_dict) print "The LEAST wanted class topic of all: ", min(choice_dict) # Let us bring our menu interface, or instruction to the front print """ ######################### THIS IS OUR WELCOME MENU ######################### # # Welcome to CSC 1000 (Fall 2009) - Final Project # # # Author: John Yeukhon Wong # E-mail: gokoproject@gmail.com # # Select one of the following to retrieve speific database analysis # # [0]: Basic Information (Last name, first name, ID, gender) # [1]: My Favorite Sport (Last name, first name, favorite sport) # [2]: My Favorite Movie (Last name, first name, movie title, movie year) # [3]: My Favorite Country to Visit (Last name, first name, country name) # [4]: My Favorite class topic (Last name, first name, first preference, second preference) # # # Each item has its own speific analysis result # A while = True loop to re-request a new analysis # Enter QUIT to leave this program # ######################### THIS IS OUR WELCOME MENU ######################### """ while True: x = raw_input("Enter a number to print specific table, or enter QUIT to leave: ") print '\n' if x == 'QUIT': print "Thank you for using our service!" break search_function(int(x)) print '\n' |