import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
def read_grade_file(location, file):
file_path = location + file;
grades = pd.read_csv(file_path)
return grades
# Read the CSV file with grades
csv_path = os.path.dirname("M300_grade.ipynb")
grade_table = read_grade_file(csv_path, "MATH300.csv")
# Delete all Personal Information
while True:
delete_col = grade_table.columns[0]
# Extract the columns with only scores
if delete_col.startswith('Homework'):
length = len(grade_table.columns)
last_col = grade_table.columns[length-1]
# Remove the last column as it has irrelevant data
grade_table = grade_table.drop(last_col, axis = 1)
break
grade_table = grade_table.drop([delete_col], axis = 1)
# Shuffle the rows and reset the indices
grade_table = grade_table.sample(frac=1)
grade_table = grade_table.reset_index(drop=True)
grade_table
# We only need Final Exam and Course Total scores anyway
new_pd_df_grade = pd.DataFrame()
new_pd_df_grade['Final'] = grade_table['Final Exam total (Real)']
new_pd_df_grade['Total'] = grade_table['Course total (Real)']
# Averages
np.mean(new_pd_df_grade)
# Medians
fm = np.median(new_pd_df_grade['Final'])
tm = np.median(new_pd_df_grade['Total'])
print("Final %0.2f\nTotal %0.2f" %(fm, tm))
# Standard Deviations
np.std(new_pd_df_grade)
# Standard Deviation is very high :O
# Correlation (Pearson) between Final exam grade and the course total grade.
new_pd_df_grade.corr()
# Course Total Histogram just for fun
plt.hist(new_pd_df_grade['Total'], bins=15)
plt.xlabel('Score')
plt.ylabel('Number of students')
plt.title('Score histogram Course Total')
# Final Exam Histogram just for fun
plt.hist(new_pd_df_grade['Final'], bins=15)
plt.xlabel('Score')
plt.ylabel('Number of students')
plt.title('Score histogram Final Exam')
# Computing the number of each letter grades
letter_gr = ["A", "A-", "B+", "B", "B-", "C+", "C", "C-", "D+", "D", "F"]
# You could use Python dictionary here, but I never liked dictionary XD
num_grade = [0,0,0,0,0,0,0,0,0,0,0]
course_total = new_pd_df_grade['Total']
for i in range(0,29):
course_grade = course_total[i]
if course_grade >= 90: #A
num_grade[0] += 1
elif course_grade >= 85: #A-
num_grade[1] += 1
elif course_grade >= 80: #B+
num_grade[2] += 1
elif course_grade >= 75: #B
num_grade[3] += 1
elif course_grade >= 70: #B-
num_grade[4] += 1
elif course_grade >= 65: #C+
num_grade[5] += 1
elif course_grade >= 60: #C
num_grade[6] += 1
elif course_grade >= 56: #C-
num_grade[7] += 1
elif course_grade >= 53: #D+
num_grade[8] += 1
elif course_grade >= 50: #D
num_grade[9] += 1
else: #F
num_grade[10] += 1
if sum(num_grade) != 29:
#Make sure the number matches with the number of enrollment (29)
print("Error")
else:
print("Number of Letter Grades")
for i in range(0, 11):
print(str(letter_gr[i]) + " : " + str(num_grade[i]))
# Sorted by the course total
# Ignore the indices
new_pd_df_grade.sort_values(by='Total', ascending=False)
# So, no one Failed!
# Thank you all for the great semester :)