In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
In [2]:
def read_grade_file(location, file):
    file_path = location + file;
    grades = pd.read_csv(file_path)
    return grades
In [3]:
# Read the CSV file with grades
csv_path = os.path.dirname("M300_grade.ipynb")
grade_table = read_grade_file(csv_path, "MATH300.csv")


# Delete all Personal Information
while True:
    delete_col = grade_table.columns[0]
    # Extract the columns with only scores
    if delete_col.startswith('Homework'):
        length = len(grade_table.columns)
        last_col = grade_table.columns[length-1]
        # Remove the last column as it has irrelevant data
        grade_table = grade_table.drop(last_col, axis = 1)
        break
    grade_table = grade_table.drop([delete_col], axis = 1)

# Shuffle the rows and reset the indices
grade_table = grade_table.sample(frac=1)
grade_table = grade_table.reset_index(drop=True)
grade_table
Out[3]:
Homework 1 (Real) Homework 2 (Real) Homework 3 (Real) Homework 4 (Real) Homework 5 (Real) Homework 6 (Real) Homework 7 (Real) Homework 8 (Real) Homework 9 (Real) Homework 10 (Real) Homework total (Real) Midterm 1 (Real) Midterm 1 total (Real) Midterm 2 (Real) Midterm 2 total (Real) Final Exam (Real) Final Exam total (Real) Course total (Real)
0 41.0 54.0 47.0 55.0 51.0 58.0 53.0 50.0 57.0 55.0 95.98 96.0 96.0 95.0 95.0 97.0 97.0 96.09
1 24.0 46.0 43.0 30.0 0.0 0.0 17.0 21.0 0.0 0.0 37.92 84.0 84.0 60.0 60.0 60.0 60.0 58.18
2 46.0 48.0 46.0 49.0 46.0 45.0 0.0 36.0 0.0 25.0 69.99 76.0 76.0 61.0 61.0 79.0 79.0 72.10
3 46.0 45.0 45.0 41.0 47.0 0.0 20.0 19.0 23.0 17.0 62.66 73.0 73.0 55.0 55.0 41.0 41.0 56.70
4 49.0 53.0 47.0 45.0 48.0 59.0 49.0 50.0 54.0 50.0 92.76 78.0 78.0 94.0 94.0 92.0 92.0 89.83
5 48.0 50.0 47.0 43.0 46.0 57.0 42.0 35.0 38.0 33.0 82.34 89.0 89.0 87.0 87.0 91.0 91.0 87.20
6 48.0 35.0 48.0 46.0 55.0 35.0 43.0 17.0 43.0 48.0 81.64 87.0 87.0 78.0 78.0 75.0 75.0 79.99
7 50.0 49.0 50.0 55.0 53.0 60.0 55.0 53.0 60.0 49.0 97.98 84.0 84.0 97.0 97.0 96.0 96.0 94.39
8 47.0 51.0 48.0 44.0 0.0 36.0 51.0 41.0 34.0 38.0 79.53 85.0 85.0 78.0 78.0 75.0 75.0 78.96
9 48.0 52.0 47.0 52.0 55.0 46.0 54.0 49.0 60.0 2.0 93.67 96.0 96.0 98.0 98.0 91.0 91.0 94.20
10 45.0 25.0 37.0 0.0 0.0 19.0 0.0 0.0 0.0 0.0 26.79 81.0 81.0 73.0 73.0 54.0 54.0 55.04
11 46.0 51.0 43.0 48.0 28.0 56.0 31.0 36.0 30.0 49.0 79.24 89.0 89.0 93.0 93.0 85.0 85.0 85.67
12 48.0 37.0 43.0 23.0 20.0 47.0 30.0 25.0 30.0 31.0 63.98 78.0 78.0 62.0 62.0 56.0 56.0 63.99
13 47.0 44.0 45.0 55.0 55.0 54.0 46.0 46.0 26.0 39.0 88.02 88.0 88.0 83.0 83.0 87.0 87.0 86.71
14 45.0 47.0 50.0 43.0 8.0 42.0 41.0 38.0 41.0 10.0 72.64 93.0 93.0 80.0 80.0 89.0 89.0 83.09
15 39.0 46.0 45.0 47.0 3.0 55.0 40.0 34.0 13.0 0.0 65.60 94.0 94.0 79.0 79.0 74.0 74.0 76.48
16 40.0 41.0 32.0 23.0 16.0 29.0 30.0 22.0 0.0 17.0 51.47 89.0 89.0 81.0 81.0 73.0 73.0 71.34
17 43.0 55.0 49.0 52.0 51.0 60.0 49.0 45.0 51.0 53.0 93.53 96.0 96.0 94.0 94.0 92.0 92.0 93.66
18 45.0 39.0 49.0 47.0 51.0 60.0 51.0 41.0 33.0 55.0 89.37 91.0 91.0 93.0 93.0 81.0 81.0 87.91
19 44.0 54.0 48.0 38.0 45.0 57.0 52.0 52.0 57.0 23.0 90.24 87.0 87.0 96.0 96.0 90.0 90.0 90.67
20 50.0 43.0 46.0 51.0 55.0 59.0 48.0 50.0 31.0 55.0 93.27 89.0 89.0 89.0 89.0 81.0 81.0 87.88
21 45.0 45.0 44.0 55.0 47.0 54.0 43.0 48.0 31.0 41.0 86.14 87.0 87.0 85.0 85.0 83.0 83.0 85.14
22 31.0 33.0 28.0 17.0 37.0 47.0 37.0 45.0 31.0 51.0 68.57 72.0 72.0 77.0 77.0 64.0 64.0 69.57
23 49.0 45.0 48.0 49.0 0.0 0.0 27.0 15.0 24.0 0.0 53.47 89.0 89.0 54.0 54.0 34.0 34.0 54.84
24 49.0 52.0 49.0 46.0 45.0 60.0 52.0 47.0 58.0 55.0 94.54 94.0 94.0 93.0 93.0 89.0 89.0 92.46
25 42.0 51.0 43.0 32.0 45.0 0.0 44.0 16.0 37.0 23.0 68.37 82.0 82.0 66.0 66.0 54.0 54.0 66.31
26 49.0 45.0 39.0 24.0 36.0 29.0 21.0 11.0 15.0 17.0 56.59 71.0 71.0 67.0 67.0 76.0 76.0 67.38
27 15.0 38.0 48.0 0.0 41.0 42.0 32.0 36.0 10.0 7.0 54.74 67.0 67.0 78.0 78.0 77.0 77.0 68.52
28 19.0 46.0 45.0 32.0 25.0 32.0 27.0 37.0 12.0 25.0 58.94 85.0 85.0 76.0 76.0 81.0 81.0 74.18
In [4]:
# We only need Final Exam and Course Total scores anyway
new_pd_df_grade = pd.DataFrame()
new_pd_df_grade['Final'] = grade_table['Final Exam total (Real)']
new_pd_df_grade['Total'] = grade_table['Course total (Real)']
In [5]:
# Averages
np.mean(new_pd_df_grade)
Out[5]:
Final    76.448276
Total    78.223448
dtype: float64
In [6]:
# Medians
fm = np.median(new_pd_df_grade['Final'])
tm = np.median(new_pd_df_grade['Total'])
print("Final %0.2f\nTotal %0.2f" %(fm, tm))
Final 81.00
Total 79.99
In [7]:
# Standard Deviations
np.std(new_pd_df_grade)
Out[7]:
Final    16.016341
Total    12.759575
dtype: float64
In [8]:
# Standard Deviation is very high :O 

# Correlation (Pearson) between Final exam grade and the course total grade.
new_pd_df_grade.corr()
Out[8]:
Final Total
Final 1.000000 0.915165
Total 0.915165 1.000000
In [9]:
# Course Total Histogram just for fun
plt.hist(new_pd_df_grade['Total'], bins=15)
plt.xlabel('Score')
plt.ylabel('Number of students')
plt.title('Score histogram Course Total')
Out[9]:
Text(0.5, 1.0, 'Score histogram Course Total')
In [10]:
# Final Exam Histogram just for fun
plt.hist(new_pd_df_grade['Final'], bins=15)
plt.xlabel('Score')
plt.ylabel('Number of students')
plt.title('Score histogram Final Exam')
Out[10]:
Text(0.5, 1.0, 'Score histogram Final Exam')
In [11]:
# Computing the number of each letter grades
letter_gr = ["A", "A-", "B+", "B", "B-", "C+", "C", "C-", "D+", "D", "F"]

# You could use Python dictionary here, but I never liked dictionary XD
num_grade = [0,0,0,0,0,0,0,0,0,0,0]
course_total = new_pd_df_grade['Total']
for i in range(0,29):
    course_grade = course_total[i]
    if course_grade >= 90: #A
        num_grade[0] += 1
    elif course_grade >= 85: #A-
        num_grade[1] += 1
    elif course_grade >= 80: #B+
        num_grade[2] += 1
    elif course_grade >= 75: #B
        num_grade[3] += 1
    elif course_grade >= 70: #B-
        num_grade[4] += 1
    elif course_grade >= 65: #C+
        num_grade[5] += 1
    elif course_grade >= 60: #C
        num_grade[6] += 1
    elif course_grade >= 56: #C-
        num_grade[7] += 1
    elif course_grade >= 53: #D+
        num_grade[8] += 1
    elif course_grade >= 50: #D
        num_grade[9] += 1
    else: #F
        num_grade[10] += 1

if sum(num_grade) != 29: 
    #Make sure the number matches with the number of enrollment (29)
    print("Error")
else:
    print("Number of Letter Grades")
    for i in range(0, 11):
        print(str(letter_gr[i]) + " : " + str(num_grade[i]))
Number of Letter Grades
A : 6
A- : 7
B+ : 1
B : 3
B- : 3
C+ : 4
C : 1
C- : 2
D+ : 2
D : 0
F : 0
In [12]:
# Sorted by the course total
# Ignore the indices
new_pd_df_grade.sort_values(by='Total', ascending=False)

# So, no one Failed!
# Thank you all for the great semester :)
Out[12]:
Final Total
0 97.0 96.09
7 96.0 94.39
9 91.0 94.20
17 92.0 93.66
24 89.0 92.46
19 90.0 90.67
4 92.0 89.83
18 81.0 87.91
20 81.0 87.88
5 91.0 87.20
13 87.0 86.71
11 85.0 85.67
21 83.0 85.14
14 89.0 83.09
6 75.0 79.99
8 75.0 78.96
15 74.0 76.48
28 81.0 74.18
2 79.0 72.10
16 73.0 71.34
22 64.0 69.57
27 77.0 68.52
26 76.0 67.38
25 54.0 66.31
12 56.0 63.99
1 60.0 58.18
3 41.0 56.70
10 54.0 55.04
23 34.0 54.84