import pandas as pd

students = pd.read_csv("cursostomados.csv", 
                           dtype='str')

students.iloc[[1,200,54,99],1:]

def makesequences(table):
    sequences = {}
    
    for index, Numest, Curso, Sesion, Nota in students.itertuples():
        if Numest not in sequences:
            sequences[Numest] = [(Curso, Sesion, Nota)]
        else:
            sequences[Numest].append((Curso, Sesion, Nota))
    
    return sequences

def codetonum(code):
    decade = code[0] 
    year = code[1]
    semester = code[2]
    time = 1900 + int(decade, base=16) * 10 + int(year) + 0.5 * (int(semester) - 1)
    return time

sequences = makesequences(students)

len(sequences.keys())

476

prog2007 = ['MATE3018', 'MATE3023', 'MATE3024', # should have tested out of these pre calc
            'CCOM3033', 'MATE3151', 'CCOM3034', 'MATE3152',
            'CCOM4016', 'CCOM4017', 'CCOM4027', 'MATE3153',
            'CCOM4086', 'CCOM3029', 'MATE4032',
            'CCOM5050', 'MATE4080',
            'CCOM4087', 'CCOM5035'
           ]

prog2011 = ['MATE3018', 'MATE3023', 'MATE3024', # should have tested out of these pre calc
            'MATE3151', 'CCOM3030', 'CCOM3981', # espa soc eng
            'CCOM3020', 'CCOM3033', 'CCOM3982', # soc espa eng
            'MATE3152', 'CCOM3034', 'CCOM4016', 'CCOM4086', # sci huma
            'MATE4081', 'MATE4032', 'CCOM4027', 'CCOM4029', # sci huma
            'MATE4080', 'CCOM4017', 'CCOM4030', # libre lite
            'CCOM5050', 'CCOM4065', 'CCOM4205', # art lite
            'MATE5001', 'CCOM5035', 'CCOM4995', # SCI SCI
            'CCOM4996', 'CCOM4087']

prog2016 = ['MATE3018', 'MATE3023', 'MATE3024', # should have tested out of these pre calc
            'MATE3151', 'CCOM3030', 'CCOM3981', # espa soc eng
            'CCOM3020', 'CCOM3033', 'CCOM3982', # soc espa eng
            'MATE3152', 'CCOM3034', 'CCOM4086', # sci huma
            'MATE4081', 'MATE4032', 'CCOM4027', 'CCOM4029', # sci huma
            'MATE4080', 'CCOM4017', 'CCOM4030', # libre lite
            'CCOM5050', 'CCOM4065', 'CCOM4205', # art lite
            'MATE5001', 'CCOM5035', 'CCOM4995', # SCI SCI
            'CCOM4996', 'CCOM4087']

def clean(students, core):
    "Filter students, return only core courses"
    clean = {}
    for student, courselist in students.items():
        cleanlist = []
        for course, sem, grade in courselist:
            if course in core:
                cleanlist.append((course, sem, grade))
        if len(cleanlist) > 0:
            clean[student] = cleanlist
    return clean

newstu = clean(sequences, prog2011)

from collections import defaultdict

def getseq(students):
    freqs = defaultdict(int)
    for student in students.keys():
        for i in range(len(students[student])):
            course, elapsed, grade = students[student][i]
            freqs[course] += i
    return list(map(lambda item: item[0], sorted(freqs.items(), key=lambda item: (item[1], item[0]))))

cs_seq = getseq(newstu)

cs_seq

['CCOM4065',
 'MATE3018',
 'CCOM4205',
 'CCOM3030',
 'CCOM4016',
 'CCOM4996',
 'MATE3023',
 'CCOM3981',
 'MATE3024',
 'MATE4081',
 'CCOM3982',
 'CCOM4029',
 'MATE5001',
 'CCOM4030',
 'CCOM3020',
 'CCOM3033',
 'CCOM3034',
 'MATE3151',
 'CCOM4086',
 'CCOM4027',
 'CCOM4017',
 'MATE4032',
 'CCOM4995',
 'CCOM5035',
 'MATE3152',
 'MATE4080',
 'CCOM4087',
 'CCOM5050']

import numpy as np

rows = len(cs_seq)
counts = np.zeros(rows * rows).reshape(rows,rows)

for stu in newstu:
    courses = [course for course, sem, grade in newstu[stu] if course in cs_seq]
    courses = list(dict.fromkeys(courses)) # unique, preserving order, first occurrence
    for i in range(len(courses)):
        row = cs_seq.index(courses[i])
        counts[row,i] += 1

counts = pd.DataFrame(counts, index=cs_seq)

%matplotlib inline

import seaborn as sns; sns.set()

ax = sns.heatmap(counts)

def heatmap(students, cs_seq):
    rows = len(cs_seq)
    cols = 17
    counts = np.zeros(rows * cols).reshape(rows,cols)
    counts = pd.DataFrame(counts, index=cs_seq)
    for stu in students:
        courses = [(course, sem, grade) for course, sem, grade in students[stu] if course in cs_seq]
        #print(courses)
        courses = semesters(courses)
        for i in range(len(courses)):
            course, sem, grade = courses[i]
            #print(course)
            row = cs_seq.index(course)
            if sem >= cols:
                sem = cols-1
            counts.iat[row,sem] += 1
    return counts

def failmap(students, cs_seq):
    rows = len(cs_seq)
    cols = 17
    counts = np.zeros(rows * cols).reshape(rows,cols)
    counts = pd.DataFrame(counts, index=cs_seq)
    for stu in students:
        courses = [(course, sem, grade) for course, sem, grade in students[stu] if course in cs_seq]
        #print(courses)
        courses = semesters(courses)
        for i in range(len(courses)):
            course, sem, grade = courses[i]
            #print(course)
            row = cs_seq.index(course)
            if sem >= cols:
                sem = cols-1
            if grade in ["F", "F*", "NP", "NP*"]:
                counts.iat[row,sem] += 1
    return counts

def codetosem(code):
    decade = code[0] 
    year = code[1]
    semester = code[2]
    time = 1900 + int(decade, base=16) * 10 + int(year) + 0.5 * (int(semester) - 1)
    return time

def semesters(courses):
    "Read a list of course, semester tuples and write out relative timeline"
    course, start, grade = courses[0]
    first_sem = codetosem(start)
    last_sem = 0
    sequence = []
    for course, semester, grade in courses:
        now = codetonum(semester)
        
        elapsed = now-first_sem
        elapsed *= 2
        sequence.append((course, int(elapsed), grade))
    return sequence

ax = sns.heatmap(heatmap(clean(sequences, prog2016), prog2016),cmap = None)

ax = sns.heatmap(heatmap(clean(sequences, prog2011), prog2011),cmap = None)

def took(courses, course):
    "Return true if student took a given course"
    return course in list(map(lambda row: row[0], courses))

precalc_stu = {k: v for k, v in sequences.items() if (took(v, 'MATE3018') or took(v, 'MATE3024'))}

len(dict(precalc_stu))

318

precalc_failcounts = failmap(precalc_stu, prog2016)

ax = sns.heatmap(heatmap(clean(precalc_stu, prog2016), prog2016),cmap = None, annot=precalc_failcounts)
ax.set(title="Students taking precalculus")

[<matplotlib.text.Text at 0x1181c6860>]

calc_stu = {k: v for k, v in sequences.items() if (not took(v, 'MATE3018') and not took(v, "MATE3024"))}

len(calc_stu)

158

318+158 # sanity check, do I still have all the students?

476

calc_failcount = failmap(clean(calc_stu, prog2016[3:]), prog2016[3:])

ax = sns.heatmap(heatmap(clean(calc_stu, prog2016[3:]), prog2016[3:]), cmap = None, annot=calc_failcount)
ax.set(title="Students starting with calculus")

[<matplotlib.text.Text at 0x1181c2128>]

ax = sns.heatmap(precalc_failcounts,cmap = "inferno", annot=precalc_failcounts)
ax.set(title="Course failures for students starting with precalculus")

[<matplotlib.text.Text at 0x1190f1198>]

ax = sns.heatmap(calc_failcount, cmap = "inferno", annot=calc_failcount)
ax.set(title="Course failures for students starting with calculus")

[<matplotlib.text.Text at 0x119808d30>]

intro = students.loc[students["Curso"] == "CCOM3030"]

intro.iloc[0:5,1:3]

cohorts = {}
for row in intro.itertuples():
    #print (row)
    index, Numest, Curso, Sesion, Nota = row
    if Sesion not in cohorts:
        cohorts[Sesion] = [Numest]
    else:
        cohorts[Sesion].append(Numest)

cohort_keys = sorted(cohorts.keys())

heatmaps = []
failmaps = []
for semester in cohort_keys:
    ids = cohorts[semester]
    cohort = {k : sequences[k] for k in ids}
    heatmaps.append(heatmap(cohort, prog2016))
    failmaps.append(failmap(cohort, prog2016))

import matplotlib.pyplot as plt

for i in range(15): # just the first 7 years
    ax = sns.heatmap(heatmaps[i], annot=failmaps[i])
    ax.set(title=cohort_keys[i])
    plt.show()

caocohorts = defaultdict(list)
known = []
for row in students.itertuples():
    #print (row)
    index, Numest, Curso, Sesion, Nota = row
    if Numest not in known:
        caocohorts[Sesion].append(Numest)
        known.append(Numest)

cao_keys = sorted(caocohorts.keys())

# find all students before 2007
list(filter(lambda semester: semester < "A71", cao_keys))

['821',
 '921',
 '961',
 '971',
 '981',
 '991',
 'A01',
 'A11',
 'A12',
 'A21',
 'A31',
 'A41',
 'A51',
 'A52',
 'A61',
 'A62']

pre_stu = []
for sem in list(filter(lambda semester: semester < "A71", cao_keys)):
    pre_stu += caocohorts[sem]

cohort = {k:sequences[k] for k in pre_stu}

ax = sns.heatmap(heatmap(clean(cohort,prog2007), prog2007), annot=failmap(clean(cohort, prog2007), prog2007))
ax.set(title="Pre 2007")
plt.show()

post_stu = []
for sem in list(filter(lambda semester: semester >= "A71", cao_keys)):
    post_stu += caocohorts[sem]

post_cohort = {k:sequences[k] for k in post_stu}

ax = sns.heatmap(heatmap(clean(post_cohort,prog2011), prog2011), annot=failmap(clean(post_cohort, prog2011), prog2011))
ax.set(title="Post 2007")
plt.show()

def got(courses, course, grades):
    "Return true if student took a given course and obtained one of the given grades on the first try"
    gotit = False
    for row in courses:
        code, section, grade = row
        if code == course:
            if str(grade) in grades:  # there's a 'nan' value in the grades!
                gotit = True
            break # don't count any more attempts on this course
    return gotit

discrete_stu = {k: v for k, v in sequences.items() if took(v, 'CCOM3020')}

len(discrete_stu)

200

aorb_discrete_stu = {k: v for k, v in discrete_stu.items() if got(v, 'CCOM3020', "AB")}

len(aorb_discrete_stu)

61

ctof_discrete_stu = {k: v for k, v in discrete_stu.items() if not got(v, 'CCOM3020', "AB")}

len(ctof_discrete_stu)

139

ax = sns.heatmap(heatmap(clean(aorb_discrete_stu, prog2016), prog2016),cmap = None)

ax = sns.heatmap(heatmap(clean(ctof_discrete_stu, prog2016), prog2016),cmap = None)

aorb_failcounts = failmap(clean(aorb_discrete_stu, prog2016), prog2016)

ctof_failcounts = failmap(clean(ctof_discrete_stu, prog2016), prog2016)

ax = sns.heatmap(aorb_failcounts,cmap = "inferno", annot=aorb_failcounts)
ax.set(title="Course failures for students with A or B in discrete math (first try)")

[<matplotlib.text.Text at 0x11a42d208>]

ax = sns.heatmap(ctof_failcounts,cmap = "inferno", annot=ctof_failcounts)
ax.set(title="Course failures for students with C through F in discrete math (first try)")

[<matplotlib.text.Text at 0x11a27f9b0>]

justc_discrete_stu = {k: v for k, v in discrete_stu.items() if got(v, 'CCOM3020', "C")}

len(justc_discrete_stu)

67

justc_failcounts = failmap(clean(justc_discrete_stu, prog2016), prog2016)

ax = sns.heatmap(heatmap(clean(justc_discrete_stu, prog2016), prog2016),cmap = None)

ax = sns.heatmap(justc_failcounts,cmap = "inferno", annot=justc_failcounts)
ax.set(title="Course failures for students with C in discrete math (first try)")

[<matplotlib.text.Text at 0x11ac2abe0>]

calc_stu = {k: v for k, v in sequences.items() if took(v, 'MATE3151')}

len(calc_stu)

283

aorb_calc_stu = {k: v for k, v in discrete_stu.items() if got(v, 'MATE3151', "AB")}

len(aorb_calc_stu)

68

justc_calc_stu = {k: v for k, v in discrete_stu.items() if got(v, 'MATE3151', "C")}

len(justc_calc_stu)

42

ax = sns.heatmap(heatmap(clean(aorb_calc_stu, prog2016), prog2016),cmap = None)

ax = sns.heatmap(heatmap(clean(justc_calc_stu, prog2016), prog2016),cmap = None)

justc_calc_failcounts = failmap(clean(justc_calc_stu, prog2016), prog2016)

ax = sns.heatmap(justc_calc_failcounts,cmap = "inferno", annot=justc_calc_failcounts)
ax.set(title="Course failures for students with C in calculus 1 (first try)")

[<matplotlib.text.Text at 0x11b5765f8>]

dtof_calc_stu = {k: v for k, v in discrete_stu.items() if got(v, 'MATE3151', "DFW")}

dtof_calc_failcounts = failmap(clean(dtof_calc_stu, prog2016), prog2016)

ax = sns.heatmap(dtof_calc_failcounts,cmap = "inferno", annot=dtof_calc_failcounts)
ax.set(title="Course failures for students with D,F or W in calculus 1 (first try)")

[<matplotlib.text.Text at 0x11b329b70>]

A map of student progress in CCOM

Introduction

Methods

Results

Cohorts¶

Cohorts (pre 2007, post 2007)¶

Discrete math¶

Discussion

	Curso	Sesion
92	CCOM3030	A71
102	CCOM3030	A92
142	CCOM3030	B32
171	CCOM3030	B22
175	CCOM3030	B31

	Curso	Sesion	Nota
1	CISO3122	921	C
200	CCOM3033	B31	B
54	EDFU3002	991	A
99	HUMA3101	A91	B