我正在尝试使用开源的Duolingo 数据来复制遗忘曲线以获得乐趣。问题是我的发现没有任何意义,即等待的时间越长,召回值就越高。有人有任何指示吗?
# make our plot outputs appear and be stored within the notebook.
%matplotlib inline
import matplotlib.pyplot as plt # import matplotlib for scatterplot, use the alias plt
import numpy as np # import the numpy package with alias np
import pandas as pd
from scipy.optimize import curve_fit
df = pd.read_csv('learning_traces.13m.csv') # Load Duolingo data
df = df[(df['history_seen'] == 1) & (df['session_seen'] == 1)] # Seen only once before
df = df.sort_values('delta') # sort by: time (in seconds) since the last lesson/practice that included this word/lexeme
df['delta'] = df['delta'] / 60.0 # Seconds to minutes
minInMonth = 44640
def func(x, a):
return np.exp2(-x / a)
xdata = df['delta']
ydata = df['p_recall']
plt.scatter(xdata, ydata)
popt, pcov = curve_fit(func, xdata, ydata)
print(popt) # Show the result
plt.plot(xdata, func(xdata, *popt), 'r-', label='fit')


