Plot Tredn Line

import matplotlib.pyplot as plt import scipy.stats as stats import datetime query = '''SELECT round((sum(container_cpu_usage_sum)/sum(container_cpu_requests_sum)*100)::numeric, 2) AS usage_pct, date_trunc('day', created_at) AS date FROM resource_usage GROUP BY date_trunc('day', created_at) ORDER BY date_trunc('day', created_at); ''' df = pd.read_sql(query, connection) df.set_index('date', inplace=True) fig, ax = plt.subplots(1, 1) ax.plot(df.index, df) ax.set_xlim([datetime.date(2022, 7, 5), datetime.date(2023, 4, 12)]) # linear regression needs dates to be numeric df.index = df.index.map(datetime.date.toordinal) # linear regression slope, y0, r, p, stderr = stats.linregress(df.index, df['usage_pct']) # print(slope, y0, r, p, stderr) # x co-ordinates for the start and end of the line x_endpoints = pd.DataFrame([df.index[0], df.index[-1]]) # Compute predicted values from linear regression y_endpoints = y0 + slope * x_endpoints print('X Points') print(x_endpoints) print('Y Points') print(y_endpoints) print('DF Index') print(df.index) # Overlay the line ax.plot(x_endpoints, y_endpoints, c='r') ax.set_xlabel('history_datetime')

Prints

X Points
        0
0  738341
1  738628
Y Points
           0
0   8.627195
1  10.011531
DF Index
Index([738341, 738348, 738354, 738361, 738369, 738375, 738397, 738404, 738480,
       738487, 738494, 738501, 738508, 738516, 738523, 738536, 738543, 738558,
       738564, 738572, 738579, 738628],
      dtype='int64', name='date')

And Plots:

Text(0.5, 0, 'history_datetime')

If I comment out the first plot I can see the trend line. It has a different index.

#ax.plot(df.index, df) #ax.set_xlim([datetime.date(2022, 7, 5), datetime.date(2023, 4, 12)])