Data Creation
In [2]:
def prepare_data_yield_curve() -> DataFrame:
"""
Function which puts together the yield curve data
Returns
-------
DataFrame
A yield curve datafram
"""
# Read in the csv
yield_curve = pd.read_csv("Data/Yield Curve.csv",index_col=0)
# Convert the index to be datetime
yield_curve.index = pd.to_datetime(yield_curve.index)
# Drop the 2 month column and any null valued rows
yield_curve = yield_curve.drop(columns="2 Mo")
yield_curve = yield_curve.dropna()
# Re-index to the every day and fill null values forward
date_range = pd.date_range(yield_curve.index.min(), yield_curve.index.max())
yield_curve = yield_curve.reindex(index=date_range)
yield_curve = yield_curve.fillna(method='ffill')
return yield_curve
yield_curve = prepare_data_yield_curve()
print(yield_curve)
1 Mo 3 Mo 6 Mo 1 Yr 2 Yr 3 Yr 5 Yr 7 Yr 10 Yr 20 Yr \
2001-07-31 3.67 3.54 3.47 3.53 3.79 4.06 4.57 4.86 5.07 5.61
2001-08-01 3.65 3.53 3.47 3.56 3.83 4.09 4.62 4.90 5.11 5.63
2001-08-02 3.65 3.53 3.46 3.57 3.89 4.17 4.69 4.97 5.17 5.68
2001-08-03 3.63 3.52 3.47 3.57 3.91 4.22 4.72 4.99 5.20 5.70
2001-08-04 3.63 3.52 3.47 3.57 3.91 4.22 4.72 4.99 5.20 5.70
... ... ... ... ... ... ... ... ... ... ...
2020-03-14 0.33 0.28 0.38 0.38 0.49 0.58 0.70 0.89 0.94 1.31
2020-03-15 0.33 0.28 0.38 0.38 0.49 0.58 0.70 0.89 0.94 1.31
2020-03-16 0.25 0.24 0.29 0.29 0.36 0.43 0.49 0.67 0.73 1.10
2020-03-17 0.12 0.19 0.24 0.30 0.47 0.54 0.66 0.91 1.02 1.45
2020-03-18 0.04 0.02 0.08 0.21 0.54 0.66 0.79 1.08 1.18 1.60
30 Yr
2001-07-31 5.51
2001-08-01 5.53
2001-08-02 5.57
2001-08-03 5.59
2001-08-04 5.59
... ...
2020-03-14 1.56
2020-03-15 1.56
2020-03-16 1.34
2020-03-17 1.63
2020-03-18 1.77
[6806 rows x 11 columns]
Real Estate Data¶
Next up is real estate data which we want to import in and do some modifications on.
In [3]:
#Create the real estate data function
def prepare_data_real_estate() -> DataFrame:
"""
The function for creation of real estate data
Returns
-------
DataFrame
A dataframe of real estate data
"""
# Pull in and format the CPI data
CPI = pd.read_csv("Data/CPI.csv",index_col=0)
CPI.columns = ["CPI"]
CPI.index = pd.to_datetime(CPI.index)
# Pull in and format the real estate data
real_estate = pd.read_csv("Data/Real Estate.csv",index_col=0)
real_estate.index = pd.to_datetime(real_estate.index)
real_estate.columns = ["Real Estate"]
# Create the multi-index
multi_index = pd.MultiIndex.from_tuples(zip(real_estate.index.year,real_estate.index.quarter))
real_estate.index = multi_index
quarterly_CPI = CPI.groupby([CPI.index.year, CPI.index.quarter]).last()
real_estate.index.names = ["Year", "Quarter"]
quarterly_CPI.index.names = ["Year", "Quarter"]
# Build the data by joining the two
data = real_estate.join(quarterly_CPI)
return data
real_estate_data = prepare_data_real_estate()
print(real_estate_data)
Real Estate CPI
Year Quarter
1975 1 59.77 52.800
2 60.97 53.500
3 61.18 54.600
4 62.22 55.600
1976 1 62.90 56.000
... ... ...
2018 4 429.86 252.653
2019 1 434.58 254.095
2 442.71 255.402
3 447.87 256.593
4 451.65 258.444
[180 rows x 2 columns]