18  Multiequationals linear models

Code
library(tidyverse)
library(mvtnorm)
library(backports)
library(latex2exp)

Let’s consider a multivariate linear model, i.e. with p>1 in (), then the model in matrix notation reads: Yn×p=Jn,1n×1a1×p+Xn×kbk×p+en×p

18.1 OLS estimate

As in the uni-variate case the optimal parameters are computed as: bOLS=Cv(Y,X)Cv(X)1αOLS=E{Y}bOLSE{X} And the variance covariance matrix of the residuals is computed as: Σ=Cv(e)=Cv(Y)bOLSCv(Y,X)

18.1.1 Example

Let’s consider n-simulated observations of the explicative variables X drown from a multivariate normal, i.e. XN(E{X},Cv{X}), with parameters E{X}=(0.50.50.5)Cv{X}=(0.50.20.10.21.20.10.10.10.3) Let’s consider two dependent variables, hence p=2 and k=3. Let’s now simulate the p×k=6 slopes parameter drown from a standard normal, i.e. for j=1,,6, bjN(0,1). The intercept parameters a are simulated drown from a uniform distribution in [0,1]. In the multivariate case a and b became matrices, i.e.  bp×k=(b1,1b1,2b1,3b2,1b2,2b2,3)αp×1=(α1α2)

For i=1,,n, let’s consider a model of the form: {Yi,1=β0,1+β1,1Xi,1+β1,2Xi,2+β1,kXi,3+ui,1Yi,2=β0,2+β2,1Xi,1+β2,2Xi,2+β2,kXi,3+ui,2 where ui,1 and ui,2 are simulated from a multivariate normal random variables with true covariance matrix equal to: Cv{u}=(0.550.30.30.70) Hence, the procedure is structured as:

  1. Simulate of the explanatory variables, the regression parameters and the residuals.
  2. Simulate the perturbed Y~ (regression with errors).
  3. Fit the regression parameters on the Y~.
  4. Compute the fitted residuals from the prediction obtained with the parameters in step 3. and compute their variance covariance matrix.
  5. Compare the results with the true parameters.
Setup
######################## Setup ########################
set.seed(1) # random seed 
n <- 500    # number of observations
p <- 2      # number of dependent variables 
k <- 3      # number of regressors 
# True regressor's mean 
true_e_x <- matrix(rep(0.5, k), ncol = 1)
# True regressor's covariance matrix 
true_cv_x <-  matrix(c(v_z1 = 0.5, cv_12 = 0.2, cv_13 = 0.1, 
                       cv_21 = 0.2, v_z2 = 1.2, cv_23 = 0.1, 
                       cv_31 = 0.1, cv_32 = 0.1, v_z3 = 0.3), 
                     nrow = k, byrow = FALSE)
# True covariance of the residuals 
true_cv_e <- matrix(c(0.55, 0.3, 0.3, 0.70), nrow = p)
##########################################################
# Generate a synthetic data set 
## Regressors  
X <- rmvnorm(n, true_e_x, true_cv_x) 
## Slope (Beta)
true_beta <- rnorm(p*k)
true_beta <- matrix(true_beta, ncol = k, byrow = TRUE) 
## Intercept (Alpha)
true_alpha <- runif(p, min = 0, max = 1)
true_alpha <- matrix(true_alpha, ncol = 1) 
## Matrix of 1 for matrix multiplication  
ones <- matrix(rep(1, n), ncol = 1)
## Fitted response variable 
Y <- ones %*% t(true_alpha) + X %*% t(true_beta)
## Simulated error 
eps <- rmvnorm(n, sigma = true_cv_e)
## Perturbed response variable 
Y_tilde <- Y + eps
Parameters fit
# True Beta 
df_beta_true <- as_tibble(true_beta)
colnames(df_beta_true) <- paste0("$\\beta_", 1:ncol(df_beta_true), "$")
# Perturbed Beta (fitted)
fit_beta <- cov(Y_tilde, X) %*% solve(cov(X))
df_beta_pert <- as_tibble(fit_beta)
colnames(df_beta_pert) <- paste0("$\\beta_", 1:ncol(df_beta_pert), "$")
# True Alpha 
df_alpha_true <- as_tibble(t(true_alpha))
colnames(df_alpha_true) <- paste0("$\\alpha_", 1:ncol(df_alpha_true), "$")
# Perturbed Alpha (fitted)
## Perturbed mean  
e_y <- matrix(apply(Y_tilde, 2, mean), ncol = 1)
e_x <- matrix(apply(X, 2, mean), ncol = 1)
## Estimated Alpha (on perturbed data)
fit_alpha <- e_y - cov(Y_tilde, X) %*% solve(cov(X)) %*% e_x
df_alpha_pert <- as_tibble(t(fit_alpha))
colnames(df_alpha_pert) <- paste0("$\\alpha_", 1:ncol(df_alpha_pert), "$")
Code table output
dplyr::bind_rows(
  dplyr::bind_cols(Type = "True", df_beta_true),
  dplyr::bind_cols(Type = "Fitted", df_beta_pert)
  ) %>%
  dplyr::mutate_if(is.numeric, format, digits = 4, scientific = FALSE) %>%
  knitr::kable(escape = FALSE)

dplyr::bind_rows(
  dplyr::bind_cols(Type = "True", df_alpha_true),
  dplyr::bind_cols(Type = "Fitted", df_alpha_pert)
  ) %>%
  dplyr::mutate_if(is.numeric, format, digits = 4, scientific = FALSE) %>%
  knitr::kable(escape = FALSE)
Table 18.1: Fitted parameters
Type β1 β2 β3
True 0.8500 -0.9253 0.8936
True -0.9410 0.5390 -0.1820
Fitted 0.8457 -0.8699 0.9396
Fitted -0.9532 0.5518 -0.1804
Type α1 α2
True 0.8137 0.8068
Fitted 0.7942 0.7423