Some simulated data, borrowed from this post .

# library for generation multivariate distributions
library ( MASS )
# always use the same random numbers
set.seed ( 123 )
# the means and errors for the multivariate distribution
MUs <- c ( 10 , 15 )
SIGMAs <- matrix ( c ( 1 , 0.5 ,
0.5 , 2 ),
nrow = 2 ,
ncol = 2 )
# the multivariate distribution
mdist <- mvrnorm ( n = 1000 ,
mu = MUs ,
Sigma = SIGMAs )
# create unobserved covariate
c <- mdist [ , 2 ]
# create the instrumental variable
z <- rnorm ( 1000 )
# create observed variable
x <- mdist [ , 1 ] + z
# constuct the dependent variable
y <- 1 + x + c + rnorm ( 1000 , 0 , 0.5 )

Check if the variables behave as expected

cor ( x , c )

`## [1] 0.1986307`

cor ( z , c )

`## [1] -0.0120011`

Let’s look at the true model.

lm ( y ~ x + c )

```
##
## Call:
## lm(formula = y ~ x + c)
##
## Coefficients:
## (Intercept) x c
## 0.9079 1.0156 0.9955
```

Estimate using OLS.

lm ( y ~ x )

```
##
## Call:
## lm(formula = y ~ x)
##
## Coefficients:
## (Intercept) x
## 13.787 1.226
```

Now using instrumental variables.

library ( AER )
ivreg ( y ~ x | z )

```
##
## Call:
## ivreg(formula = y ~ x | z)
##
## Coefficients:
## (Intercept) x
## 15.949 1.008
```

Now using the `lm`

function.

# first stage
lms1 <- lm ( x ~ z )
# manually obtain fitted values
lmXhat <- lms1 $ coefficients [ 2 ] * z + lms1 $ coefficients [ 1 ]
# estimate second stage using Xhat
( lms2 <- lm ( y ~ lmXhat ) )

```
##
## Call:
## lm(formula = y ~ lmXhat)
##
## Coefficients:
## (Intercept) lmXhat
## 15.949 1.008
```

Now we can do the same using a neural network

library ( nnet )
# first stage
nns1 <- nnet ( x ~ z , size = 0 , skip = TRUE , linout = TRUE )

```
## # weights: 2
## initial value 100832.781903
## final value 924.804075
## converged
```

# manually obtain fitted values
nnXhat <- nns1 $ fitted.values
# estimate second stage using Xhat
nns2 <- nnet ( y ~ nnXhat , size = 0 , skip = TRUE , linout = TRUE )

```
## # weights: 2
## initial value 528901.038261
## final value 4019.409973
## converged
```

summary ( nns2 )

```
## a 1-0-1 network with 2 weights
## options were - skip-layer connections linear output units
## b->o i1->o
## 15.95 1.01
```

Compare output.

lms2 $ coefficients - nns2 $ wts

```
## (Intercept) lmXhat
## -1.749729e-10 -2.814797e-09
```

Compare estimates.

library ( ggplot2 )
qplot ( lms2 $ fitted.values - nns2 $ fitted.values )

Now redo using a non-linearity