Skip to contents

Implementation of equation 8 in the paper.

Usage

RegSDCcomp(
  y,
  compCorr = NA,
  x = NULL,
  doSVD = FALSE,
  makeunique = TRUE,
  ensureIntercept = TRUE
)

Arguments

y

Matrix of confidential variables

compCorr

Required component score correlations (possibly recycled)

x

Matrix of non-confidential variables

doSVD

SVD when TRUE and QR when FALSE

makeunique

Parameter to be used in GenQR

ensureIntercept

Whether to ensure/include a constant term. Non-NULL x is subjected to EnsureIntercept

Value

Generated version of y

Details

NA component score correlation means independent random. Input matrices are subjected to EnsureMatrix.

Author

Øyvind Langsrud

Examples

x <- matrix(1:10, 10, 1)
y <- matrix(rnorm(30) + 1:30, 10, 3)

# Same as IPSO (RegSDCipso)
RegSDCcomp(y, NA, x)
#>            [,1]     [,2]     [,3]
#>  [1,] 0.8317548 10.52484 21.38367
#>  [2,] 2.2396118 11.34822 22.75468
#>  [3,] 4.6698188 13.12409 22.96191
#>  [4,] 4.2293614 14.15172 25.04092
#>  [5,] 5.9821308 14.56800 26.79894
#>  [6,] 4.9595102 16.49727 26.99543
#>  [7,] 5.3957966 16.19149 27.70459
#>  [8,] 7.0159493 17.51957 26.58577
#>  [9,] 7.1895163 19.88291 28.35850
#> [10,] 9.2620562 20.52317 29.21500

# Using QR and SVD
yQR <- RegSDCcomp(y, c(0.1, 0.2, NA), x)
ySVD <- RegSDCcomp(y, c(0.1, 0.2, NA), x, doSVD = TRUE)

# Calculation of residuals
r <- residuals(lm(y ~ x))
rQR <- residuals(lm(yQR ~ x))
rSVD <- residuals(lm(ySVD ~ x))

# Correlations for two first components as required
diag(cor(GenQR(r)$Q, GenQR(rQR)$Q))
#> [1] 0.1000000 0.2000000 0.1373426
diag(cor(GenQR(r, doSVD = TRUE)$Q, GenQR(rSVD, doSVD = TRUE)$Q))
#> [1] 0.1000000 0.2000000 0.5668663

# Identical covariance matrices
cov(yQR) - cov(ySVD)
#>               [,1]          [,2]          [,3]
#> [1,] -4.440892e-15 -1.065814e-14 -7.105427e-15
#> [2,] -1.065814e-14 -2.131628e-14 -1.421085e-14
#> [3,] -7.105427e-15 -1.421085e-14 -7.105427e-15
cov(rQR) - cov(rSVD)
#>               [,1]          [,2]          [,3]
#> [1,] -4.440892e-16 -1.484923e-15 -4.024558e-16
#> [2,] -1.484923e-15 -3.330669e-16 -1.630640e-16
#> [3,] -4.024558e-16 -1.630640e-16  2.664535e-15

# Identical regression results
summary(lm(y[, 1] ~ x))
#> 
#> Call:
#> lm(formula = y[, 1] ~ x)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -1.5674 -0.4055  0.1461  0.3306  1.1205 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)  1.01990    0.61025   1.671    0.133    
#> x            0.75594    0.09835   7.686 5.82e-05 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.8933 on 8 degrees of freedom
#> Multiple R-squared:  0.8807,	Adjusted R-squared:  0.8658 
#> F-statistic: 59.08 on 1 and 8 DF,  p-value: 5.818e-05
#> 
summary(lm(yQR[, 1] ~ x))
#> 
#> Call:
#> lm(formula = yQR[, 1] ~ x)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -1.0724 -0.6544 -0.2782  0.6340  1.2333 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)  1.01990    0.61025   1.671    0.133    
#> x            0.75594    0.09835   7.686 5.82e-05 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.8933 on 8 degrees of freedom
#> Multiple R-squared:  0.8807,	Adjusted R-squared:  0.8658 
#> F-statistic: 59.08 on 1 and 8 DF,  p-value: 5.818e-05
#> 
summary(lm(ySVD[, 1] ~ x))
#> 
#> Call:
#> lm(formula = ySVD[, 1] ~ x)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -0.9801 -0.5008 -0.2042  0.1629  2.0462 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)  1.01990    0.61025   1.671    0.133    
#> x            0.75594    0.09835   7.686 5.82e-05 ***
#> ---
#> Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#> 
#> Residual standard error: 0.8933 on 8 degrees of freedom
#> Multiple R-squared:  0.8807,	Adjusted R-squared:  0.8658 
#> F-statistic: 59.08 on 1 and 8 DF,  p-value: 5.818e-05
#>