Skip to contents

Assume that frequencies to be published, z, can be computed from inner frequencies, y, via z = t(x) %*% y, where x is a dummy matrix. Assuming correct suppression, this function will generate safe inner cell frequencies as decimal numbers.

Usage

SuppressDec(
  x,
  z = NULL,
  y = NULL,
  suppressed = NULL,
  digits = 9,
  nRep = 1,
  yDeduct = NULL,
  resScale = NULL,
  rmse = NULL,
  sparseLimit = 500
)

Arguments

x

Dummy matrix where the dimensions matches z and/or y input. Sparse matrix (Matrix package) is possible.

z

Frequencies to be published. All, only the safe ones or with suppressed as NA.

y

Inner cell frequencies (see details).

suppressed

Logical vector defining the suppressed elements of z.

digits

Output close to whole numbers will be rounded using digits as input to RoundWhole.

nRep

Integer, when >1, several y's will be generated. Extra columns in output.

yDeduct

Values to be subtracted from y and added back after the calculations. Can be used to perform the modulo method described in the paper (see examples).

resScale

Residuals will be scaled by resScale

rmse

Desired root mean square error (residual standard error). Will be used when resScale is NULL or cannot be used.

sparseLimit

Limit for the number of rows of a reduced x-matrix within the algorithm. When exceeded, a sparse algorithm is used (see IpsoExtra).

Value

The inner cell frequencies as decimal numbers

Details

This function makes use of ReduceX and RegSDCipso. It is not required that y consists of cell frequencies. A multivariate y or z is also possible. Then several values are possible as digits, resScale and rmse input.

Note

Capital letters, X, Y and Z, are used in the paper.

Author

Øyvind Langsrud

Examples

# Same data as in the paper
z <- RegSDCdata("sec7z")
x <- RegSDCdata("sec7x")
y <- RegSDCdata("sec7y")  # Now z is t(x) %*% y 
zAll <- RegSDCdata("sec7zAll")
zAllSupp <- RegSDCdata("sec7zAllSupp")
xAll <- RegSDCdata("sec7xAll")

# When no suppression, output is identical to y
SuppressDec(xAll, zAll, y)
#>           freq
#> row1_col1    3
#> row2_col1    1
#> row3_col1   12
#> row4_col1   18
#> row1_col2   11
#> row2_col2    9
#> row3_col2   22
#> row4_col2   19
#> row1_col3   32
#> row2_col3   13
#> row3_col3    2
#> row4_col3   16
#> row1_col4   30
#> row2_col4    8
#> row3_col4    2
#> row4_col4    3
SuppressDec(xAll, zAll)  # y can be seen in z
#>           freq
#> row1_col1    3
#> row2_col1    1
#> row3_col1   12
#> row4_col1   18
#> row1_col2   11
#> row2_col2    9
#> row3_col2   22
#> row4_col2   19
#> row1_col3   32
#> row2_col3   13
#> row3_col3    2
#> row4_col3   16
#> row1_col4   30
#> row2_col4    8
#> row3_col4    2
#> row4_col4    3

# Similar to Y* in paper (but other random values)
SuppressDec(x, z, y)
#>                 freq
#> row1_col1  8.3760459
#> row2_col1 -2.1183476
#> row3_col1 12.0000000
#> row4_col1 15.7423017
#> row1_col2  5.6239541
#> row2_col2 14.3760459
#> row3_col2 22.0000000
#> row4_col2 19.0000000
#> row1_col3 32.0000000
#> row2_col3 10.7423017
#> row3_col3  4.2576983
#> row4_col3 16.0000000
#> row1_col4 30.0000000
#> row2_col4  8.0000000
#> row3_col4 -0.2576983
#> row4_col4  5.2576983

# Residual standard error forced to be 1
SuppressDec(x, z, y, rmse = 1)
#>                freq
#> row1_col1  3.627008
#> row2_col1  4.600348
#> row3_col1 12.000000
#> row4_col1 13.772644
#> row1_col2 10.372992
#> row2_col2  9.627008
#> row3_col2 22.000000
#> row4_col2 19.000000
#> row1_col3 32.000000
#> row2_col3  8.772644
#> row3_col3  6.227356
#> row4_col3 16.000000
#> row1_col4 30.000000
#> row2_col4  8.000000
#> row3_col4 -2.227356
#> row4_col4  7.227356

# Seven ways of obtaining the same output
SuppressDec(x, z, rmse = 1)  # slower, y must be estimated
#>                freq
#> row1_col1  4.070156
#> row2_col1  5.179566
#> row3_col1 12.000000
#> row4_col1 12.750278
#> row1_col2  9.929844
#> row2_col2 10.070156
#> row3_col2 22.000000
#> row4_col2 19.000000
#> row1_col3 32.000000
#> row2_col3  7.750278
#> row3_col3  7.249722
#> row4_col3 16.000000
#> row1_col4 30.000000
#> row2_col4  8.000000
#> row3_col4 -3.249722
#> row4_col4  8.249722
SuppressDec(x, y = y, rmse = 1)
#>                freq
#> row1_col1  4.895203
#> row2_col1  3.951412
#> row3_col1 12.000000
#> row4_col1 13.153385
#> row1_col2  9.104797
#> row2_col2 10.895203
#> row3_col2 22.000000
#> row4_col2 19.000000
#> row1_col3 32.000000
#> row2_col3  8.153385
#> row3_col3  6.846615
#> row4_col3 16.000000
#> row1_col4 30.000000
#> row2_col4  8.000000
#> row3_col4 -2.846615
#> row4_col4  7.846615
SuppressDec(xAll, zAllSupp, y, rmse = 1)
#>                freq
#> row1_col1  4.832868
#> row2_col1  4.209071
#> row3_col1 12.000000
#> row4_col1 12.958061
#> row1_col2  9.167132
#> row2_col2 10.832868
#> row3_col2 22.000000
#> row4_col2 19.000000
#> row1_col3 32.000000
#> row2_col3  7.958061
#> row3_col3  7.041939
#> row4_col3 16.000000
#> row1_col4 30.000000
#> row2_col4  8.000000
#> row3_col4 -3.041939
#> row4_col4  8.041939
SuppressDec(xAll, zAllSupp, rmse = 1)  # slower, y must be estimated
#>                freq
#> row1_col1  4.884392
#> row2_col1  4.033761
#> row3_col1 12.000000
#> row4_col1 13.081846
#> row1_col2  9.115608
#> row2_col2 10.884392
#> row3_col2 22.000000
#> row4_col2 19.000000
#> row1_col3 32.000000
#> row2_col3  8.081846
#> row3_col3  6.918154
#> row4_col3 16.000000
#> row1_col4 30.000000
#> row2_col4  8.000000
#> row3_col4 -2.918154
#> row4_col4  7.918154
SuppressDec(xAll, zAll, y, is.na(zAllSupp), rmse = 1)
#>                freq
#> row1_col1  4.781734
#> row2_col1  3.703295
#> row3_col1 12.000000
#> row4_col1 13.514971
#> row1_col2  9.218266
#> row2_col2 10.781734
#> row3_col2 22.000000
#> row4_col2 19.000000
#> row1_col3 32.000000
#> row2_col3  8.514971
#> row3_col3  6.485029
#> row4_col3 16.000000
#> row1_col4 30.000000
#> row2_col4  8.000000
#> row3_col4 -2.485029
#> row4_col4  7.485029
SuppressDec(xAll, zAll, suppressed = is.na(zAllSupp), rmse = 1)  # y seen in z
#>                freq
#> row1_col1  4.616492
#> row2_col1  3.696645
#> row3_col1 12.000000
#> row4_col1 13.686863
#> row1_col2  9.383508
#> row2_col2 10.616492
#> row3_col2 22.000000
#> row4_col2 19.000000
#> row1_col3 32.000000
#> row2_col3  8.686863
#> row3_col3  6.313137
#> row4_col3 16.000000
#> row1_col4 30.000000
#> row2_col4  8.000000
#> row3_col4 -2.313137
#> row4_col4  7.313137
SuppressDec(xAll, y = y, suppressed = is.na(zAllSupp), rmse = 1)
#>                freq
#> row1_col1  4.729882
#> row2_col1  4.427013
#> row3_col1 12.000000
#> row4_col1 12.843105
#> row1_col2  9.270118
#> row2_col2 10.729882
#> row3_col2 22.000000
#> row4_col2 19.000000
#> row1_col3 32.000000
#> row2_col3  7.843105
#> row3_col3  7.156895
#> row4_col3 16.000000
#> row1_col4 30.000000
#> row2_col4  8.000000
#> row3_col4 -3.156895
#> row4_col4  8.156895

# YhatMod4 and YhatMod10 in Table 2 in paper
SuppressDec(xAll, zAllSupp, y, yDeduct = 4 * (y%/%4), resScale = 0)
#>                 freq
#> row1_col1  2.9565217
#> row2_col1  0.8695652
#> row3_col1 12.0000000
#> row4_col1 18.1739130
#> row1_col2 11.0434783
#> row2_col2  8.9565217
#> row3_col2 22.0000000
#> row4_col2 19.0000000
#> row1_col3 32.0000000
#> row2_col3 13.1739130
#> row3_col3  1.8260870
#> row4_col3 16.0000000
#> row1_col4 30.0000000
#> row2_col4  8.0000000
#> row3_col4  2.1739130
#> row4_col4  2.8260870
SuppressDec(xAll, zAllSupp, y, yDeduct = 10 * (y%/%10), rmse = 0)
#>                 freq
#> row1_col1  0.6956522
#> row2_col1  4.0869565
#> row3_col1 12.0000000
#> row4_col1 17.2173913
#> row1_col2 13.3043478
#> row2_col2  6.6956522
#> row3_col2 22.0000000
#> row4_col2 19.0000000
#> row1_col3 32.0000000
#> row2_col3 12.2173913
#> row3_col3  2.7826087
#> row4_col3 16.0000000
#> row1_col4 30.0000000
#> row2_col4  8.0000000
#> row3_col4  1.2173913
#> row4_col4  3.7826087

# As data in Table 3 in paper (but other random values)
SuppressDec(xAll, zAllSupp, y, yDeduct = 10 * (y%/%10), resScale = 0.1)
#>                 freq
#> row1_col1  0.6512744
#> row2_col1  4.3390612
#> row3_col1 12.0000000
#> row4_col1 17.0096643
#> row1_col2 13.3487256
#> row2_col2  6.6512744
#> row3_col2 22.0000000
#> row4_col2 19.0000000
#> row1_col3 32.0000000
#> row2_col3 12.0096643
#> row3_col3  2.9903357
#> row4_col3 16.0000000
#> row1_col4 30.0000000
#> row2_col4  8.0000000
#> row3_col4  1.0096643
#> row4_col4  3.9903357

# rmse instead of resScale and 5 draws
SuppressDec(xAll, zAllSupp, y, yDeduct = 10 * (y%/%10), rmse = 1, nRep = 5)
#>                 freq      freq      freq      freq       freq
#> row1_col1  0.4336625  1.005745  1.324514  1.416361  0.7262801
#> row2_col1  3.7672470  3.307106  3.278874  3.447879  4.6382642
#> row3_col1 12.0000000 12.000000 12.000000 12.000000 12.0000000
#> row4_col1 17.7990905 17.687149 17.396612 17.135760 16.6354556
#> row1_col2 13.5663375 12.994255 12.675486 12.583639 13.2737199
#> row2_col2  6.4336625  7.005745  7.324514  7.416361  6.7262801
#> row3_col2 22.0000000 22.000000 22.000000 22.000000 22.0000000
#> row4_col2 19.0000000 19.000000 19.000000 19.000000 19.0000000
#> row1_col3 32.0000000 32.000000 32.000000 32.000000 32.0000000
#> row2_col3 12.7990905 12.687149 12.396612 12.135760 11.6354556
#> row3_col3  2.2009095  2.312851  2.603388  2.864240  3.3645444
#> row4_col3 16.0000000 16.000000 16.000000 16.000000 16.0000000
#> row1_col4 30.0000000 30.000000 30.000000 30.000000 30.0000000
#> row2_col4  8.0000000  8.000000  8.000000  8.000000  8.0000000
#> row3_col4  1.7990905  1.687149  1.396612  1.135760  0.6354556
#> row4_col4  3.2009095  3.312851  3.603388  3.864240  4.3645444