Suppressed tabular data: Inner cell frequencies as decimal numbers
Source:R/SuppressDec.R
SuppressDec.Rd
Assume that frequencies to be published, z
, can be computed from inner
frequencies, y
, via z = t(x) %*% y
,
where x
is a dummy matrix.
Assuming correct suppression, this function will generate safe inner cell frequencies as decimal numbers.
Usage
SuppressDec(
x,
z = NULL,
y = NULL,
suppressed = NULL,
digits = 9,
nRep = 1,
yDeduct = NULL,
resScale = NULL,
rmse = NULL,
sparseLimit = 500
)
Arguments
- x
Dummy matrix where the dimensions matches z and/or y input. Sparse matrix (Matrix package) is possible.
- z
Frequencies to be published. All, only the safe ones or with suppressed as NA.
- y
Inner cell frequencies (see details).
- suppressed
Logical vector defining the suppressed elements of z.
- digits
Output close to whole numbers will be rounded using
digits
as input toRoundWhole
.- nRep
Integer, when >1, several y's will be generated. Extra columns in output.
- yDeduct
Values to be subtracted from y and added back after the calculations. Can be used to perform the modulo method described in the paper (see examples).
- resScale
Residuals will be scaled by resScale
- rmse
Desired root mean square error (residual standard error). Will be used when resScale is NULL or cannot be used.
- sparseLimit
Limit for the number of rows of a reduced x-matrix within the algorithm. When exceeded, a sparse algorithm is used (see
IpsoExtra
).
Details
This function makes use of ReduceX
and RegSDCipso
.
It is not required that y
consists of cell frequencies. A multivariate y
or z
is also possible.
Then several values are possible as digits
, resScale
and rmse
input.
Examples
# Same data as in the paper
z <- RegSDCdata("sec7z")
x <- RegSDCdata("sec7x")
y <- RegSDCdata("sec7y") # Now z is t(x) %*% y
zAll <- RegSDCdata("sec7zAll")
zAllSupp <- RegSDCdata("sec7zAllSupp")
xAll <- RegSDCdata("sec7xAll")
# When no suppression, output is identical to y
SuppressDec(xAll, zAll, y)
#> freq
#> row1_col1 3
#> row2_col1 1
#> row3_col1 12
#> row4_col1 18
#> row1_col2 11
#> row2_col2 9
#> row3_col2 22
#> row4_col2 19
#> row1_col3 32
#> row2_col3 13
#> row3_col3 2
#> row4_col3 16
#> row1_col4 30
#> row2_col4 8
#> row3_col4 2
#> row4_col4 3
SuppressDec(xAll, zAll) # y can be seen in z
#> freq
#> row1_col1 3
#> row2_col1 1
#> row3_col1 12
#> row4_col1 18
#> row1_col2 11
#> row2_col2 9
#> row3_col2 22
#> row4_col2 19
#> row1_col3 32
#> row2_col3 13
#> row3_col3 2
#> row4_col3 16
#> row1_col4 30
#> row2_col4 8
#> row3_col4 2
#> row4_col4 3
# Similar to Y* in paper (but other random values)
SuppressDec(x, z, y)
#> freq
#> row1_col1 8.3760459
#> row2_col1 -2.1183476
#> row3_col1 12.0000000
#> row4_col1 15.7423017
#> row1_col2 5.6239541
#> row2_col2 14.3760459
#> row3_col2 22.0000000
#> row4_col2 19.0000000
#> row1_col3 32.0000000
#> row2_col3 10.7423017
#> row3_col3 4.2576983
#> row4_col3 16.0000000
#> row1_col4 30.0000000
#> row2_col4 8.0000000
#> row3_col4 -0.2576983
#> row4_col4 5.2576983
# Residual standard error forced to be 1
SuppressDec(x, z, y, rmse = 1)
#> freq
#> row1_col1 3.627008
#> row2_col1 4.600348
#> row3_col1 12.000000
#> row4_col1 13.772644
#> row1_col2 10.372992
#> row2_col2 9.627008
#> row3_col2 22.000000
#> row4_col2 19.000000
#> row1_col3 32.000000
#> row2_col3 8.772644
#> row3_col3 6.227356
#> row4_col3 16.000000
#> row1_col4 30.000000
#> row2_col4 8.000000
#> row3_col4 -2.227356
#> row4_col4 7.227356
# Seven ways of obtaining the same output
SuppressDec(x, z, rmse = 1) # slower, y must be estimated
#> freq
#> row1_col1 4.070156
#> row2_col1 5.179566
#> row3_col1 12.000000
#> row4_col1 12.750278
#> row1_col2 9.929844
#> row2_col2 10.070156
#> row3_col2 22.000000
#> row4_col2 19.000000
#> row1_col3 32.000000
#> row2_col3 7.750278
#> row3_col3 7.249722
#> row4_col3 16.000000
#> row1_col4 30.000000
#> row2_col4 8.000000
#> row3_col4 -3.249722
#> row4_col4 8.249722
SuppressDec(x, y = y, rmse = 1)
#> freq
#> row1_col1 4.895203
#> row2_col1 3.951412
#> row3_col1 12.000000
#> row4_col1 13.153385
#> row1_col2 9.104797
#> row2_col2 10.895203
#> row3_col2 22.000000
#> row4_col2 19.000000
#> row1_col3 32.000000
#> row2_col3 8.153385
#> row3_col3 6.846615
#> row4_col3 16.000000
#> row1_col4 30.000000
#> row2_col4 8.000000
#> row3_col4 -2.846615
#> row4_col4 7.846615
SuppressDec(xAll, zAllSupp, y, rmse = 1)
#> freq
#> row1_col1 4.832868
#> row2_col1 4.209071
#> row3_col1 12.000000
#> row4_col1 12.958061
#> row1_col2 9.167132
#> row2_col2 10.832868
#> row3_col2 22.000000
#> row4_col2 19.000000
#> row1_col3 32.000000
#> row2_col3 7.958061
#> row3_col3 7.041939
#> row4_col3 16.000000
#> row1_col4 30.000000
#> row2_col4 8.000000
#> row3_col4 -3.041939
#> row4_col4 8.041939
SuppressDec(xAll, zAllSupp, rmse = 1) # slower, y must be estimated
#> freq
#> row1_col1 4.884392
#> row2_col1 4.033761
#> row3_col1 12.000000
#> row4_col1 13.081846
#> row1_col2 9.115608
#> row2_col2 10.884392
#> row3_col2 22.000000
#> row4_col2 19.000000
#> row1_col3 32.000000
#> row2_col3 8.081846
#> row3_col3 6.918154
#> row4_col3 16.000000
#> row1_col4 30.000000
#> row2_col4 8.000000
#> row3_col4 -2.918154
#> row4_col4 7.918154
SuppressDec(xAll, zAll, y, is.na(zAllSupp), rmse = 1)
#> freq
#> row1_col1 4.781734
#> row2_col1 3.703295
#> row3_col1 12.000000
#> row4_col1 13.514971
#> row1_col2 9.218266
#> row2_col2 10.781734
#> row3_col2 22.000000
#> row4_col2 19.000000
#> row1_col3 32.000000
#> row2_col3 8.514971
#> row3_col3 6.485029
#> row4_col3 16.000000
#> row1_col4 30.000000
#> row2_col4 8.000000
#> row3_col4 -2.485029
#> row4_col4 7.485029
SuppressDec(xAll, zAll, suppressed = is.na(zAllSupp), rmse = 1) # y seen in z
#> freq
#> row1_col1 4.616492
#> row2_col1 3.696645
#> row3_col1 12.000000
#> row4_col1 13.686863
#> row1_col2 9.383508
#> row2_col2 10.616492
#> row3_col2 22.000000
#> row4_col2 19.000000
#> row1_col3 32.000000
#> row2_col3 8.686863
#> row3_col3 6.313137
#> row4_col3 16.000000
#> row1_col4 30.000000
#> row2_col4 8.000000
#> row3_col4 -2.313137
#> row4_col4 7.313137
SuppressDec(xAll, y = y, suppressed = is.na(zAllSupp), rmse = 1)
#> freq
#> row1_col1 4.729882
#> row2_col1 4.427013
#> row3_col1 12.000000
#> row4_col1 12.843105
#> row1_col2 9.270118
#> row2_col2 10.729882
#> row3_col2 22.000000
#> row4_col2 19.000000
#> row1_col3 32.000000
#> row2_col3 7.843105
#> row3_col3 7.156895
#> row4_col3 16.000000
#> row1_col4 30.000000
#> row2_col4 8.000000
#> row3_col4 -3.156895
#> row4_col4 8.156895
# YhatMod4 and YhatMod10 in Table 2 in paper
SuppressDec(xAll, zAllSupp, y, yDeduct = 4 * (y%/%4), resScale = 0)
#> freq
#> row1_col1 2.9565217
#> row2_col1 0.8695652
#> row3_col1 12.0000000
#> row4_col1 18.1739130
#> row1_col2 11.0434783
#> row2_col2 8.9565217
#> row3_col2 22.0000000
#> row4_col2 19.0000000
#> row1_col3 32.0000000
#> row2_col3 13.1739130
#> row3_col3 1.8260870
#> row4_col3 16.0000000
#> row1_col4 30.0000000
#> row2_col4 8.0000000
#> row3_col4 2.1739130
#> row4_col4 2.8260870
SuppressDec(xAll, zAllSupp, y, yDeduct = 10 * (y%/%10), rmse = 0)
#> freq
#> row1_col1 0.6956522
#> row2_col1 4.0869565
#> row3_col1 12.0000000
#> row4_col1 17.2173913
#> row1_col2 13.3043478
#> row2_col2 6.6956522
#> row3_col2 22.0000000
#> row4_col2 19.0000000
#> row1_col3 32.0000000
#> row2_col3 12.2173913
#> row3_col3 2.7826087
#> row4_col3 16.0000000
#> row1_col4 30.0000000
#> row2_col4 8.0000000
#> row3_col4 1.2173913
#> row4_col4 3.7826087
# As data in Table 3 in paper (but other random values)
SuppressDec(xAll, zAllSupp, y, yDeduct = 10 * (y%/%10), resScale = 0.1)
#> freq
#> row1_col1 0.6512744
#> row2_col1 4.3390612
#> row3_col1 12.0000000
#> row4_col1 17.0096643
#> row1_col2 13.3487256
#> row2_col2 6.6512744
#> row3_col2 22.0000000
#> row4_col2 19.0000000
#> row1_col3 32.0000000
#> row2_col3 12.0096643
#> row3_col3 2.9903357
#> row4_col3 16.0000000
#> row1_col4 30.0000000
#> row2_col4 8.0000000
#> row3_col4 1.0096643
#> row4_col4 3.9903357
# rmse instead of resScale and 5 draws
SuppressDec(xAll, zAllSupp, y, yDeduct = 10 * (y%/%10), rmse = 1, nRep = 5)
#> freq freq freq freq freq
#> row1_col1 0.4336625 1.005745 1.324514 1.416361 0.7262801
#> row2_col1 3.7672470 3.307106 3.278874 3.447879 4.6382642
#> row3_col1 12.0000000 12.000000 12.000000 12.000000 12.0000000
#> row4_col1 17.7990905 17.687149 17.396612 17.135760 16.6354556
#> row1_col2 13.5663375 12.994255 12.675486 12.583639 13.2737199
#> row2_col2 6.4336625 7.005745 7.324514 7.416361 6.7262801
#> row3_col2 22.0000000 22.000000 22.000000 22.000000 22.0000000
#> row4_col2 19.0000000 19.000000 19.000000 19.000000 19.0000000
#> row1_col3 32.0000000 32.000000 32.000000 32.000000 32.0000000
#> row2_col3 12.7990905 12.687149 12.396612 12.135760 11.6354556
#> row3_col3 2.2009095 2.312851 2.603388 2.864240 3.3645444
#> row4_col3 16.0000000 16.000000 16.000000 16.000000 16.0000000
#> row1_col4 30.0000000 30.000000 30.000000 30.000000 30.0000000
#> row2_col4 8.0000000 8.000000 8.000000 8.000000 8.0000000
#> row3_col4 1.7990905 1.687149 1.396612 1.135760 0.6354556
#> row4_col4 3.2009095 3.312851 3.603388 3.864240 4.3645444