Difference between revisions of "Data.table"

From kogic.kr
 
Line 78: Line 78:
 
10: &nbsp; 9 &nbsp; &nbsp; &nbsp; &nbsp;9.941699 5.4668993</span></span></div>
 
10: &nbsp; 9 &nbsp; &nbsp; &nbsp; &nbsp;9.941699 5.4668993</span></span></div>
  
<h1><span style="font-family:courier new,courier,monospace">Data table aggregation with &#39;by&#39;</span></h1>
+
<h1><span style="font-family:courier new,courier,monospace">Data table row-wise calculation</span></h1>
  
 
<h3 style="color:#aaa; font-style:italic"><span style="font-family:courier new,courier,monospace">Input (head)</span></h3>
 
<h3 style="color:#aaa; font-style:italic"><span style="font-family:courier new,courier,monospace">Input (head)</span></h3>
  
<div style="background:#eee;border:1px solid #ccc;padding:5px 10px;"><span style="font-family:courier new,courier,monospace"><span style="font-size:11px">&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;ID CDS_length &nbsp; &nbsp; &nbsp;FPKM &nbsp; ord bin<br />
+
<div style="background:#eee;border:1px solid #ccc;padding:5px 10px;"><span style="font-family:courier new,courier,monospace"><span style="font-size:11px">&nbsp; &nbsp;Description &nbsp; &nbsp; &nbsp; ID Anolis_carolinensis Balaena_mysticetus Balaenoptera_acutorostrata_scammoni<br />
&nbsp; &nbsp; 1: &nbsp; &nbsp; &nbsp; g10.t1.cds@g10@000002F &nbsp;10.210671 4.9242662 10665 &nbsp; 8<br />
+
1: &nbsp; &nbsp;OG000001 OG000001 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 126 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;103 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 107<br />
&nbsp; &nbsp; 2: g10002.t1.cds@g10002@000064F &nbsp;12.039262 2.3361320 &nbsp;3975 &nbsp; 3<br />
+
2: &nbsp; &nbsp;OG000002 OG000002 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;70 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 19 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;48<br />
&nbsp; &nbsp; 3: g10008.t1.cds@g10008@000073F &nbsp; 9.162391 0.6201266 &nbsp; 856 &nbsp; 1<br />
+
3: &nbsp; &nbsp;OG000003 OG000003 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;27 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 29 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;25<br />
&nbsp; &nbsp; 4: g10011.t1.cds@g10011@000073F &nbsp; 9.942515 1.9781956 &nbsp;3149 &nbsp; 3<br />
+
4: &nbsp; &nbsp;OG000004 OG000004 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;39 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 36 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;47<br />
&nbsp; &nbsp; 5: g10012.t1.cds@g10012@000073F &nbsp;10.762382 0.5596289 &nbsp; 785 &nbsp; 1</span></span></div>
+
5: &nbsp; &nbsp;OG000005 OG000005 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 1 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 11 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 3</span></span></div>
  
 
<h3 style="color:#aaa; font-style:italic"><span style="font-family:courier new,courier,monospace">Code</span></h3>
 
<h3 style="color:#aaa; font-style:italic"><span style="font-family:courier new,courier,monospace">Code</span></h3>
  
<div style="background:#eee;border:1px solid #ccc;padding:5px 10px;"><span style="font-family:courier new,courier,monospace"><big>dt[, .(Mean_CDS_length = mean(CDS_length), Mean_FPKM = mean(FPKM)), by=bin]</big></span></div>
+
<div style="background:#eee;border:1px solid #ccc;padding:5px 10px;"><span style="font-family:courier new,courier,monospace"><big>dt[,Sum := rowSums(.SD, na.rm=T), .SDcols = 3:5]</big></span></div>
 +
 
 +
<div style="background:#eee;border:1px solid #ccc;padding:5px 10px;"><span style="font-family:courier new,courier,monospace"><big>dt[,Mean&nbsp;:= rowMeans(.SD, na.rm=T), .SDcols = 3:5]</big></span></div>
  
 
<h3 style="color:#aaa; font-style:italic"><span style="font-family:courier new,courier,monospace">Output</span></h3>
 
<h3 style="color:#aaa; font-style:italic"><span style="font-family:courier new,courier,monospace">Output</span></h3>
  
<div style="background:#eee;border:1px solid #ccc;padding:5px 10px;"><span style="font-family:courier new,courier,monospace"><span style="font-size:11px">&nbsp; &nbsp; bin Mean_CDS_length Mean_FPKM<br />
+
<div style="background:#eee;border:1px solid #ccc;padding:5px 10px;"><span style="font-family:courier new,courier,monospace"><span style="font-size:11px">&nbsp; &nbsp;Description &nbsp; &nbsp; &nbsp; ID Anolis_carolinensis Balaena_mysticetus Balaenoptera_acutorostrata_scammoni Sum &nbsp; &nbsp; &nbsp; Avg<br />
&nbsp;1: &nbsp; 8 &nbsp; &nbsp; &nbsp; 10.187520 4.7044951<br />
+
1: &nbsp; &nbsp;OG000001 OG000001 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 126 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;103 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 107 336 112.00000<br />
&nbsp;2: &nbsp; 3 &nbsp; &nbsp; &nbsp; 10.590668 2.0831168<br />
+
2: &nbsp; &nbsp;OG000002 OG000002 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;70 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 19 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;48 137 &nbsp;45.66667<br />
&nbsp;3: &nbsp; 1 &nbsp; &nbsp; &nbsp; 10.488467 0.4904325<br />
+
3: &nbsp; &nbsp;OG000003 OG000003 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;27 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 29 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;25 &nbsp;81 &nbsp;27.00000<br />
&nbsp;4: &nbsp; 4 &nbsp; &nbsp; &nbsp; 10.550280 2.6412267<br />
+
4: &nbsp; &nbsp;OG000004 OG000004 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;39 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 36 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp;47 122 &nbsp;40.66667<br />
&nbsp;5: &nbsp; 2 &nbsp; &nbsp; &nbsp; 10.541246 1.3801430<br />
+
5: &nbsp; &nbsp;OG000005 OG000005 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 1 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 11 &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; &nbsp; 3 &nbsp;15 &nbsp; 5.00000</span></span></div>
&nbsp;6: &nbsp; 7 &nbsp; &nbsp; &nbsp; 10.344120 4.1326888<br />
 
&nbsp;7: &nbsp; 6 &nbsp; &nbsp; &nbsp; 10.377043 3.6221557<br />
 
&nbsp;8: &nbsp;10 &nbsp; &nbsp; &nbsp; &nbsp;9.563917 7.4570552<br />
 
&nbsp;9: &nbsp; 5 &nbsp; &nbsp; &nbsp; 10.509382 3.1425986<br />
 
10: &nbsp; 9 &nbsp; &nbsp; &nbsp; &nbsp;9.941699 5.4668993</span></span></div>
 

Latest revision as of 12:39, 24 January 2019

YalPak_Rtip

Convert a molten data table into an data table (array type)

Input (head)

                       Species                           ID       Var1 Var2         Val                   Group      Print_name

    1: Dendronephthya_gigantea g30906.t1.cds@g30906@000029F CDS_length Full 2.01600e+03 Non-symbiotic_cnidarian Carnation_coral
    2: Dendronephthya_gigantea g14782.t1.cds@g14782@000108F CDS_length Full 4.02000e+02 Non-symbiotic_cnidarian Carnation_coral
    3: Dendronephthya_gigantea   g9986.t1.cds@g9986@000064F CDS_length Full 8.40000e+02 Non-symbiotic_cnidarian Carnation_coral
    4: Dendronephthya_gigantea   g1279.t1.cds@g1279@000024F CDS_length Full 8.58000e+02 Non-symbiotic_cnidarian Carnation_coral

    5: Dendronephthya_gigantea   g9325.t1.cds@g9325@000042F CDS_length Full 8.61000e+02 Non-symbiotic_cnidarian Carnation_coral

Code

dcast(dt, ID ~ Var1, value.var = "Val")

Output

                                 ID CDS_length      FPKM

    1:       g10.t1.cds@g10@000002F       1185 30.363500
    2:     g100.t1.cds@g100@000002F        696  0.959006
    3:   g1000.t1.cds@g1000@000011F        660  0.000000
    4: g10000.t1.cds@g10000@000064F       1074  0.278465

    5: g10001.t1.cds@g10001@000064F        522  0.962268    

Fast melt for Data table

Input (head)

        Position G4-SV40 G4-SV40-OHT

    1: 101488764      52          51
    2: 101488765      52          50
    3: 101488766      52          49
    4: 101488767      53          49

    5: 101488768      54          49

Code

melt(SeqDepth_dt, id.vars = "Position")

Output (head)

         Position    variable value

     1: 101488764     G4-SV40    52
     2: 101488765     G4-SV40    52
     3: 101488766     G4-SV40    52
     4: 101488767     G4-SV40    53

     5: 101488768     G4-SV40    54

Data table aggregation with 'by'

Input (head)

                                 ID CDS_length      FPKM   ord bin

    1:       g10.t1.cds@g10@000002F  10.210671 4.9242662 10665   8
    2: g10002.t1.cds@g10002@000064F  12.039262 2.3361320  3975   3
    3: g10008.t1.cds@g10008@000073F   9.162391 0.6201266   856   1
    4: g10011.t1.cds@g10011@000073F   9.942515 1.9781956  3149   3

    5: g10012.t1.cds@g10012@000073F  10.762382 0.5596289   785   1

Code

dt[, .(Mean_CDS_length = mean(CDS_length), Mean_FPKM = mean(FPKM)), by=bin]

Output

    bin Mean_CDS_length Mean_FPKM

 1:   8       10.187520 4.7044951
 2:   3       10.590668 2.0831168
 3:   1       10.488467 0.4904325
 4:   4       10.550280 2.6412267
 5:   2       10.541246 1.3801430
 6:   7       10.344120 4.1326888
 7:   6       10.377043 3.6221557
 8:  10        9.563917 7.4570552
 9:   5       10.509382 3.1425986

10:   9        9.941699 5.4668993

Data table row-wise calculation

Input (head)

   Description       ID Anolis_carolinensis Balaena_mysticetus Balaenoptera_acutorostrata_scammoni

1:    OG000001 OG000001                 126                103                                 107
2:    OG000002 OG000002                  70                 19                                  48
3:    OG000003 OG000003                  27                 29                                  25
4:    OG000004 OG000004                  39                 36                                  47

5:    OG000005 OG000005                   1                 11                                   3

Code

dt[,Sum := rowSums(.SD, na.rm=T), .SDcols = 3:5]
dt[,Mean := rowMeans(.SD, na.rm=T), .SDcols = 3:5]

Output

   Description       ID Anolis_carolinensis Balaena_mysticetus Balaenoptera_acutorostrata_scammoni Sum       Avg

1:    OG000001 OG000001                 126                103                                 107 336 112.00000
2:    OG000002 OG000002                  70                 19                                  48 137  45.66667
3:    OG000003 OG000003                  27                 29                                  25  81  27.00000
4:    OG000004 OG000004                  39                 36                                  47 122  40.66667

5:    OG000005 OG000005                   1                 11                                   3  15   5.00000