Skip to content

Interval_Aggregate CLI usage

Activate virtual environment

# Using virtualenvwrapper here but can also be done with Conda 
workon pycoMeth
(pycoMeth) 

Getting help

pycoMeth Interval_Aggregate --help
usage: pycoMeth Interval_Aggregate [-h] -i CPG_AGGREGATE_FN -f REF_FASTA_FN
                                   [-a INTERVAL_BED_FN] [-b OUTPUT_BED_FN]
                                   [-t OUTPUT_TSV_FN] [-n INTERVAL_SIZE]
                                   [-m MIN_CPG_PER_INTERVAL] [-s SAMPLE_ID]
                                   [-l MIN_LLR] [-v] [-q] [-p]

Bin the output of `pycoMeth CpG_Aggregate` in genomic intervals, using either
an annotation file containing intervals or a sliding window.

optional arguments:
  -h, --help            show this help message and exit

Input/Output options:
  -i CPG_AGGREGATE_FN, --cpg_aggregate_fn CPG_AGGREGATE_FN
                        Output tsv file generated by CpG_Aggregate (can be
                        gzipped) (required) [str]
  -f REF_FASTA_FN, --ref_fasta_fn REF_FASTA_FN
                        Reference file used for alignment in Fasta format
                        (ideally already indexed with samtools faidx)
                        (required) [str]
  -a INTERVAL_BED_FN, --interval_bed_fn INTERVAL_BED_FN
                        SORTED bed file containing **non-overlapping**
                        intervals to bin CpG data into (Optional) (can be
                        gzipped) (default: None) [str]
  -b OUTPUT_BED_FN, --output_bed_fn OUTPUT_BED_FN
                        Path to write a summary result file in BED format (At
                        least 1 output file is required) (can be gzipped)
                        (default: None) [str]
  -t OUTPUT_TSV_FN, --output_tsv_fn OUTPUT_TSV_FN
                        Path to write a more extensive result report in TSV
                        format (At least 1 output file is required) (can be
                        gzipped) (default: None) [str]

Misc options:
  -n INTERVAL_SIZE, --interval_size INTERVAL_SIZE
                        Size of the sliding window in which to aggregate CpG
                        sites data from if no BED file is provided (default:
                        1000) [int]
  -m MIN_CPG_PER_INTERVAL, --min_cpg_per_interval MIN_CPG_PER_INTERVAL
                        Minimal number of CpG sites per interval. (default: 5)
                        [int]
  -s SAMPLE_ID, --sample_id SAMPLE_ID
                        Sample ID to be used for the BED track header
                        (default: None) [str]
  -l MIN_LLR, --min_llr MIN_LLR
                        Minimal log likelyhood ratio to consider a site
                        significantly methylated or unmethylated in output BED
                        file (default: 2) [float]

Verbosity options:
  -v, --verbose         Increase verbosity
  -q, --quiet           Reduce verbosity
  -p, --progress        Display a progress bar
(pycoMeth) 

Example usage

Default usage with sliding windows

pycoMeth Interval_Aggregate\
    -i ./data/CpG_Aggregate_sample_1.tsv \
    -f ./data/ref.fa \
    -b ./results/Interval_Aggregate_sample_1_CLI.bed \
    -t ./results/Interval_Aggregate_sample_1_CLI.tsv \
    --interval_size 500 \
    --min_cpg_per_interval 5 \
    -s sample_1 \
    --progress

head ./results/Interval_Aggregate_sample_1_CLI.bed
head ./results/Interval_Aggregate_sample_1_CLI.tsv
## Checking options and input files ##
## Parsing CpG_aggregate file ##
    Progress: 100%|██████████████████████| 5.82M/5.82M [00:01<00:00, 3.11M bytes/s]
    Results summary
        Lines parsed: 89,392
        Total number of intervals: 24,319
    Writter summary
        Empty intervals skipped: 14,390
        Valid intervals written: 8,389
        Low CpG intervals skipped: 1,540
(pycoMeth) (pycoMeth) track name=sample_1_Interval itemRgb=On
I   500 1000    .   -3.35   .   500 1000    29,140,190
I   1000    1500    .   -3.65   .   1000    1500    29,140,190
I   1500    2000    .   -3.4    .   1500    2000    29,140,190
I   2000    2500    .   -4.272  .   2000    2500    33,102,171
I   2500    3000    .   -2.5    .   2500    3000    52,168,194
I   3000    3500    .   -1.4    .   3000    3500    230,230,230
I   3500    4000    .   -1.75   .   3500    4000    230,230,230
I   4000    4500    .   -3.325  .   4000    4500    29,140,190
I   4500    5000    .   -2.628  .   4500    5000    52,168,194
(pycoMeth) chromosome   start   end num_motifs  median_llr  llr_list    pos_list
I   500 1000    12  -3.35   [-1.14,-3.54,-7.24,-4.3,0.56,-0.65,-4.37,-3.78,-0.27,-1.32,-3.35]   [557,587,626,665,834,868,890,936,955,967,988]
I   1000    1500    22  -3.65   [-2.48,-5.035,-4.16,-3.315,-3.295,-1.69,-9.885,-7.95,-3.65,-3.0,-2.83,-8.36,-8.56]  [1036,1095,1119,1136,1158,1178,1197,1212,1345,1399,1437,1450,1482]
I   1500    2000    19  -3.4    [-5.71,-6.05,-0.925,-7.165,-3.975,0.56,-1.78,-1.86,-3.67,-3.4,-5.53,-1.06,-1.79,-1.94,-6.22]    [1520,1581,1630,1650,1707,1755,1782,1797,1814,1829,1889,1925,1949,1961,1976]
I   2000    2500    15  -4.272  [-5.24,-3.07,-4.33,-19.055,-7.55,-1.255,-2.565,-4.215,-5.515,-3.48] [2003,2051,2084,2126,2300,2396,2421,2445,2459,2498]
I   2500    3000    19  -2.5    [-0.705,0.385,-6.685,-10.175,-4.27,-2.3,-2.5,-2.52,-1.425,-4.255,-2.73,0.35,-9.31,-1.05,-0.99]  [2546,2563,2584,2629,2666,2680,2694,2729,2752,2764,2829,2870,2908,2978,2989]
I   3000    3500    9   -1.4    [-2.34,-2.19,-1.4,-1.0,0.29,-1.3,-6.9,-1.22,-1.92]  [3000,3024,3044,3056,3071,3148,3218,3367,3473]
I   3500    4000    8   -1.75   [-1.7,-0.53,-4.46,-2.2,-1.75,-8.47,0.53]    [3516,3610,3624,3674,3722,3820,3987]
I   4000    4500    10  -3.325  [-2.83,-3.96,-1.77,-13.895,-3.82,-1.73] [4076,4094,4223,4267,4296,4399]
I   4500    5000    12  -2.628  [-2.71,-0.76,-1.55,-2.56,-0.985,-5.02,-3.46,-1.275,-2.695,-3.52]    [4534,4591,4654,4706,4787,4809,4847,4859,4917,4988]
(pycoMeth) 

Usage with an annotation Bed file

pycoMeth Interval_Aggregate\
    -i ./data/CpG_Aggregate_sample_1.tsv \
    -f ./data/ref.fa \
    -a ./data/Yeast_CGI.bed \
    -b ./results/Interval_Aggregate_sample_1_interval_CLI.bed \
    -t ./results/Interval_Aggregate_sample_1_interval_CLI.tsv \
    -s sample_1 \
    --progress

head ./results/Interval_Aggregate_sample_1_interval_CLI.bed
head ./results/Interval_Aggregate_sample_1_interval_CLI.tsv
## Checking options and input files ##
## Parsing CpG_aggregate file ##
    Progress: 100%|█████████████████████▉| 5.81M/5.82M [00:00<00:00, 6.77M bytes/s]
    Results summary
        Lines parsed: 89,235
        Total number of intervals: 2,041
    Writter summary
        Empty intervals skipped: 1,323
        Valid intervals written: 637
        Low CpG intervals skipped: 81
(pycoMeth) (pycoMeth) track name=sample_1_Interval itemRgb=On
I   1804    2170    .   -3.67   .   1804    2170    29,140,190
I   31835   32949   .   -5.65   .   31835   32949   35,70,156
I   33497   34371   .   -3.295  .   33497   34371   29,140,190
I   44730   44988   .   -3.2    .   44730   44988   29,140,190
I   47889   48187   .   -4.55   .   47889   48187   33,102,171
I   57175   57391   .   -4.76   .   57175   57391   33,102,171
I   59052   59257   .   -4.05   .   59052   59257   33,102,171
I   60422   60656   .   -3.615  .   60422   60656   29,140,190
I   61246   61903   .   -3.423  .   61246   61903   29,140,190
(pycoMeth) chromosome   start   end num_motifs  median_llr  llr_list    pos_list
I   1804    2170    14  -3.67   [-3.67,-3.4,-5.53,-1.06,-1.79,-1.94,-6.22,-5.24,-3.07,-4.33,-19.055]    [1814,1829,1889,1925,1949,1961,1976,2003,2051,2084,2126]
I   31835   32949   10  -5.65   [-2.925,-6.055,-1.785,-5.65,-6.83,-1.695,-12.32]    [31867,31889,31937,31960,32003,32027,32054]
I   33497   34371   19  -3.295  [-4.38,-3.32,-1.29,-3.27,-5.89,-8.96,-6.88,-2.22,-3.605,-1.07,-6.465,-1.735,0.15,-0.96] [33947,33967,34001,34021,34049,34060,34097,34160,34171,34207,34226,34253,34307,34354]
I   44730   44988   9   -3.2    [-2.37,-4.9,-1.63,-1.69,-8.09,-4.03]    [44748,44789,44808,44841,44867,44926]
I   47889   48187   13  -4.55   [-4.55,-9.41,-3.37,-4.66,-3.24,-4.66,-4.535]    [47890,47994,48036,48050,48082,48100,48113]
I   57175   57391   9   -4.76   [-7.96,-4.76,-0.33,-3.77,-1.03,-7.68,-6.66] [57192,57255,57274,57292,57316,57335,57359]
I   59052   59257   13  -4.05   [-8.53,-1.23,-11.15,-3.07,-3.88,-4.22,-1.59,-5.79]  [59067,59109,59137,59167,59180,59219,59232,59247]
I   60422   60656   18  -3.615  [-3.3,-3.14,-0.44,-6.65,-3.93,-2.27,-9.16,-1.73,-17.33,-4.25]   [60427,60440,60467,60490,60515,60548,60559,60581,60595,60650]
I   61246   61903   38  -3.423  [-3.68,-26.15,-6.45,-0.76,-3.86,-2.33,-4.945,-4.975,-2.615,-2.715,-2.905,-3.415,-9.04,-21.33,-2.735,-1.355,-3.43,-2.44] [61257,61328,61389,61409,61424,61443,61495,61544,61569,61581,61614,61625,61644,61692,61746,61763,61857,61872]
(pycoMeth)