TEST pycltools package¶

This notebook contains tests for the functions contained in pycltools package

# Jupyter specific imports
from IPython.core.display import display, HTML, Markdown
# Import of required packages
from os import remove
# import all the functions from pyCL
from pycltools.pycltools import *

JUPYTER NOTEBOOK SPECIFIC TOOLS¶

jhelp¶

help(jhelp)

Help on function jhelp in module pycltools.pycltools:

jhelp(function, full=True, print_private=False, **kwargs)
    Print a nice looking help string based on the name of a declared function. By default print the function
    definition and description
    * function
        Name of a declared function or class method
    * full
        If True, the help string will included a description of all arguments

jhelp(jhelp, full=True)

jprint¶

jhelp(jprint, full=True)

txt="Lorem ipsum condimentum elementum sapien nam eleifend quisque sapien curae"
jprint(txt,font="sans", color="purple", size=200, bold=True)

txt="Lorem ipsum\n\tcondimentum elementum\n\t\tsapien nam eleifend quisque\n\t\t\tsapien curae"
jprint(txt,font="sans", color="powderblue", size=200, bold=True, line_height=50)

jprint("Lorem","ipsum","condimentum","elementum", 1, True, bold=False, italic=False, highlight=False, underlined=True, striked=False, subscripted=False, superscripted=False, font="calibri", color="grey", size=250, align="center")

toogle_code¶

jhelp(toogle_code, full=True)

#toogle_code()

larger_display¶

jhelp(larger_display, full=True)

larger_display(100)

PREDICATES¶

is_readable_file¶

jhelp(is_readable_file, full=True)

try:
    is_readable_file("./data/KJHYTGYUJ")
    print ("OK")
except OSError as E:
    print(E)

./data/KJHYTGYUJ is not a valid file

try:
    is_readable_file("./data/RADAR_Secondary.txt")
    print ("OK")
except OSError as E:
    print(E)

OK

is_gziped¶

jhelp(is_gziped, full=True)

is_gziped("./data/RADAR_Secondary.txt")

False

is_gziped("./data/RADAR_Secondary.txt.gz")

True

has_extension¶

jhelp(has_extension, full=True)

has_extension("./data/test/RADAR_Secondary.txt.gz", "gz")

True

has_extension("./data/test/RADAR_Secondary.txt.gz", "fa")

False

has_extension("./data/test/RADAR_Secondary.txt.gz", "txt", -2)

True

PATH MANIPULATION¶

file_basename¶

jhelp(file_basename, full=True)

file_basename("./data/RADAR_Secondary.txt.gz")

'RADAR_Secondary'

extensions¶

jhelp(extensions, full=True)

print(extensions("./data/RADAR_Secondary.txt.gz"))
print(extensions("./data/RADAR_Secondary.txt"))
print(extensions("./data/RADAR_Secondary"))

.txt.gz
.txt

extensions_list¶

jhelp(extensions_list, full=True)

print(extensions_list("./data/RADAR_Secondary.txt.gz"))
print(extensions_list("./data/RADAR_Secondary.txt"))
print(extensions_list("./data/RADAR_Secondary"))

['txt', 'gz']
['txt']
[]

file_name¶

jhelp(file_name, full=True)

file_name("./data/test/RADAR_Secondary.txt.gz")

'RADAR_Secondary.txt.gz'

dir_name¶

jhelp(dir_name, full=True)

print(dir_name("./data/test/RADAR_Secondary.txt.gz"))
print(dir_name("./__init__.py"))
print(dir_name("/bin/bash"))

test
.
bin

dir_path¶

jhelp(dir_path, full=True)

print(dir_path("./data/test/RADAR_Secondary.txt.gz"))
print(dir_path("./__init__.py"))
print(dir_path("/bin/bash"))

./data/test
.
/bin

STRING FORMATTING¶

supersplit¶

jhelp(supersplit, full=True)

a = "chr7\t74138\t774138\tA>I|LOC100129917|LUNG:LYMPHOBLASTOID_CELL_LINE|15342557:15258596:22327324\t0"

print(supersplit(a, ["\t","|"]))

print(supersplit(a))

print(supersplit(a, "|"))

['chr7', '74138', '774138', 'A>I', 'LOC100129917', 'LUNG:LYMPHOBLASTOID_CELL_LINE', '15342557:15258596:22327324', '0']
['chr7', '74138', '774138', 'A>I|LOC100129917|LUNG:LYMPHOBLASTOID_CELL_LINE|15342557:15258596:22327324', '0']
['chr7\t74138\t774138\tA>I', 'LOC100129917', 'LUNG:LYMPHOBLASTOID_CELL_LINE', '15342557:15258596:22327324\t0']

rm_blank¶

jhelp(rm_blank, full=True)

a = "chr\t\t17|LU NG:LYMPHOBLAST    OID_CELL_LINE|15342557:152585     96:22327324\t0"

print(rm_blank(a))

print(rm_blank(a, replace="*"))

chr17|LUNG:LYMPHOBLASTOID_CELL_LINE|15342557:15258596:223273240
chr*17|LU*NG:LYMPHOBLAST*OID_CELL_LINE|15342557:152585*96:22327324*0

FILE MANIPULATION¶

copyFile¶

jhelp(copyFile, full=True)

copyFile(src="./data/RADAR_Secondary.txt", dest="./data/")

Error: './data/RADAR_Secondary.txt' and './data/RADAR_Secondary.txt' are the same file

copyFile(src="./data/RADAR_Secondary.txt", dest="./data/RADAR_Secondary_copy.txt")

gzip_file¶

jhelp(gzip_file, full=True)

gzip_file("./data/RADAR_Secondary.txt")

Compressing ./data/RADAR_Secondary.txt

'/home/aleg/Programming/pycltools/docs/data/RADAR_Secondary.txt.gz'

gunzip_file¶

jhelp(gunzip_file, full=True)

gunzip_file("./data/RADAR_Secondary.txt.gz")

Uncompressing ./data/RADAR_Secondary.txt.gz

'/home/aleg/Programming/pycltools/docs/data/RADAR_Secondary.txt'

FILE INFORMATION¶

linerange¶

jhelp(linerange, full=True)

file = "./data/RADAR_Secondary.txt"
linerange (file)

0	#location	reference	tissue	coverage	editing_level(%)
1	chr1:1037916	Peng et al 2012	Lymphoblastoid cell line	9	66.67
2	chr1:1156882	Peng et al 2012	Lymphoblastoid cell line	42	36.59
...
97	chr1:10560773	Peng et al 2012	Lymphoblastoid cell line	20	40.00
98	chr1:10602697	Peng et al 2012	Lymphoblastoid cell line	5	60.00
99	chr1:11138237	Peng et al 2012	Lymphoblastoid cell line	14	42.86

file = "./data/gencode_sample.gff3"
linerange (file, [[2,5],[10,12],[98,100]], max_char_line=100)

...
2	#provider: GENCODE
3	#contact: gencode-help@sanger.ac.uk
4	#format: gff3
5	#date: 2015-12-03
...
10	chr1	HAVANA	exon	30564	30667	.	+	.	ID=exon:ENST00000473358.1:2;Parent=ENST00000473358.1;gene_id=E...
11	chr1	HAVANA	exon	30976	31097	.	+	.	ID=exon:ENST00000473358.1:3;Parent=ENST00000473358.1;gene_id=E...
12	chr1	HAVANA	transcript	30267	31109	.	+	.	ID=ENST00000469289.1;Parent=ENSG00000243485.3;gene_id=EN...
...
98	chr1	HAVANA	exon	287517	287921	.	-	.	ID=exon:ENST00000335577.4:2;Parent=ENST00000335577.4;gene_id...
99	chr1	HAVANA	gene	357383	359681	.	-	.	ID=ENSG00000236743.1;gene_id=ENSG00000236743.1;gene_type=lin...
100	chr1	HAVANA	transcript	357383	359681	.	-	.	ID=ENST00000441866.1;Parent=ENSG00000236743.1;gene_id...
...

file = "./data/RADAR_Secondary.txt.gz"
linerange (file, line_numbering=False)

#location	reference	tissue	coverage	editing_level(%)
chr1:1037916	Peng et al 2012	Lymphoblastoid cell line	9	66.67
chr1:1156882	Peng et al 2012	Lymphoblastoid cell line	42	36.59
...
chr1:10560773	Peng et al 2012	Lymphoblastoid cell line	20	40.00
chr1:10602697	Peng et al 2012	Lymphoblastoid cell line	5	60.00
chr1:11138237	Peng et al 2012	Lymphoblastoid cell line	14	42.86

cat¶

jhelp(cat, full=True)

file = "./data/RADAR_Secondary.txt.gz"
cat (file, max_lines=10)

#location	reference	tissue	coverage	editing_level(%)
chr1:1037916	Peng et al 2012	Lymphoblastoid cell line	9	66.67
chr1:1156882	Peng et al 2012	Lymphoblastoid cell line	42	36.59
chr1:1157460	Peng et al 2012	Lymphoblastoid cell line	66	22.73
chr1:1252441	Peng et al 2012	Lymphoblastoid cell line	11	72.73
...
chr1:10521237	Peng et al 2012	Lymphoblastoid cell line	34	17.65
chr1:10521238	Peng et al 2012	Lymphoblastoid cell line	35	37.14
chr1:10560773	Peng et al 2012	Lymphoblastoid cell line	20	40.00
chr1:10602697	Peng et al 2012	Lymphoblastoid cell line	5	60.00
chr1:11138237	Peng et al 2012	Lymphoblastoid cell line	14	42.86

file="./data/gencode_sample.gff3"
cat (file, max_lines=20, line_numbering=True, max_char_line=100)

0	##gff-version 3
1	#description: evidence-based annotation of the human genome (GRCh38), version 24 (Ensembl 83) - lo...
2	#provider: GENCODE
3	#contact: gencode-help@sanger.ac.uk
4	#format: gff3
5	#date: 2015-12-03
6	##sequence-region chr1 1 248956422
7	chr1	HAVANA	gene	29554	31109	.	+	.	ID=ENSG00000243485.3;gene_id=ENSG00000243485.3;gene_type=lincRN...
8	chr1	HAVANA	transcript	29554	31097	.	+	.	ID=ENST00000473358.1;Parent=ENSG00000243485.3;gene_id=ENS...
9	chr1	HAVANA	exon	29554	30039	.	+	.	ID=exon:ENST00000473358.1:1;Parent=ENST00000473358.1;gene_id=EN...
...
9990	chr1	HAVANA	exon	221983000	221983143	.	+	.	ID=exon:ENST00000421147.5:3;Parent=ENST00000421147.5...
9991	chr1	HAVANA	transcript	221966410	221984964	.	+	.	ID=ENST00000441160.1;Parent=ENSG00000228437.5;...
9992	chr1	HAVANA	exon	221966410	221966502	.	+	.	ID=exon:ENST00000441160.1:1;Parent=ENST00000441160.1...
9993	chr1	HAVANA	exon	221983000	221983143	.	+	.	ID=exon:ENST00000441160.1:2;Parent=ENST00000441160.1...
9994	chr1	HAVANA	exon	221984054	221984964	.	+	.	ID=exon:ENST00000441160.1:3;Parent=ENST00000441160.1...
9995	chr1	HAVANA	gene	222041705	222064763	.	-	.	ID=ENSG00000232679.1;gene_id=ENSG00000232679.1;gene_...
9996	chr1	HAVANA	transcript	222041705	222064763	.	-	.	ID=ENST00000438158.1;Parent=ENSG00000232679.1;...
9997	chr1	HAVANA	exon	222064685	222064763	.	-	.	ID=exon:ENST00000438158.1:1;Parent=ENST00000438158.1...
9998	chr1	HAVANA	exon	222058414	222058678	.	-	.	ID=exon:ENST00000438158.1:2;Parent=ENST00000438158.1...
9999	chr1	HAVANA	exon	222041705	222041922	.	-	.	ID=exon:ENST00000438158.1:3;Parent=ENST00000438158.1...

tail¶

jhelp(tail, full=True)

file = "./data/RADAR_clean.txt"
tail (file, n = 4)

...
chr1	225974581	225974581	A>I|SRP9|YH|22327324	28.89	+
chr1	225974735	225974735	A>I|SRP9|YH|22327324	23.88	+
chr1	225974746	225974746	A>I|SRP9|YH|22327324	71.19	+

file = "./data/RADAR_Secondary.txt.gz"
tail (file, n = 4, line_numbering=True)

...
97	chr1:10560773	Peng et al 2012	Lymphoblastoid cell line	20	40.00
98	chr1:10602697	Peng et al 2012	Lymphoblastoid cell line	5	60.00
99	chr1:11138237	Peng et al 2012	Lymphoblastoid cell line	14	42.86

file="./data/gencode_sample.gff3"
tail (file, n = 5, max_char_line=100)

...
chr1	HAVANA	transcript	222041705	222064763	.	-	.	ID=ENST00000438158.1;Parent=ENSG00000232679.1;gene_...
chr1	HAVANA	exon	222064685	222064763	.	-	.	ID=exon:ENST00000438158.1:1;Parent=ENST00000438158.1;gene...
chr1	HAVANA	exon	222058414	222058678	.	-	.	ID=exon:ENST00000438158.1:2;Parent=ENST00000438158.1;gene...
chr1	HAVANA	exon	222041705	222041922	.	-	.	ID=exon:ENST00000438158.1:3;Parent=ENST00000438158.1;gene...

head¶

jhelp(head, full=True)

head("./data/RADAR_Main.txt", n= 3)

#chromosome position  gene       strand annot1     annot2     alu? non_alu_repetitive? conservation_chimp conservation_rhesus conservation_mouse 
chr1        206256301 C1orf186   -      intronic   intronic   no   no                  N                  N                   N                  
chr6        116991832 intergenic -      intergenic intergenic no   no                  N                  N                   N

head("./data/RADAR_Main.txt", ignore_comment_line=True,n= 3)

chr1 206256301 C1orf186   - intronic   intronic   no no N N N 
chr6 116991832 intergenic - intergenic intergenic no no N N N 
chr7 30504355  NOD1       - intronic   intronic   no no N N N

head("./data/RADAR_Main.txt", n=5, max_char_line=110)

#chromosome position  gene       strand annot1     annot2     alu? non_alu_repetitive? conservation_chimp cons...
chr1        206256301 C1orf186   -      intronic   intronic   no   no                  N                  N   ...
chr6        116991832 intergenic -      intergenic intergenic no   no                  N                  N   ...
chr7        30504355  NOD1       -      intronic   intronic   no   no                  N                  N   ...
chr1        85127959  SSX2IP     -      Syn        Gln->Gln   no   no                  N                  N   ...

head("./data/RADAR_Secondary.txt.gz", n=6, ignore_comment_line=True)

chr1:1037916 Peng et al 2012 Lymphoblastoid cell line 9  66.67 
chr1:1156882 Peng et al 2012 Lymphoblastoid cell line 42 36.59 
chr1:1157460 Peng et al 2012 Lymphoblastoid cell line 66 22.73 
chr1:1252441 Peng et al 2012 Lymphoblastoid cell line 11 72.73 
chr1:1252443 Peng et al 2012 Lymphoblastoid cell line 11 45.45 
chr1:1253357 Peng et al 2012 Lymphoblastoid cell line 31 32.26

head("./data/sample.sam", n=6, ignore_comment_line=True)

chr1|35235|35295|-|5.1   272 chr12 37283     0 61M * 0 0 *                                                  *                                                  
chr1|90965|91025|-|7.57  256 chr16 90215899  0 61M * 0 0 *                                                  *                                                  
chr1|91055|91115|-|7.60  256 chr2  168290980 0 61M * 0 0 *                                                  *                                                  
chr1|92081|92141|-|8.1   272 chr1  268657    0 61M * 0 0 *                                                  *                                                  
chr1|92111|92171|-|8.2   256 chr5  181462264 0 61M * 0 0 *                                                  *                                                  
chr1|110943|111003|-|9.1 0   chrY  24307299  0 61M * 0 0 AATGAAAGATATGTGTTTTTCATATTACCAGGTAGATGATAAGGAGATTT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII

head ("./data/sample_100.bam", n=6)

chr1|1736694|1736754|-|168.51      256 chr6  108404793 0  32M29H   * 0 0 *                                                  *                                                  
chr1|20158612|20158672|+|508.32    0   chr1  20158612  60 61M      * 0 0 CTCAGAGGCTTGAAAAGTAGCATCCACCCCCTTCTGGGCATCAATCACAG IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII 
chr1|47096793|47096853|-|1008.6    272 chr1  156061950 0  2H54M5H  * 0 0 *                                                  *                                                  
chr1|65003940|65004000|-|1364.17   256 chr13 107349700 0  16M1I44M * 0 0 *                                                  *                                                  
chr1|108202106|108202166|+|1958.74 0   chr1  108202106 60 61M      * 0 0 GGACAGAAAACAAATCAGTAGTTACCAGTTGTGACTAGCGGGAAGGGAAT IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII 
chr1|147173091|147173151|+|2353.12 272 chr2  74122749  0  22H39M   * 0 0 *                                                  *

linesample¶

jhelp(linesample, full=True)

linesample("./data/RADAR_clean.txt", n_lines=10, line_numbering=True)

77	chr1	6710595	6710595	A>I|DNAJC11|YH|22327324	50.00	-
96	chr1	10521237	10521237	A>I|DFFA|YH|22327324	17.65	-
266	chr1	32737172	32737172	A>I|LCK|YH|22327324	35.71	+
342	chr1	40205396	40205396	A>I|PPIE|YH|22327324	63.64	+
448	chr1	52875019	52875019	A>I|PRPF38A|YH|22327324	38.89	+
533	chr1	85449497	85449497	A>I|MCOLN2|YH|22327324	20.00	-
610	chr1	114296188	114296188	A>I|PHTF1|YH|22327324	20.59	-
767	chr1	155444343	155444343	A>I|ASH1L|YH|22327324	42.86	-
824	chr1	157516004	157516004	A>I|FCRL5|YH|22327324	21.88	-
946	chr1	204526795	204526795	A>I|MDM4|YH|22327324	32.29	+

linesample("./data/RADAR_Secondary.txt.gz", n_lines=10, line_numbering=True)

4	chr1:1252441	Peng et al 2012	Lymphoblastoid cell line	11	72.73
8	chr1:1418532	Peng et al 2012	Lymphoblastoid cell line	5	60.00
51	chr1:6608345	Peng et al 2012	Lymphoblastoid cell line	13	46.15
56	chr1:6707305	Peng et al 2012	Lymphoblastoid cell line	33	39.39
61	chr1:6708354	Peng et al 2012	Lymphoblastoid cell line	15	40.00
62	chr1:6708680	Peng et al 2012	Lymphoblastoid cell line	24	25.00
63	chr1:6708681	Peng et al 2012	Lymphoblastoid cell line	24	20.83
75	chr1:6710585	Peng et al 2012	Lymphoblastoid cell line	30	65.52
90	chr1:10520702	Peng et al 2012	Lymphoblastoid cell line	98	11.22
93	chr1:10520751	Peng et al 2012	Lymphoblastoid cell line	166	28.92

count_uniq¶

jhelp(count_uniq, full=True)

count_uniq("./data/Small_editing_Peng_hg38.bed", colnum=17, sep=['\t',"|"])

17
intergenic    110
intron         55
3-UTR          17
unknown        12
dtype: int64

count_uniq("./data/gencode_sample.gff3", colnum=17, sep=["\t","=", ";"], select_values={2:["transcript", "exon"], 6:"+"})

17
lincRNA                     2031
antisense                   1600
processed_transcript         686
sense_intronic               105
TEC                           36
sense_overlapping             11
3prime_overlapping_ncrna       2
dtype: int64

colsum¶

jhelp(colsum, full=True)

display(Markdown(colsum("./data/RADAR_Main.txt", header=True, colrange=[0,2,6], max_items=15)))

colsum("./data/RADAR_Main.txt", header=True, ret_type="dict", colrange=[0,3])

OrderedDict([(0,
              OrderedDict([('chr1', 4),
                           ('chr6', 2),
                           ('chr7', 1),
                           ('chr15', 2),
                           ('chr9', 2),
                           ('chr17', 3),
                           ('chr4', 1),
                           ('chrY', 1),
                           ('chr2', 1),
                           ('chr18', 1),
                           ('chr14', 1)])),
             (3, OrderedDict([('-', 10), ('+', 9)]))])

print(colsum(
        "./data/RADAR_clean.txt",
        header=True,
        ignore_hashtag_line=True,
        ret_type="report",
        separator=["\t","|"],
        max_items=5))

0
	chr1	997
1
	225974746	1
	225974735	1
	225974581	1
	224599486	1
	224584888	1
	...	...
2
	225974746	1
	225974735	1
	225974581	1
	224599486	1
	224584888	1
	...	...
3
	A>I	997
4
	FDPS	34
	MDM4	31
	CTSS	28
	DNAJC11	25
	S100PBP	24
	...	...
5
	YH	997
6
	22327324	997
7
	33.33	31
	66.67	31
	50.00	23
	57.14	22
	60.00	22
	...	...
8
	-	527
	+	470

fastcount¶

jhelp(fastcount, full=True)

fastcount("./data/RADAR_Secondary.txt")

100

fastcount("./data/RADAR_Secondary.txt.gz")

100

simplecount¶

jhelp(simplecount, full=True)

simplecount("./data/Small_m5C_Squires_hg38.bed", ignore_hashtag_line=True)

194

simplecount("./data/RADAR_Secondary.txt.gz")

100

DIRECTORY MANIPULATION¶

mkdir¶

jhelp(mkdir, full=True)

mkdir("./data/test_dir")

mkdir ("./test/test/test")
!rm -rf ./test

Creating /home/aleg/Programming/pycltools/docs/test
Creating /home/aleg/Programming/pycltools/docs/test/test
Creating /home/aleg/Programming/pycltools/docs/test/test/test

SHELL MANIPULATION¶

make_cmd_str¶

jhelp(make_cmd_str, full=True)

make_cmd_str("bwa", {"-b":None, "-t":6, "-i":"../idx/seq.fa"}, ["../read1", "../read2"])

'bwa -b -t 6 -i ../idx/seq.fa ../read1 ../read2 '

bash_basic¶

jhelp(bash_basic, full=True)

print(bash_basic("ls -l"))
print(bash_basic("echo TTTT"))
print(bash_basic("grep ./data/RADAR_Secondary.txt"))

total 136
drwxrwxr-x 3 aleg aleg  4096 Dec 10 10:54 data
-rw-rw-r-- 1 aleg aleg 39582 Dec 10 12:05 pycltools_functions_list.ipynb
-rw-rw-r-- 1 aleg aleg 93686 Dec 10 12:07 pycltools_tests.ipynb


None
TTTT


None


None

bash¶

jhelp(bash, full=True)

bash("ls", print_stdout=True, ret_stdout=True,)

data
pycltools_functions_list.ipynb
pycltools_tests.ipynb

'data\npycltools_functions_list.ipynb\npycltools_tests.ipynb\n'

bash("for i in 1 2 3 4; do echo $i && sleep 1 && ls error ;done", live="stderr",  print_stdout=True, ret_stdout=True, print_stderr=True)

ls: cannot access 'error': No such file or directory
ls: cannot access 'error': No such file or directory
ls: cannot access 'error': No such file or directory
ls: cannot access 'error': No such file or directory
Error code #2 during execution of the command : for i in 1 2 3 4; do echo $i && sleep 1 && ls error ;done

bash("ls", print_stdout=False, ret_stdout=False, log_stdout="./data/stdout.txt")
head("./data/stdout.txt")

Only 3 lines in the file
data                           
pycltools_functions_list.ipynb 
pycltools_tests.ipynb

bash_update¶

jhelp(bash_update, full=True)

#bash_update("htop")

DICTIONNARY FORMATTING¶

dict_to_md¶

jhelp(dict_to_md, full=True)

d = {"a":12,"b":14,"c":8,"d":56,"e":76}
display(Markdown(dict_to_md(d, "Letter", "Number", sort_by_val=True)))
display(Markdown(dict_to_md(d, "Letter", "Number", transpose=True, max_items=3)))

dict_to_report¶

jhelp(dict_to_report, full=True)

d = {"a":12,"b":14,"c":{"c1":12,"c2":{"c2.1":33221,"c2.2":765},"c3":32,"c4":443},"d":56,"e":76}
print(dict_to_report(d, tab=" | "))

d = {"a":12,"b":14,"c":{"c1":12,"c2":{"c2.1":33221,"c2.2":765, "c2.3":7533,"c2.4":76433,"c2.5":876543,"c2.6":89765,"c2.7":8654},"c3":32,"c4":443},"d":56,"e":76}
print(dict_to_report(d, tab="--", max_items=4, sort_dict=True))

a:12
b:14
c
 | c1:12
 | c2
 |  | c2.1:33221
 |  | c2.2:765
 | c3:32
 | c4:443
d:56
e:76

a:12
b:14
c
--c1:12
--c2
----c2.5:876543
----c2.6:89765
----c2.4:76433
----c2.1:33221
----...:...
--c3:32
--c4:443
d:56
e:76

TABLE FORMATTING¶

reformat_table¶

jhelp(reformat_table, full = True)

# With numeric index
reformat_table(
    input_file="./data/Small_m5C_Squires_hg38.bed",
    output_file="./data/Small_m5C_Squires_hg38_reformat.bed",
    init_template=[0,"\t",1,"\t",2,"\t",3,"|",4,"\t",5,"\t",6],
    final_template=[0,"\t",1,"\t",2,"\tm5C|*|HeLa|22344696\t-\t",6],
    replace_internal_space='_',
    replace_null_val="*",
    keep_original_header=False,
    header="# New header\n"
    )

linerange ("./data/Small_m5C_Squires_hg38.bed")
linerange ("./data/Small_m5C_Squires_hg38_reformat.bed")

0	# Transcriptome-wide map of m5C [hg38 coordinates]
1	# Reference: Squires et al., Nucleic Acids Res. 40, 5023 (2012) [PMID 22344696, DOI 10.1093/nar/gks144]
2	#
...
197	chr1	19311959	19311960	Squires|id185	0	-
198	chr1	19608342	19608343	Squires|id186	0	+
199	chr1	19608343	19608344	Squires|id187	0	+
0	# New header
1	chr1	631539	631540	m5C|*|HeLa|22344696	-	+
2	chr1	631540	631541	m5C|*|HeLa|22344696	-	+
...
192	chr1	19311959	19311960	m5C|*|HeLa|22344696	-	-
193	chr1	19608342	19608343	m5C|*|HeLa|22344696	-	+
194	chr1	19608343	19608344	m5C|*|HeLa|22344696	-	+

# With str index
reformat_table(
    input_file="./data/Small_m5C_Squires_hg38.bed",
    output_file="./data/Small_m5C_Squires_hg38_reformat.bed",
    init_template=["{chrom}","\t","{start}","\t","{end}","|","{name}","\t","{score}","\t","{strand}"],
    final_template=["{start}","\t","{end}","\tadditional_informations\t","{name}"],
    replace_internal_space='_',
    replace_null_val="*",
    keep_original_header=False,
    header="# New header\n",
    verbose=True
    )

linerange ("./data/Small_m5C_Squires_hg38.bed")
linerange ("./data/Small_m5C_Squires_hg38_reformat.bed")

Enumerated named argument list:
	verbose: True
	standard_template: None
	predicate: None
	filter_dict: []
	subst_dict: {}
	replace_null_val: *
	replace_internal_space: _
	header_from_final_template: False
	keep_original_header: False
	header: # New header

	final_template: ['{start}', '\t', '{end}', '\tadditional_informations\t', '{name}']
	init_template: ['{chrom}', '\t', '{start}', '\t', '{end}', '|', '{name}', '\t', '{score}', '\t', '{strand}']
	return_df: False
	output_file: ./data/Small_m5C_Squires_hg38_reformat.bed
	input_file: ./data/Small_m5C_Squires_hg38.bed
Unenumerated named arguments list:
Initial template values
chrom	start	end|name	score	strand
Final template values
start	end	additional_informations	name
194 Lines processed	194 Lines pass	0 Lines filtered out	0 Lines fail
0	# Transcriptome-wide map of m5C [hg38 coordinates]
1	# Reference: Squires et al., Nucleic Acids Res. 40, 5023 (2012) [PMID 22344696, DOI 10.1093/nar/gks144]
2	#
...
197	chr1	19311959	19311960	Squires|id185	0	-
198	chr1	19608342	19608343	Squires|id186	0	+
199	chr1	19608343	19608344	Squires|id187	0	+
0	# New header
1	631539	631540	Squires	additional_informations	id1
2	631540	631541	Squires	additional_informations	id2
...
192	19311959	19311960	Squires	additional_informations	id185
193	19608342	19608343	Squires	additional_informations	id186
194	19608343	19608344	Squires	additional_informations	id187

subst_dict = {0:{"chr1":"1", "chr2":"2"}, 3:{"Peng":"22344696"}}
filter_dict = {18:["intron"]}
input_file="./data/Small_editing_Peng_hg38.bed"
output_file="./data/Small_editing_Peng_hg38_reformat.bed"

reformat_table(
    input_file, output_file,
    init_template=[0,"\t",1,"\t",2,"\t",3,"|",4,"|",5,"|",6,"|",7,"|",8,"|",9,"->",10,"|",11,"%|",12,"|",13,"|",14,"|",15,"|",16,"|",17,"|",18,"|",19,"\t",20,"\t",21],
    final_template=[0,"\t",1,"\t",2,"\t",9,">",10,"|",3,"|HeLa|",19,"\t",11,"\t",21],
    replace_internal_space='_',
    replace_null_val="*",
    subst_dict = subst_dict,
    filter_dict = filter_dict,
    verbose=True
    )

linerange (input_file)
linerange (output_file)

Enumerated named argument list:
	verbose: True
	standard_template: None
	predicate: None
	filter_dict: {18: ['intron']}
	subst_dict: {0: {'chr1': '1', 'chr2': '2'}, 3: {'Peng': '22344696'}}
	replace_null_val: *
	replace_internal_space: _
	header_from_final_template: False
	keep_original_header: True
	header: 
	final_template: [0, '\t', 1, '\t', 2, '\t', 9, '>', 10, '|', 3, '|HeLa|', 19, '\t', 11, '\t', 21]
	init_template: [0, '\t', 1, '\t', 2, '\t', 3, '|', 4, '|', 5, '|', 6, '|', 7, '|', 8, '|', 9, '->', 10, '|', 11, '%|', 12, '|', 13, '|', 14, '|', 15, '|', 16, '|', 17, '|', 18, '|', 19, '\t', 20, '\t', 21]
	return_df: False
	output_file: ./data/Small_editing_Peng_hg38_reformat.bed
	input_file: ./data/Small_editing_Peng_hg38.bed
Unenumerated named arguments list:
Initial template values
0	1	2	3|4|5|6|7|8|9->10|11%|12|13|14|15|16|17|18|19	20	21
Final template values
0	1	2	9>10|3|HeLa|19	11	21
194 Lines processed	139 Lines pass	55 Lines filtered out	0 Lines fail
0	# Transcriptome-wide map of editing sites [hg38 coordinates]
1	# Reference: Peng et al., Nat. Biotechnol. 30, 253 (2012) [PMID 22327324, DOI 10.1038/nbt.2122]
2	#
...
197	chr1	9173454	9173455	Peng|chr1|9156101|-|T|Y|A->G|35.14%|99|T|24|C|13|37|intergenic|-	0	-
198	chr1	9173533	9173534	Peng|chr1|9156180|-|T|Y|A->G|24.10%|61|T|148|C|47|195|intergenic|-	0	-
199	chr1	9173535	9173536	Peng|chr1|9156182|-|T|Y|A->G|66.15%|99|C|129|T|66|195|intergenic|-	0	-
0	# Transcriptome-wide map of editing sites [hg38 coordinates]
1	# Reference: Peng et al., Nat. Biotechnol. 30, 253 (2012) [PMID 22327324, DOI 10.1038/nbt.2122]
2	#
...
142	1	9173454	9173455	A>G|22344696|HeLa|-	35.14	-
143	1	9173533	9173534	A>G|22344696|HeLa|-	24.10	-
144	1	9173535	9173536	A>G|22344696|HeLa|-	66.15	-

input_file="./data/Small_editing_Peng_hg38.bed"

df = reformat_table(
    input_file,
    return_df=True,
    init_template=[0,"\t",1,"\t",2,"\t",3,"|",4,"|",5,"|",6,"|",7,"|",8,"|",9,"->",10,"|",11,"%|",12,"|",13,"|",14,"|",15,"|",16,"|",17,"|",18,"|",19,"\t",20,"\t",21],
    replace_internal_space='_',
    replace_null_val="*",
    verbose=True)

print(head(input_file, 11))

df.head()

Enumerated named argument list:
	verbose: True
	standard_template: None
	predicate: None
	filter_dict: []
	subst_dict: {}
	replace_null_val: *
	replace_internal_space: _
	header_from_final_template: False
	keep_original_header: True
	header: 
	final_template: []
	init_template: [0, '\t', 1, '\t', 2, '\t', 3, '|', 4, '|', 5, '|', 6, '|', 7, '|', 8, '|', 9, '->', 10, '|', 11, '%|', 12, '|', 13, '|', 14, '|', 15, '|', 16, '|', 17, '|', 18, '|', 19, '\t', 20, '\t', 21]
	return_df: True
	output_file: 
	input_file: ./data/Small_editing_Peng_hg38.bed
Unenumerated named arguments list:
No final template given. Create final template from init template
Initial template values
0	1	2	3|4|5|6|7|8|9->10|11%|12|13|14|15|16|17|18|19	20	21
Final template values
0	1	2	3|4|5|6|7|8|9->10|11%|12|13|14|15|16|17|18|19	20	21
# Transcriptome-wide map of editing sites [hg38 coordinates]
# Reference: Peng et al., Nat. Biotechnol. 30, 253 (2012) [PMID 22327324, DOI 10.1038/nbt.2122]
#
# Data cleaned and converted to BED6, coordinate conversion to hg38 using liftOver.
# Maintainer: Maurits Evers (maurits.evers@anu.edu.au)
#
chr1	1102535	1102536	Peng|chr1|1027779|-|T|Y|A->G|66.67%|37|C|6|T|3|9|intron|C1orf159	0	-
chr1	1221501	1221502	Peng|chr1|1146745|-|T|Y|A->G|36.59%|99|T|26|C|15|42|intron|SDF4	0	-
chr1	1222079	1222080	Peng|chr1|1147323|-|T|Y|A->G|22.73%|94|T|51|C|15|66|intron|SDF4	0	-
chr1	1251840	1251841	Peng|chr1|1177084|-|T|Y|A->G|56.25%|99|C|9|T|7|16|intergenic|-	0	-
chr1	1252243	1252244	Peng|chr1|1177487|-|T|Y|A->G|19.44%|30|T|29|C|7|36|intergenic|-	0	-

None

input_file = "./data/gencode_sample.gff3"

df = reformat_table(
    input_file,
    return_df=True,
    standard_template="gff3_ens_transcript", 
    keep_original_header=False,
    header_from_final_template= True,
    verbose=True
    )

print(head(input_file, 11))
df.head()

Enumerated named argument list:
	verbose: True
	standard_template: gff3_ens_transcript
	predicate: None
	filter_dict: []
	subst_dict: {}
	replace_null_val: *
	replace_internal_space: _
	header_from_final_template: True
	keep_original_header: False
	header: 
	final_template: []
	init_template: []
	return_df: True
	output_file: 
	input_file: ./data/gencode_sample.gff3
Unenumerated named arguments list:
Using gff3 ensembl transcript template. Non-transcript features will be filtered out
No final template given. Create final template from init template
Initial template values
seqid	source	type	start	end	score	strand	phase	ID=ID;Parent=Parent;gene_id=gene_id;transcript_id=transcript_id;gene_type=gene_type;gene_status=gene_status;gene_name=gene_name;transcript_type=transcript_type;transcript_status=transcript_status;transcript_name=transcript_name;level=level;transcript_support_level=transcript_support_level;tag=tag;havana_gene=havana_gene;havana_transcript=havana_transcript
Final template values
seqid	source	type	start	end	score	strand	phase	ID=ID;Parent=Parent;gene_id=gene_id;transcript_id=transcript_id;gene_type=gene_type;gene_status=gene_status;gene_name=gene_name;transcript_type=transcript_type;transcript_status=transcript_status;transcript_name=transcript_name;level=level;transcript_support_level=transcript_support_level;tag=tag;havana_gene=havana_gene;havana_transcript=havana_transcript
##gff-version 3
#description: evidence-based annotation of the human genome (GRCh38), version 24 (Ensembl 83) - long non-coding RNAs
#provider: GENCODE
#contact: gencode-help@sanger.ac.uk
#format: gff3
#date: 2015-12-03
##sequence-region chr1 1 248956422
chr1	HAVANA	gene	29554	31109	.	+	.	ID=ENSG00000243485.3;gene_id=ENSG00000243485.3;gene_type=lincRNA;gene_status=KNOWN;gene_name=RP11-34P13.3;level=2;tag=ncRNA_host;havana_gene=OTTHUMG00000000959.2
chr1	HAVANA	transcript	29554	31097	.	+	.	ID=ENST00000473358.1;Parent=ENSG00000243485.3;gene_id=ENSG00000243485.3;transcript_id=ENST00000473358.1;gene_type=lincRNA;gene_status=KNOWN;gene_name=RP11-34P1...
chr1	HAVANA	exon	29554	30039	.	+	.	ID=exon:ENST00000473358.1:1;Parent=ENST00000473358.1;gene_id=ENSG00000243485.3;transcript_id=ENST00000473358.1;gene_type=lincRNA;gene_status=KNOWN;gene_name=RP11-34P...
chr1	HAVANA	exon	30564	30667	.	+	.	ID=exon:ENST00000473358.1:2;Parent=ENST00000473358.1;gene_id=ENSG00000243485.3;transcript_id=ENST00000473358.1;gene_type=lincRNA;gene_status=KNOWN;gene_name=RP11-34P...

None

WEB TOOLS¶

url_exist¶

jhelp(url_exist, full=True)

url_exist("http://www.google.com") # When this one will be False it will probably be the end of the world

True

url_exist("http://www.JUYGKUYHGJHFJ.com")

True

wget¶

jhelp(wget, full=True)

outfile = wget("")
if outfile:
    print(outfile)
    remove(outfile)

unknown url type: ''

outfile = wget("https://www.encodeproject.org/files/ENCFF000HJC/@@download/ENCFF000HJC.bigWig", "test.bigWig", 50000000)
if outfile:
    print(outfile)
    remove(outfile)

Downloading: https://www.encodeproject.org/files/ENCFF000HJC/@@download/ENCFF000HJC.bigWig	Bytes: 258930225
50.0 MB Downloaded	[19.31 %]
100.0 MB Downloaded	[38.62 %]
150.0 MB Downloaded	[57.93 %]
200.0 MB Downloaded	[77.24 %]
250.0 MB Downloaded	[96.55 %]
258.9 MB Downloaded	[100 %]
test.bigWig

FUNCTION TOOLS¶

print_arg¶

jhelp(print_arg, full=True)

def test (A,B,C=7,*args, **kwarg):
    print_arg()

test(1,2,3,5, z=65, x=100)

Enumerated named argument list:
	C: 3
	B: 2
	A: 1
Unenumerated named arguments list:
	z: 65
	x: 100
Unnamed positional arguments list:
	5

SSH TOOLS¶

scp¶

jhelp(scp, full=True)

#scp(hostname="ebi-cli-001.ebi.ac.uk", local_file="../README.md", remote_dir="~/test", username="aleg", rsa_private_key="/home/aleg/.ssh/ebi_rsa")

#scp(hostname="ebi", local_file="../README.md", remote_dir="~/test")

Package Tools¶

get_package_file¶

jhelp(get_package_file, full=True)

get_package_file("pyCL", "pyCL/")

/home/aleg/Programming/pycltools/pycltools/pycltools.py:1947: UserWarning: The 'pyCL' distribution was not found and is required by the application
  warnings.warn(str(E))

SAM/BAM TOOLS¶

bam_sample¶

jhelp(bam_sample, full=True)

bam_sample("./data/sample.sam", fp_out="./data/sample_100.sam", n_reads=100, verbose=True)
linesample("./data/sample_100.sam", n_lines=10, max_char_line=100)

Found 5000 reads in input file
Wrote 100 reads in output file
20	@SQ	SN:chr21	LN:46709983
44	@SQ	SN:KI270305.1	LN:1472
111	@SQ	SN:KI270508.1	LN:1951
146	@SQ	SN:KI270710.1	LN:40176
170	@SQ	SN:KI270734.1	LN:165050
171	@SQ	SN:KI270735.1	LN:42811
217	chr14|61657775|61657835|+|13447.7	272	chr7	127489894	0	61M	*	0	0	*	*	NM:i:3	MD:Z:39A14A4A1	AS:i:49
234	chr17|43159683|43159737|-|19991.10	272	chr9	131908717	0	55M	*	0	0	*	*	NM:i:0	MD:Z:55	AS:i:55
239	chr18|14010134|14010194|+|21568.4	272	chr5	4925139	0	61M	*	0	0	*	*	NM:i:0	MD:Z:61	AS:i:61
266	chr3|138485055|138485115|+|33361.101	256	chr12	6132886	0	61M	*	0	0	*	*	NM:i:5	MD:Z:16G4C0A3C25G8	AS:...

bam_sample("./data/sample.sam", fp_out="./data/sample_100.bam", n_reads=100, verbose=True)
!samtools view "./data/sample_100.bam" | head

Found 5000 reads in input file
Wrote 100 reads in output file
chr1|805036|805096|+|89.10	272	chr8	436410	0	61M	*	0	0	*	*	NM:i:3	MD:Z:7A19C0A32	AS:i:46
chr1|110408997|110409057|+|2013.22	272	chr15	35143322	0	13H48M	*	0	0	*	*	NM:i:3	MD:Z:37G0A2T6	AS:i:37
chr1|121462469|121462529|+|2240.83	0	chr1	121462469	48	61M	*	0	0	AATCTATTTATTTATTTTTCTTCAGTGTTACAATGAAACAACATTGCTTTATTTAAATTTT	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII	NM:i:0	MD:Z:61	AS:i:61	XS:i:46
chr1|205386423|205386483|+|3446.41	272	KI270750.1	53599	0	7H47M7H	*	0	0	*	*	NM:i:3	MD:Z:27C0A6A11	AS:i:32
chr1|221508699|221508759|+|3731.6	272	chrX	69857918	0	37M24H	*	0	0	*	*	NM:i:0	MD:Z:37	AS:i:37
chr1|246607871|246607931|+|4121.10	256	chr19	29557507	0	19H42M	*	0	0	*	*	NM:i:0	MD:Z:42	AS:i:42
chr10|14878128|14878188|-|4488.16	256	chr15	84959120	0	17H44M	*	0	0	*	*	NM:i:0	MD:Z:44	AS:i:44
chr10|65751058|65751118|+|5083.9	272	chr2	222774610	0	18H43M	*	0	0	*	*	NM:i:1	MD:Z:6G36	AS:i:38
chr10|106187699|106187759|+|5744.7	272	chr10	73831305	0	61M	*	0	0	*	*	NM:i:5	MD:Z:0T44C4T3A5C0	AS:i:44
chr10|125698897|125698957|+|5980.5	0	chr10	125698897	60	61M	*	0	0	AGGTGGGCTCCATTTGGCCTCCTTCCTTGGTCCATTCTCATCTTCCTGGGCCCTGCGGATG	IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII	NM:i:0	MD:Z:61	AS:i:61	XS:i:0

bam_sample("./data/sample.txt", fp_out="./data/sample_100.bam", n_reads=100, verbose=True)

/home/aleg/Programming/pycltools/pycltools/pycltools.py:1990: UserWarning: Invalid input file format (.bam/.sam/.cram)
  warnings.warn ("Invalid input file format (.bam/.sam/.cram)")

bam_sample("./data/sample.sam", fp_out="./data/sample_100.txt", n_reads=100, verbose=True)

/home/aleg/Programming/pycltools/pycltools/pycltools.py:1999: UserWarning: Invalid output file format (.bam/.sam/.cram)
  warnings.warn ("Invalid output file format (.bam/.sam/.cram)")

DNA SEQUENCE TOOLS¶

base_generator¶

jhelp(base_generator, full = True)

bg = base_generator()
for i in range(10):
    print (next(bg))

A
T
C
A
T
C
G
G
T
T

bg = base_generator(bases=['A', 'T', 'C', 'G', 'N'], weights=[0.8, 0.8, 0.2, 0.2, 0.1])
for i in range(10):
    print (next(bg))

G
T
A
G
G
G
G
A
T
G

make_sequence¶

jhelp(make_sequence, full=True)

make_sequence()

'ACTGGCGTCGGATCGTGAGGTACTGATATTTCCGGCTCGCTGCCTATACCTATCAGTCCAAGTATGATGACTAGGAAGAACGCTAGTAATAGTGGGCGTTCACGGTTGAGAACCTCTTATTCATGGAAATAAATATTGAGTCTTGTGGGTCTGATAAGCGTTCCCCAAGTAAGTACGAAAAATCTGAGAGCCAAAGGAACTACCGTTATGAGGATCTCTGTTTAAATTCTGATAATATGTATTTGGATCCGAAATACGCGGTGATGGTGTGTAGTTACCTTAGGCTGATCGGTAAGCACTGCATCTACAGTTATAGTCCCCACTTTTCGTTTGCAAGCAAAAGTTGATCTATGTCACCCTCAATCTCGTAAAGGTGTTGCTATGGTTAAAGTAAGTGTCTCCTAGTGCTGATCAGAGCAAACGCTAAGGGAAAGGGGAGCTAAGCCCTTATGATCAAAGAGACAGATGGCTTAGCGCCCAATTCAGCTATTATGTGAAATACATGTACGGGAAAAATTCTTCACTTGGAAGAAACAATGGTGAGTCTTTATCCAGGAACATGTAAGGAATTTGTAGTTCCAAATTCGGTCTATGTCCAATGATGACAGAAGCTAACGTATTGCGTTATGAATCAGGTGTACTTGTGTTTGATTTTAGTAATCCTTCGACTGAATTTGCATCTGTGGACGAGATATCACGGAGATTTGGGTGTCTCTACTTGAACATCATAGTTTGTCATAGGGCTAGTTCTTGGCATTTAATAAAATTAATAATATTGACTAATAACAACGCGACTGTTCGTCGCTAAATTGAAAACCATACAATGATCTATTTCAATACCTATTTGTCCCCACAGTAATCGATTTGCTTTATTTATAAGAGAAGATTATCAATATTTTAAGTTCTATGAATTCCTAGCACTCATAGGTCTGTGTCCCGGTGTTCCAATCTGGTGTCAACGTCGATCAGCCTTTGTCTAGTTCTTAATCTAGAGTTTAGT'

make_sequence(bases=['A', 'T', 'C', 'G', 'N'], weights=[], length=100)

'TATNGGATTNANGGCGTNGAATGNATNANCGTTGNNCCAAATTGANCGNTGTNNTTNGATNNTNAGGCTTGCCCTCNCGCAAAACCNGNCAACTTNNNNG'

make_sequence(bases=['A', 'T', 'C', 'G', 'N'], weights=[0.8, 0.8, 0.2, 0.2, 0.1], length=100)

'ATCATGATCGNTTTTAATCAAAATTATCTTAATAAATTAATTTCTATTTTANGNAANAGATATCTNTCTTCCTNATACNCAATATAAGTTAAAACTAGGG'

	0	1	2	3	4	5	6	7	8	9	...	12	13	14	15	16	17	18	19	21
0	chr1	1102535	1102536	Peng	chr1	1027779	-	T	Y	A	...	37	C	6	T	3	9	intron	C1orf159	-
1	chr1	1221501	1221502	Peng	chr1	1146745	-	T	Y	A	...	99	T	26	C	15	42	intron	SDF4	-
2	chr1	1222079	1222080	Peng	chr1	1147323	-	T	Y	A	...	94	T	51	C	15	66	intron	SDF4	-
3	chr1	1251840	1251841	Peng	chr1	1177084	-	T	Y	A	...	99	C	9	T	7	16	intergenic	-	-
4	chr1	1252243	1252244	Peng	chr1	1177487	-	T	Y	A	...	30	T	29	C	7	36	intergenic	-	-

	seqid	source	type	start	end	score	strand	phase	ID	Parent	...	gene_status	gene_name	transcript_type	transcript_status	transcript_name	level	transcript_support_level	tag	havana_gene	havana_transcript
0	chr1	HAVANA	transcript	29554	31097	.	+	.	ENST00000473358.1	ENSG00000243485.3	...	KNOWN	RP11-34P13.3	lincRNA	KNOWN	RP11-34P13.3-001	2	5	not_best_in_genome_evidence,dotter_confirmed,b...	OTTHUMG00000000959.2	OTTHUMT00000002840.1
1	chr1	HAVANA	transcript	30267	31109	.	+	.	ENST00000469289.1	ENSG00000243485.3	...	KNOWN	RP11-34P13.3	lincRNA	KNOWN	RP11-34P13.3-002	2	5	not_best_in_genome_evidence,basic	OTTHUMG00000000959.2	OTTHUMT00000002841.2
2	chr1	HAVANA	transcript	34554	36081	.	-	.	ENST00000417324.1	ENSG00000237613.2	...	KNOWN	FAM138A	lincRNA	KNOWN	FAM138A-001	2	1	basic	OTTHUMG00000000960.1	OTTHUMT00000002842.1
3	chr1	HAVANA	transcript	35245	36073	.	-	.	ENST00000461467.1	ENSG00000237613.2	...	KNOWN	FAM138A	lincRNA	KNOWN	FAM138A-002	2	3;havana_gene=OTTHUMG00000000960.1;havana_tran...	*	*	*
4	chr1	HAVANA	transcript	89295	120932	.	-	.	ENST00000466430.5	ENSG00000238009.6	...	KNOWN	RP11-34P13.7	lincRNA	KNOWN	RP11-34P13.7-001	2	5	not_best_in_genome_evidence,basic	OTTHUMG00000001096.2	OTTHUMT00000003225.1

Letter	Number
e	76
d	56
b	14
a	12
c	8