This notebook contains tests for the functions contained in pycltools package
# Jupyter specific imports
from IPython.core.display import display, HTML, Markdown
# Import of required packages
from os import remove
# import all the functions from pyCL
from pycltools.pycltools import *
help(jhelp)
jhelp(jhelp, full=True)
jhelp(jprint, full=True)
txt="Lorem ipsum condimentum elementum sapien nam eleifend quisque sapien curae"
jprint(txt,font="sans", color="purple", size=200, bold=True)
txt="Lorem ipsum\n\tcondimentum elementum\n\t\tsapien nam eleifend quisque\n\t\t\tsapien curae"
jprint(txt,font="sans", color="powderblue", size=200, bold=True, line_height=50)
jprint("Lorem","ipsum","condimentum","elementum", 1, True, bold=False, italic=False, highlight=False, underlined=True, striked=False, subscripted=False, superscripted=False, font="calibri", color="grey", size=250, align="center")
jhelp(toogle_code, full=True)
#toogle_code()
jhelp(larger_display, full=True)
larger_display(100)
jhelp(is_readable_file, full=True)
try:
is_readable_file("./data/KJHYTGYUJ")
print ("OK")
except OSError as E:
print(E)
try:
is_readable_file("./data/RADAR_Secondary.txt")
print ("OK")
except OSError as E:
print(E)
jhelp(is_gziped, full=True)
is_gziped("./data/RADAR_Secondary.txt")
is_gziped("./data/RADAR_Secondary.txt.gz")
jhelp(has_extension, full=True)
has_extension("./data/test/RADAR_Secondary.txt.gz", "gz")
has_extension("./data/test/RADAR_Secondary.txt.gz", "fa")
has_extension("./data/test/RADAR_Secondary.txt.gz", "txt", -2)
jhelp(file_basename, full=True)
file_basename("./data/RADAR_Secondary.txt.gz")
jhelp(extensions, full=True)
print(extensions("./data/RADAR_Secondary.txt.gz"))
print(extensions("./data/RADAR_Secondary.txt"))
print(extensions("./data/RADAR_Secondary"))
jhelp(extensions_list, full=True)
print(extensions_list("./data/RADAR_Secondary.txt.gz"))
print(extensions_list("./data/RADAR_Secondary.txt"))
print(extensions_list("./data/RADAR_Secondary"))
jhelp(file_name, full=True)
file_name("./data/test/RADAR_Secondary.txt.gz")
jhelp(dir_name, full=True)
print(dir_name("./data/test/RADAR_Secondary.txt.gz"))
print(dir_name("./__init__.py"))
print(dir_name("/bin/bash"))
jhelp(dir_path, full=True)
print(dir_path("./data/test/RADAR_Secondary.txt.gz"))
print(dir_path("./__init__.py"))
print(dir_path("/bin/bash"))
jhelp(supersplit, full=True)
a = "chr7\t74138\t774138\tA>I|LOC100129917|LUNG:LYMPHOBLASTOID_CELL_LINE|15342557:15258596:22327324\t0"
print(supersplit(a, ["\t","|"]))
print(supersplit(a))
print(supersplit(a, "|"))
jhelp(rm_blank, full=True)
a = "chr\t\t17|LU NG:LYMPHOBLAST OID_CELL_LINE|15342557:152585 96:22327324\t0"
print(rm_blank(a))
print(rm_blank(a, replace="*"))
jhelp(copyFile, full=True)
copyFile(src="./data/RADAR_Secondary.txt", dest="./data/")
copyFile(src="./data/RADAR_Secondary.txt", dest="./data/RADAR_Secondary_copy.txt")
jhelp(gzip_file, full=True)
gzip_file("./data/RADAR_Secondary.txt")
jhelp(gunzip_file, full=True)
gunzip_file("./data/RADAR_Secondary.txt.gz")
jhelp(linerange, full=True)
file = "./data/RADAR_Secondary.txt"
linerange (file)
file = "./data/gencode_sample.gff3"
linerange (file, [[2,5],[10,12],[98,100]], max_char_line=100)
file = "./data/RADAR_Secondary.txt.gz"
linerange (file, line_numbering=False)
jhelp(cat, full=True)
file = "./data/RADAR_Secondary.txt.gz"
cat (file, max_lines=10)
file="./data/gencode_sample.gff3"
cat (file, max_lines=20, line_numbering=True, max_char_line=100)
jhelp(tail, full=True)
file = "./data/RADAR_clean.txt"
tail (file, n = 4)
file = "./data/RADAR_Secondary.txt.gz"
tail (file, n = 4, line_numbering=True)
file="./data/gencode_sample.gff3"
tail (file, n = 5, max_char_line=100)
jhelp(head, full=True)
head("./data/RADAR_Main.txt", n= 3)
head("./data/RADAR_Main.txt", ignore_comment_line=True,n= 3)
head("./data/RADAR_Main.txt", n=5, max_char_line=110)
head("./data/RADAR_Secondary.txt.gz", n=6, ignore_comment_line=True)
head("./data/sample.sam", n=6, ignore_comment_line=True)
head ("./data/sample_100.bam", n=6)
jhelp(linesample, full=True)
linesample("./data/RADAR_clean.txt", n_lines=10, line_numbering=True)
linesample("./data/RADAR_Secondary.txt.gz", n_lines=10, line_numbering=True)
jhelp(count_uniq, full=True)
count_uniq("./data/Small_editing_Peng_hg38.bed", colnum=17, sep=['\t',"|"])
count_uniq("./data/gencode_sample.gff3", colnum=17, sep=["\t","=", ";"], select_values={2:["transcript", "exon"], 6:"+"})
jhelp(colsum, full=True)
display(Markdown(colsum("./data/RADAR_Main.txt", header=True, colrange=[0,2,6], max_items=15)))
colsum("./data/RADAR_Main.txt", header=True, ret_type="dict", colrange=[0,3])
print(colsum(
"./data/RADAR_clean.txt",
header=True,
ignore_hashtag_line=True,
ret_type="report",
separator=["\t","|"],
max_items=5))
jhelp(fastcount, full=True)
fastcount("./data/RADAR_Secondary.txt")
fastcount("./data/RADAR_Secondary.txt.gz")
jhelp(simplecount, full=True)
simplecount("./data/Small_m5C_Squires_hg38.bed", ignore_hashtag_line=True)
simplecount("./data/RADAR_Secondary.txt.gz")
jhelp(mkdir, full=True)
mkdir("./data/test_dir")
mkdir ("./test/test/test")
!rm -rf ./test
jhelp(make_cmd_str, full=True)
make_cmd_str("bwa", {"-b":None, "-t":6, "-i":"../idx/seq.fa"}, ["../read1", "../read2"])
jhelp(bash_basic, full=True)
print(bash_basic("ls -l"))
print(bash_basic("echo TTTT"))
print(bash_basic("grep ./data/RADAR_Secondary.txt"))
jhelp(bash, full=True)
bash("ls", print_stdout=True, ret_stdout=True,)
bash("for i in 1 2 3 4; do echo $i && sleep 1 && ls error ;done", live="stderr", print_stdout=True, ret_stdout=True, print_stderr=True)
bash("ls", print_stdout=False, ret_stdout=False, log_stdout="./data/stdout.txt")
head("./data/stdout.txt")
jhelp(bash_update, full=True)
#bash_update("htop")
jhelp(dict_to_md, full=True)
d = {"a":12,"b":14,"c":8,"d":56,"e":76}
display(Markdown(dict_to_md(d, "Letter", "Number", sort_by_val=True)))
display(Markdown(dict_to_md(d, "Letter", "Number", transpose=True, max_items=3)))
jhelp(dict_to_report, full=True)
d = {"a":12,"b":14,"c":{"c1":12,"c2":{"c2.1":33221,"c2.2":765},"c3":32,"c4":443},"d":56,"e":76}
print(dict_to_report(d, tab=" | "))
d = {"a":12,"b":14,"c":{"c1":12,"c2":{"c2.1":33221,"c2.2":765, "c2.3":7533,"c2.4":76433,"c2.5":876543,"c2.6":89765,"c2.7":8654},"c3":32,"c4":443},"d":56,"e":76}
print(dict_to_report(d, tab="--", max_items=4, sort_dict=True))
jhelp(reformat_table, full = True)
# With numeric index
reformat_table(
input_file="./data/Small_m5C_Squires_hg38.bed",
output_file="./data/Small_m5C_Squires_hg38_reformat.bed",
init_template=[0,"\t",1,"\t",2,"\t",3,"|",4,"\t",5,"\t",6],
final_template=[0,"\t",1,"\t",2,"\tm5C|*|HeLa|22344696\t-\t",6],
replace_internal_space='_',
replace_null_val="*",
keep_original_header=False,
header="# New header\n"
)
linerange ("./data/Small_m5C_Squires_hg38.bed")
linerange ("./data/Small_m5C_Squires_hg38_reformat.bed")
# With str index
reformat_table(
input_file="./data/Small_m5C_Squires_hg38.bed",
output_file="./data/Small_m5C_Squires_hg38_reformat.bed",
init_template=["{chrom}","\t","{start}","\t","{end}","|","{name}","\t","{score}","\t","{strand}"],
final_template=["{start}","\t","{end}","\tadditional_informations\t","{name}"],
replace_internal_space='_',
replace_null_val="*",
keep_original_header=False,
header="# New header\n",
verbose=True
)
linerange ("./data/Small_m5C_Squires_hg38.bed")
linerange ("./data/Small_m5C_Squires_hg38_reformat.bed")
subst_dict = {0:{"chr1":"1", "chr2":"2"}, 3:{"Peng":"22344696"}}
filter_dict = {18:["intron"]}
input_file="./data/Small_editing_Peng_hg38.bed"
output_file="./data/Small_editing_Peng_hg38_reformat.bed"
reformat_table(
input_file, output_file,
init_template=[0,"\t",1,"\t",2,"\t",3,"|",4,"|",5,"|",6,"|",7,"|",8,"|",9,"->",10,"|",11,"%|",12,"|",13,"|",14,"|",15,"|",16,"|",17,"|",18,"|",19,"\t",20,"\t",21],
final_template=[0,"\t",1,"\t",2,"\t",9,">",10,"|",3,"|HeLa|",19,"\t",11,"\t",21],
replace_internal_space='_',
replace_null_val="*",
subst_dict = subst_dict,
filter_dict = filter_dict,
verbose=True
)
linerange (input_file)
linerange (output_file)
input_file="./data/Small_editing_Peng_hg38.bed"
df = reformat_table(
input_file,
return_df=True,
init_template=[0,"\t",1,"\t",2,"\t",3,"|",4,"|",5,"|",6,"|",7,"|",8,"|",9,"->",10,"|",11,"%|",12,"|",13,"|",14,"|",15,"|",16,"|",17,"|",18,"|",19,"\t",20,"\t",21],
replace_internal_space='_',
replace_null_val="*",
verbose=True)
print(head(input_file, 11))
df.head()
input_file = "./data/gencode_sample.gff3"
df = reformat_table(
input_file,
return_df=True,
standard_template="gff3_ens_transcript",
keep_original_header=False,
header_from_final_template= True,
verbose=True
)
print(head(input_file, 11))
df.head()
jhelp(url_exist, full=True)
url_exist("http://www.google.com") # When this one will be False it will probably be the end of the world
url_exist("http://www.JUYGKUYHGJHFJ.com")
jhelp(wget, full=True)
outfile = wget("")
if outfile:
print(outfile)
remove(outfile)
outfile = wget("https://www.encodeproject.org/files/ENCFF000HJC/@@download/ENCFF000HJC.bigWig", "test.bigWig", 50000000)
if outfile:
print(outfile)
remove(outfile)
jhelp(print_arg, full=True)
def test (A,B,C=7,*args, **kwarg):
print_arg()
test(1,2,3,5, z=65, x=100)
jhelp(scp, full=True)
#scp(hostname="ebi-cli-001.ebi.ac.uk", local_file="../README.md", remote_dir="~/test", username="aleg", rsa_private_key="/home/aleg/.ssh/ebi_rsa")
#scp(hostname="ebi", local_file="../README.md", remote_dir="~/test")
jhelp(get_package_file, full=True)
get_package_file("pyCL", "pyCL/")
jhelp(bam_sample, full=True)
bam_sample("./data/sample.sam", fp_out="./data/sample_100.sam", n_reads=100, verbose=True)
linesample("./data/sample_100.sam", n_lines=10, max_char_line=100)
bam_sample("./data/sample.sam", fp_out="./data/sample_100.bam", n_reads=100, verbose=True)
!samtools view "./data/sample_100.bam" | head
bam_sample("./data/sample.txt", fp_out="./data/sample_100.bam", n_reads=100, verbose=True)
bam_sample("./data/sample.sam", fp_out="./data/sample_100.txt", n_reads=100, verbose=True)
jhelp(base_generator, full = True)
bg = base_generator()
for i in range(10):
print (next(bg))
bg = base_generator(bases=['A', 'T', 'C', 'G', 'N'], weights=[0.8, 0.8, 0.2, 0.2, 0.1])
for i in range(10):
print (next(bg))
jhelp(make_sequence, full=True)
make_sequence()
make_sequence(bases=['A', 'T', 'C', 'G', 'N'], weights=[], length=100)
make_sequence(bases=['A', 'T', 'C', 'G', 'N'], weights=[0.8, 0.8, 0.2, 0.2, 0.1], length=100)