Description
Visualizing the Elements Within Bio-Sequences.
Description
Visualizing the types and distribution of elements within bio-sequences. At the same time, We have developed a geom layer, geom_rrect(), that can generate rounded rectangles. No external references are used in the development of this package.
README.md
BioVizSeq
1. Introduction
The goal of BioVizSeq is to visualize the types and distribution of elements within bio-sequences. At the same time, We have developed a geom layer, geom_rrect(), that can generate rounded rectangles. No external references are used in the development of this package.
2. Installation
Install from CRAN:
# Install from CRAN
install.packages("BioVizSeq")
Install from Github: the development version of BioVizSeq:
install.packages("devtools")
devtools::install_github("zhaosq2022/BioVizSeq")
3. Libary packages
library(BioVizSeq)
#> Registered S3 methods overwritten by 'treeio':
#> method from
#> MRCA.phylo tidytree
#> MRCA.treedata tidytree
#> Nnode.treedata tidytree
#> Ntip.treedata tidytree
#> ancestor.phylo tidytree
#> ancestor.treedata tidytree
#> child.phylo tidytree
#> child.treedata tidytree
#> full_join.phylo tidytree
#> full_join.treedata tidytree
#> groupClade.phylo tidytree
#> groupClade.treedata tidytree
#> groupOTU.phylo tidytree
#> groupOTU.treedata tidytree
#> inner_join.phylo tidytree
#> inner_join.treedata tidytree
#> is.rooted.treedata tidytree
#> nodeid.phylo tidytree
#> nodeid.treedata tidytree
#> nodelab.phylo tidytree
#> nodelab.treedata tidytree
#> offspring.phylo tidytree
#> offspring.treedata tidytree
#> parent.phylo tidytree
#> parent.treedata tidytree
#> root.treedata tidytree
#> rootnode.phylo tidytree
#> sibling.phylo tidytree
#> Package BioVizSeq loaded successfully!
# Extra package
library(ggplot2)
#> Warning: 程辑包'ggplot2'是用R版本4.3.3 来建造的
4. Usage cases
4.1 GFF/GTF
gff or gtf file
4.1.1 Step by step
gff_path <- system.file("extdata", "idpro.gff3", package = "BioVizSeq")
gff_data <- read.table(gff_path, header = FALSE, sep = '\t')
gff_loc <- gff_to_loc(gff_data)
motif_plot(gff_loc$table_loc, gff_loc$gene_length) +
labs(x="DNA length (5'-3')", y="Gene name")

4.1.2 One step
gff_path <- system.file("extdata", "idpro.gff3", package = "BioVizSeq")
gff_plot(gff_path)

4.2 MEME
meme.xml or mast.xml
4.2.1 Step by step
meme_path <- system.file("extdata", "mast.xml", package = "BioVizSeq")
meme_file <- readLines(meme_path)
motif_loc <- meme_to_loc(meme_file)
motif_plot(motif_loc$table_loc, motif_loc$gene_length)

4.2.2 One step
meme_path <- system.file("extdata", "meme.xml", package = "BioVizSeq")
meme_plot(meme_path)

4.3 PFAM
Download: .tsv
4.3.1 Step by step
pfam_path <- system.file("extdata", "iprscan.tsv", package = "BioVizSeq")
pfam_file <- read.table(pfam_path, sep='\t', header = FALSE)
domain_loc <- pfam_to_loc(pfam_file)
motif_plot(domain_loc$table_loc, domain_loc$gene_length)

4.3.2 One step
pfam_path <- system.file("extdata", "iprscan.tsv", package = "BioVizSeq")
pfam_plot(pfam_path)

4.4 CDD
Download “Superfamily Only”
Type: .txt
4.4.1 Step by step
hitdata_path <- system.file("extdata", "hitdata.txt", package = "BioVizSeq")
cdd_file <- readLines(hitdata_path)
domain_loc <- cdd_to_loc(cdd_file)
fa_path <- system.file("extdata", "idpep.fa", package = "BioVizSeq")
gene_length <- fastaleng(fa_path)
motif_plot(domain_loc, gene_length)

4.4.2 One step
hitdata_path <- system.file("extdata", "hitdata.txt", package = "BioVizSeq")
fa_path <- system.file("extdata", "idpep.fa", package = "BioVizSeq")
cdd_plot(hitdata_path, fa_path)

4.5 SMART
protein file (.fa or .fasta)
4.5.1 Step by step
fa_path <- system.file("extdata", "target.fa", package = "BioVizSeq")
domain_loc <- smart_to_loc(fa_path)
#> Submitting sequence AtAP2_002...
#> Submitting sequence AtAP2_003...
#> Job entered the queue with ID3917111511880801744383449ENDkjSkPHY. Waiting for results.
#> Submitting sequence AtAP2_004...
#> Submitting sequence AtAP2_005...
motif_plot(domain_loc$table_loc, domain_loc$gene_length)

4.5.2 One step
fa_path <- system.file("extdata", "target.fa", package = "BioVizSeq")
smart_plot(fa_path)
#> Submitting sequence AtAP2_002...
#> Submitting sequence AtAP2_003...
#> Job entered the queue with ID39171115118198521744383592TvKgSiKgen. Waiting for results.
#> Submitting sequence AtAP2_004...
#> Submitting sequence AtAP2_005...

4.6 Plantcare
promoter sequence(.fa or .fasta)
4.6.1 Step by step
# 1. upload fasta file to plantcare, get the result file(.tab)
# upload_fa_to_plantcare(fasta_file, email)
# 2. Classify the functions of cis element
plantcare_path <- system.file("extdata", "plantCARE_output.tab", package = "BioVizSeq")
plantcare_file <- read.table(plantcare_path, header = FALSE, sep = '\t', quote="")
plantcare_data <- plantcare_classify(plantcare_file)
plantcare_loc <- plantcare_to_loc(plantcare_data)
promoter_length <- data.frame(ID = unique(plantcare_loc$ID), length=2000)
motif_plot(plantcare_loc, promoter_length) +
labs(x="Promoter Length", y="Gene")

4.6.2 One step
plantcare_path <- system.file("extdata", "plantCARE_output.tab", package = "BioVizSeq")
plantcare_plot(plantcare_path, promoter_length = 2000)

4.7 Advance Plot
p_tree, p_gff, p_pfam, p_meme, p_smart, p_cdd, p_plantcare
library(patchwork)
tree_path <- system.file("extdata", "idpep.nwk", package = "BioVizSeq")
gff_path <- system.file("extdata", "idpro.gff3", package = "BioVizSeq")
meme_path <- system.file("extdata", "meme.xml", package = "BioVizSeq")
pfam_path <- system.file("extdata", "iprscan.tsv", package = "BioVizSeq")
plot_file <- combi_p(tree_path = tree_path, gff_path = gff_path,
meme_path = meme_path, pfam_path = pfam_path)
plot_file$p_tree + plot_file$p_gff + plot_file$p_pfam +
plot_file$p_meme +plot_layout(ncol = 4, guides = 'collect') +
plot_annotation(
tag_levels = 'A'
)

library(patchwork)
tree_path <- system.file("extdata", "idpep.nwk", package = "BioVizSeq")
plantcare_path <- system.file("extdata", "plantCARE_output.tab", package = "BioVizSeq")
plot_file <- combi_p(tree_path = tree_path, plantcare_path = plantcare_path, promoter_length = 2000)
plot_file$p_tree + plot_file$p_plantcare1 + plot_file$p_plantcare2 + plot_layout(ncol = 3, guides = 'collect', widths = c(1, 3, 1)) + plot_annotation( tag_levels = 'A' )
