Liang-Bo Wang (亮亮), 2016-05-16
By Liang2 under CC 4.0 BY license
			Esc to overview 
			← → to navigate
		
 
		
	 
	 
	 
	 
	Bioconductor provides tools for the analysis and comprehension of high-throughput genomic data.
source("https://bioconductor.org/biocLite.R")biocLite("PKGNAME")
source("https://bioconductor.org/biocLite.R")
biocLite()   # install fundamental packages
biocLite(c(  # packages required for today's demo
    "Homo.sapiens",
    "BSgenome.Hsapiens.UCSC.hg19",
))
install.packages(c("devtools", "stringr", "plyr"))
		
	 
	| Object Type | Example Package Name | 
|---|---|
| OrgDb | org.Hs.eg.db | 
| TxDb | TxDb.Hsapiens.UCSC.hg19.knownGene | 
| BSgenome | BSgenome.Hsapiens.UCSC.hg19 | 
| OrganismDb | Homo.sapiens | 
| AnnotationHub | AnnotationHub | 
Ref: Bioconductor workflow – Annotation Resources.
Bioconductor contains hundreds of these AnnotationData packages.
We can use these annotation sources to act as a simple genome browser. Here we use Bioconductor to show gene GAPDH in human genome hg19.
devtools::install_github("genomicsclass/ph525x")
biocLite("Gviz")  # Gviz for plotting genomic data
library(ph525x)  # Provides handy helper functions
modPlot("GAPDH", collapse=FALSE, useGeneSym=FALSE)
		
Ref: Annotating phenotypes and molecular function from PH525x series
 
	 
	Behind the scene, modPlot(...) did a lot of queries. Let's find out how to extract this information ourselves using the annotation related API.
Plot is made by Gviz. Though we are not able to introduce it today.
columns(), keytypes(), keys()select(), mapIds()
	> human
OrgDb object:
| DBSCHEMAVERSION: 2.1
| Db type: OrgDb
| Supporting package: AnnotationDbi
| DBSCHEMA: HUMAN_DB
| ORGANISM: Homo sapiens
| SPECIES: Human
| EGSOURCEDATE: 2016-Mar14
| EGSOURCENAME: Entrez Gene
| EGSOURCEURL: ftp://ftp.ncbi.nlm.nih.gov/gene/DATA
| CENTRALID: EG
| TAXID: 9606
| GOSOURCENAME: Gene Ontology
| GOSOURCEURL: ftp://ftp.geneontology.org/pub/go/godatabase/archive/latest-lite/
| GOSOURCEDATE: 20160305
| GOEGSOURCEDATE: 2016-Mar14
| GOEGSOURCENAME: Entrez Gene
| GOEGSOURCEURL: ftp://ftp.ncbi.nlm.nih.gov/gene/DATA
| KEGGSOURCENAME: KEGG GENOME
| KEGGSOURCEURL: ftp://ftp.genome.jp/pub/kegg/genomes
| KEGGSOURCEDATE: 2011-Mar15
| GPSOURCENAME: UCSC Genome Bioinformatics (Homo sapiens)
| GPSOURCEURL: ftp://hgdownload.cse.ucsc.edu/goldenPath/hg19
| GPSOURCEDATE: 2010-Mar22
| ENSOURCEDATE: 2016-Mar9
| ENSOURCENAME: Ensembl
| ENSOURCEURL: ftp://ftp.ensembl.org/pub/current_fasta
| UPSOURCENAME: Uniprot
| UPSOURCEURL: http://www.UniProt.org/
| UPSOURCEDATE: Wed Mar 23 15:50:09 2016
		
		Many metadata about each annotation's original source and last update date can be found here.
library(org.Hs.eg.db)
human <- org.Hs.eg.db
columns(human)  # shows all available columns
# "ACCNUM"   "ALIAS"  "ENSEMBL"  "ENSEMBLPROT" "ENSEMBLTRANS"
# "ENTREZID" "ENZYME" "EVIDENCE" "EVIDENCEALL" "GENENAME"
# "GO" "GOALL" "IPI"  "MAP" "OMIM" "ONTOLOGY"  "ONTOLOGYALL" ...
		
		org.Hs.eg.db's "eg" means data come from NCBI Entrez Gene. Info about each column can be found by help("ENSEMBL")
keytypes(orgDb).keys() return values of the given column.
keys(human, "SYMBOL")  # [1] "A1BG" "A2M" "A2MP1" ...
# Passed a pattern to filter the returned symbols
keys(human, keytype="SYMBOL", pattern="MAPK")
# [1] "MAPK14" "MAPK1" "MAPK3" "MAPK4" "MAPK6" "MAPK7" ...
# Accept regex as well
keys(human, keytype="SYMBOL", pattern="^MAPK[[:digit:]]{2}$")
# [1] "MAPK14" "MAPK11" "MAPK10" "MAPK13" "MAPK12" "MAPK15"
		
	select() connects different columns. Say, what are the Entrez Gene IDs and their full name for MAPK gene family?
select(
    human,
    keys = c("MAPK1", "MAPK3", "MAPK6"),
    keytype = "SYMBOL",
    columns = c("ENTREZID", "GENENAME")
)
		
	
# select(...) from the last page
# 'select()' returned 1:1 mapping between keys and columns
  SYMBOL ENTREZID                           GENENAME
1  MAPK1     5594 mitogen-activated protein kinase 1
2  MAPK3     5595 mitogen-activated protein kinase 3
3  MAPK6     5597 mitogen-activated protein kinase 6
		
		How does these functions interact with the underlying database?
-- Get SQLite3 database filepath by `dbfile(org.Hs.eg.db)`
sqlite> SELECT symbol, gene_id AS entrez_id, gene_name
   ...> FROM genes JOIN (
   ...>     SELECT * FROM gene_info
   ...>     WHERE symbol IN ("MAPK1", "MAPK3", "MAPK6")
   ...> ) AS symbol_info
   ...> ON genes._id = symbol_info._id;
symbol	entrez_id	gene_name
MAPK1	5594		mitogen-activated protein kinase 1
MAPK3	5595		mitogen-activated protein kinase 3
MAPK6	5597		mitogen-activated protein kinase 6
	
For example, find all RefSeq IDs for these MAPK-family genes.
select(
    human,
    keys = MAPKS,
    keytype = "SYMBOL",
    columns = c("REFSEQ")
)
			
		
# 'select()' returned 1:many ...
   SYMBOL       REFSEQ
1   MAPK1    NM_002745
2   MAPK1    NM_138957
3   MAPK1    NP_002736
4   MAPK1    NP_620407
5   MAPK3 NM_001040056
6   MAPK3 NM_001109891
# ...
			
		Use mapIds(multiVals="list"), which returns a list-like structure grouped by having the same key (here is SYMBOL).
refseq_ids <- mapIds(
    human,
    keys = MAPKS,
    keytype = "SYMBOL",
    column = "REFSEQ",
    multiVals = "list"
)
			
		
$MAPK1
[1] "NM_002745" "NM_138957" ...
$MAPK3
[1] "NM_001040056" "NM_001109891"
$MAPK6
[1] "NM_002748" "NP_002739" ...
# ...
			
		
refseq_ids$MAPK1  # operate exactly as a list
# [1] "NM_002745" "NM_138957" "NP_002736" "NP_620407"
lapply(  # select only transcript IDs (starting with "NM_")
    refseq_ids,
	function(ids) grep("^NM_", ids, value = TRUE)
)
# $MAPK1
# [1] "NM_002745" "NM_138957"
# $MAPK3
# [1] "NM_001040056" "NM_001109891" "NM_002746" ... 
		
	
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
keytypes(txdb)
# [1] "CDSID" "CDSNAME" "EXONID" "EXONNAME" "GENEID"
# [6] "TXID" "TXNAME"
columns(txdb)
#  [1] "CDSCHROM" "CDSEND" "CDSID" "CDSNAME" "CDSSTART"
#  [6] "CDSSTRAND" "EXONCHROM" "EXONEND" "EXONID" "EXONNAME"
# [11] "EXONRANK" "EXONSTART" "EXONSTRAND" "GENEID" "TXCHROM"
# [16] "TXEND" "TXID" "TXNAME" "TXSTART" "TXSTRAND" "TXTYPE"
		
	transcripts(txdb) returns all transcript records (in GRanges).
txs <- transcripts(txdb); txs
# GRanges object with 82960 ranges and 2 metadata columns:
#                 seqnames           ranges strand |     tx_id     tx_name
#                    <Rle>        <IRanges>  <Rle> | <integer> <character>
#       [1]           chr1 [ 11874,  14409]      + |         1  uc001aaa.3
#       [2]           chr1 [ 11874,  14409]      + |         2  uc010nxq.1
#       [3]           chr1 [ 11874,  14409]      + |         3  uc010nxr.1
#       [4]           chr1 [ 69091,  70008]      + |         4  uc001aal.1
#       [5]           chr1 [321084, 321115]      + |         5  uc001aaq.2
#       ...            ...              ...    ... .       ...         ...
#   [82956] chrUn_gl000237   [    1,  2686]      - |     82956  uc011mgu.1
#   [82957] chrUn_gl000241   [20433, 36875]      - |     82957  uc011mgv.2
#   [82958] chrUn_gl000243   [11501, 11530]      + |     82958  uc011mgw.1
#   [82959] chrUn_gl000243   [13608, 13637]      + |     82959  uc022brq.1
#   [82960] chrUn_gl000247   [ 5787,  5816]      - |     82960  uc022brr.1
#   -------
#   seqinfo: 93 sequences (1 circular) from hg19 genome
		
	Similar extractor functions exist for genes(), exons(), cds().
genes <- genes(txdb); genes
# GRanges object with 23056 ranges and 1 metadata column:
#         seqnames                 ranges strand |     gene_id
#            <Rle>              <IRanges>  <Rle> | <character>
#       1    chr19 [ 58858172,  58874214]      - |           1
#      10     chr8 [ 18248755,  18258723]      + |          10
#     100    chr20 [ 43248163,  43280376]      - |         100
#    1000    chr18 [ 25530930,  25757445]      - |        1000
#   10000     chr1 [243651535, 244006886]      - |       10000
#     ...      ...                    ...    ... .         ...
#    9991     chr9 [114979995, 115095944]      - |        9991
#    9992    chr21 [ 35736323,  35743440]      + |        9992
#    9993    chr22 [ 19023795,  19109967]      - |        9993
#    9994     chr6 [ 90539619,  90584155]      + |        9994
#    9997    chr22 [ 50961997,  50964905]      - |        9997
#   -------
#   seqinfo: 93 sequences (1 circular) from hg19 genome
		
	Can subset the GRanges object via the row id (gene_id).
genes[c("5594", "5595")]
# GRanges object with 2 ranges and 1 metadata column:
#        seqnames               ranges strand |     gene_id
#           <Rle>            <IRanges>  <Rle> | <character>
#   5594    chr22 [22113947, 22221970]      - |        5594
#   5595    chr16 [30125426, 30134630]      - |        5595
#   -------
#   seqinfo: 93 sequences (1 circular) from hg19 genome
		
	How to find transcripts of a gene? How to know the extrons, UTRs of a transcripts?
transcriptsBy(txdb, by = "gene")exonsBy(txdb, by = "tx", use.names = TRUE)
txby <- transcriptsBy(txdb, by="gene"); txby
# GRangesList object of length 23459:
# $1
# GRanges object with 2 ranges and 2 metadata columns:
#       seqnames               ranges strand |     tx_id     tx_name
#          <Rle>            <IRanges>  <Rle> | <integer> <character>
#   [1]    chr19 [58858172, 58864865]      - |     70455  uc002qsd.4
#   [2]    chr19 [58859832, 58874214]      - |     70456  uc002qsf.2
#
# $10
# GRanges object with 1 range and 2 metadata columns:
#       seqnames               ranges strand | tx_id    tx_name
#   [1]     chr8 [18248755, 18258723]      + | 31944 uc003wyw.1
#
# ...
# <23456 more elements>
# -------
# seqinfo: 93 sequences (1 circular) from hg19 genome
		
	transcriptsBy() returns a GRangesList, whose keys are gene IDs (Entrez). Select MAPK1's transcripts and verify with what we found in OrgDb.
txby[["5594"]]
# GRanges object with 3 ranges and 2 metadata columns:
#       seqnames               ranges strand |     tx_id     tx_name
#          <Rle>            <IRanges>  <Rle> | <integer> <character>
#   [1]    chr22 [22113947, 22221970]      - |     74608  uc002zvn.3
#   [2]    chr22 [22123319, 22221970]      - |     74609  uc002zvo.3
#   [3]    chr22 [22123484, 22221970]      - |     74610  uc010gtk.1
#   -------
#   seqinfo: 93 sequences (1 circular) from hg19 genome
		
		Why we know the gene IDs are Entrez Gene IDs? (try printing txdb itself)
exby <- exonsBy(txdb, by = "tx", use.names = TRUE); exby
# GRangesList object of length 82960:
# $uc001aaa.3
# GRanges object with 3 ranges and 3 metadata columns:
#       seqnames         ranges strand |   exon_id   exon_name exon_rank
#          <Rle>      <IRanges>  <Rle> | <integer> <character> <integer>
#   [1]     chr1 [11874, 12227]      + |         1        <NA>         1
#   [2]     chr1 [12613, 12721]      + |         3        <NA>         2
#   [3]     chr1 [13221, 14409]      + |         5        <NA>         3
#
# $uc010nxq.1
# GRanges object with 3 ranges and 3 metadata columns:
#       seqnames         ranges strand | exon_id exon_name exon_rank
#   [1]     chr1 [11874, 12227]      + |       1      <NA>         1
#   [2]     chr1 [12595, 12721]      + |       2      <NA>         2
#   [3]     chr1 [13403, 14409]      + |       6      <NA>         3
#
# ...
# <82957 more elements>
# -------
# seqinfo: 93 sequences (1 circular) from hg19 genome
		
	exonsBy() returns a GRangesList, whose keys are transcript IDs (UCSC KnownGene). Pick one MAPK1 transcript and verify with modPlot()'s result.
exby[["uc002zvn.3"]]
# GRangesList object of length 1:
# $uc002zvn.3
# GRanges object with 9 ranges and 3 metadata columns:
#       seqnames               ranges strand |   exon_id   exon_name exon_rank
#          <Rle>            <IRanges>  <Rle> | <integer> <character> <integer>
#   [1]    chr22 [22221612, 22221970]      - |    264662        <NA>         1
#   [2]    chr22 [22161953, 22162135]      - |    264661        <NA>         2
#   [3]    chr22 [22160139, 22160328]      - |    264660        <NA>         3
#   ...
#   [7]    chr22 [22127162, 22127271]      - |    264656        <NA>         7
#   [8]    chr22 [22123484, 22123609]      - |    264655        <NA>         8
#   [9]    chr22 [22113947, 22118529]      - |    264653        <NA>         9
#
# -------
# seqinfo: 93 sequences (1 circular) from hg19 genome
		
	Extract the chromosome information by seqinfo(), returning a Seqinfo object. Works on all GRanges objects.
si <- seqinfo(txdb); si
# Seqinfo object with 93 sequences (1 circular) from hg19 genome:
#   seqnames       seqlengths isCircular genome
#   chr1            249250621       <NA>   hg19
#   chr2            243199373       <NA>   hg19
#   chr3            198022430       <NA>   hg19
#   ...                   ...        ...    ...
#   chrUn_gl000247      36422       <NA>   hg19
#   chrUn_gl000248      39786       <NA>   hg19
#   chrUn_gl000249      38502       <NA>   hg19
	
	
seqnames(si)  # use its column name as function
# [1] "chr1" "chr2" "chr3" "chr4" "chr5" ...
seqlengths(si)
#      chr1      chr2      chr3      chr4      chr5 ...
# 249250621 243199373 198022430 191154276 180915260 ...
isCircular(si)
# chr1 chr2 chr3 chr4 chr5 ... chrM ...
#   NA   NA   NA   NA   NA ... TRUE ...
		
	For example, Ensembl uses NCBI style naming (1, 2, ...); UCSC prefix all chromosome names with "chr".
seqlevels(txdb)
# [1] "chr1" "chr2" "chr3" "chr4" "chr5" "chr6" ...
seqlevelsStyle(txdb)
# [1] "UCSC"
seqlevelsStyle(txdb) <- "NCBI"  # change the naming style
seqlevels(txdb)
# [1] "1" "2" "3" "4" "5" "6" ...
		
	columns() and select())transcripts() and genes())transcriptsBy() and exonsBy())seqinfo()Homo.sapiens
library(Homo.sapiens)
human <- Homo.sapiens
		
	
	> human
OrganismDb Object:
# Includes GODb Object:  GO.db
# With data about:  Gene Ontology
# Includes OrgDb Object:  org.Hs.eg.db
# Gene data about:  Homo sapiens
# Taxonomy Id:  9606
# Includes TxDb Object:  TxDb.Hsapiens.UCSC.hg19.knownGene
# Transcriptome data about:  Homo sapiens
# Based on genome:  hg19
# The OrgDb gene id ENTREZID is mapped to
# the TxDb gene id GENEID .
		
	
select(
    human,
    keys = c("MAPK1"), keytype = "SYMBOL",
    columns = c("TXNAME", "TXCHROM", "TXSTRAND")
)
#   SYMBOL     TXNAME TXCHROM TXSTRAND
# 1  MAPK1 uc002zvn.3   chr22        -
# 2  MAPK1 uc002zvo.3   chr22        -
# 3  MAPK1 uc010gtk.1   chr22        -
		
	
transcripts(human, columns=c("TXNAME", "SYMBOL"))
# 'select()' returned 1:1 mapping between keys and columns
# GRanges object with 82960 ranges and 2 metadata columns:
#                 seqnames           ranges strand |          TXNAME          SYMBOL
#                    <Rle>        <IRanges>  <Rle> | <CharacterList> <CharacterList>
#       [1]           chr1 [ 11874,  14409]      + |      uc001aaa.3         DDX11L1
#       [2]           chr1 [ 11874,  14409]      + |      uc010nxq.1         DDX11L1
#       [3]           chr1 [ 11874,  14409]      + |      uc010nxr.1         DDX11L1
#       [4]           chr1 [ 69091,  70008]      + |      uc001aal.1           OR4F5
#       [5]           chr1 [321084, 321115]      + |      uc001aaq.2              NA
#       ...            ...              ...    ... .             ...             ...
#   [82956] chrUn_gl000237   [    1,  2686]      - |      uc011mgu.1              NA
#   [82957] chrUn_gl000241   [20433, 36875]      - |      uc011mgv.2              NA
#   [82958] chrUn_gl000243   [11501, 11530]      + |      uc011mgw.1              NA
#   [82959] chrUn_gl000243   [13608, 13637]      + |      uc022brq.1              NA
#   [82960] chrUn_gl000247   [ 5787,  5816]      - |      uc022brr.1              NA
#   -------
#   seqinfo: 93 sequences (1 circular) from hg19 genome
		
		But how can we get the contents columns like TXNAME and SYMBOL?!
 
			Ref: Genomic Ranges For Genome-Scale Data And Annotation, Martin Morgan, 2015.10
 
			Ref: Genomic Ranges For Genome-Scale Data And Annotation, Martin Morgan, 2015.10
?"intra-range-methods")?"inter-range-methods")Ref: Genomic Ranges For Genome-Scale Data And Annotation, Martin Morgan, 2015.10
 
		Ref: Genomic Ranges For Genome-Scale Data And Annotation, Martin Morgan, 2015.10
txs <- transcripts(human, columns=c("TXNAME", "SYMBOL"))
mcols(txs)  # act as data.frame
# DataFrame with 82960 rows and 2 columns
#                TXNAME          SYMBOL
#       <CharacterList> <CharacterList>
# 1          uc001aaa.3         DDX11L1
# 2          uc010nxq.1         DDX11L1
# 3          uc010nxr.1         DDX11L1
# 4          uc001aal.1           OR4F5
# 5          uc001aaq.2              NA
# ...               ...             ...
# 82956      uc011mgu.1              NA
# 82957      uc011mgv.2              NA
# 82958      uc011mgw.1              NA
# 82959      uc022brq.1              NA
# 82960      uc022brr.1              NA
mcols(txs)$SYMBOL  # take SYMBOL column out
		
	
gr1 <- GRanges(
    seqnames = "chr2", ranges = IRanges(3, 6), strand = "+",
    score = 5L, GC = 0.45
); gr1
# GRanges object with 1 range and 2 metadata columns:
#       seqnames    ranges strand |     score        GC
#          <Rle> <IRanges>  <Rle> | <integer> <numeric>
#   [1]     chr2    [3, 6]      + |         5      0.45
#   -------
#   seqinfo: 1 sequence from an unspecified genome; no seqlengths
gr2 <- GRanges(
	seqnames = c("chr1", "chr1"),
	ranges = IRanges(c(7, 13), width = 3), strand = c("+", "-"),
    score = 3:4, GC = c(0.3, 0.5)
)
	
grl <- GRangesList("txA" = gr1, "txB" = gr2); grl
# GRangesList object of length 2:
# $txA
# GRanges object with 1 range and 2 metadata columns:
#       seqnames    ranges strand |     score        GC
#          <Rle> <IRanges>  <Rle> | <integer> <numeric>
#   [1]     chr2    [3, 6]      + |         5      0.45
#
# $txB
# GRanges object with 2 ranges and 2 metadata columns:
#       seqnames   ranges strand | score  GC
#   [1]     chr1 [ 7,  9]      + |     3 0.3
#   [2]     chr1 [13, 15]      - |     4 0.5
#
# -------
# seqinfo: 2 sequences from an unspecified genome; no seqlengths
	
	library(BSgenome.Hsapiens.UCSC.hg19)
Hsapiens
# Human genome:
# # organism: Homo sapiens (Human)
# # provider: UCSC
# # provider version: hg19
# # release date: Feb. 2009
# # release name: Genome Reference Consortium GRCh37
# # 93 sequences:
# ...
chrom_names <- seqnames(Hsapiens)
head(chrom_names)
# [1] "chr1" "chr2" "chr3" "chr4" "chr5" "chr6"
getSeq(Hsapiens, chrom_names[1:2])
#   A DNAStringSet instance of length 2
#         width seq
# [1] 249250621 NNNNNNNNNNN...NNNNNNNNNNN
# [2] 243199373 NNNNNNNNNNN...NNNNNNNNNNN
		
	getSeq() accepts GRanges as well.
getSeq(Hsapiens, txby[["5594"]])
#   A DNAStringSet instance of length 3
#      width seq
# [1] 108024 GCCCCTCCCTCCGCCCGCCC...CAGTAAATAGCAAGTCTTTTTAA
# [2]  98652 GCCCCTCCCTCCGCCCGCCC...AATTATTTAGAAAGTTACATATA
# [3]  98487 GCCCCTCCCTCCGCCCGCCC...GATACAGATCTTAAATTTGTCAG
# Join all exons (UTRs included) of uc002zvn.3
unlist(getSeq(Hsapiens, exby[["uc002zvn.3"]]))
#   5915-letter "DNAString" instance
# seq: GCCCCTCCCTCCGCCC...CAAGTCTTTTTAA
		
	| Object Type | Notable Methods | 
|---|---|
| OrgDb | columns(), select(), mapIds(), ... | 
| TxDb | transcripts(), genes(), transcriptsBy(), exonsBy(), ... and methods by GRanges and GRangesList | 
| GRanges[List] | start(), end(), strand(), mcols(), seqinfo(), ... | 
| Seqinfo | seqnames(), seqlengths(), isCircular(), seqlevels(), ... | 
| OrganismDb | Methods by OrgDb and TxDb | 
| BSgenome | seqnames(), getSeq() | 
 
	 
	findOverlaps(query, subject) implements an efficient algorithmtype="any". Support "within", "start", and "end" scenario%over%, %within%, and %outside% operators
(gr1 <- GRanges(
  "chrZ",
  IRanges(c(1,11,21,31,41),width=5),
  strand="*"))
# GRanges object with 5 ranges:
#       seqnames    ranges strand
#   [1]     chrZ  [ 1,  5]      *
#   [2]     chrZ  [11, 15]      *
#   [3]     chrZ  [21, 25]      *
#   [4]     chrZ  [31, 35]      *
#   [5]     chrZ  [41, 45]      *
(gr2 <- GRanges(
  "chrZ",
  IRanges(c(19,33), c(38,35)),
  strand="*"))
# GRanges object with 2 ranges:
#       seqnames    ranges strand
#   [1]     chrZ  [19, 38]      *
#   [2]     chrZ  [33, 35]      *
			
		Ex. The 3rd range of gr1 [21, 25] overlaps with 1st range of gr2 [19, 38]
findOverlaps(gr1, gr2)
# Hits object with 3 hits:
#       queryHits subjectHits
#          
#   [1]         3           1
#   [2]         4           1
#   [3]         4           2
#   -------
#   queryLength: 5 / subjectLength: 2
gr1 %over% gr2
# [1] FALSE FALSE  TRUE  TRUE FALSE  
			
		foverlaps() is another function to handle region overlappinglibrary(data.table)
dt_gr1 <- as.data.table(gr1)  # convert to data.table
dt_gr2 <- as.data.table(gr2)
# foverlap requires columns (keys) to be indexed
gr_key <- c("seqnames","start","end")
setkeyv(dt_gr1, gr_key); setkeyv(dt_gr2, gr_key)
foverlaps(dt_gr1, dt_gr2, which = TRUE, nomatch = 0)
#    xid yid
# 1:   3   1
# 2:   4   1
# 3:   4   2
