@@ -2,6 +2,7 @@ import AbortablePromiseCache from 'abortable-promise-cache'
22import BAI from './bai'
33import CSI from './csi'
44import Chunk from './chunk'
5+ import crc32 from 'buffer-crc32'
56
67import { unzip , unzipChunkSlice } from '@gmod/bgzf-filehandle'
78
@@ -11,30 +12,22 @@ import { LocalFile, RemoteFile, GenericFilehandle } from 'generic-filehandle'
1112import BAMFeature from './record'
1213import IndexFile from './indexFile'
1314import { parseHeaderText } from './sam'
14- import { abortBreakPoint , checkAbortSignal , timeout } from './util'
15+ import { abortBreakPoint , checkAbortSignal , timeout , makeOpts , BamOpts , BaseOpts } from './util'
1516
16- const BAM_MAGIC = 21840194
17+ export const BAM_MAGIC = 21840194
1718
1819const blockLen = 1 << 16
19- type G = GenericFilehandle
20-
21- interface BamOpts {
22- viewAsPairs ?: boolean
23- pairAcrossChr ?: boolean
24- maxInsertSize ?: number
25- signal ?: AbortSignal
26- }
2720
2821export default class BamFile {
2922 private renameRefSeq : ( a : string ) => string
3023 private bam : GenericFilehandle
3124 private index : IndexFile
32- private featureCache : any
3325 private chunkSizeLimit : number
3426 private fetchSizeLimit : number
3527 private header : any
36- private chrToIndex : any
37- private indexToChr : any
28+ protected featureCache : any
29+ protected chrToIndex : any
30+ protected indexToChr : any
3831
3932 /**
4033 * @param {object } args
@@ -112,14 +105,14 @@ export default class BamFile {
112105 this . chunkSizeLimit = chunkSizeLimit || 300000000 // 300MB
113106 }
114107
115- async getHeader ( abortSignal ?: AbortSignal ) {
116- const indexData = await this . index . parse ( abortSignal )
108+ async getHeader ( origOpts : AbortSignal | BaseOpts = { } ) {
109+ const opts = makeOpts ( origOpts )
110+ const indexData = await this . index . parse ( opts )
117111 const ret = indexData . firstDataLine ? indexData . firstDataLine . blockPosition + 65535 : undefined
118112 let buffer
119113 if ( ret ) {
120- const res = await this . bam . read ( Buffer . alloc ( ret + blockLen ) , 0 , ret + blockLen , 0 , {
121- signal : abortSignal ,
122- } )
114+ const res = await this . bam . read ( Buffer . alloc ( ret + blockLen ) , 0 , ret + blockLen , 0 , opts )
115+
123116 const { bytesRead } = res
124117 ; ( { buffer } = res )
125118 if ( ! bytesRead ) {
@@ -131,7 +124,7 @@ export default class BamFile {
131124 buffer = buffer . slice ( 0 , ret )
132125 }
133126 } else {
134- buffer = ( await this . bam . readFile ( { signal : abortSignal } ) ) as Buffer
127+ buffer = ( await this . bam . readFile ( opts ) ) as Buffer
135128 }
136129
137130 const uncba = await unzip ( buffer )
@@ -142,7 +135,7 @@ export default class BamFile {
142135 const headLen = uncba . readInt32LE ( 4 )
143136
144137 this . header = uncba . toString ( 'utf8' , 8 , 8 + headLen )
145- const { chrToIndex, indexToChr } = await this . _readRefSeqs ( headLen + 8 , 65535 , abortSignal )
138+ const { chrToIndex, indexToChr } = await this . _readRefSeqs ( headLen + 8 , 65535 , opts )
146139 this . chrToIndex = chrToIndex
147140 this . indexToChr = indexToChr
148141
@@ -154,17 +147,15 @@ export default class BamFile {
154147 async _readRefSeqs (
155148 start : number ,
156149 refSeqBytes : number ,
157- abortSignal ?: AbortSignal ,
150+ opts : BaseOpts = { } ,
158151 ) : Promise < {
159152 chrToIndex : { [ key : string ] : number }
160153 indexToChr : { refName : string ; length : number } [ ]
161154 } > {
162155 if ( start > refSeqBytes ) {
163- return this . _readRefSeqs ( start , refSeqBytes * 2 )
156+ return this . _readRefSeqs ( start , refSeqBytes * 2 , opts )
164157 }
165- const res = await this . bam . read ( Buffer . alloc ( refSeqBytes + blockLen ) , 0 , refSeqBytes , 0 , {
166- signal : abortSignal ,
167- } )
158+ const res = await this . bam . read ( Buffer . alloc ( refSeqBytes + blockLen ) , 0 , refSeqBytes , 0 , opts )
168159 const { bytesRead } = res
169160 let { buffer } = res
170161 if ( ! bytesRead ) {
@@ -181,7 +172,7 @@ export default class BamFile {
181172 const chrToIndex : { [ key : string ] : number } = { }
182173 const indexToChr : { refName : string ; length : number } [ ] = [ ]
183174 for ( let i = 0 ; i < nRef ; i += 1 ) {
184- await abortBreakPoint ( abortSignal )
175+ await abortBreakPoint ( opts . signal )
185176 const lName = uncba . readInt32LE ( p )
186177 let refName = uncba . toString ( 'utf8' , p + 4 , p + 4 + lName - 1 )
187178 refName = this . renameRefSeq ( refName )
@@ -193,7 +184,7 @@ export default class BamFile {
193184 p = p + 8 + lName
194185 if ( p > uncba . length ) {
195186 console . warn ( `BAM header is very big. Re-fetching ${ refSeqBytes } bytes.` )
196- return this . _readRefSeqs ( start , refSeqBytes * 2 )
187+ return this . _readRefSeqs ( start , refSeqBytes * 2 , opts )
197188 }
198189 }
199190 return { chrToIndex, indexToChr }
@@ -269,7 +260,7 @@ export default class BamFile {
269260 const c = chunks [ i ]
270261 const { data, cpositions, dpositions, chunk } = await this . featureCache . get (
271262 c . toString ( ) ,
272- c ,
263+ { chunk : c , opts } ,
273264 opts . signal ,
274265 )
275266 const promise = this . readBamFeatures ( data , cpositions , dpositions , chunk ) . then ( records => {
@@ -376,7 +367,7 @@ export default class BamFile {
376367 const mateFeatPromises = mateChunks . map ( async c => {
377368 const { data, cpositions, dpositions, chunk } = await this . featureCache . get (
378369 c . toString ( ) ,
379- c ,
370+ { chunk : c , opts } ,
380371 opts . signal ,
381372 )
382373 const feats = await this . readBamFeatures ( data , cpositions , dpositions , chunk )
@@ -398,11 +389,10 @@ export default class BamFile {
398389 return featuresRet
399390 }
400391
401- async _readChunk ( chunk : Chunk , abortSignal ?: AbortSignal ) {
402- const bufsize = chunk . fetchedSize ( )
403- const res = await this . bam . read ( Buffer . alloc ( bufsize ) , 0 , bufsize , chunk . minv . blockPosition , {
404- signal : abortSignal ,
405- } )
392+ async _readChunk ( { chunk, opts } : { chunk : unknown ; opts : BaseOpts } , abortSignal ?: AbortSignal ) {
393+ const c = chunk as Chunk
394+ const bufsize = c . fetchedSize ( )
395+ const res = await this . bam . read ( Buffer . alloc ( bufsize ) , 0 , bufsize , c . minv . blockPosition , opts )
406396 const { bytesRead } = res
407397 let { buffer } = res
408398 checkAbortSignal ( abortSignal )
@@ -431,8 +421,10 @@ export default class BamFile {
431421 const blockEnd = blockStart + 4 + blockSize - 1
432422
433423 // increment position to the current decompressed status
434- while ( blockStart + chunk . minv . dataPosition >= dpositions [ pos ++ ] ) { }
435- pos --
424+ if ( dpositions ) {
425+ while ( blockStart + chunk . minv . dataPosition >= dpositions [ pos ++ ] ) { }
426+ pos --
427+ }
436428
437429 // only try to read the feature if we have all the bytes for it
438430 if ( blockEnd < ba . length ) {
@@ -442,6 +434,9 @@ export default class BamFile {
442434 start : blockStart ,
443435 end : blockEnd ,
444436 } ,
437+ // the below results in an automatically calculated file-offset based ID
438+ // if the info for that is available, otherwise crc32 of the features
439+ //
445440 // cpositions[pos] refers to actual file offset of a bgzip block boundaries
446441 //
447442 // we multiply by (1 <<8) in order to make sure each block has a "unique"
@@ -455,11 +450,12 @@ export default class BamFile {
455450 // starts at 0 instead of chunk.minv.dataPosition
456451 //
457452 // the +1 is just to avoid any possible uniqueId 0 but this does not realistically happen
458- fileOffset :
459- cpositions [ pos ] * ( 1 << 8 ) +
460- ( blockStart - dpositions [ pos ] ) +
461- chunk . minv . dataPosition +
462- 1 ,
453+ fileOffset : cpositions
454+ ? cpositions [ pos ] * ( 1 << 8 ) +
455+ ( blockStart - dpositions [ pos ] ) +
456+ chunk . minv . dataPosition +
457+ 1
458+ : crc32 . signed ( ba . slice ( blockStart , blockEnd ) ) ,
463459 } )
464460
465461 sink . push ( feature )
0 commit comments