diff --git a/colly.go b/colly.go index fb28cfbe..b3a924f4 100644 --- a/colly.go +++ b/colly.go @@ -134,6 +134,8 @@ type Collector struct { backend *httpBackend wg *sync.WaitGroup lock *sync.RWMutex + + fixCharset bool } // RequestCallback is a type alias for OnRequest callback functions @@ -462,6 +464,12 @@ func CheckHead() CollectorOption { } } +func FixCharset(f bool) CollectorOption { + return func(c *Collector) { + c.fixCharset = f + } +} + // Init initializes the Collector's private variables and sets default // configuration for the Collector func (c *Collector) Init() { @@ -483,6 +491,9 @@ func (c *Collector) Init() { c.ID = atomic.AddUint32(&collectorCounter, 1) c.TraceHTTP = false c.Context = context.Background() + + // default true same as colly + c.fixCharset = true } // Appengine will replace the Collector's backend http.Client @@ -711,9 +722,11 @@ func (c *Collector) fetch(u, method string, depth int, requestData io.Reader, ct response.Request = request response.Trace = hTrace - err = response.fixCharset(c.DetectCharset, request.ResponseCharacterEncoding) - if err != nil { - return err + if c.fixCharset { + err = response.fixCharset(c.DetectCharset, request.ResponseCharacterEncoding) + if err != nil { + return err + } } c.handleOnResponse(response) diff --git a/response.go b/response.go index 049d8801..52060631 100644 --- a/response.go +++ b/response.go @@ -75,12 +75,7 @@ func (r *Response) fixCharset(detectCharset bool, defaultEncoding string) error } contentType := strings.ToLower(r.Headers.Get("Content-Type")) - if strings.Contains(contentType, "image/") || - strings.Contains(contentType, "video/") || - strings.Contains(contentType, "audio/") || - strings.Contains(contentType, "font/") { - // These MIME types should not have textual data. - + if !strings.Contains(contentType, "text/") { return nil }