From 9268ada0b332e41c3fc5c08ed3db5b3079f9c586 Mon Sep 17 00:00:00 2001 From: Nova Kwok Date: Mon, 29 May 2023 19:41:39 +0800 Subject: [PATCH] A better way to handle remote content-type, fix etag (#216) * A better way to handle remote content-type, fix etag * Check for err * Optimize on fetchRemoteImage * Write 600 permission * Implement simple lock for write operation * Removing typo * Removing typo * Setup cache * Fix CI * Fix retry and remove genEtag * Fix error note * remove duplicate Get * WTF * Fix tests * remove broken test * Fix broken tests --- Makefile | 2 +- config.go | 2 +- go.mod | 1 + go.sum | 2 ++ helper.go | 82 +++++++++++++++++++++++++++++++------------------- helper_test.go | 11 ------- router.go | 16 +++++----- router_test.go | 22 ++++++-------- webp-server.go | 6 ++++ 9 files changed, 81 insertions(+), 63 deletions(-) diff --git a/Makefile b/Makefile index 6b37f0b..720a608 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,7 @@ tools-dir: install-staticcheck: tools-dir GOBIN=`pwd`/tools/bin go install honnef.co/go/tools/cmd/staticcheck@latest - curl -sfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh| sh -s -- -b ./tools/bin v1.51.2 + curl -sfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh| sh -s -- -b ./tools/bin v1.52.2 static-check: install-staticcheck #S1000,SA1015,SA4006,SA4011,S1023,S1034,ST1003,ST1005,ST1016,ST1020,ST1021 diff --git a/config.go b/config.go index ac4d077..50ea9f4 100644 --- a/config.go +++ b/config.go @@ -31,7 +31,7 @@ var ( prefetch, proxyMode bool remoteRaw = "remote-raw" config Config - version = "0.8.2" + version = "0.8.3" ) const ( diff --git a/go.mod b/go.mod index 83d9075..222b92a 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/davidbyttow/govips/v2 v2.13.0 github.com/gofiber/fiber/v2 v2.4.0 github.com/h2non/filetype v1.1.3 + github.com/patrickmn/go-cache v2.1.0+incompatible github.com/schollz/progressbar/v3 v3.13.1 github.com/sirupsen/logrus v1.9.2 github.com/stretchr/testify v1.8.3 diff --git a/go.sum b/go.sum index 6046764..84499f3 100644 --- a/go.sum +++ b/go.sum @@ -22,6 +22,8 @@ github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2Em github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= +github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc= +github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= diff --git a/helper.go b/helper.go index ef0449d..4b6aa66 100644 --- a/helper.go +++ b/helper.go @@ -5,12 +5,11 @@ import ( "crypto/sha1" //#nosec "encoding/hex" "fmt" - "hash/crc32" - "io" "net/http" "os" "path" "path/filepath" + "time" "github.com/h2non/filetype" @@ -65,6 +64,21 @@ func imageExists(filename string) bool { // means something wrong in exhaust file system return false } + + // Check if there is lock in cache, retry after 1 second + maxRetries := 3 + retryDelay := 100 * time.Millisecond // Initial retry delay + + for retry := 0; retry < maxRetries; retry++ { + if _, found := WriteLock.Get(filename); found { + log.Infof("file %s is locked, retrying in %s", filename, retryDelay) + time.Sleep(retryDelay) + retryDelay *= 2 // Exponential backoff + } else { + return !info.IsDir() + } + } + log.Debugf("file %s exists!", filename) return !info.IsDir() } @@ -86,49 +100,64 @@ func checkAllowedType(imgFilename string) bool { // Check for remote filepath, e.g: https://test.webp.sh/node.png // return StatusCode, etagValue and length func getRemoteImageInfo(fileURL string) (int, string, string) { - res, err := http.Head(fileURL) + resp, err := http.Head(fileURL) if err != nil { - log.Errorln("Connection to remote error!") + log.Errorln("Connection to remote error when getRemoteImageInfo!") return http.StatusInternalServerError, "", "" } - defer res.Body.Close() - if res.StatusCode != 404 { - etagValue := res.Header.Get("etag") + defer resp.Body.Close() + if resp.StatusCode == 200 { + etagValue := resp.Header.Get("etag") if etagValue == "" { - log.Info("Remote didn't return etag in header, please check.") + log.Info("Remote didn't return etag in header when getRemoteImageInfo, please check.") } else { - return 200, etagValue, res.Header.Get("content-length") + return resp.StatusCode, etagValue, resp.Header.Get("content-length") } } - return res.StatusCode, "", res.Header.Get("content-length") - + return resp.StatusCode, "", resp.Header.Get("content-length") } func fetchRemoteImage(filepath string, url string) error { resp, err := http.Get(url) if err != nil { + log.Errorln("Connection to remote error when fetchRemoteImage!") return err } defer resp.Body.Close() - // Check if remote content-type is image - if !strings.Contains(resp.Header.Get("content-type"), "image") { - log.Warnf("remote file %s is not image, remote returned %s", url, resp.Header.Get("content-type")) - // Delete the file - _ = os.Remove(filepath) - return fmt.Errorf("remote file %s is not image, remote returned %s", url, resp.Header.Get("content-type")) + if resp.StatusCode != 200 { + return fmt.Errorf("remote returned %s when fetching remote image", resp.Status) } - _ = os.MkdirAll(path.Dir(filepath), 0755) - out, err := os.Create(filepath) + // Copy bytes here + bodyBytes := new(bytes.Buffer) + _, err = bodyBytes.ReadFrom(resp.Body) if err != nil { return err } - defer out.Close() - _, err = io.Copy(out, resp.Body) - return err + // Check if remote content-type is image using check by filetype instead of content-type returned by origin + kind, _ := filetype.Match(bodyBytes.Bytes()) + if kind == filetype.Unknown || !strings.Contains(kind.MIME.Value, "image") { + return fmt.Errorf("remote file %s is not image, remote content has MIME type of %s", url, kind.MIME.Value) + } + + _ = os.MkdirAll(path.Dir(filepath), 0755) + + // Create Cache here as a lock + // Key: filepath, Value: true + WriteLock.Set(filepath, true, -1) + + err = os.WriteFile(filepath, bodyBytes.Bytes(), 0600) + if err != nil { + return err + } + + // Delete lock here + WriteLock.Delete(filepath) + + return nil } // Given /path/to/node.png @@ -174,15 +203,6 @@ func genOptimizedAbsPath(rawImagePath string, exhaustPath string, imageName stri return avifAbsolutePath, webpAbsolutePath } -func genEtag(ImgAbsPath string) string { - data, err := os.ReadFile(ImgAbsPath) - if err != nil { - log.Warn(err) - } - crc := crc32.ChecksumIEEE(data) - return fmt.Sprintf(`W/"%d-%08X"`, len(data), crc) -} - func getCompressionRate(RawImagePath string, optimizedImg string) string { originFileInfo, err := os.Stat(RawImagePath) if err != nil { diff --git a/helper_test.go b/helper_test.go index dd8f64a..988d7e3 100644 --- a/helper_test.go +++ b/helper_test.go @@ -115,14 +115,6 @@ func TestGenOptimizedAbsPath(t *testing.T) { } } -func TestGenEtag(t *testing.T) { - var data = "./pics/png.jpg" - var expected = "W/\"1020764-262C0329\"" - var result = genEtag(data) - - assert.Equalf(t, result, expected, "Result: [%s], Expected: [%s]", result, expected) -} - func TestSelectFormat(t *testing.T) { // this is a complete test case for webp compatibility // func goOrigin(header, ua string) bool @@ -214,9 +206,6 @@ func TestFetchRemoteImage(t *testing.T) { err = fetchRemoteImage(fp, "http://ahjdsgdsghja.cya") assert.NotNil(t, err) - // test when returned is not image - err = fetchRemoteImage(fp, "https://github.com/") - assert.Equal(t, err.Error(), "remote file https://github.com/ is not image, remote returned text/html; charset=utf-8") } func TestCleanProxyCache(t *testing.T) { diff --git a/router.go b/router.go index e61fbae..524c600 100644 --- a/router.go +++ b/router.go @@ -6,7 +6,6 @@ import ( "net/http" "net/url" - // "os" "path" "strconv" @@ -115,13 +114,15 @@ func proxyHandler(c *fiber.Ctx, reqURIwithQuery string) (string, error) { // https://test.webp.sh/mypic/123.jpg?someother=200&somebugs=200 realRemoteAddr := config.ImgPath + reqURIwithQuery - // Since we cannot store file in format of "mypic/123.jpg?someother=200&somebugs=200", we need to hash it. - reqURIwithQueryHash := Sha1Path(reqURIwithQuery) // 378e740ca56144b7587f3af9debeee544842879a - - localRawImagePath := path.Join(remoteRaw, reqURIwithQueryHash) // For store the remote raw image, /home/webp_server/remote-raw/378e740ca56144b7587f3af9debeee544842879a // Ping Remote for status code and etag info - log.Infof("Remote Addr is %s, fetching...", realRemoteAddr) - statusCode, _, _ := getRemoteImageInfo(realRemoteAddr) + log.Infof("Remote Addr is %s, fetching info...", realRemoteAddr) + statusCode, etagValue, _ := getRemoteImageInfo(realRemoteAddr) + + // Since we cannot store file in format of "/mypic/123.jpg?someother=200&somebugs=200", we need to hash it. + reqURIwithQueryHash := Sha1Path(reqURIwithQuery) // 378e740ca56144b7587f3af9debeee544842879a + etagValueHash := Sha1Path(etagValue) // 123e740ca56333b7587f3af9debeee5448428123 + + localRawImagePath := path.Join(remoteRaw, reqURIwithQueryHash+"-etag-"+etagValueHash) // For store the remote raw image, /home/webp_server/remote-raw/378e740ca56144b7587f3af9debeee544842879a-etag-123e740ca56333b7587f3af9debeee5448428123 if statusCode == 200 { if imageExists(localRawImagePath) { @@ -129,6 +130,7 @@ func proxyHandler(c *fiber.Ctx, reqURIwithQuery string) (string, error) { } else { // Temporary store of remote file. cleanProxyCache(config.ExhaustPath + reqURIwithQuery + "*") + log.Info("Remote file not found in remote-raw path, fetching...") err := fetchRemoteImage(localRawImagePath, realRemoteAddr) return localRawImagePath, err } diff --git a/router_test.go b/router_test.go index 8892204..d414933 100644 --- a/router_test.go +++ b/router_test.go @@ -6,9 +6,11 @@ import ( "net/http/httptest" "os" "testing" + "time" "github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2/middleware/etag" + "github.com/patrickmn/go-cache" "github.com/stretchr/testify/assert" ) @@ -21,7 +23,7 @@ var ( curlUA = "curl/7.64.1" ) -func setupParam() { +func TestMain(m *testing.M) { // setup parameters here... config.ImgPath = "./pics" config.ExhaustPath = "./exhaust_test" @@ -29,6 +31,8 @@ func setupParam() { proxyMode = false remoteRaw = "remote-raw" + WriteLock = cache.New(5*time.Minute, 10*time.Minute) + m.Run() } func requestToServer(url string, app *fiber.App, ua, accept string) (*http.Response, []byte) { @@ -51,9 +55,7 @@ func requestToServerHeaders(url string, app *fiber.App, headers map[string]strin return resp, data } - func TestServerHeaders(t *testing.T) { - setupParam() var app = fiber.New() app.Use(etag.New(etag.Config{ Weak: true, @@ -74,8 +76,8 @@ func TestServerHeaders(t *testing.T) { // TestServerHeadersNotModified var headers = map[string]string{ - "User-Agent": chromeUA, - "Accept": acceptWebP, + "User-Agent": chromeUA, + "Accept": acceptWebP, "If-None-Match": etag, } response, _ = requestToServerHeaders(url, app, headers) @@ -105,7 +107,6 @@ func TestServerHeaders(t *testing.T) { } func TestConvert(t *testing.T) { - setupParam() // TODO: old-style test, better update it with accept headers var testChromeLink = map[string]string{ "http://127.0.0.1:3333/webp_server.jpg": "image/webp", @@ -172,7 +173,6 @@ func TestConvert(t *testing.T) { } func TestConvertNotAllowed(t *testing.T) { - setupParam() config.AllowedTypes = []string{"jpg", "png", "jpeg"} var app = fiber.New() @@ -193,14 +193,13 @@ func TestConvertNotAllowed(t *testing.T) { } func TestConvertProxyModeBad(t *testing.T) { - setupParam() proxyMode = true var app = fiber.New() app.Get("/*", convert) // this is local random image, should be 404 - url := "http://127.0.0.1:3333/webp_8888server.bmp" + url := "http://127.0.0.1:3333/webp_8888server.jpg" resp, _ := requestToServer(url, app, chromeUA, acceptWebP) defer resp.Body.Close() assert.Equal(t, http.StatusNotFound, resp.StatusCode) @@ -209,11 +208,9 @@ func TestConvertProxyModeBad(t *testing.T) { resp1, _ := requestToServer(url, app, curlUA, acceptWebP) defer resp1.Body.Close() assert.Equal(t, http.StatusNotFound, resp1.StatusCode) - } func TestConvertProxyModeWork(t *testing.T) { - setupParam() proxyMode = true var app = fiber.New() @@ -235,8 +232,9 @@ func TestConvertProxyModeWork(t *testing.T) { } func TestConvertBigger(t *testing.T) { - setupParam() + proxyMode = false config.Quality = 100 + config.ImgPath = "./pics" var app = fiber.New() app.Get("/*", convert) diff --git a/webp-server.go b/webp-server.go index e4a1fa1..ee75089 100644 --- a/webp-server.go +++ b/webp-server.go @@ -7,14 +7,18 @@ import ( "os" "regexp" "runtime" + "time" "github.com/davidbyttow/govips/v2/vips" "github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2/middleware/etag" "github.com/gofiber/fiber/v2/middleware/logger" + "github.com/patrickmn/go-cache" log "github.com/sirupsen/logrus" ) +var WriteLock *cache.Cache + func loadConfig(path string) Config { jsonObject, err := os.Open(path) if err != nil { @@ -104,6 +108,8 @@ Develop by WebP Server team. https://github.com/webp-sh`, version) }) defer vips.Shutdown() + WriteLock = cache.New(5*time.Minute, 10*time.Minute) + if prefetch { go prefetchImages(config.ImgPath, config.ExhaustPath) }