A better way to handle remote content-type, fix etag (#216)

* A better way to handle remote content-type, fix etag

* Check for err

* Optimize on fetchRemoteImage

* Write 600 permission

* Implement simple lock for write operation

* Removing typo

* Removing typo

* Setup cache

* Fix CI

* Fix retry and remove genEtag

* Fix error note

* remove duplicate Get

* WTF

* Fix tests

* remove broken test

* Fix broken tests
This commit is contained in:
Nova Kwok 2023-05-29 19:41:39 +08:00 committed by GitHub
parent c66fccfffd
commit 9268ada0b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 81 additions and 63 deletions

View File

@ -24,7 +24,7 @@ tools-dir:
install-staticcheck: tools-dir install-staticcheck: tools-dir
GOBIN=`pwd`/tools/bin go install honnef.co/go/tools/cmd/staticcheck@latest GOBIN=`pwd`/tools/bin go install honnef.co/go/tools/cmd/staticcheck@latest
curl -sfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh| sh -s -- -b ./tools/bin v1.51.2 curl -sfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh| sh -s -- -b ./tools/bin v1.52.2
static-check: install-staticcheck static-check: install-staticcheck
#S1000,SA1015,SA4006,SA4011,S1023,S1034,ST1003,ST1005,ST1016,ST1020,ST1021 #S1000,SA1015,SA4006,SA4011,S1023,S1034,ST1003,ST1005,ST1016,ST1020,ST1021

View File

@ -31,7 +31,7 @@ var (
prefetch, proxyMode bool prefetch, proxyMode bool
remoteRaw = "remote-raw" remoteRaw = "remote-raw"
config Config config Config
version = "0.8.2" version = "0.8.3"
) )
const ( const (

1
go.mod
View File

@ -6,6 +6,7 @@ require (
github.com/davidbyttow/govips/v2 v2.13.0 github.com/davidbyttow/govips/v2 v2.13.0
github.com/gofiber/fiber/v2 v2.4.0 github.com/gofiber/fiber/v2 v2.4.0
github.com/h2non/filetype v1.1.3 github.com/h2non/filetype v1.1.3
github.com/patrickmn/go-cache v2.1.0+incompatible
github.com/schollz/progressbar/v3 v3.13.1 github.com/schollz/progressbar/v3 v3.13.1
github.com/sirupsen/logrus v1.9.2 github.com/sirupsen/logrus v1.9.2
github.com/stretchr/testify v1.8.3 github.com/stretchr/testify v1.8.3

2
go.sum
View File

@ -22,6 +22,8 @@ github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2Em
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=

View File

@ -5,12 +5,11 @@ import (
"crypto/sha1" //#nosec "crypto/sha1" //#nosec
"encoding/hex" "encoding/hex"
"fmt" "fmt"
"hash/crc32"
"io"
"net/http" "net/http"
"os" "os"
"path" "path"
"path/filepath" "path/filepath"
"time"
"github.com/h2non/filetype" "github.com/h2non/filetype"
@ -65,6 +64,21 @@ func imageExists(filename string) bool {
// means something wrong in exhaust file system // means something wrong in exhaust file system
return false return false
} }
// Check if there is lock in cache, retry after 1 second
maxRetries := 3
retryDelay := 100 * time.Millisecond // Initial retry delay
for retry := 0; retry < maxRetries; retry++ {
if _, found := WriteLock.Get(filename); found {
log.Infof("file %s is locked, retrying in %s", filename, retryDelay)
time.Sleep(retryDelay)
retryDelay *= 2 // Exponential backoff
} else {
return !info.IsDir()
}
}
log.Debugf("file %s exists!", filename) log.Debugf("file %s exists!", filename)
return !info.IsDir() return !info.IsDir()
} }
@ -86,49 +100,64 @@ func checkAllowedType(imgFilename string) bool {
// Check for remote filepath, e.g: https://test.webp.sh/node.png // Check for remote filepath, e.g: https://test.webp.sh/node.png
// return StatusCode, etagValue and length // return StatusCode, etagValue and length
func getRemoteImageInfo(fileURL string) (int, string, string) { func getRemoteImageInfo(fileURL string) (int, string, string) {
res, err := http.Head(fileURL) resp, err := http.Head(fileURL)
if err != nil { if err != nil {
log.Errorln("Connection to remote error!") log.Errorln("Connection to remote error when getRemoteImageInfo!")
return http.StatusInternalServerError, "", "" return http.StatusInternalServerError, "", ""
} }
defer res.Body.Close() defer resp.Body.Close()
if res.StatusCode != 404 { if resp.StatusCode == 200 {
etagValue := res.Header.Get("etag") etagValue := resp.Header.Get("etag")
if etagValue == "" { if etagValue == "" {
log.Info("Remote didn't return etag in header, please check.") log.Info("Remote didn't return etag in header when getRemoteImageInfo, please check.")
} else { } else {
return 200, etagValue, res.Header.Get("content-length") return resp.StatusCode, etagValue, resp.Header.Get("content-length")
} }
} }
return res.StatusCode, "", res.Header.Get("content-length") return resp.StatusCode, "", resp.Header.Get("content-length")
} }
func fetchRemoteImage(filepath string, url string) error { func fetchRemoteImage(filepath string, url string) error {
resp, err := http.Get(url) resp, err := http.Get(url)
if err != nil { if err != nil {
log.Errorln("Connection to remote error when fetchRemoteImage!")
return err return err
} }
defer resp.Body.Close() defer resp.Body.Close()
// Check if remote content-type is image if resp.StatusCode != 200 {
if !strings.Contains(resp.Header.Get("content-type"), "image") { return fmt.Errorf("remote returned %s when fetching remote image", resp.Status)
log.Warnf("remote file %s is not image, remote returned %s", url, resp.Header.Get("content-type"))
// Delete the file
_ = os.Remove(filepath)
return fmt.Errorf("remote file %s is not image, remote returned %s", url, resp.Header.Get("content-type"))
} }
_ = os.MkdirAll(path.Dir(filepath), 0755) // Copy bytes here
out, err := os.Create(filepath) bodyBytes := new(bytes.Buffer)
_, err = bodyBytes.ReadFrom(resp.Body)
if err != nil { if err != nil {
return err return err
} }
defer out.Close()
_, err = io.Copy(out, resp.Body) // Check if remote content-type is image using check by filetype instead of content-type returned by origin
return err kind, _ := filetype.Match(bodyBytes.Bytes())
if kind == filetype.Unknown || !strings.Contains(kind.MIME.Value, "image") {
return fmt.Errorf("remote file %s is not image, remote content has MIME type of %s", url, kind.MIME.Value)
}
_ = os.MkdirAll(path.Dir(filepath), 0755)
// Create Cache here as a lock
// Key: filepath, Value: true
WriteLock.Set(filepath, true, -1)
err = os.WriteFile(filepath, bodyBytes.Bytes(), 0600)
if err != nil {
return err
}
// Delete lock here
WriteLock.Delete(filepath)
return nil
} }
// Given /path/to/node.png // Given /path/to/node.png
@ -174,15 +203,6 @@ func genOptimizedAbsPath(rawImagePath string, exhaustPath string, imageName stri
return avifAbsolutePath, webpAbsolutePath return avifAbsolutePath, webpAbsolutePath
} }
func genEtag(ImgAbsPath string) string {
data, err := os.ReadFile(ImgAbsPath)
if err != nil {
log.Warn(err)
}
crc := crc32.ChecksumIEEE(data)
return fmt.Sprintf(`W/"%d-%08X"`, len(data), crc)
}
func getCompressionRate(RawImagePath string, optimizedImg string) string { func getCompressionRate(RawImagePath string, optimizedImg string) string {
originFileInfo, err := os.Stat(RawImagePath) originFileInfo, err := os.Stat(RawImagePath)
if err != nil { if err != nil {

View File

@ -115,14 +115,6 @@ func TestGenOptimizedAbsPath(t *testing.T) {
} }
} }
func TestGenEtag(t *testing.T) {
var data = "./pics/png.jpg"
var expected = "W/\"1020764-262C0329\""
var result = genEtag(data)
assert.Equalf(t, result, expected, "Result: [%s], Expected: [%s]", result, expected)
}
func TestSelectFormat(t *testing.T) { func TestSelectFormat(t *testing.T) {
// this is a complete test case for webp compatibility // this is a complete test case for webp compatibility
// func goOrigin(header, ua string) bool // func goOrigin(header, ua string) bool
@ -214,9 +206,6 @@ func TestFetchRemoteImage(t *testing.T) {
err = fetchRemoteImage(fp, "http://ahjdsgdsghja.cya") err = fetchRemoteImage(fp, "http://ahjdsgdsghja.cya")
assert.NotNil(t, err) assert.NotNil(t, err)
// test when returned is not image
err = fetchRemoteImage(fp, "https://github.com/")
assert.Equal(t, err.Error(), "remote file https://github.com/ is not image, remote returned text/html; charset=utf-8")
} }
func TestCleanProxyCache(t *testing.T) { func TestCleanProxyCache(t *testing.T) {

View File

@ -6,7 +6,6 @@ import (
"net/http" "net/http"
"net/url" "net/url"
// "os"
"path" "path"
"strconv" "strconv"
@ -115,13 +114,15 @@ func proxyHandler(c *fiber.Ctx, reqURIwithQuery string) (string, error) {
// https://test.webp.sh/mypic/123.jpg?someother=200&somebugs=200 // https://test.webp.sh/mypic/123.jpg?someother=200&somebugs=200
realRemoteAddr := config.ImgPath + reqURIwithQuery realRemoteAddr := config.ImgPath + reqURIwithQuery
// Since we cannot store file in format of "mypic/123.jpg?someother=200&somebugs=200", we need to hash it.
reqURIwithQueryHash := Sha1Path(reqURIwithQuery) // 378e740ca56144b7587f3af9debeee544842879a
localRawImagePath := path.Join(remoteRaw, reqURIwithQueryHash) // For store the remote raw image, /home/webp_server/remote-raw/378e740ca56144b7587f3af9debeee544842879a
// Ping Remote for status code and etag info // Ping Remote for status code and etag info
log.Infof("Remote Addr is %s, fetching...", realRemoteAddr) log.Infof("Remote Addr is %s, fetching info...", realRemoteAddr)
statusCode, _, _ := getRemoteImageInfo(realRemoteAddr) statusCode, etagValue, _ := getRemoteImageInfo(realRemoteAddr)
// Since we cannot store file in format of "/mypic/123.jpg?someother=200&somebugs=200", we need to hash it.
reqURIwithQueryHash := Sha1Path(reqURIwithQuery) // 378e740ca56144b7587f3af9debeee544842879a
etagValueHash := Sha1Path(etagValue) // 123e740ca56333b7587f3af9debeee5448428123
localRawImagePath := path.Join(remoteRaw, reqURIwithQueryHash+"-etag-"+etagValueHash) // For store the remote raw image, /home/webp_server/remote-raw/378e740ca56144b7587f3af9debeee544842879a-etag-123e740ca56333b7587f3af9debeee5448428123
if statusCode == 200 { if statusCode == 200 {
if imageExists(localRawImagePath) { if imageExists(localRawImagePath) {
@ -129,6 +130,7 @@ func proxyHandler(c *fiber.Ctx, reqURIwithQuery string) (string, error) {
} else { } else {
// Temporary store of remote file. // Temporary store of remote file.
cleanProxyCache(config.ExhaustPath + reqURIwithQuery + "*") cleanProxyCache(config.ExhaustPath + reqURIwithQuery + "*")
log.Info("Remote file not found in remote-raw path, fetching...")
err := fetchRemoteImage(localRawImagePath, realRemoteAddr) err := fetchRemoteImage(localRawImagePath, realRemoteAddr)
return localRawImagePath, err return localRawImagePath, err
} }

View File

@ -6,9 +6,11 @@ import (
"net/http/httptest" "net/http/httptest"
"os" "os"
"testing" "testing"
"time"
"github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/etag" "github.com/gofiber/fiber/v2/middleware/etag"
"github.com/patrickmn/go-cache"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
@ -21,7 +23,7 @@ var (
curlUA = "curl/7.64.1" curlUA = "curl/7.64.1"
) )
func setupParam() { func TestMain(m *testing.M) {
// setup parameters here... // setup parameters here...
config.ImgPath = "./pics" config.ImgPath = "./pics"
config.ExhaustPath = "./exhaust_test" config.ExhaustPath = "./exhaust_test"
@ -29,6 +31,8 @@ func setupParam() {
proxyMode = false proxyMode = false
remoteRaw = "remote-raw" remoteRaw = "remote-raw"
WriteLock = cache.New(5*time.Minute, 10*time.Minute)
m.Run()
} }
func requestToServer(url string, app *fiber.App, ua, accept string) (*http.Response, []byte) { func requestToServer(url string, app *fiber.App, ua, accept string) (*http.Response, []byte) {
@ -51,9 +55,7 @@ func requestToServerHeaders(url string, app *fiber.App, headers map[string]strin
return resp, data return resp, data
} }
func TestServerHeaders(t *testing.T) { func TestServerHeaders(t *testing.T) {
setupParam()
var app = fiber.New() var app = fiber.New()
app.Use(etag.New(etag.Config{ app.Use(etag.New(etag.Config{
Weak: true, Weak: true,
@ -74,8 +76,8 @@ func TestServerHeaders(t *testing.T) {
// TestServerHeadersNotModified // TestServerHeadersNotModified
var headers = map[string]string{ var headers = map[string]string{
"User-Agent": chromeUA, "User-Agent": chromeUA,
"Accept": acceptWebP, "Accept": acceptWebP,
"If-None-Match": etag, "If-None-Match": etag,
} }
response, _ = requestToServerHeaders(url, app, headers) response, _ = requestToServerHeaders(url, app, headers)
@ -105,7 +107,6 @@ func TestServerHeaders(t *testing.T) {
} }
func TestConvert(t *testing.T) { func TestConvert(t *testing.T) {
setupParam()
// TODO: old-style test, better update it with accept headers // TODO: old-style test, better update it with accept headers
var testChromeLink = map[string]string{ var testChromeLink = map[string]string{
"http://127.0.0.1:3333/webp_server.jpg": "image/webp", "http://127.0.0.1:3333/webp_server.jpg": "image/webp",
@ -172,7 +173,6 @@ func TestConvert(t *testing.T) {
} }
func TestConvertNotAllowed(t *testing.T) { func TestConvertNotAllowed(t *testing.T) {
setupParam()
config.AllowedTypes = []string{"jpg", "png", "jpeg"} config.AllowedTypes = []string{"jpg", "png", "jpeg"}
var app = fiber.New() var app = fiber.New()
@ -193,14 +193,13 @@ func TestConvertNotAllowed(t *testing.T) {
} }
func TestConvertProxyModeBad(t *testing.T) { func TestConvertProxyModeBad(t *testing.T) {
setupParam()
proxyMode = true proxyMode = true
var app = fiber.New() var app = fiber.New()
app.Get("/*", convert) app.Get("/*", convert)
// this is local random image, should be 404 // this is local random image, should be 404
url := "http://127.0.0.1:3333/webp_8888server.bmp" url := "http://127.0.0.1:3333/webp_8888server.jpg"
resp, _ := requestToServer(url, app, chromeUA, acceptWebP) resp, _ := requestToServer(url, app, chromeUA, acceptWebP)
defer resp.Body.Close() defer resp.Body.Close()
assert.Equal(t, http.StatusNotFound, resp.StatusCode) assert.Equal(t, http.StatusNotFound, resp.StatusCode)
@ -209,11 +208,9 @@ func TestConvertProxyModeBad(t *testing.T) {
resp1, _ := requestToServer(url, app, curlUA, acceptWebP) resp1, _ := requestToServer(url, app, curlUA, acceptWebP)
defer resp1.Body.Close() defer resp1.Body.Close()
assert.Equal(t, http.StatusNotFound, resp1.StatusCode) assert.Equal(t, http.StatusNotFound, resp1.StatusCode)
} }
func TestConvertProxyModeWork(t *testing.T) { func TestConvertProxyModeWork(t *testing.T) {
setupParam()
proxyMode = true proxyMode = true
var app = fiber.New() var app = fiber.New()
@ -235,8 +232,9 @@ func TestConvertProxyModeWork(t *testing.T) {
} }
func TestConvertBigger(t *testing.T) { func TestConvertBigger(t *testing.T) {
setupParam() proxyMode = false
config.Quality = 100 config.Quality = 100
config.ImgPath = "./pics"
var app = fiber.New() var app = fiber.New()
app.Get("/*", convert) app.Get("/*", convert)

View File

@ -7,14 +7,18 @@ import (
"os" "os"
"regexp" "regexp"
"runtime" "runtime"
"time"
"github.com/davidbyttow/govips/v2/vips" "github.com/davidbyttow/govips/v2/vips"
"github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/etag" "github.com/gofiber/fiber/v2/middleware/etag"
"github.com/gofiber/fiber/v2/middleware/logger" "github.com/gofiber/fiber/v2/middleware/logger"
"github.com/patrickmn/go-cache"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
) )
var WriteLock *cache.Cache
func loadConfig(path string) Config { func loadConfig(path string) Config {
jsonObject, err := os.Open(path) jsonObject, err := os.Open(path)
if err != nil { if err != nil {
@ -104,6 +108,8 @@ Develop by WebP Server team. https://github.com/webp-sh`, version)
}) })
defer vips.Shutdown() defer vips.Shutdown()
WriteLock = cache.New(5*time.Minute, 10*time.Minute)
if prefetch { if prefetch {
go prefetchImages(config.ImgPath, config.ExhaustPath) go prefetchImages(config.ImgPath, config.ExhaustPath)
} }