A better way to handle remote content-type, fix etag (#216)

* A better way to handle remote content-type, fix etag

* Check for err

* Optimize on fetchRemoteImage

* Write 600 permission

* Implement simple lock for write operation

* Removing typo

* Removing typo

* Setup cache

* Fix CI

* Fix retry and remove genEtag

* Fix error note

* remove duplicate Get

* WTF

* Fix tests

* remove broken test

* Fix broken tests
This commit is contained in:
Nova Kwok 2023-05-29 19:41:39 +08:00 committed by GitHub
parent c66fccfffd
commit 9268ada0b3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 81 additions and 63 deletions

View File

@ -24,7 +24,7 @@ tools-dir:
install-staticcheck: tools-dir
GOBIN=`pwd`/tools/bin go install honnef.co/go/tools/cmd/staticcheck@latest
curl -sfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh| sh -s -- -b ./tools/bin v1.51.2
curl -sfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh| sh -s -- -b ./tools/bin v1.52.2
static-check: install-staticcheck
#S1000,SA1015,SA4006,SA4011,S1023,S1034,ST1003,ST1005,ST1016,ST1020,ST1021

View File

@ -31,7 +31,7 @@ var (
prefetch, proxyMode bool
remoteRaw = "remote-raw"
config Config
version = "0.8.2"
version = "0.8.3"
)
const (

1
go.mod
View File

@ -6,6 +6,7 @@ require (
github.com/davidbyttow/govips/v2 v2.13.0
github.com/gofiber/fiber/v2 v2.4.0
github.com/h2non/filetype v1.1.3
github.com/patrickmn/go-cache v2.1.0+incompatible
github.com/schollz/progressbar/v3 v3.13.1
github.com/sirupsen/logrus v1.9.2
github.com/stretchr/testify v1.8.3

2
go.sum
View File

@ -22,6 +22,8 @@ github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2Em
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=

View File

@ -5,12 +5,11 @@ import (
"crypto/sha1" //#nosec
"encoding/hex"
"fmt"
"hash/crc32"
"io"
"net/http"
"os"
"path"
"path/filepath"
"time"
"github.com/h2non/filetype"
@ -65,6 +64,21 @@ func imageExists(filename string) bool {
// means something wrong in exhaust file system
return false
}
// Check if there is lock in cache, retry after 1 second
maxRetries := 3
retryDelay := 100 * time.Millisecond // Initial retry delay
for retry := 0; retry < maxRetries; retry++ {
if _, found := WriteLock.Get(filename); found {
log.Infof("file %s is locked, retrying in %s", filename, retryDelay)
time.Sleep(retryDelay)
retryDelay *= 2 // Exponential backoff
} else {
return !info.IsDir()
}
}
log.Debugf("file %s exists!", filename)
return !info.IsDir()
}
@ -86,51 +100,66 @@ func checkAllowedType(imgFilename string) bool {
// Check for remote filepath, e.g: https://test.webp.sh/node.png
// return StatusCode, etagValue and length
func getRemoteImageInfo(fileURL string) (int, string, string) {
res, err := http.Head(fileURL)
resp, err := http.Head(fileURL)
if err != nil {
log.Errorln("Connection to remote error!")
log.Errorln("Connection to remote error when getRemoteImageInfo!")
return http.StatusInternalServerError, "", ""
}
defer res.Body.Close()
if res.StatusCode != 404 {
etagValue := res.Header.Get("etag")
defer resp.Body.Close()
if resp.StatusCode == 200 {
etagValue := resp.Header.Get("etag")
if etagValue == "" {
log.Info("Remote didn't return etag in header, please check.")
log.Info("Remote didn't return etag in header when getRemoteImageInfo, please check.")
} else {
return 200, etagValue, res.Header.Get("content-length")
return resp.StatusCode, etagValue, resp.Header.Get("content-length")
}
}
return res.StatusCode, "", res.Header.Get("content-length")
return resp.StatusCode, "", resp.Header.Get("content-length")
}
func fetchRemoteImage(filepath string, url string) error {
resp, err := http.Get(url)
if err != nil {
log.Errorln("Connection to remote error when fetchRemoteImage!")
return err
}
defer resp.Body.Close()
// Check if remote content-type is image
if !strings.Contains(resp.Header.Get("content-type"), "image") {
log.Warnf("remote file %s is not image, remote returned %s", url, resp.Header.Get("content-type"))
// Delete the file
_ = os.Remove(filepath)
return fmt.Errorf("remote file %s is not image, remote returned %s", url, resp.Header.Get("content-type"))
if resp.StatusCode != 200 {
return fmt.Errorf("remote returned %s when fetching remote image", resp.Status)
}
_ = os.MkdirAll(path.Dir(filepath), 0755)
out, err := os.Create(filepath)
// Copy bytes here
bodyBytes := new(bytes.Buffer)
_, err = bodyBytes.ReadFrom(resp.Body)
if err != nil {
return err
}
defer out.Close()
_, err = io.Copy(out, resp.Body)
// Check if remote content-type is image using check by filetype instead of content-type returned by origin
kind, _ := filetype.Match(bodyBytes.Bytes())
if kind == filetype.Unknown || !strings.Contains(kind.MIME.Value, "image") {
return fmt.Errorf("remote file %s is not image, remote content has MIME type of %s", url, kind.MIME.Value)
}
_ = os.MkdirAll(path.Dir(filepath), 0755)
// Create Cache here as a lock
// Key: filepath, Value: true
WriteLock.Set(filepath, true, -1)
err = os.WriteFile(filepath, bodyBytes.Bytes(), 0600)
if err != nil {
return err
}
// Delete lock here
WriteLock.Delete(filepath)
return nil
}
// Given /path/to/node.png
// Delete /path/to/node.png*
func cleanProxyCache(cacheImagePath string) {
@ -174,15 +203,6 @@ func genOptimizedAbsPath(rawImagePath string, exhaustPath string, imageName stri
return avifAbsolutePath, webpAbsolutePath
}
func genEtag(ImgAbsPath string) string {
data, err := os.ReadFile(ImgAbsPath)
if err != nil {
log.Warn(err)
}
crc := crc32.ChecksumIEEE(data)
return fmt.Sprintf(`W/"%d-%08X"`, len(data), crc)
}
func getCompressionRate(RawImagePath string, optimizedImg string) string {
originFileInfo, err := os.Stat(RawImagePath)
if err != nil {

View File

@ -115,14 +115,6 @@ func TestGenOptimizedAbsPath(t *testing.T) {
}
}
func TestGenEtag(t *testing.T) {
var data = "./pics/png.jpg"
var expected = "W/\"1020764-262C0329\""
var result = genEtag(data)
assert.Equalf(t, result, expected, "Result: [%s], Expected: [%s]", result, expected)
}
func TestSelectFormat(t *testing.T) {
// this is a complete test case for webp compatibility
// func goOrigin(header, ua string) bool
@ -214,9 +206,6 @@ func TestFetchRemoteImage(t *testing.T) {
err = fetchRemoteImage(fp, "http://ahjdsgdsghja.cya")
assert.NotNil(t, err)
// test when returned is not image
err = fetchRemoteImage(fp, "https://github.com/")
assert.Equal(t, err.Error(), "remote file https://github.com/ is not image, remote returned text/html; charset=utf-8")
}
func TestCleanProxyCache(t *testing.T) {

View File

@ -6,7 +6,6 @@ import (
"net/http"
"net/url"
// "os"
"path"
"strconv"
@ -115,13 +114,15 @@ func proxyHandler(c *fiber.Ctx, reqURIwithQuery string) (string, error) {
// https://test.webp.sh/mypic/123.jpg?someother=200&somebugs=200
realRemoteAddr := config.ImgPath + reqURIwithQuery
// Since we cannot store file in format of "mypic/123.jpg?someother=200&somebugs=200", we need to hash it.
reqURIwithQueryHash := Sha1Path(reqURIwithQuery) // 378e740ca56144b7587f3af9debeee544842879a
localRawImagePath := path.Join(remoteRaw, reqURIwithQueryHash) // For store the remote raw image, /home/webp_server/remote-raw/378e740ca56144b7587f3af9debeee544842879a
// Ping Remote for status code and etag info
log.Infof("Remote Addr is %s, fetching...", realRemoteAddr)
statusCode, _, _ := getRemoteImageInfo(realRemoteAddr)
log.Infof("Remote Addr is %s, fetching info...", realRemoteAddr)
statusCode, etagValue, _ := getRemoteImageInfo(realRemoteAddr)
// Since we cannot store file in format of "/mypic/123.jpg?someother=200&somebugs=200", we need to hash it.
reqURIwithQueryHash := Sha1Path(reqURIwithQuery) // 378e740ca56144b7587f3af9debeee544842879a
etagValueHash := Sha1Path(etagValue) // 123e740ca56333b7587f3af9debeee5448428123
localRawImagePath := path.Join(remoteRaw, reqURIwithQueryHash+"-etag-"+etagValueHash) // For store the remote raw image, /home/webp_server/remote-raw/378e740ca56144b7587f3af9debeee544842879a-etag-123e740ca56333b7587f3af9debeee5448428123
if statusCode == 200 {
if imageExists(localRawImagePath) {
@ -129,6 +130,7 @@ func proxyHandler(c *fiber.Ctx, reqURIwithQuery string) (string, error) {
} else {
// Temporary store of remote file.
cleanProxyCache(config.ExhaustPath + reqURIwithQuery + "*")
log.Info("Remote file not found in remote-raw path, fetching...")
err := fetchRemoteImage(localRawImagePath, realRemoteAddr)
return localRawImagePath, err
}

View File

@ -6,9 +6,11 @@ import (
"net/http/httptest"
"os"
"testing"
"time"
"github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/etag"
"github.com/patrickmn/go-cache"
"github.com/stretchr/testify/assert"
)
@ -21,7 +23,7 @@ var (
curlUA = "curl/7.64.1"
)
func setupParam() {
func TestMain(m *testing.M) {
// setup parameters here...
config.ImgPath = "./pics"
config.ExhaustPath = "./exhaust_test"
@ -29,6 +31,8 @@ func setupParam() {
proxyMode = false
remoteRaw = "remote-raw"
WriteLock = cache.New(5*time.Minute, 10*time.Minute)
m.Run()
}
func requestToServer(url string, app *fiber.App, ua, accept string) (*http.Response, []byte) {
@ -51,9 +55,7 @@ func requestToServerHeaders(url string, app *fiber.App, headers map[string]strin
return resp, data
}
func TestServerHeaders(t *testing.T) {
setupParam()
var app = fiber.New()
app.Use(etag.New(etag.Config{
Weak: true,
@ -105,7 +107,6 @@ func TestServerHeaders(t *testing.T) {
}
func TestConvert(t *testing.T) {
setupParam()
// TODO: old-style test, better update it with accept headers
var testChromeLink = map[string]string{
"http://127.0.0.1:3333/webp_server.jpg": "image/webp",
@ -172,7 +173,6 @@ func TestConvert(t *testing.T) {
}
func TestConvertNotAllowed(t *testing.T) {
setupParam()
config.AllowedTypes = []string{"jpg", "png", "jpeg"}
var app = fiber.New()
@ -193,14 +193,13 @@ func TestConvertNotAllowed(t *testing.T) {
}
func TestConvertProxyModeBad(t *testing.T) {
setupParam()
proxyMode = true
var app = fiber.New()
app.Get("/*", convert)
// this is local random image, should be 404
url := "http://127.0.0.1:3333/webp_8888server.bmp"
url := "http://127.0.0.1:3333/webp_8888server.jpg"
resp, _ := requestToServer(url, app, chromeUA, acceptWebP)
defer resp.Body.Close()
assert.Equal(t, http.StatusNotFound, resp.StatusCode)
@ -209,11 +208,9 @@ func TestConvertProxyModeBad(t *testing.T) {
resp1, _ := requestToServer(url, app, curlUA, acceptWebP)
defer resp1.Body.Close()
assert.Equal(t, http.StatusNotFound, resp1.StatusCode)
}
func TestConvertProxyModeWork(t *testing.T) {
setupParam()
proxyMode = true
var app = fiber.New()
@ -235,8 +232,9 @@ func TestConvertProxyModeWork(t *testing.T) {
}
func TestConvertBigger(t *testing.T) {
setupParam()
proxyMode = false
config.Quality = 100
config.ImgPath = "./pics"
var app = fiber.New()
app.Get("/*", convert)

View File

@ -7,14 +7,18 @@ import (
"os"
"regexp"
"runtime"
"time"
"github.com/davidbyttow/govips/v2/vips"
"github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/etag"
"github.com/gofiber/fiber/v2/middleware/logger"
"github.com/patrickmn/go-cache"
log "github.com/sirupsen/logrus"
)
var WriteLock *cache.Cache
func loadConfig(path string) Config {
jsonObject, err := os.Open(path)
if err != nil {
@ -104,6 +108,8 @@ Develop by WebP Server team. https://github.com/webp-sh`, version)
})
defer vips.Shutdown()
WriteLock = cache.New(5*time.Minute, 10*time.Minute)
if prefetch {
go prefetchImages(config.ImgPath, config.ExhaustPath)
}