feat(archive viewer): option to select text encoding for zip files (#2867)
This commit is contained in:
2
assets
2
assets
Submodule assets updated: dcf21d5eb9...dece1c7098
@@ -17,6 +17,13 @@ import (
|
||||
"github.com/cloudreve/Cloudreve/v4/pkg/filemanager/fs/dbfs"
|
||||
"github.com/cloudreve/Cloudreve/v4/pkg/filemanager/manager/entitysource"
|
||||
"github.com/cloudreve/Cloudreve/v4/pkg/util"
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/charmap"
|
||||
"golang.org/x/text/encoding/japanese"
|
||||
"golang.org/x/text/encoding/korean"
|
||||
"golang.org/x/text/encoding/simplifiedchinese"
|
||||
"golang.org/x/text/encoding/traditionalchinese"
|
||||
"golang.org/x/text/encoding/unicode"
|
||||
"golang.org/x/tools/container/intsets"
|
||||
)
|
||||
|
||||
@@ -37,7 +44,47 @@ func init() {
|
||||
gob.Register([]ArchivedFile{})
|
||||
}
|
||||
|
||||
func (m *manager) ListArchiveFiles(ctx context.Context, uri *fs.URI, entity string) ([]ArchivedFile, error) {
|
||||
var ZipEncodings = map[string]encoding.Encoding{
|
||||
"ibm866": charmap.CodePage866,
|
||||
"iso8859_2": charmap.ISO8859_2,
|
||||
"iso8859_3": charmap.ISO8859_3,
|
||||
"iso8859_4": charmap.ISO8859_4,
|
||||
"iso8859_5": charmap.ISO8859_5,
|
||||
"iso8859_6": charmap.ISO8859_6,
|
||||
"iso8859_7": charmap.ISO8859_7,
|
||||
"iso8859_8": charmap.ISO8859_8,
|
||||
"iso8859_8I": charmap.ISO8859_8I,
|
||||
"iso8859_10": charmap.ISO8859_10,
|
||||
"iso8859_13": charmap.ISO8859_13,
|
||||
"iso8859_14": charmap.ISO8859_14,
|
||||
"iso8859_15": charmap.ISO8859_15,
|
||||
"iso8859_16": charmap.ISO8859_16,
|
||||
"koi8r": charmap.KOI8R,
|
||||
"koi8u": charmap.KOI8U,
|
||||
"macintosh": charmap.Macintosh,
|
||||
"windows874": charmap.Windows874,
|
||||
"windows1250": charmap.Windows1250,
|
||||
"windows1251": charmap.Windows1251,
|
||||
"windows1252": charmap.Windows1252,
|
||||
"windows1253": charmap.Windows1253,
|
||||
"windows1254": charmap.Windows1254,
|
||||
"windows1255": charmap.Windows1255,
|
||||
"windows1256": charmap.Windows1256,
|
||||
"windows1257": charmap.Windows1257,
|
||||
"windows1258": charmap.Windows1258,
|
||||
"macintoshcyrillic": charmap.MacintoshCyrillic,
|
||||
"gbk": simplifiedchinese.GBK,
|
||||
"gb18030": simplifiedchinese.GB18030,
|
||||
"big5": traditionalchinese.Big5,
|
||||
"eucjp": japanese.EUCJP,
|
||||
"iso2022jp": japanese.ISO2022JP,
|
||||
"shiftjis": japanese.ShiftJIS,
|
||||
"euckr": korean.EUCKR,
|
||||
"utf16be": unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
|
||||
"utf16le": unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM),
|
||||
}
|
||||
|
||||
func (m *manager) ListArchiveFiles(ctx context.Context, uri *fs.URI, entity, zipEncoding string) ([]ArchivedFile, error) {
|
||||
file, err := m.fs.Get(ctx, uri, dbfs.WithFileEntities(), dbfs.WithRequiredCapabilities(dbfs.NavigatorCapabilityDownloadFile))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get file: %w", err)
|
||||
@@ -57,7 +104,18 @@ func (m *manager) ListArchiveFiles(ctx context.Context, uri *fs.URI, entity stri
|
||||
return nil, fs.ErrEntityNotExist
|
||||
}
|
||||
|
||||
cacheKey := getArchiveListCacheKey(targetEntity.ID())
|
||||
var (
|
||||
enc encoding.Encoding
|
||||
ok bool
|
||||
)
|
||||
if zipEncoding != "" {
|
||||
enc, ok = ZipEncodings[strings.ToLower(zipEncoding)]
|
||||
if !ok {
|
||||
return nil, fs.ErrNotSupportedAction.WithError(fmt.Errorf("not supported zip encoding: %s", zipEncoding))
|
||||
}
|
||||
}
|
||||
|
||||
cacheKey := getArchiveListCacheKey(targetEntity.ID(), zipEncoding)
|
||||
kv := m.kv
|
||||
res, found := kv.Get(cacheKey)
|
||||
if found {
|
||||
@@ -72,7 +130,7 @@ func (m *manager) ListArchiveFiles(ctx context.Context, uri *fs.URI, entity stri
|
||||
es.Apply(entitysource.WithContext(ctx))
|
||||
defer es.Close()
|
||||
|
||||
var readerFunc func(ctx context.Context, file io.ReaderAt, size int64) ([]ArchivedFile, error)
|
||||
var readerFunc func(ctx context.Context, file io.ReaderAt, size int64, textEncoding encoding.Encoding) ([]ArchivedFile, error)
|
||||
switch file.Ext() {
|
||||
case "zip":
|
||||
readerFunc = getZipFileList
|
||||
@@ -83,7 +141,7 @@ func (m *manager) ListArchiveFiles(ctx context.Context, uri *fs.URI, entity stri
|
||||
}
|
||||
|
||||
sr := io.NewSectionReader(es, 0, targetEntity.Size())
|
||||
fileList, err := readerFunc(ctx, sr, targetEntity.Size())
|
||||
fileList, err := readerFunc(ctx, sr, targetEntity.Size(), enc)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read file list: %w", err)
|
||||
}
|
||||
@@ -199,7 +257,7 @@ func (m *manager) compressFileToArchive(ctx context.Context, parent string, file
|
||||
|
||||
}
|
||||
|
||||
func getZipFileList(ctx context.Context, file io.ReaderAt, size int64) ([]ArchivedFile, error) {
|
||||
func getZipFileList(ctx context.Context, file io.ReaderAt, size int64, textEncoding encoding.Encoding) ([]ArchivedFile, error) {
|
||||
zr, err := zip.NewReader(file, size)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create zip reader: %w", err)
|
||||
@@ -207,10 +265,25 @@ func getZipFileList(ctx context.Context, file io.ReaderAt, size int64) ([]Archiv
|
||||
|
||||
fileList := make([]ArchivedFile, 0, len(zr.File))
|
||||
for _, f := range zr.File {
|
||||
hdr := f.FileHeader
|
||||
if hdr.NonUTF8 && textEncoding != nil {
|
||||
dec := textEncoding.NewDecoder()
|
||||
filename, err := dec.String(hdr.Name)
|
||||
if err == nil {
|
||||
hdr.Name = filename
|
||||
}
|
||||
if hdr.Comment != "" {
|
||||
comment, err := dec.String(hdr.Comment)
|
||||
if err == nil {
|
||||
hdr.Comment = comment
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info := f.FileInfo()
|
||||
modTime := info.ModTime()
|
||||
fileList = append(fileList, ArchivedFile{
|
||||
Name: util.FormSlash(f.Name),
|
||||
Name: util.FormSlash(hdr.Name),
|
||||
Size: info.Size(),
|
||||
UpdatedAt: &modTime,
|
||||
IsDirectory: info.IsDir(),
|
||||
@@ -219,7 +292,7 @@ func getZipFileList(ctx context.Context, file io.ReaderAt, size int64) ([]Archiv
|
||||
return fileList, nil
|
||||
}
|
||||
|
||||
func get7zFileList(ctx context.Context, file io.ReaderAt, size int64) ([]ArchivedFile, error) {
|
||||
func get7zFileList(ctx context.Context, file io.ReaderAt, size int64, extEncoding encoding.Encoding) ([]ArchivedFile, error) {
|
||||
zr, err := sevenzip.NewReader(file, size)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create 7z reader: %w", err)
|
||||
@@ -239,6 +312,6 @@ func get7zFileList(ctx context.Context, file io.ReaderAt, size int64) ([]Archive
|
||||
return fileList, nil
|
||||
}
|
||||
|
||||
func getArchiveListCacheKey(entity int) string {
|
||||
return fmt.Sprintf("archive_list_%d", entity)
|
||||
func getArchiveListCacheKey(entity int, encoding string) string {
|
||||
return fmt.Sprintf("archive_list_%d_%s", entity, encoding)
|
||||
}
|
||||
|
||||
@@ -88,7 +88,7 @@ type (
|
||||
// CreateArchive creates an archive
|
||||
CreateArchive(ctx context.Context, uris []*fs.URI, writer io.Writer, opts ...fs.Option) (int, error)
|
||||
// ListArchiveFiles lists files in an archive
|
||||
ListArchiveFiles(ctx context.Context, uri *fs.URI, entity string) ([]ArchivedFile, error)
|
||||
ListArchiveFiles(ctx context.Context, uri *fs.URI, entity, zipEncoding string) ([]ArchivedFile, error)
|
||||
}
|
||||
|
||||
FileManager interface {
|
||||
|
||||
@@ -27,13 +27,6 @@ import (
|
||||
"github.com/cloudreve/Cloudreve/v4/pkg/util"
|
||||
"github.com/gofrs/uuid"
|
||||
"github.com/mholt/archives"
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/charmap"
|
||||
"golang.org/x/text/encoding/japanese"
|
||||
"golang.org/x/text/encoding/korean"
|
||||
"golang.org/x/text/encoding/simplifiedchinese"
|
||||
"golang.org/x/text/encoding/traditionalchinese"
|
||||
"golang.org/x/text/encoding/unicode"
|
||||
)
|
||||
|
||||
type (
|
||||
@@ -79,46 +72,6 @@ func init() {
|
||||
queue.RegisterResumableTaskFactory(queue.ExtractArchiveTaskType, NewExtractArchiveTaskFromModel)
|
||||
}
|
||||
|
||||
var encodings = map[string]encoding.Encoding{
|
||||
"ibm866": charmap.CodePage866,
|
||||
"iso8859_2": charmap.ISO8859_2,
|
||||
"iso8859_3": charmap.ISO8859_3,
|
||||
"iso8859_4": charmap.ISO8859_4,
|
||||
"iso8859_5": charmap.ISO8859_5,
|
||||
"iso8859_6": charmap.ISO8859_6,
|
||||
"iso8859_7": charmap.ISO8859_7,
|
||||
"iso8859_8": charmap.ISO8859_8,
|
||||
"iso8859_8I": charmap.ISO8859_8I,
|
||||
"iso8859_10": charmap.ISO8859_10,
|
||||
"iso8859_13": charmap.ISO8859_13,
|
||||
"iso8859_14": charmap.ISO8859_14,
|
||||
"iso8859_15": charmap.ISO8859_15,
|
||||
"iso8859_16": charmap.ISO8859_16,
|
||||
"koi8r": charmap.KOI8R,
|
||||
"koi8u": charmap.KOI8U,
|
||||
"macintosh": charmap.Macintosh,
|
||||
"windows874": charmap.Windows874,
|
||||
"windows1250": charmap.Windows1250,
|
||||
"windows1251": charmap.Windows1251,
|
||||
"windows1252": charmap.Windows1252,
|
||||
"windows1253": charmap.Windows1253,
|
||||
"windows1254": charmap.Windows1254,
|
||||
"windows1255": charmap.Windows1255,
|
||||
"windows1256": charmap.Windows1256,
|
||||
"windows1257": charmap.Windows1257,
|
||||
"windows1258": charmap.Windows1258,
|
||||
"macintoshcyrillic": charmap.MacintoshCyrillic,
|
||||
"gbk": simplifiedchinese.GBK,
|
||||
"gb18030": simplifiedchinese.GB18030,
|
||||
"big5": traditionalchinese.Big5,
|
||||
"eucjp": japanese.EUCJP,
|
||||
"iso2022jp": japanese.ISO2022JP,
|
||||
"shiftjis": japanese.ShiftJIS,
|
||||
"euckr": korean.EUCKR,
|
||||
"utf16be": unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
|
||||
"utf16le": unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM),
|
||||
}
|
||||
|
||||
// NewExtractArchiveTask creates a new ExtractArchiveTask
|
||||
func NewExtractArchiveTask(ctx context.Context, src, dst, encoding, password string, mask []string) (queue.Task, error) {
|
||||
state := &ExtractArchiveTaskState{
|
||||
@@ -374,7 +327,7 @@ func (m *ExtractArchiveTask) masterExtractArchive(ctx context.Context, dep depen
|
||||
if zipExtractor, ok := extractor.(archives.Zip); ok {
|
||||
if m.state.Encoding != "" {
|
||||
m.l.Info("Using encoding %q for zip archive", m.state.Encoding)
|
||||
encoding, ok := encodings[strings.ToLower(m.state.Encoding)]
|
||||
encoding, ok := manager.ZipEncodings[strings.ToLower(m.state.Encoding)]
|
||||
if !ok {
|
||||
m.l.Warning("Unknown encoding %q, fallback to default encoding", m.state.Encoding)
|
||||
} else {
|
||||
@@ -750,7 +703,7 @@ func (m *SlaveExtractArchiveTask) Do(ctx context.Context) (task.Status, error) {
|
||||
if zipExtractor, ok := extractor.(archives.Zip); ok {
|
||||
if m.state.Encoding != "" {
|
||||
m.l.Info("Using encoding %q for zip archive", m.state.Encoding)
|
||||
encoding, ok := encodings[strings.ToLower(m.state.Encoding)]
|
||||
encoding, ok := manager.ZipEncodings[strings.ToLower(m.state.Encoding)]
|
||||
if !ok {
|
||||
m.l.Warning("Unknown encoding %q, fallback to default encoding", m.state.Encoding)
|
||||
} else {
|
||||
|
||||
@@ -720,8 +720,9 @@ func (s *PatchViewService) Patch(c *gin.Context) error {
|
||||
type (
|
||||
ArchiveListFilesParamCtx struct{}
|
||||
ArchiveListFilesService struct {
|
||||
Uri string `form:"uri" binding:"required"`
|
||||
Entity string `form:"entity"`
|
||||
Uri string `form:"uri" binding:"required"`
|
||||
Entity string `form:"entity"`
|
||||
TextEncoding string `form:"text_encoding"`
|
||||
}
|
||||
)
|
||||
|
||||
@@ -739,7 +740,7 @@ func (s *ArchiveListFilesService) List(c *gin.Context) (*ArchiveListFilesRespons
|
||||
return nil, serializer.NewError(serializer.CodeParamErr, "unknown uri", err)
|
||||
}
|
||||
|
||||
files, err := m.ListArchiveFiles(c, uri, s.Entity)
|
||||
files, err := m.ListArchiveFiles(c, uri, s.Entity, s.TextEncoding)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to list archive files: %w", err)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user