mirror of
https://github.com/gotenberg/gotenberg.git
synced 2026-07-02 08:27:41 +08:00
fix(pdfcpu): use custom sort to retrieve the splitted PDFs
This commit is contained in:
@@ -146,7 +146,7 @@ func (engine *PdfCpu) Split(ctx context.Context, logger *zap.Logger, mode gotenb
|
||||
return nil, fmt.Errorf("walk directory to find resulting PDFs from split with pdfcpu: %w", err)
|
||||
}
|
||||
|
||||
sort.Sort(gotenberg.AlphanumericSort(outputPaths))
|
||||
sort.Sort(digitSuffixSort(outputPaths))
|
||||
|
||||
return outputPaths, nil
|
||||
}
|
||||
|
||||
@@ -0,0 +1,68 @@
|
||||
package pdfcpu
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
type digitSuffixSort []string
|
||||
|
||||
func (s digitSuffixSort) Len() int {
|
||||
return len(s)
|
||||
}
|
||||
|
||||
func (s digitSuffixSort) Swap(i, j int) {
|
||||
s[i], s[j] = s[j], s[i]
|
||||
}
|
||||
|
||||
func (s digitSuffixSort) Less(i, j int) bool {
|
||||
numI, restI := extractNumber(s[i])
|
||||
numJ, restJ := extractNumber(s[j])
|
||||
|
||||
// If both strings contain a number, compare them numerically.
|
||||
if numI != -1 && numJ != -1 {
|
||||
if numI != numJ {
|
||||
return numI < numJ
|
||||
}
|
||||
// If the numbers are equal, compare the "rest" strings.
|
||||
return restI < restJ
|
||||
}
|
||||
|
||||
// If one contains a number and the other doesn't, the one with the number
|
||||
// comes first.
|
||||
if numI != -1 {
|
||||
return true
|
||||
}
|
||||
if numJ != -1 {
|
||||
return false
|
||||
}
|
||||
|
||||
// Neither has a number; fall back to lexicographical order.
|
||||
return s[i] < s[j]
|
||||
}
|
||||
|
||||
func extractNumber(str string) (int, string) {
|
||||
str = filepath.Base(str)
|
||||
|
||||
// Check for a number immediately before an extension.
|
||||
if matches := extensionSuffixRegexp.FindStringSubmatch(str); len(matches) > 3 {
|
||||
if num, err := strconv.Atoi(matches[2]); err == nil {
|
||||
// Remove the numeric block but keep the extension.
|
||||
return num, matches[1] + matches[3]
|
||||
}
|
||||
}
|
||||
|
||||
// No numeric portion found.
|
||||
return -1, str
|
||||
}
|
||||
|
||||
// Regular expressions used by extractNumber.
|
||||
var (
|
||||
// Matches a numeric block immediately before a file extension.
|
||||
extensionSuffixRegexp = regexp.MustCompile(`^(.*?)(\d+)(\.[^.]+)$`)
|
||||
)
|
||||
|
||||
// Interface guard.
|
||||
var _ sort.Interface = (*digitSuffixSort)(nil)
|
||||
@@ -0,0 +1,29 @@
|
||||
package pdfcpu
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDigitSuffixSort(t *testing.T) {
|
||||
for _, tc := range []struct {
|
||||
scenario string
|
||||
values []string
|
||||
expectedSort []string
|
||||
}{
|
||||
{
|
||||
scenario: "UUIDs with digit suffixes",
|
||||
values: []string{"2521a33d-1fb4-4279-80fe-8a945285b8f4_12.pdf", "2521a33d-1fb4-4279-80fe-8a945285b8f4_1.pdf", "2521a33d-1fb4-4279-80fe-8a945285b8f4_10.pdf", "2521a33d-1fb4-4279-80fe-8a945285b8f4_3.pdf"},
|
||||
expectedSort: []string{"2521a33d-1fb4-4279-80fe-8a945285b8f4_1.pdf", "2521a33d-1fb4-4279-80fe-8a945285b8f4_3.pdf", "2521a33d-1fb4-4279-80fe-8a945285b8f4_10.pdf", "2521a33d-1fb4-4279-80fe-8a945285b8f4_12.pdf"},
|
||||
},
|
||||
} {
|
||||
t.Run(tc.scenario, func(t *testing.T) {
|
||||
sort.Sort(digitSuffixSort(tc.values))
|
||||
|
||||
if !reflect.DeepEqual(tc.values, tc.expectedSort) {
|
||||
t.Fatalf("expected %+v but got: %+v", tc.expectedSort, tc.values)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user