fix(pdfcpu): use custom sort to retrieve the splitted PDFs

This commit is contained in:
Julien Neuhart
2025-04-12 19:40:16 +02:00
parent 7ca1d118a1
commit 9701f88ae3
3 changed files with 98 additions and 1 deletions
+1 -1
View File
@@ -146,7 +146,7 @@ func (engine *PdfCpu) Split(ctx context.Context, logger *zap.Logger, mode gotenb
return nil, fmt.Errorf("walk directory to find resulting PDFs from split with pdfcpu: %w", err)
}
sort.Sort(gotenberg.AlphanumericSort(outputPaths))
sort.Sort(digitSuffixSort(outputPaths))
return outputPaths, nil
}
+68
View File
@@ -0,0 +1,68 @@
package pdfcpu
import (
"path/filepath"
"regexp"
"sort"
"strconv"
)
type digitSuffixSort []string
func (s digitSuffixSort) Len() int {
return len(s)
}
func (s digitSuffixSort) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
func (s digitSuffixSort) Less(i, j int) bool {
numI, restI := extractNumber(s[i])
numJ, restJ := extractNumber(s[j])
// If both strings contain a number, compare them numerically.
if numI != -1 && numJ != -1 {
if numI != numJ {
return numI < numJ
}
// If the numbers are equal, compare the "rest" strings.
return restI < restJ
}
// If one contains a number and the other doesn't, the one with the number
// comes first.
if numI != -1 {
return true
}
if numJ != -1 {
return false
}
// Neither has a number; fall back to lexicographical order.
return s[i] < s[j]
}
func extractNumber(str string) (int, string) {
str = filepath.Base(str)
// Check for a number immediately before an extension.
if matches := extensionSuffixRegexp.FindStringSubmatch(str); len(matches) > 3 {
if num, err := strconv.Atoi(matches[2]); err == nil {
// Remove the numeric block but keep the extension.
return num, matches[1] + matches[3]
}
}
// No numeric portion found.
return -1, str
}
// Regular expressions used by extractNumber.
var (
// Matches a numeric block immediately before a file extension.
extensionSuffixRegexp = regexp.MustCompile(`^(.*?)(\d+)(\.[^.]+)$`)
)
// Interface guard.
var _ sort.Interface = (*digitSuffixSort)(nil)
+29
View File
@@ -0,0 +1,29 @@
package pdfcpu
import (
"reflect"
"sort"
"testing"
)
func TestDigitSuffixSort(t *testing.T) {
for _, tc := range []struct {
scenario string
values []string
expectedSort []string
}{
{
scenario: "UUIDs with digit suffixes",
values: []string{"2521a33d-1fb4-4279-80fe-8a945285b8f4_12.pdf", "2521a33d-1fb4-4279-80fe-8a945285b8f4_1.pdf", "2521a33d-1fb4-4279-80fe-8a945285b8f4_10.pdf", "2521a33d-1fb4-4279-80fe-8a945285b8f4_3.pdf"},
expectedSort: []string{"2521a33d-1fb4-4279-80fe-8a945285b8f4_1.pdf", "2521a33d-1fb4-4279-80fe-8a945285b8f4_3.pdf", "2521a33d-1fb4-4279-80fe-8a945285b8f4_10.pdf", "2521a33d-1fb4-4279-80fe-8a945285b8f4_12.pdf"},
},
} {
t.Run(tc.scenario, func(t *testing.T) {
sort.Sort(digitSuffixSort(tc.values))
if !reflect.DeepEqual(tc.values, tc.expectedSort) {
t.Fatalf("expected %+v but got: %+v", tc.expectedSort, tc.values)
}
})
}
}