From 7356a8d5a9742d7474ea688649248ef5bb1134bf Mon Sep 17 00:00:00 2001 From: Jonathan Durand <38976316+jdugh@users.noreply.github.com> Date: Fri, 24 Oct 2025 13:41:32 +0200 Subject: [PATCH] fix(copySheet): preserve UTF-8 encoding in sheet XML content (#208) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace etree.tostring() with asBinary() to prevent UTF-8 corruption - Add warning when binary=false is used - Apply binary parameter consistently across all copied files - Add comprehensive tests for UTF-8, merged cells, styles, and images Fixes Excel XML error: 'Illegal XML character' when copying sheets with UTF-8 characters in headers/footers (éàèùï, etc.) This bug was present even before PR #207 (comment copying fix). --- src/index.js | 16 ++++++++-- test/crud-test.ts | 78 ++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 84 insertions(+), 10 deletions(-) diff --git a/src/index.js b/src/index.js index e461e30..f1d392f 100755 --- a/src/index.js +++ b/src/index.js @@ -97,12 +97,22 @@ class Workbook { */ copySheet(sheetName, copyName, binary = true) { var self = this; + + // Warn user if binary mode is disabled + // do not warn during jest tests to avoid polluting test output + if (binary === false && !process.env.JEST_WORKER_ID) { + console.warn('Warning: copySheet() called with binary=false. UTF-8 characters (éàèùï, etc.) in sheet content, headers, footers, comments, and other XML content may be corrupted. Use binary=true (default) to preserve all characters correctly.'); + } + var sheet = self.loadSheet(sheetName); //filename, name , id, root var newSheetIndex = (self.workbook.findall("sheets/sheet").length + 1).toString(); var fileName = 'worksheets' + '/' + 'sheet' + newSheetIndex + '.xml'; var arcName = self.prefix + '/' + fileName; - // Copy sheet file - self.archive.file(arcName, etree.tostring(sheet.root)); + + // Copy sheet file in binary mode to preserve UTF-8 encoding + var sourceSheetFile = self.archive.file(sheet.filename); + var sheetContent = sourceSheetFile.asBinary(); + self.archive.file(arcName, sheetContent); self.archive.files[arcName].options.binary = binary; // Add content type for the new sheet @@ -194,7 +204,7 @@ class Workbook { } self.archive.file(newFilePath, binaryContent); - self.archive.files[newFilePath].options.binary = true; // Force binary mode to preserve UTF-8 encoding + self.archive.files[newFilePath].options.binary = binary; // Update relationship target in the copied rels file var newRelInRels = newRelsRoot.findall('Relationship')[index]; diff --git a/test/crud-test.ts b/test/crud-test.ts index b772206..9a1e855 100644 --- a/test/crud-test.ts +++ b/test/crud-test.ts @@ -1506,24 +1506,88 @@ describe("CRUD operations", function() { expect(getSharedString(sharedStrings, copysheet4, "B4")).toEqual("Some data"); expect(getSharedString(sharedStrings, copysheet4, "C6")).toEqual("${data.foo}"); expect(getSharedString(sharedStrings, copysheet4, "C11")).toEqual("AZE²&é\"'(-è_çà)=~#{[|`\\^@]} ^$ù*,;:!¨£%µ?./§¤€<>Ôöôîïï"); - // TODO : How can I test the fill color of cell D9 ? + // Verify that cell D9 has the same style as the original (fill color should be preserved) + var originalSheet1 = etree.parse(t.archive.file("xl/worksheets/sheet1.xml").asText()).getroot(); + var originalCellD9 = originalSheet1.find(".//c[@r='D9']"); + var copiedCellD9 = copysheet4.find(".//c[@r='D9']"); + expect(originalCellD9).toBeDefined(); + expect(copiedCellD9).toBeDefined(); + expect(copiedCellD9.attrib.s).toEqual(originalCellD9.attrib.s); // Same style index (fill color preserved) // test copy sheet with image t.copySheet("image_sheet", "image_sheet_copy"); var relscopysheet2 = etree.parse(t.archive.file("xl/worksheets/_rels/sheet5.xml.rels").asText()).getroot(); expect(relscopysheet2.findall("Relationship").length).toEqual(1); expect(relscopysheet2.findall("Relationship")[0].attrib.Target).toEqual("../drawings/drawing1.xml"); - // test copy sheet with utf-8 header/footer - // without binary mode (no ok) : - // This test can be remove if you wish. This is just for show the issue if binary is set to true (default) + + // ======================================== + // UTF-8 PRESERVATION TESTS + // ======================================== + // The original test expected UTF-8 corruption (.not.toEqual) due to a bug where + // etree.tostring() was used without proper encoding. This has been fixed by using + // asBinary() to read the source sheet, preserving UTF-8 characters correctly. + + // Test with binary mode enabled (default = true) - UTF-8 should be preserved t.copySheet("utf8_header_sheet", "utf8_header_sheet_copy"); var copysheet6 = etree.parse(t.archive.file("xl/worksheets/sheet6.xml").asText()).getroot(); expect(copysheet6).toBeDefined(); - expect(copysheet6.find("headerFooter/oddHeader").text).not.toEqual("<EST UTF8 éàè&ùï&C&A"); - // with binary mode (ok) : + expect(copysheet6.find("headerFooter/oddHeader").text).toEqual("<EST UTF8 éàè&ùï&C&A"); // UTF-8 must be preserved with binary mode enabled (default = true) + + // Test with binary mode explicitly disabled (binary=false) - UTF-8 will be corrupted t.copySheet("utf8_header_sheet", "utf8_header_sheet_copy_v2", false); var copysheet7 = etree.parse(t.archive.file("xl/worksheets/sheet7.xml").asText()).getroot(); expect(copysheet7).toBeDefined(); - expect(copysheet7.find("headerFooter/oddHeader").text).toEqual("<EST UTF8 éàè&ùï&C&A"); + // When binary=false, UTF-8 characters are corrupted (this is expected behavior with warning) + expect(copysheet7.find("headerFooter/oddHeader").text).not.toEqual("<EST UTF8 éàè&ùï&C&A"); + expect(copysheet7.find("headerFooter/oddHeader").text).toContain("LTEST UTF8"); // non-UTF8 parts should remain intact + + // ======================================== + // NEW VALIDATION TESTS + // ======================================== + + // Verify merged cells are preserved + var originalSheetWithMerge = etree.parse(t.archive.file("xl/worksheets/sheet1.xml").asText()).getroot(); + var originalMergeCells = originalSheetWithMerge.find("mergeCells"); + var copiedSheetWithMerge = copysheet4; + var copiedMergeCells = copiedSheetWithMerge.find("mergeCells"); + expect(copiedMergeCells).toBeDefined(); + expect(copiedMergeCells.findall("mergeCell").length).toEqual(originalMergeCells.findall("mergeCell").length); + var originalMergeRefs = originalMergeCells.findall("mergeCell").map((mc: any) => mc.attrib.ref); + var copiedMergeRefs = copiedMergeCells.findall("mergeCell").map((mc: any) => mc.attrib.ref); + expect(copiedMergeRefs).toEqual(originalMergeRefs); + + // Verify print settings are preserved + var originalSheet = etree.parse(t.archive.file("xl/worksheets/sheet3.xml").asText()).getroot(); + var originalPageSetup = originalSheet.find("pageSetup"); + var copiedPageSetup = copysheet6.find("pageSetup"); + expect(copiedPageSetup).toBeDefined(); + expect(copiedPageSetup.attrib.orientation).toEqual(originalPageSetup.attrib.orientation); + + // Verify header/footer content is identical to original + var originalHeaderFooter = originalSheet.find("headerFooter"); + expect(originalHeaderFooter).toBeDefined(); + var copiedHeaderFooter = copysheet6.find("headerFooter"); + expect(copiedHeaderFooter).toBeDefined(); + expect(copiedHeaderFooter.find("oddHeader")).toBeDefined(); + expect(copiedHeaderFooter.find("oddHeader").text).toEqual(originalHeaderFooter.find("oddHeader").text); + expect(copiedHeaderFooter.find("oddFooter")).toBeDefined(); + expect(copiedHeaderFooter.find("oddFooter").text).toEqual(originalHeaderFooter.find("oddFooter").text); + + // Verify images share the same drawing file (not duplicated) - like excel does when you do the operation manually + // This unit test could evolve if it is necessary to duplicate the image + // If so, then: + // -> expect(copiedImageRels.findall("Relationship").length).not.toEqual(originalImageRels.findall("Relationship").length); + // and test the consistency of the Relationship + var originalImageSheet = etree.parse(t.archive.file("xl/worksheets/sheet2.xml").asText()).getroot(); + var originalImageRels = etree.parse(t.archive.file("xl/worksheets/_rels/sheet2.xml.rels").asText()).getroot(); + var copiedImageRels = etree.parse(t.archive.file("xl/worksheets/_rels/sheet5.xml.rels").asText()).getroot(); + expect(copiedImageRels.findall("Relationship").length).toEqual(originalImageRels.findall("Relationship").length); + + var originalDrawingRel = originalImageRels.find("Relationship[@Type='http://schemas.openxmlformats.org/officeDocument/2006/relationships/drawing']"); + var copiedDrawingRel = copiedImageRels.find("Relationship[@Type='http://schemas.openxmlformats.org/officeDocument/2006/relationships/drawing']"); + expect(originalDrawingRel).toBeDefined(); + expect(copiedDrawingRel).toBeDefined(); + expect(copiedDrawingRel.attrib.Target).toEqual(originalDrawingRel.attrib.Target); + // Get binary data var newData = t.generate(); fs.writeFileSync("test/output/copy-sheet.xlsx", newData, "binary");