用 Node 解析 XLSX 并创建 json

IT技术 javascript json node.js excel xlsx
2021-03-17 16:37:58

好的,所以我发现这确实有据可查,node_module称为js-xlsx

问题: 如何解析 xlsx 以输出 json

这是excel表格的样子:

在此处输入图片说明

最后 json 应该是这样的:

[
   {
   "id": 1,
   "Headline": "Team: Sally Pearson",
   "Location": "Austrailia",
   "BodyText": "...",
   "Media: "..."
   },
   {
   "id": 2,
   "Headline": "Team: Rebeca Andrade",
   "Location": "Brazil",
   "BodyText": "...",
   "Media: "..."
   }
]

索引.js:

if(typeof require !== 'undefined') {
    console.log('hey');
    XLSX = require('xlsx');
}
var workbook = XLSX.readFile('./assets/visa.xlsx');
var sheet_name_list = workbook.SheetNames;
sheet_name_list.forEach(function(y) { /* iterate through sheets */
  var worksheet = workbook.Sheets[y];
  for (z in worksheet) {
    /* all keys that do not begin with "!" correspond to cell addresses */
    if(z[0] === '!') continue;
    // console.log(y + "!" + z + "=" + JSON.stringify(worksheet[z].v));

  }

});
XLSX.writeFile(workbook, 'out.xlsx');
6个回答

你也可以使用

var XLSX = require('xlsx');
var workbook = XLSX.readFile('Master.xlsx');
var sheet_name_list = workbook.SheetNames;
console.log(XLSX.utils.sheet_to_json(workbook.Sheets[sheet_name_list[0]]))
请注意,它应该是XLSX.utils.sheet_to_json(sheet, {defval: ""})而不是XLSX.utils.sheet_to_json(sheet, {defVal=""})
2021-05-01 16:37:58
是的,默认情况下它不处理空白单元格,但通过将可选参数传递给 XLSX.readFile 函数是一种解决方法。var workbook = XLSX.readFile('Master.xlsx', {sheetStubs: true}); 可选对象中的 sheetStubs 参数允许库列出默认情况下被库的数据处理实用程序忽略的单元格。
2021-05-08 16:37:58
它不返回空值,这意味着如果值为空,它不会添加到 json 中
2021-05-14 16:37:58

“Josh Marinacci”答案的改进版本,它将超出 Z 列(即 AA1)。

var XLSX = require('xlsx');
var workbook = XLSX.readFile('test.xlsx');
var sheet_name_list = workbook.SheetNames;
sheet_name_list.forEach(function(y) {
    var worksheet = workbook.Sheets[y];
    var headers = {};
    var data = [];
    for(z in worksheet) {
        if(z[0] === '!') continue;
        //parse out the column, row, and value
        var tt = 0;
        for (var i = 0; i < z.length; i++) {
            if (!isNaN(z[i])) {
                tt = i;
                break;
            }
        };
        var col = z.substring(0,tt);
        var row = parseInt(z.substring(tt));
        var value = worksheet[z].v;

        //store header names
        if(row == 1 && value) {
            headers[col] = value;
            continue;
        }

        if(!data[row]) data[row]={};
        data[row][headers[col]] = value;
    }
    //drop those first two rows which are empty
    data.shift();
    data.shift();
    console.log(data);
});
它有效,但如何?查看变量的命名,如果您需要更改某些内容,则必须对流程进行逆向工程。下面的答案具有相同的代码,但命名更好。
2021-04-25 16:37:58
男人,我两年后,希望通过你不来电变量yztt了,那是一个巨大的痛苦,以更新代码
2021-05-08 16:37:58
这里到底发生了什么?z是什么?tt是什么?
2021-05-19 16:37:58
@StormMuller 看到我的回答它有角度解决方案,变量命名正常
2021-05-19 16:37:58

我认为这段代码会做你想做的。它将第一行存储为一组标题,然后将其余部分存储在一个数据对象中,您可以将其作为 JSON 写入磁盘。

var XLSX = require('xlsx');
var workbook = XLSX.readFile('test.xlsx');
var sheet_name_list = workbook.SheetNames;
sheet_name_list.forEach(function(y) {
    var worksheet = workbook.Sheets[y];
    var headers = {};
    var data = [];
    for(z in worksheet) {
        if(z[0] === '!') continue;
        //parse out the column, row, and value
        var col = z.substring(0,1);
        var row = parseInt(z.substring(1));
        var value = worksheet[z].v;

        //store header names
        if(row == 1) {
            headers[col] = value;
            continue;
        }

        if(!data[row]) data[row]={};
        data[row][headers[col]] = value;
    }
    //drop those first two rows which are empty
    data.shift();
    data.shift();
    console.log(data);
});

打印出来

[ { id: 1,
    headline: 'team: sally pearson',
    location: 'Australia',
    'body text': 'majority have…',
    media: 'http://www.youtube.com/foo' },
  { id: 2,
    headline: 'Team: rebecca',
    location: 'Brazil',
    'body text': 'it is a long established…',
    media: 'http://s2.image.foo/' } ]
非常非常好!只发现一件事:当列名以AA, AB...开头时代码会遗漏宽表的列因为这些行var col = z.substring(0,1);var row = parseInt(z.substring(1));. 改成了const column = z.replace(/[0-9]/g, '')const row = parseInt(z.replace(/\D/g,''))现在它的工作原理完全。
2021-04-26 16:37:58
很好,但它跳过了工作表中的空白单元格,这些单元格对我来说应该是 null 或 '',例如:K 列的标题是权重,k1 的值为 72,因此它返回为权重:'72' 和 k3值是 65 所以 weight: '65' 但 k2 单元格留空所以它没有返回任何东西,我想要 weight: '' 或 weight: null,如果我有任何解决方案,这对我来说将是巨大的解脱,请帮助我在此解决,非常感谢
2021-04-27 16:37:58
我想读取一个 xlsx 文件并将该数据转换为 JSON。这可能吗?还有其他建议吗??
2021-05-08 16:37:58
你就是乔希这个人!我进行了一些编辑以保存文件。享受你的赏金
2021-05-13 16:37:58
**podria ser algo asi en react y electron**

 xslToJson = workbook => {
        //var data = [];
        var sheet_name_list = workbook.SheetNames[0];
        return XLSX.utils.sheet_to_json(workbook.Sheets[sheet_name_list], {
            raw: false,
            dateNF: "DD-MMM-YYYY",
            header:1,
            defval: ""
        });
    };

    handleFile = (file /*:File*/) => {
        /* Boilerplate to set up FileReader */
        const reader = new FileReader();
        const rABS = !!reader.readAsBinaryString;

        reader.onload = e => {
            /* Parse data */
            const bstr = e.target.result;
            const wb = XLSX.read(bstr, { type: rABS ? "binary" : "array" });
            /* Get first worksheet */
            let arr = this.xslToJson(wb);

            console.log("arr ", arr)
            var dataNueva = []

            arr.forEach(data => {
                console.log("data renaes ", data)
            })
            // this.setState({ DataEESSsend: dataNueva })
            console.log("dataNueva ", dataNueva)

        };


        if (rABS) reader.readAsBinaryString(file);
        else reader.readAsArrayBuffer(file);
    };

    handleChange = e => {
        const files = e.target.files;
        if (files && files[0]) {
            this.handleFile(files[0]);
        }
    };
@AN German 你救了我的命,今天生产部署,我用这段代码修复了它,非常感谢你
2021-04-29 16:37:58
欢迎使用 Stackoverflow!:D 如果您评论您的代码或在您的正文中添加一些内容以澄清您的答案,它将使您的答案更好且易于理解。
2021-05-18 16:37:58

这里的这个角5方法版本unminified语法对于那些谁与挣扎yztt在接受的答案。用法:parseXlsx().subscribe((data)=> {...})

parseXlsx() {
    let self = this;
    return Observable.create(observer => {
        this.http.get('./assets/input.xlsx', { responseType: 'arraybuffer' }).subscribe((data: ArrayBuffer) => {
            const XLSX = require('xlsx');
            let file = new Uint8Array(data);
            let workbook = XLSX.read(file, { type: 'array' });
            let sheetNamesList = workbook.SheetNames;

            let allLists = {};
            sheetNamesList.forEach(function (sheetName) {
                let worksheet = workbook.Sheets[sheetName];
                let currentWorksheetHeaders: object = {};
                let data: Array<any> = [];
                for (let cellName in worksheet) {//cellNames example: !ref,!margins,A1,B1,C1

                    //skipping serviceCells !margins,!ref
                    if (cellName[0] === '!') {
                        continue
                    };

                    //parse colName, rowNumber, and getting cellValue
                    let numberPosition = self.getCellNumberPosition(cellName);
                    let colName = cellName.substring(0, numberPosition);
                    let rowNumber = parseInt(cellName.substring(numberPosition));
                    let cellValue = worksheet[cellName].w;// .w is XLSX property of parsed worksheet

                    //treating '-' cells as empty on Spot Indices worksheet
                    if (cellValue.trim() == "-") {
                        continue;
                    }

                    //storing header column names
                    if (rowNumber == 1 && cellValue) {
                        currentWorksheetHeaders[colName] = typeof (cellValue) == "string" ? cellValue.toCamelCase() : cellValue;
                        continue;
                    }

                    //creating empty object placeholder to store current row
                    if (!data[rowNumber]) {
                        data[rowNumber] = {}
                    };

                    //if header is date - for spot indices headers are dates
                    data[rowNumber][currentWorksheetHeaders[colName]] = cellValue;

                }

                //dropping first two empty rows
                data.shift();
                data.shift();
                allLists[sheetName.toCamelCase()] = data;
            });

            this.parsed = allLists;

            observer.next(allLists);
            observer.complete();
        })
    });
}