Parse a dot delimited numerical hierarchy index JSON

75 views Asked by At

Suppose I have a data structure in the following format

https://docs.google.com/spreadsheets/d/e/2PACX-1vTFO_MoPwZmvKZOWT2J7kjGr9OC4uUb06zGxOdmXJ8h3FZ2Q0tpsoYH653Pm5mdNcM4Fs7KdlGWSkEy/pubhtml?gid=692973693&single=true

-----------
MovementOfGoods
---|  4.2. MovementOfGoods : N/A 
------|  4.2.1. NumberOfMovementLines : Inteiro *
------|  4.2.2. TotalQuantityIssued : Decimal *
------|  4.2.3. StockMovement : N/A 
---------|  4.2.3.1. DocumentNumber : Texto 60 *
---------|  4.2.3.2. ATCUD : Texto 100 *
---------|  4.2.3.3. DocumentStatus : N/A *
------------|  4.2.3.3.1. MovementStatus : Texto 1 *
------------|  4.2.3.3.2. MovementStatusDate : Data e Hora *
------------|  4.2.3.3.3. Reason : Texto 50 
------------|  4.2.3.3.4. SourceID : Texto 30 *
------------|  4.2.3.3.5. SourceBilling : Texto 1 *
---------|  4.2.3.4. Hash : Texto 172 *
---------|  4.2.3.5. HashControl : Texto 70 *
---------|  4.2.3.6. Period : Inteiro 
---------|  4.2.3.7. MovementDate : Data *
---------|  4.2.3.8. MovementType : Texto 2 *
---------|  4.2.3.9. SystemEntryDate : Data e Hora *
---------|  4.2.3.10. TransactionID : Texto 70 **
---------|  4.2.3.11. CustomerID : Texto 30 **
---------|  4.2.3.12. SupplierID : Texto 30 **
---------|  4.2.3.13. SourceID : Texto 30 *
---------|  4.2.3.14. EACCode : Texto 5 
---------|  4.2.3.15. MovementComments : Texto 60 
---------|  4.2.3.16. ShipTo : N/A 
------------|  4.2.3.16.1. DeliveryID : Texto 255 
------------|  4.2.3.16.2. DeliveryDate : Data 
------------|  4.2.3.16.3. WarehouseID : Texto 50 
------------|  4.2.3.16.4. LocationID : Texto 30 
------------|  4.2.3.16.5. Address : N/A 
---------------|  4.2.3.16.5.1. BuildingNumber : Texto 10 
---------------|  4.2.3.16.5.2. StreetName : Texto 200 
---------------|  4.2.3.16.5.3. AddressDetail : Texto 210 *
---------------|  4.2.3.16.5.4. City : Texto 50 *
---------------|  4.2.3.16.5.5. PostalCode : Texto 20 *
---------------|  4.2.3.16.5.6. Region : Texto 50 
---------------|  4.2.3.16.5.7. Country : Texto 2 *
---------|  4.2.3.17. ShipFrom : N/A 
------------|  4.2.3.17.1. DeliveryID : Texto 255 
------------|  4.2.3.17.2. DeliveryDate : Data 
------------|  4.2.3.17.3. WarehouseID : Texto 50 
------------|  4.2.3.17.4. LocationID : Texto 30 
------------|  4.2.3.17.5. Address : N/A 
---------------|  4.2.3.17.5.1. BuildingNumber : Texto 10 
---------------|  4.2.3.17.5.2. StreetName : Texto 200 
---------------|  4.2.3.17.5.3. AddressDetail : Texto 210 *
---------------|  4.2.3.17.5.4. City : Texto 50 *
---------------|  4.2.3.17.5.5. PostalCode : Texto 20 *
---------------|  4.2.3.17.5.6. Region : Texto 20 
---------------|  4.2.3.17.5.7. Country : Texto 2 *
---------|  4.2.3.18. MovementEndTime : Data e hora 
---------|  4.2.3.19. MovementStartTime : Data e hora *
---------|  4.2.3.20. ATDocCodeID : Texto 200 
---------|  4.2.3.21. Line : N/A *
------------|  4.2.3.21.1. LineNumber : Inteiro *
------------|  4.2.3.21.2. OrderReferences : N/A 
---------------|  4.2.3.21.2.1. OriginatingON : Texto 60 
---------------|  4.2.3.21.2.2. OrderDate : Data 
------------|  4.2.3.21.3. ProductCode : Texto 60 *
------------|  4.2.3.21.4. ProductDescription : Texto 200 *
------------|  4.2.3.21.5. Quantity : Decimal *
------------|  4.2.3.21.6. UnitOfMeasure : Texto 20 *
------------|  4.2.3.21.7. UnitPrice : Monetário *
------------|  4.2.3.21.8. Description : Texto 200 *
------------|  4.2.3.21.9. ProductSerialNumber : N/A 
---------------|  4.2.3.21.9.1. SerialNumber : Texto 100 *
------------|  4.2.3.21.10. DebitAmount : Monetário **
------------|  4.2.3.21.11. CreditAmount : Monetário **
------------|  4.2.3.21.12. Tax : N/A **
---------------|  4.2.3.21.12.1. TaxType : Texto 3 *
---------------|  4.2.3.21.12.2. TaxCountryRegion : Texto 5 *
---------------|  4.2.3.21.12.3. TaxCode : Texto 10 *
---------------|  4.2.3.21.12.4. TaxPercentage : Decimal *
------------|  4.2.3.21.13. TaxExemptionReason : Texto 60 **
------------|  4.2.3.21.14. TaxExemptionCode : Texto 3 **
------------|  4.2.3.21.15. SettlementAmount : Monetário 
------------|  4.2.3.21.16. CustomsInformation : N/A 
---------------|  4.2.3.21.16.1. ARCNo : Texto 21 
---------------|  4.2.3.21.16.2. IECAmount : Monetário 
---------|  4.2.3.22. DocumentTotals : N/A *
------------|  4.2.3.22.1. TaxPayable : Monetário *
------------|  4.2.3.22.2. NetTotal : Monetário *
------------|  4.2.3.22.3. GrossTotal : Monetário *
------------|  4.2.3.22.4. Currency : N/A 
---------------|  4.2.3.22.4.1. CurrencyCode : Texto 3 *
---------------|  4.2.3.22.4.2. CurrencyAmount : Monetário *
---------------|  4.2.3.22.4.3. ExchangeRate : Decimal *

[Embedded structures]
~> MovementOfGoods
~> StockMovement
~> DocumentStatus
~> ShipTo
~> Address
~> ShipFrom
~> Address
~> Line
~> OrderReferences
~> ProductSerialNumber
~> Tax
~> CustomsInformation
~> DocumentTotals
~> Currency

Which I would like to parse into JSON with objects embedded so as to follow the hierarchy created by the numbering.

The above is a parsed version of a tsv file exported from Google Sheets, https://docs.google.com/spreadsheets/d/e/2PACX-1vTFO_MoPwZmvKZOWT2J7kjGr9OC4uUb06zGxOdmXJ8h3FZ2Q0tpsoYH653Pm5mdNcM4Fs7KdlGWSkEy/pubhtml?gid=692973693&single=true

I suppose an algorithm to do this would need to be aware of what "level" each line is, when a new indention is about to be made, into a new object, newer even embedded objects inside that, and then back down to root, as in

-----------
Customer
---|  2.2.1. CustomerID : Texto 30 *
---|  2.2.2. AccountID : Texto 30 *
---|  2.2.3. CustomerTaxID : Texto 30 *
---|  2.2.4. CompanyName : Texto 100 *
---|  2.2.5. Contact : Texto 50 
---|  2.2.6. BillingAddress : N/A *
------|  2.2.6.1. BuildingNumber : Texto 10 
------|  2.2.6.2. StreetName : Texto 200 
------|  2.2.6.3. AddressDetail : Texto 210 *
------|  2.2.6.4. City : Texto 50 *
------|  2.2.6.5. PostalCode : Texto 20 *
------|  2.2.6.6. Region : Texto 50 
------|  2.2.6.7. Country : Texto 12 *
---|  2.2.7. ShipToAddress : N/A 
------|  2.2.7.1. BuildingNumber : Texto 10 
------|  2.2.7.2. StreetName : Texto 200 
------|  2.2.7.3. AddressDetail : Texto 210 *
------|  2.2.7.4. City : Texto 50 *
------|  2.2.7.5. PostalCode : Texto 20 *
------|  2.2.7.6. Region : Texto 50 
------|  2.2.7.7. Country : Texto 12 *
---|  2.2.8. Telephone : Texto 20 
---|  2.2.9. Fax : Texto 20 
---|  2.2.10. Email : Texto 60 
---|  2.2.11. Website : Texto 60 
---|  2.2.12. SelfBillingIndicator : Inteiro *

[Embedded structures]
~> BillingAddress
~> ShipToAddress

for example.

My current code goes through the list and detects which are embedded structures, but fails to keep track of which of these structures should be embedded in, so hierarchy is lost, or actually only visual. After going through each sheet, the "[Embedded structures]" are listed, but flat, not in a tree.

var request = require('request')

url = "https://docs.google.com/spreadsheets/d/e/2PACX-1vTFO_MoPwZmvKZOWT2J7kjGr9OC4uUb06zGxOdmXJ8h3FZ2Q0tpsoYH653Pm5mdNcM4Fs7KdlGWSkEy/pub?gid=692973693&single=true&output=tsv"

request(url, function (error, response, body) {
  if (body != undefined) {

    let lines = body.split('\r')
    let lastLevel = 0
    let dataStructs = [];

    lines.forEach(function(line, index) {
      if (index != 0) {
        columns = line.split('\t')
        let currentLevel = (columns[0].trim().split(".").length - 1)
        if (index == 1) {
          initialLevel = currentLevel;
        }
        if (columns[5].toLowerCase() == 'n/a') dataStructs.push(columns[2]);
        if (currentLevel !== lastLevel) {
          if (currentLevel > lastLevel) {
            console.log(columns[0].trim(), columns[2].trim(), ':', columns[5].trim(), columns[1].trim())
          } else if (currentLevel < lastLevel) {
            console.log(columns[0].trim(), columns[2].trim(), ':', columns[5].trim(), columns[1].trim())
          }
          lastLevel = currentLevel
        } else {
          console.log(columns[0].trim(), columns[2].trim(), ':', columns[5].trim(), columns[1].trim())
        }
      }
    });
    console.log('\n[Embedded structures]')
    dataStructs.forEach(struct => {
      console.log('~>', struct/* , '=', modelName+struct, '?' */)
    })
    console.log('\n')
  }
})

The code above will output

4.2. MovementOfGoods : N/A 
4.2.1. NumberOfMovementLines : Inteiro *
4.2.2. TotalQuantityIssued : Decimal *
4.2.3. StockMovement : N/A 
4.2.3.1. DocumentNumber : Texto 60 *
4.2.3.2. ATCUD : Texto 100 *
4.2.3.3. DocumentStatus : N/A *
4.2.3.3.1. MovementStatus : Texto 1 *
4.2.3.3.2. MovementStatusDate : Data e Hora *
4.2.3.3.3. Reason : Texto 50 
4.2.3.3.4. SourceID : Texto 30 *
4.2.3.3.5. SourceBilling : Texto 1 *
4.2.3.4. Hash : Texto 172 *
4.2.3.5. HashControl : Texto 70 *
4.2.3.6. Period : Inteiro 
4.2.3.7. MovementDate : Data *
4.2.3.8. MovementType : Texto 2 *
4.2.3.9. SystemEntryDate : Data e Hora *
4.2.3.10. TransactionID : Texto 70 **
4.2.3.11. CustomerID : Texto 30 **
4.2.3.12. SupplierID : Texto 30 **
4.2.3.13. SourceID : Texto 30 *
4.2.3.14. EACCode : Texto 5 
4.2.3.15. MovementComments : Texto 60 
4.2.3.16. ShipTo : N/A 
4.2.3.16.1. DeliveryID : Texto 255 
4.2.3.16.2. DeliveryDate : Data 
4.2.3.16.3. WarehouseID : Texto 50 
4.2.3.16.4. LocationID : Texto 30 
4.2.3.16.5. Address : N/A 
4.2.3.16.5.1. BuildingNumber : Texto 10 
4.2.3.16.5.2. StreetName : Texto 200 
4.2.3.16.5.3. AddressDetail : Texto 210 *
4.2.3.16.5.4. City : Texto 50 *
4.2.3.16.5.5. PostalCode : Texto 20 *
4.2.3.16.5.6. Region : Texto 50 
4.2.3.16.5.7. Country : Texto 2 *
4.2.3.17. ShipFrom : N/A 
4.2.3.17.1. DeliveryID : Texto 255 
4.2.3.17.2. DeliveryDate : Data 
4.2.3.17.3. WarehouseID : Texto 50 
4.2.3.17.4. LocationID : Texto 30 
4.2.3.17.5. Address : N/A 
4.2.3.17.5.1. BuildingNumber : Texto 10 
4.2.3.17.5.2. StreetName : Texto 200 
4.2.3.17.5.3. AddressDetail : Texto 210 *
4.2.3.17.5.4. City : Texto 50 *
4.2.3.17.5.5. PostalCode : Texto 20 *
4.2.3.17.5.6. Region : Texto 20 
4.2.3.17.5.7. Country : Texto 2 *
4.2.3.18. MovementEndTime : Data e hora 
4.2.3.19. MovementStartTime : Data e hora *
4.2.3.20. ATDocCodeID : Texto 200 
4.2.3.21. Line : N/A *
4.2.3.21.1. LineNumber : Inteiro *
4.2.3.21.2. OrderReferences : N/A 
4.2.3.21.2.1. OriginatingON : Texto 60 
4.2.3.21.2.2. OrderDate : Data 
4.2.3.21.3. ProductCode : Texto 60 *
4.2.3.21.4. ProductDescription : Texto 200 *
4.2.3.21.5. Quantity : Decimal *
4.2.3.21.6. UnitOfMeasure : Texto 20 *
4.2.3.21.7. UnitPrice : Monetário *
4.2.3.21.8. Description : Texto 200 *
4.2.3.21.9. ProductSerialNumber : N/A 
4.2.3.21.9.1. SerialNumber : Texto 100 *
4.2.3.21.10. DebitAmount : Monetário **
4.2.3.21.11. CreditAmount : Monetário **
4.2.3.21.12. Tax : N/A **
4.2.3.21.12.1. TaxType : Texto 3 *
4.2.3.21.12.2. TaxCountryRegion : Texto 5 *
4.2.3.21.12.3. TaxCode : Texto 10 *
4.2.3.21.12.4. TaxPercentage : Decimal *
4.2.3.21.13. TaxExemptionReason : Texto 60 **
4.2.3.21.14. TaxExemptionCode : Texto 3 **
4.2.3.21.15. SettlementAmount : Monetário 
4.2.3.21.16. CustomsInformation : N/A 
4.2.3.21.16.1. ARCNo : Texto 21 
4.2.3.21.16.2. IECAmount : Monetário 
4.2.3.22. DocumentTotals : N/A *
4.2.3.22.1. TaxPayable : Monetário *
4.2.3.22.2. NetTotal : Monetário *
4.2.3.22.3. GrossTotal : Monetário *
4.2.3.22.4. Currency : N/A 
4.2.3.22.4.1. CurrencyCode : Texto 3 *
4.2.3.22.4.2. CurrencyAmount : Monetário *
4.2.3.22.4.3. ExchangeRate : Decimal *

[Embedded structures]
~> MovementOfGoods
~> StockMovement
~> DocumentStatus
~> ShipTo
~> Address
~> ShipFrom
~> Address
~> Line
~> OrderReferences
~> ProductSerialNumber
~> Tax
~> CustomsInformation
~> DocumentTotals
~> Currency

How would one go down these lists and add the fields/properties to the right embedded objects or to the root of the object, in a tree-like form?

edit:

Using the above "Customer" as an example, with a tabular structure such as https://docs.google.com/spreadsheets/d/e/2PACX-1vTFO_MoPwZmvKZOWT2J7kjGr9OC4uUb06zGxOdmXJ8h3FZ2Q0tpsoYH653Pm5mdNcM4Fs7KdlGWSkEy/pubhtml?gid=700343422&single=true the desired output would be

{
 "Customer": {
  "2.2.1. CustomerID": "Texto 30 *",
  "2.2.2. AccountID": "Texto 30 *",
  "2.2.3. CustomerTaxID": "Texto 30 *",
  "2.2.4. CompanyName": "Texto 100 *",
  "2.2.5. Contact": "Texto 50",
  "2.2.6. BillingAddress": {
   "2.2.6.1. BuildingNumber": "Texto 10",
   "2.2.6.2. StreetName": "Texto 200",
   "2.2.6.3. AddressDetail": "Texto 210 *",
   "2.2.6.4. City": "Texto 50 *",
   "2.2.6.5. PostalCode": "Texto 20 *",
   "2.2.6.6. Region": "Texto 50 ",
   "2.2.6.7. Country": "Texto 12 *"
  },
  "2.2.7. ShipToAddress": {
   "2.2.7.1. BuildingNumber": "Texto 10",
   "2.2.7.2. StreetName": "Texto 200",
   "2.2.7.3. AddressDetail": "Texto 210 *",
   "2.2.7.4. City": "Texto 50 *",
   "2.2.7.5. PostalCode": "Texto 20 *",
   "2.2.7.6. Region": "Texto 50",
   "2.2.7.7. Country": "Texto 12 *"
  },
  "2.2.8. Telephone ": "Texto 20",
  "2.2.9. Fax": "Texto 20",
  "2.2.10. Email": "Texto 60",
  "2.2.11. Website": "Texto 60",
  "2.2.12. SelfBillingIndicator": "Inteiro *"
 }
}

edit 2: To be clear, the desired "outcome" is to be able to detect when nested objects start and end, as well as nesting them in the right place according to the source table. The particular columns from the table passed onto JSON could vary, as well as the actual syntax used changed from JSON to, say, rails generators. What I'm missing is the "level detection" part of the code.

edit 3:

var request = require('request')

url = "https://docs.google.com/spreadsheets/d/e/2PACX-1vTFO_MoPwZmvKZOWT2J7kjGr9OC4uUb06zGxOdmXJ8h3FZ2Q0tpsoYH653Pm5mdNcM4Fs7KdlGWSkEy/pub?gid=700343422&single=true&output=tsv"

request(url, function (error, response, body) {
  if (body != undefined) {

    let lines = body.split('\r')
    let lastLevel = 0
    let dataStructs = [];
    goneUpCounter = 0;
    console.log("{")

    lines.forEach(function (line, index) {
      if (index != 0) {
        columns = line.split('\t')
        let currentLevel = (columns[0].trim().split(".").length - 1)

        if (columns[5].toLowerCase() == 'n/a') {
          dataStructs.push(columns[2]);
          goneUpCounter += 1;
        }
        if (currentLevel !== lastLevel) {
          if (currentLevel > lastLevel) {
            if (columns[5].toLowerCase() == 'n/a') {
              console.log("{\"" + columns[2].trim() + "\" : {")
            } else {
              console.log("\"" + columns[0].trim(), columns[2].trim() + "\"", ':', "\"" + columns[5].trim(), columns[1].trim() + "\",")
            }
          } else if (currentLevel < lastLevel) {
            if (columns[5].toLowerCase() == 'n/a') {
              console.log("\"" + columns[2].trim() + "\" : {")
            } else {
              console.log("\"" + columns[0].trim(), columns[2].trim() + "\"", ':', "\"" + columns[5].trim(), columns[1].trim() + "\",")
            }
          }
          lastLevel = currentLevel
        } else {

          if (columns[5].toLowerCase() == 'n/a') {
            console.log("\"" + columns[2].trim() + "\" : {")
          } else {
            if ((lines[index + 1] != undefined && lines[index + 1].split('\t')[0].trim().split('.').length - 1 < lastLevel) || index + 1 == lines.length) {
              console.log("\"" + columns[0].trim(), columns[2].trim() + "\"", ':', "\"" + columns[5].trim(), columns[1].trim() + "\"")
            } else {
              console.log("\"" + columns[0].trim(), columns[2].trim() + "\"", ':', "\"" + columns[5].trim(), columns[1].trim() + "\",")
            }
          }

          if (lines[index + 1] != undefined && lines[index + 1].split('\t')[0].trim().split('.').length - 1 < lastLevel) {
            goneUpCounter -= 1;
            console.log('},')
          }
        }
      }
    });
    for (let i = 0; i < goneUpCounter + 1; i++) {
      console.log('}')
    }
  }
})

returns valid JSON, but the code is just awful.

{
 "2.2.1. CustomerID": "Texto 30 *",
 "2.2.2. AccountID": "Texto 30 *",
 "2.2.3. CustomerTaxID": "Texto 30 *",
 "2.2.4. CompanyName": "Texto 100 *",
 "2.2.5. Contact": "Texto 50 ",
 "BillingAddress": {
  "2.2.6.1. BuildingNumber": "Texto 10 ",
  "2.2.6.2. StreetName": "Texto 200 ",
  "2.2.6.3. AddressDetail": "Texto 210 *",
  "2.2.6.4. City": "Texto 50 *",
  "2.2.6.5. PostalCode": "Texto 20 *",
  "2.2.6.6. Region": "Texto 50 ",
  "2.2.6.7. Country": "Texto 12 *"
 },
 "ShipToAddress": {
  "2.2.7.1. BuildingNumber": "Texto 10 ",
  "2.2.7.2. StreetName": "Texto 200 ",
  "2.2.7.3. AddressDetail": "Texto 210 *",
  "2.2.7.4. City": "Texto 50 *",
  "2.2.7.5. PostalCode": "Texto 20 *",
  "2.2.7.6. Region": "Texto 50 ",
  "2.2.7.7. Country": "Texto 12 *"
 },
 "2.2.8. Telephone": "Texto 20 ",
 "2.2.9. Fax": "Texto 20 ",
 "2.2.10. Email": "Texto 60 ",
 "2.2.11. Website": "Texto 60 ",
 "2.2.12. SelfBillingIndicator": "Inteiro *"
}

0

There are 0 answers