EDIT : fixed dummy data, thanks for your comment.

I want to create a series of functions to create emails. I need to remove some parts of the input, but keep some others.

The general idea is : keep only the first name, return string from beginning until a space char. BUT, if there is a "particle" at the beginning of the lastname, return the string with the particle until the next space.

There is a regex builder to create a big regex that matches the interesting part of the name

This first snippet launches the users() function. I've put some hardcoded data to work with, but it should be extracted from a google spreadsheet. I get an error if I use the oneName() function twice (see next snippet for a single use example)

Cannot read property '1' of undefined"

users();

function users() {
  var data = [['Jean', 'A Marca'], ['Marie', 'A Marca von Machin'], ['Pierre-Philippe', 'A Marca von machin'], ['Charles-Henri', 'Machin von Truc'], ['Franz Albert', 'All\'Arrabiata von truc'], ['Jérôme', 'all\'arrabiata truc'], ['Heinz', 'n\'Goye'], ['Anne-Méry', 'M\'Bala'], ['Angel', 'Di Maria y Fôdes'], ['Herberto', 'Fôdes y Dos Santos']]
  
  var output = [];
  for(i in data){
    var prenom = oneName(data[i][0])[0];
    console.log(prenom);
    var nom = oneName(data[i][1])[0]
    console.log(nom);
  }
}

function oneName(nom) {
var particules1 = ['de ', 'di ', 'da ', 'dos ', 'von ', 'del\'', 'dell\'', 'della ', 'el ', 'al ', 'd\'', 'do ', 'du ', 'des ', 'de la ', 'a ', 'all\'', 'l\'', 'n\'', 'm\''];
  var particules2 = ['de',  'di',  'da',  'dos',  'von',  'del',   'dell',   'della',  'el',  'al',  'd',   'do',  'du',  'des',  'dela',   'a',  'all' ,  'l',   'n',   'm'];
  
  var regex = new RegExp(regexBuilder(particules1), 'gi');
  return nom.match(regex);
}


function regexBuilder(listeParticules){
  var regex = '';
  for(i in listeParticules){
    if(i == listeParticules.length -1){
      regex = regex + '(^' + listeParticules[i] + '[a-zA-Z\u00c0-\u024f\u1e00-\u1eff]+)|(^[a-zA-Z\u00c0-\u024f\u1e00-\u1eff\']+)';  
    }else{
      regex = regex + '(^' + listeParticules[i] + '[a-zA-Z\u00c0-\u024f\u1e00-\u1eff]+)|';
    }
  }
  return regex;
}

If I only use the oneName() function, everything is fine...

users();

function users() {
  var data = [['Jean', 'A Marca'], ['Marie', 'A Marca von Machin'], ['Pierre-Philippe', 'A Marca von machin'], ['Charles-Henri, Machin von Truc'], ['Franz Albert', 'All\'Arrabiata von truc'], ['Jérôme', 'all\'arrabiata truc'], ['Heinz', 'n\'Goye'], ['Anne-Méry', 'M\'Bala'], ['Angel', 'Di Maria y Fôdes'], ['Herberto', 'Fôdes y Dos Santos']]
  
  var output = [];
  for(i in data){
    var prenom = oneName(data[i][0])[0];
    console.log(prenom);
  }
}

function oneName(nom) {
var particules1 = ['de ', 'di ', 'da ', 'dos ', 'von ', 'del\'', 'dell\'', 'della ', 'el ', 'al ', 'd\'', 'do ', 'du ', 'des ', 'de la ', 'a ', 'all\'', 'l\'', 'n\'', 'm\''];
  var particules2 = ['de',  'di',  'da',  'dos',  'von',  'del',   'dell',   'della',  'el',  'al',  'd',   'do',  'du',  'des',  'dela',   'a',  'all' ,  'l',   'n',   'm'];
  
  var regex = new RegExp(regexBuilder(particules1), 'gi');
  return nom.match(regex);
}


function regexBuilder(listeParticules){
  var regex = '';
  for(i in listeParticules){
    if(i == listeParticules.length -1){
      regex = regex + '(^' + listeParticules[i] + '[a-zA-Z\u00c0-\u024f\u1e00-\u1eff]+)|(^[a-zA-Z\u00c0-\u024f\u1e00-\u1eff\']+)';  
    }else{
      regex = regex + '(^' + listeParticules[i] + '[a-zA-Z\u00c0-\u024f\u1e00-\u1eff]+)|';
    }
  }
  return regex;
}

I have an additional issue : if I use the oneName() function only once, but I would like to use it to extract the lastnames, it works with the first three names, but get "undefined" again. This issue must have something to do with my regex, I'll try to troubleshoot it separately.

So, why can't I use this function twice ?

1 Answers

1
Tanaike On Best Solutions

I think that Diego's comment is one of the important modification points.

As another modification point, In your script, after var prenom = oneName(data[i][0])[0] was run, data[i] becomes undefined because i of for(i in data){} is used as the global. After var prenom = oneName(data[i][0])[0]; was run, the value of i is changed in the function of regexBuilder(). By this, I think that the error occurs.

In order to avoid this error, how about this modification? In this modification, for(i in data){ was modified to for(var i in data){. Please think of this as just two of several answers.

Modified script:

users();

function users() {
  var data = [['Jean', 'A Marca'], ['Marie', 'A Marca von Machin'], ['Pierre-Philippe', 'A Marca von machin'], ['Charles-Henri', 'Machin von Truc'], ['Franz Albert', 'All\'Arrabiata von truc'], ['Jérôme', 'all\'arrabiata truc'], ['Heinz', 'n\'Goye'], ['Anne-Méry', 'M\'Bala'], ['Angel', 'Di Maria y Fôdes'], ['Herberto', 'Fôdes y Dos Santos']]
  
  var output = [];
  for (var i in data) { // or for (var i = 0; i < data.length; i++) { // <--- Modified
    var prenom = oneName(data[i][0])[0];
    console.log(prenom);
    var nom = oneName(data[i][1])[0];
    console.log(nom);
  }
}

function oneName(nom) {
  var particules1 = ['de ', 'di ', 'da ', 'dos ', 'von ', 'del\'', 'dell\'', 'della ', 'el ', 'al ', 'd\'', 'do ', 'du ', 'des ', 'de la ', 'a ', 'all\'', 'l\'', 'n\'', 'm\''];
  var particules2 = ['de',  'di',  'da',  'dos',  'von',  'del',   'dell',   'della',  'el',  'al',  'd',   'do',  'du',  'des',  'dela',   'a',  'all' ,  'l',   'n',   'm'];
  var regex = new RegExp(regexBuilder(particules1), 'gi');
  return nom.match(regex);
}

function regexBuilder(listeParticules) {
  var regex = '';
  for(var i in listeParticules){ // Modified
    if(i == listeParticules.length -1){
      regex = regex + '(^' + listeParticules[i] + '[a-zA-Z\u00c0-\u024f\u1e00-\u1eff]+)|(^[a-zA-Z\u00c0-\u024f\u1e00-\u1eff\']+)';  
    }else{
      regex = regex + '(^' + listeParticules[i] + '[a-zA-Z\u00c0-\u024f\u1e00-\u1eff]+)|';
    }
  }
  return regex;
}

If I misunderstood your question and this was not the result you want, I apologize.