Build Correlation Matrix to use two different cells of two Csv files in Rapid miner

273 views Asked by At

I want to build correlation matrix with two different cells of two different csv files. Anyone can help me to tell how I can specify one column from one file and same as other file?.

1

There are 1 answers

3
David On

You have to create a new example set by joining the two columns together with the Join operator and then you can calculate the correlation matrix. Make sure when joining that the two example sets have the same ID attribute. The code block below shows an example process of how to select and join two attributes.

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="6.4.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="7.0.000-SNAPSHOT" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="7.0.000-SNAPSHOT" expanded="true" height="60" name="Retrieve Iris" width="90" x="45" y="75">
        <parameter key="repository_entry" value="//Samples/data/Iris"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="7.0.000-SNAPSHOT" expanded="true" height="76" name="Select Attributes" width="90" x="179" y="75">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="a1"/>
      </operator>
      <operator activated="true" class="retrieve" compatibility="7.0.000-SNAPSHOT" expanded="true" height="60" name="Retrieve Iris (2)" width="90" x="45" y="255">
        <parameter key="repository_entry" value="//Samples/data/Iris"/>
      </operator>
      <operator activated="true" class="select_attributes" compatibility="7.0.000-SNAPSHOT" expanded="true" height="76" name="Select Attributes (2)" width="90" x="179" y="255">
        <parameter key="attribute_filter_type" value="single"/>
        <parameter key="attribute" value="a2"/>
      </operator>
      <operator activated="true" class="join" compatibility="7.0.000-SNAPSHOT" expanded="true" height="76" name="Join" width="90" x="380" y="165">
        <list key="key_attributes"/>
      </operator>
      <operator activated="true" class="correlation_matrix" compatibility="7.0.000-SNAPSHOT" expanded="true" height="94" name="Correlation Matrix" width="90" x="581" y="165"/>
      <connect from_op="Retrieve Iris" from_port="output" to_op="Select Attributes" to_port="example set input"/>
      <connect from_op="Select Attributes" from_port="example set output" to_op="Join" to_port="left"/>
      <connect from_op="Retrieve Iris (2)" from_port="output" to_op="Select Attributes (2)" to_port="example set input"/>
      <connect from_op="Select Attributes (2)" from_port="example set output" to_op="Join" to_port="right"/>
      <connect from_op="Join" from_port="join" to_op="Correlation Matrix" to_port="example set"/>
      <connect from_op="Correlation Matrix" from_port="matrix" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>