5

My task is to take the data from http://services.swpc.noaa.gov/text/ace-swepam.txt and split/sort it into something useful. To start with, I'm trying to split the data into categories, so that I can use in chart.js or something later on, but when I try and print a field, it just comes up as [].

    var options = {
    host: 'services.swpc.noaa.gov',
    path: '/text/ace-swepam.txt',
    port: 80,

    method: 'POST'
};


var req = http.request(options, function (res) {


    res.on('data', function (chunk) {
        //   console.log('BODY: ' + chunk);
        results += chunk.toString();
        //split results into an array by each new line
        lines = results.split("\n");
        //delete header lines
        lines.splice(0, 18);
        if (lines.length <= 20) {
            return;
        }
        console.log(lines);

    });
    res.on('end', function (e) {
        callback();

    });

});

req.on('error', function (e) {
    console.log('problem with request: ' + e.message);
});

req.end();

function callback() {
    for (var line in lines) {
        var x = [];
        x = lines[line].split(" ");
        var statuscode = x[14];
        if (statuscode == 0) {
            if (lines[line].indexOf('-') === -1) {
                year.push(x[0]);
                month.push(x[1]);
                day.push(x[2]);
                time.push(x[4]);
                statusno.push(statuscode);
                proton.push(x[22]);
                bulksp.push(x[28]);
                iontemp.push(x[33]);
            }
        }

    }

    //    console.log(year, month, day, time, statusno, proton, bulksp, iontemp)
}

2 Answers 2

2

The data rows do not appear to be tab-delimited. I expect they are fixed length.

Here is the first line I receive. "2015 08 18 1708 57252 61680 0 2.6 45"

Instead of trying to split this row by tab.

    fields = line.split("\t");

Create an array of the lengths of each field and split it using the substring method.

Here is the full code for the parsing of the returned data. It gives 119 lines or which 6-7 have a status!=0(and so are skipped). Your variables then have 112 entries each.

    res.on('data', function (chunk) {
    var fieldLengths = [0, 4, 7, 10, 16, 24, 32, 37, 48, 59, 72];

    //   console.log('BODY: ' + chunk);
    results += chunk.toString();
    //split results into an array by each new line
    lines = results.split("\n");

    // for me, the first "chunk" is incomplete.  Throw it away and just use the second chunk.
    if (lines.length <= 20) {
        return;
    }

    //delete header lines
    lines.splice(0, 18);

    for (var line in lines) {
        console.log("entry: " + lines[line]);
        //split into data fields
        var lineText = lines[line];
        var fields = [];
        for (var i = 0; i <= fieldLengths.length -1; i++) {
            fields.push(lineText.substring(fieldLengths[i], fieldLengths[i + 1]));
        }

        //if there are no problems (status code 0)
        //add the data to their respective fields
        if (fields[6] == 0) {
            year.push(fields[0]);
            month.push(fields[1]);
            day.push(fields[2]);
            time.push(fields[3]);
            statusno.push(fields[6]);
            proton.push(fields[7]);
            bulksp.push(fields[8]);
            iontemp.push(fields[9]);
        }
    }
});
res.on('end', function (e) {
    console.log(year);
});

});

This is easy to debug if you use Visual Studio(free community edition will work) and add the node tools for visual studio.

Let me know if the data isn't quite right. I understand what you are trying to do and can tweak the code if necessary.

Sign up to request clarification or add additional context in comments.

6 Comments

Thank you, you amazing person, you :)
Of course! Can you please mark this as the answer if works for you.
Sorry, yes, never used this site so I have no idea what I'm doing. I've asked the guy below but I'll ask you too - I've got it working with the substring-ing method, which is brilliant, but it's only cycling through the first 30 or so (1725 - 1824 for me) records, three times, rather than getting ALL the data ONCE. Any obvious reasons you can see why it's doing this?
I'm getting the right data now. Just sec and I'll post the parsing code.
Try downloading the data from the site directly. It is just a text file. Use the URL that you have in your script. Then, get the parsing working correctly like that. It may be that their service is acting weird. If you can identify that your side is working correctly, you could talk with them.
|
1
function callback()  {
  for (var line in lines) {

        //split into data fields
         year.push(lines[line].substring(x,y));//fill in x and y
         //month.push..
         //day.push..
      }
}

or

callback() {
var x = [];
for (var line in lines){
x = lines[line].split(" ");
console.log(x);
year.push(x[index]) // index being where year was split into x

    }
}

just put this function in the res.on('end'). I'm not 100% sure what you're doing exactly , hope this helps.

EDIT:

  var options = {
host: 'services.swpc.noaa.gov',
path: '/text/ace-swepam.txt',
port: 80,

method: 'POST'
};


var req = http.request(options, function (res) {


res.on('data', function (chunk) {
    //   console.log('BODY: ' + chunk);
    results += chunk.toString();
    //split results into an array by each new line
    lines = results.split("\n");
    //delete header lines
    lines.splice(0, 18);

});
res.on('end', function (e) {
    callback();

    });

});

req.on('error', function (e) {
console.log('problem with request: ' + e.message);
 });

req.end();

function callback()  {
for (var line in lines) {
 var x = [];



x = lines[line].split(" ");
//console.log(x); Print x and see which index of x has the vaule you want. Constant for all
year.push(x[0]);
month.push(x[1]);
day.push(x[2]);    
time.push(x[4]);






    }
    //console.log(year,month,day,time); Check final result

}

4 Comments

Thanks! :) I've got it working with the substring-ing method, which is brilliant, but it's only cycling through the first 30 or so (1725 - 1824 for me) records, three times, rather than getting ALL the data ONCE. Any obvious reasons you can see why it's doing this?
I'll edit my answer with the code for my solution. Which is cycling through all rows of data on the site, which seems to be updating.
Gets all data from the site and parses it into x. You can print X and see which index your value is in(it will be constant for all rows because same spacing). I did up until the time value because that's the first one that's actually different.
Ok, I was using the substring method the guy above mentioned, but yours seems simpler, so I've made a copy running your method. But I'm still not getting the most recent hour-or-so's data :/

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.