3

I have made an array of strings and I am trying to group a string array into categories.

So far my code looks like this:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int
main(int argc, char *argv[]) {
    char *results[] = {"Canada", "Cycling", "Canada", "Swimming", "India", "Swimming", "New Mexico",
                       "Cycling", "New Mexico", "Cycling", "New Mecico", "Swimming"};



    int nelements, i, country_count;

    nelements = sizeof(results) / sizeof(results[0]);

    for (i = 0 ; i < nelements; i++) {
        printf("%s\n", results[i]);
    }

    return 0;
}

Which prints out this:

Canada
Cycling
Canada
Swimming
India
Swimming
New Mexico
Cycling
New Mexico
Cycling
New Mexico
Swimming

But I am trying to group the sports along with respective counts with the individual countries, which I want to look like this:

Canada
    Cycling  1
    Swimming 1

India
    Swimming 1

New Mexico
    Cycling  2
    Swimming 1

I am thinking of categorizing the countries with every i+2 element in the array, and using strcmp to remove the duplicate country strings, but I am not sure how to do this with the counts of the sports along with each country.

I am just not sure how to go about this. Any sort of help would be appreciated.

2
  • 1
    You could simplify with char *results[][2] for starters. Commented Sep 8, 2016 at 4:02
  • 1
    can't use some data structure like map in c++? Commented Sep 8, 2016 at 4:14

7 Answers 7

3

The solution depends on what kind of approach you want to take. Keeping a single character array (results* in your code) will not be to make your data dynamic. Essentially, you would want to use a dictionary data structure which stores (nested if required) pair. In C I would have used structures in order to make it modular.

First of all, You would need a structure to store sports and their counts(say medal count)

struct sport {
  char *sport_name;
  int medal_count;
  //Any other details you want to store
};

Then, a Country can play multiple sports. Hence we need to make country structure.

struct Country{
  char *country_name;
  struct sport* results;
  //Any other details you want to store
};

Now let's create an array of country data.

#define NO_OF_COUNTRIES 3  //You may fix this or make it dynamic
struct Country country_data[NO_OF_COUNTRIES]; 

You can fill data accordingly now. Hope this helps.

Sign up to request clarification or add additional context in comments.

Comments

2

Consider using a lists of cities and countries instead of an array of strings.

The following code explain the simplest implementation with two structures and two methods for each - adding new element and searching element.

Try this code and then learn it:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

struct city
{
    struct city * next;
    char * cityName;
    int counter;
};

struct country
{
    struct country * next;
    char * coutryName;
    struct city * cities;
};

struct country * findCountry(struct country * coutries, char * country)
{
    struct country * searchResult = NULL;
    while (coutries != NULL)
    {
        if (strcmp(country, coutries->coutryName) == 0)
        {
            searchResult = coutries;
            break;
        }
        coutries = coutries->next;
    }
    return searchResult;
}

struct country * addCountry(struct country * coutries, char * country)
{
    struct country * newCountry = malloc(sizeof(struct country));
    newCountry->next = coutries;
    newCountry->coutryName = country;
    newCountry->cities = NULL;
    return newCountry;
}

struct city * findCity(struct city * cities, char * city)
{
    struct city * searchResult = NULL;
    while (cities != NULL)
    {
        if (strcmp(city, cities->cityName) == 0)
        {
            searchResult = cities;
            break;
        }
        cities = cities->next;
    }
    return searchResult;
}

struct city * addCity(struct city * cities, char * city)
{
    struct city * newCity = malloc(sizeof(struct city));
    newCity->cityName = city;
    newCity->next = cities;
    newCity->counter = 0;
    return newCity;
}

int main(void) 
{
    char *results[] = { "Canada", "Cycling", "Canada", "Swimming", "India", "Swimming", "New Mexico",
        "Cycling", "New Mexico", "Cycling", "New Mexico", "Swimming" };

    struct country * countries = NULL;
    int nelements = sizeof(results) / sizeof(results[0]);
    // filling list of countries with sublists of cityes
    int i;
    for (i = 0; i < nelements; i+=2)
    {
        struct country * pCountry = findCountry(countries, results[i]);
        if (!pCountry)
        {
            countries = addCountry(countries, results[i]);
            pCountry = countries;
        }
        struct city * pCity = findCity(pCountry->cities, results[i+1]);
        if (!pCity)
        {
            pCountry->cities = addCity(pCountry->cities, results[i + 1]);
            pCity = pCountry->cities;
        }
        pCity->counter++;
    }

    // reading cities from all countries
    struct country * pCountry = countries;
    while (pCountry != NULL)
    {
        printf("%s\n",pCountry->coutryName);
        struct city * pCity = pCountry->cities;
        while (pCity != NULL)
        {
            printf("    %s %d\n", pCity->cityName, pCity->counter);
            pCity = pCity->next;
        }
        printf("\n");
        pCountry = pCountry->next;
    }

    return 0;
}

Note: in your code last "New Mexico" was like "New Mecico", in my code this mistype was fixed.

UPDATE

Note 2: Because I add elements in the beginning of the lists order of countries and cities is reverse to order of their first mention in the source array.

If order is important you have two options:

1) rewrite my code to add new items to the end of list (it is the long way)

2) rewrite for-loop in the main just to read initial array from the end (it is the easiest way):

// filling list of countries with sublists of cityes
int i;
for (i = nelements-2; i >=0 ; i -= 2)
   {
   . . .

Comments

1

Given you array I can see that country name is available alternatively. If this is the format data is available in than you can follow the below code.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main(int argc, char *argv[])
{
   char *results[] = {"Canada", "Cycling", "Canada", "Swimming", "India","Swimming", "New Mexico",
               "Cycling", "New Mexico", "Cycling", "New Mexico", "Swimming"};



   int nelements, i, sport_count=0,country_change =0;
   char country[50];char sport[50];
   strcpy(country,results[0]);
   printf("%s\n", country);
   strcpy(sport,results[1]);
   nelements = sizeof(results) / sizeof(results[0]);

   for (i = 1 ; i < nelements; i++) 
   {
      if(((i%2)==0) && (strcmp(country,results[i])))
      {
         //sport_count++;
         printf("\t%s %d\n", sport,sport_count);
         country_change =1;
         strcpy(country,results[i]);
         printf("%s\n", country);
      }
      else if((i%2)==1)
      {
          if(country_change)
          {
             strcpy(sport,results[i]);
             country_change = 0;
             sport_count = 0;
          }

          if(!strcmp(sport,results[i]))
          {
              sport_count++;
          }
          else
          {
              printf("\t%s %d\n", sport,sport_count);
              strcpy(sport,results[i]);
              sport_count = 1;
          }
             //strcpy(country,results[i]);
       }

    }
    printf("\t%s %d\n", sport,sport_count);

 return 0;
}

Basically this is what I am trying to do here:

  1. Store the first index in a variable.
  2. Than in each even iteration check if the country name is equal to the stored name. If not update the name.
  3. In each odd iteration you can just print out the name.
  4. Sport name is stored in a variable and a int variable sports_count keeps the count.
  5. If new country arrives than print the name of sport first and than a mandatory update in the name of sport and relevant variables.
  6. Last sport name is printed outside the loop.

    Output
    
    Canada
            Cycling 1
            Swimming 1
    India
            Swimming 1
    New Mexico
            Cycling 2
            Swimming 1
    

Comments

1

I would use a struct (if you are not familiar, I always remind myself when needed with myStruct.c) and with two arrays as data members, like this:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define COUNTRY_LENGTH 15
#define MAX_SPORTS 5

enum sport_name { CYCLING, SWIMMING };

typedef struct Record {
  char country[COUNTRY_LENGTH];
  int sports[MAX_SPORTS];
} Record;

// return index of 'country' in 'array' if the 'country'
// is found inside 'array', else -1
int exists(char country[], Record* array, int size) {
    int i;
    for(i = 0; i < size; ++i)
        if(!strcmp(array[i].country, country))
            return i;
    return -1;
}

int find_sport_index(char sport[]) {
    if(!strcmp(sport, "Cycling"))
        return CYCLING;
    if(!strcmp(sport, "Swimming"))
        return SWIMMING;
    printf("I couldn't find a sport index for %s\n!!! Do something...Undefined Behavior!", sport);
    return -1;
}

char* find_sport_string(int sport) {
    if(sport == CYCLING)
        return "Cycling";
    if(sport == SWIMMING)
        return "Swimming";
    printf("I couldn't find a sport string for sport index %d\n!!! Do something...", sport);
    return NULL;
}

int main(int argc, char *argv[]) {
    // you had a typo, New Mecico, I corrected it..Also you could have used a struct here... ;)
    char *results[] = {"Canada", "Cycling", "Canada", "Swimming", "India", "Swimming", "New Mexico",
                       "Cycling", "New Mexico", "Cycling", "New Mexico", "Swimming"};



    int nelements, i, j;

    nelements = sizeof(results) / sizeof(results[0]);

    const int records_size = nelements/2;

    Record record[records_size];
    for(i = 0; i < records_size; i++) {
        for(j = 0; j < COUNTRY_LENGTH; j++) 
            record[i].country[j] = 0;
        for(j = 0; j < MAX_SPORTS; j++)
            record[i].sports[j] = 0;
    }

    int country_index, records_count = 0;
    for(i = 0; i < nelements; ++i) {
        // results[i] is a country
        if(i % 2 == 0) {
            country_index = exists(results[i], record, records_size);
            if(country_index == -1) {
                country_index = records_count++;
                strcpy(record[country_index].country, results[i]);
            }
        } else {
            // result[i] is a sport
            record[country_index].sports[find_sport_index(results[i])]++;
        }
    }    


    for(i = 0; i < records_size; ++i) {
        if(strlen(record[i].country)) {
            printf("%s\n", record[i].country);
            for(j = 0; j < MAX_SPORTS; j++) {
                if(record[i].sports[j] != 0) {
                    printf("    %s %d\n", find_sport_string(j), record[i].sports[j]);
                }
            }
        }    
    }

    return 0;
}

Output:

C02QT2UBFVH6-lm:~ gsamaras$ ./a.out 
Canada
    Cycling 1
    Swimming 1
India
    Swimming 1
New Mexico
    Cycling 2
    Swimming 1

The idea is that:

  1. The struct Record holds the records in the Olympics, with relevant sports.
  2. Record.country holds the name of the country (and I assume that it be 14 characters at max, +1 for the NULL terminator, thus I defined it as 15).
  3. Record.sports is an array with size MAX_SPORTS- the size would be equal to all the sports in the Olympics, but I assumed it's 5. Every position of this array is a counter (of the medals every country got in a sport. For example, Record.sports[1] = 2 would indicate that this country has 2 medals in Swimming. But how I know it was Swimming? I decided apriori, as a programmer that the first counter is connected to Cycling, the second to Swimming and so on. I used an enum to make that more readable, instead of using magic numbers. (Note: You could use a list instead an array, but that would be an overkill for that application. But if you want to do it for fun (and because a bit less memory), you can use our List (C)).
  4. You define results[] in a strange way, since you should really have used a struct for that, but I worked with your code...So I needed an array of Records, and its size should be equal to the number of the countries, i.e. the half of the size of results[]. Notice that because you defined results[] to contain implicit pairs of country-sport, a division by two is just enough to determine the size of the Records array.
  5. I loop over results[] to populate record[], by using a counter named i in the . When i is even, result[i] contains a country, else it contains a sport. I use the module operator (%) to determine that easily.
  6. If the country doesn't exist in record[], then I insert it, else I don't insert it again. In both cases I want to remember its index in record[], so that in the next iteration, that we will process the sport, we will now at which position of record[] we should look into and act accordingly.
  7. Now, when I process a sport, I want to increase the counter of that sport, but only for the corresponding country (remember that I have stored the country index I had processed in the previous iteration).
  8. Then I just print, that's it! :)

1 Comment

nice. Took me a while to figure out what does !strcmp mean. Since it's a little bit of unintuitive.
1

Idea of this solution is in building map - table where rows correspond to countries and columns correspond to sport events (or sport names).

Memory for the maximum possible map (size is nelements/2 x nelements/2) is allocated with calloc but actually it can be just int[6][6] if char *results[] is unchanged.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main(void) 
{
    char *results[] = { "Canada", "Cycling", "Canada", "Swimming", "India", "Swimming", "New Mexico",
        "Cycling", "New Mexico", "Cycling", "New Mexico", "Swimming" };
    int nelements = sizeof(results) / sizeof(results[0]);
    int i;
    // making empty map
    int ** map = calloc(nelements/2, sizeof(int*));
    for (i = 0; i < nelements / 2; i++)
        map[i] = calloc(nelements/2, sizeof(int));
    char ** rowNames = calloc(nelements / 2, sizeof(char*));
    int usedRows = 0;
    char ** colNames = calloc(nelements / 2, sizeof(char*));
    int usedCols = 0;

    // filling the map
    // the outer loop for countries
    int c;
    for (c = 0; c < nelements; c+=2) {
        int row = -1;
        // Find country in the map (loop for rows)
        for (i = 0; i < usedRows; i++) 
        {
            if (strcmp(results[c], rowNames[i]) == 0)
            {
                row = i;
                break;
            }
        }
        // or add if it is new country
        if (row < 0)
        {
            row = usedRows;
            rowNames[usedRows] = results[c];
            usedRows++;
        }
        // Find sport in the map (loop for columns)
        int col = -1;
        for (i = 0; i < usedCols; i++)
        {
            if (strcmp(results[c+1], colNames[i]) == 0)
            {
                col = i;
                break;
            }
        }
        // or add if it is new sport
        if (col < 0)
        {
            col = usedCols;
            colNames[usedCols] = results[c+1];
            usedCols++;
        }
        // Just count sport event in the current country
        map[row][col]++;
    }

    // print results from map
    // the outer loop for countries (loop for rows in map)
    for (c = 0; c < usedRows; c++) {
        printf("%s\n", rowNames[c]);
        // the inner loop for sport
        for (i = 0; i < usedCols; i++)
            if (map[c][i])
                printf("   %s %d\n", colNames[i], map[c][i]);
        printf("\n");
    }

    return 0;
}

So when map, as well as rowNames (with countries) and colNames (with sports) are filled we can output data in any way.

Comments

1

There are a number of ways to approach this task, as you can see from the number of answers. One element you will need, for either the countries, or the events (but not both), is a simple lookup table containing either country entries or event entries to allow you to distinguish whether the values in results are country names or event names. A simple country lookup (made global here, but could be function scope as well) such as the following works:

char *countries[] = { "Canada", "India", "New Mexico" }; /* countries lookup */

Another shortcut you can take is recognizing that the pointers within results have function scope as defined, so there is no need to copy or allocate memory to hold them -- they already exist in readonly memory.

One other struct element that helps is to keep a count of the events associated with the country, say eventcnt. That can be incremented each time an event is added under the country. You can use a country/events struct similar to:

typedef struct {
    char *country;
    char *event[MAXE];
    int eventcnt;
} host;

(the MAXE is a simple constant for the maximum events involved to allow you to use automatic storage with your array of structs. (it can easily be changed to allocate/reallocate storage on an as needed basis)

You then need to simply loop through the results array, once, understanding that events always follow the country before them. Using several nested loops keeps the number of times you traverse results to one-time. Essentially, you loop over each pointer in results, determine if it points to a country name, if it is a country name, then either add it if it doesn't exists as one of your host.country values, or skip it if it does (no need to updated pointers to point to the last occurrence of the country name)

Since nested loops are involved, a simple goto provides all the control you need to determine when you are dealing with country names of when you are dealing with event names and allows you to take the action needed in each case.

Then it is just a matter of printing/using the results you wanted which are now contained in a array of struct with hidx (host index) containing the total number of unique hosts involved.

Putting the pieces together, you could do something similar to the following:

#include <stdio.h>
#include <string.h>

/* constants max(countries, events) */
enum { MAXC = 8, MAXE = 16 };

char *countries[] = { "Canada", "India", "New Mexico" }; /* countries lookup */

typedef struct {
    char *country;
    char *event[MAXE];
    int eventcnt;
} host;

int main (void) {

    char *results[] = { "Canada", "Cycling", "Canada", "Swimming", 
                        "India", "Swimming", "New Mexico", "Cycling", 
                        "New Mexico", "Cycling", "New Mexico", "Swimming"};
    host hosts[MAXC] = {{ .country = NULL }};
    int hidx = 0, i, j, country_count, current = 0, nelements;

    country_count = sizeof countries/sizeof *countries;
    nelements = sizeof results / sizeof *results;

    for (i = 0 ; i < nelements; i++) {          /* for each element */
        for (j = 0; j < country_count; j++) {   /* check if country */
            if (strcmp (results[i], countries[j]) == 0) { /* if so */
                int k;
                for (k = 0; k < hidx &&  /* check if already assigned */
                    strcmp (hosts[k].country, countries[j]); k++) {}
                if (!hosts[k].country) { /* if not, assign ptr, increment */
                    hosts[hidx++].country = results[i];
                    current = hidx - 1;;
                }
                goto nextc; /* skip event adding */
            }
        } /* results[i] is not a country, check if event exists for host */
        if (hosts[current].eventcnt < MAXE) {   /* if it doesn't, add it */
            int k;
            for (k = 0; k < hosts[current].eventcnt; k++)
                if (strcmp (results[i], hosts[current].event[k]) == 0)
                    goto nextc;  /* already exists for host, skip add */
            hosts[current].event[hosts[current].eventcnt++] = results[i];
        }
        nextc:;
    }

    for (i = 0; i < hidx; i++) {    /* output countries & events for each */
        printf (" %s\n", hosts[i].country);
        for (j = 0; j < hosts[i].eventcnt; j++)
            printf ("     %s\n", hosts[i].event[j]);
    }

    return 0;
}

Example Use/Output

$ ./bin/events
 Canada
     Cycling
     Swimming
 India
     Swimming
 New Mexico
     Cycling
     Swimming

Look over all the answers. There are many good points contained. Let me know if you have any questions.

Comments

1

I would enumerate both the sports and the locations, adding NUM_x as the last element, so the enumerations can be easily appended in the future...

typedef enum _sport_t
{
  CYCLING,
  SWIMMING,
  NUM_SPORTS
} sport_t;

typedef enum _location_t
{
  CANADA,
  INDIA,
  NEW_MEXICO,
  NUM_LOCATIONS
} location_t;

Now, you can define string arrays to use when you want to print out the names...

char* sports_name[NUM_SPORTS] = {"Cycling", "Swimming"};
char* location_name[NUM_LOCATIONS] = {"Canada", "India", "New Mexico"};

This approach will reduce your storage a bit and increase efficiency since you will be comparing enumerations (integers) instead of strings when you categorize the list.

You might also want to consider using a two dimensional boolean array of all locations and all sports, indicating whether said location has said sport.

typedef enum _bool_t
{
  FALSE,
  TRUE
} bool_t;

bool_t sports_array[NUM_LOCATIONS][NUM_SPORTS] =
{ 
  {TRUE,TRUE},  // Canada
  {TRUE,FALSE}, // India
  {TRUE,TRUE},  // New Mexico
};

So, your loops would look like this...

location_t l;
sport_t s;

for (l = (location_t)0; l < NUM_LOCATIONS; l++)
{
  printf( " %s\n", location_name[l] );
  for (s = (sport_t)0; s < NUM_SPORTS; s++)
  {
    if (sports_array[l,s])
    {
      printf( "     %s\n", sport_name[s] );
    }
  }
}

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.