0

I want to create a three indicators for a variable. I have a dataset that looks like this:-

ID    Group  Color
1763    A     Red
1763    A     Yellow
6372    B     Red
0498    A     Red

I want to account for when I have two rows with the same ID with different values in the column, Color (either Red or Yellow) and label it with an additional indicator. Then have distinct IDs in my output dataset.

proc sql;
create table want as 
  select a.ID
  a.Qty
  (case when b.Group = 'A' then 'R'
        when b.Group = 'B' then 'L' 
        when b.Color = 'Red' AND b.Group ='A' then 'R/L'
        when b.Color = 'Yellow' AND b.Group = 'B' then 'R/L'
        else 'X' end) as Category
 from work.test a
   left join (select distinct ID, Group, Color from work.have) b
        on a.ID=b.ID
;
quit;

I would like the dataset to look like this:-

ID      Qty   Category
1763     28     R/L
6372     30     L
3908     41     X
0498     32     R

1 Answer 1

0

From your question, it would be useful to know the contents of either test or have, i.e. both of the two source datasets provided in your code example. However, I have attempted to show how this might be achieved by having a guess at what the test table might contain:

data have;
input ID : $4. Group : $1. Color :$8.;
datalines;
1763    A     Red
1763    A     Yellow
6372    B     Red
0498    A     Red
;
run;

data test;
input ID : $4.;
if id='1763' then do i=1 to 28; output; end;
if id='6372' then do i=1 to 30; output; end;
if id='0498' then do i=1 to 32; output; end;
if id='3908' then do i=1 to 41; output; end;
drop i;
datalines;
1763
6372
0498
3908
;
run;

proc sql;
    /* Count the distinct colours in the have dataset by id, group */
    create table id_color_count as
    select ID, group, count(distinct color) as diff_color
    from work.have
    group by id, group;
    
    /* Conditionally set category names based on count of colours first, then groups */
    /* if the id is not in the have dataset set the category to X */
    create table aggregate as
    select
    test.*, case when diff_color > 1 then 'R/L'
    when group = 'A' then 'R'
    when group = 'B' then 'L'
    else 'X'
    end as category
    from test
    left join
    id_color_count
    on test.id = id_color_count.id;
    
    /* Group the data and use count to output the Qty */
    create table want as
    select id,
    count(*) as qty
    category,
    from
    aggregate
    group by id, category;
quit;
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.