I am developing a program that plays video in the console using ascii characters. This is an example of such a program text To calculate the correct characters, I use opencl for parallelization. Here is the code:
MediaType::Video(video) => {
let fps = video.get(opencv::videoio::CAP_PROP_FPS).unwrap_or(30.0);
let ms_per_frame = (1000.0f64 / fps).floor() as u64;
let mut frame_index = 0;
let chars = crate::ascii::CHARS1;
let mut size = 0;
let mut chars_buffer = unsafe { Buffer::<cl_uchar>::create(&self.context, CL_MEM_READ_ONLY, chars.len(), ptr::null_mut()).unwrap() };
let mut output_buffer = unsafe { Buffer::<cl_uchar>::create(&self.context, CL_MEM_WRITE_ONLY, 1, ptr::null_mut()).unwrap() };
let mut frame_buffer = unsafe { Buffer::<cl_uchar>::create(&self.context, CL_MEM_READ_ONLY, 1, ptr::null_mut()).unwrap() };
let chars_buffer_write_event = unsafe { self.queue.enqueue_write_buffer(&mut chars_buffer, CL_NON_BLOCKING, 0, chars.as_bytes(), &[]) };
let char_len: cl_uint = 1;
let grayscale: cl_uint = 1;
let step: cl_uint = (255.0 / (chars.chars().count() as f32)).ceil() as u32;
let now = SystemTime::now();
loop {
let mut frame = Mat::default();
let result = video.read(&mut frame);
if result.is_err() || !result.unwrap() || frame.empty() {
break;
}
let terminal_size = termion::terminal_size().unwrap();
let new_size = Size::new(terminal_size.0 as i32, terminal_size.1 as i32);
let mut resized_frame = Mat::default();
let result = imgproc::resize(&frame, &mut resized_frame, new_size, 0.0, 0.0, imgproc::INTER_LINEAR);
if result.is_err() || resized_frame.empty() {
break;
}
if grayscale == 1 {
let mut gray_frame = Mat::default();
let result = imgproc::cvt_color(&resized_frame, &mut gray_frame, imgproc::COLOR_BGR2GRAY, 0);
if result.is_err() || gray_frame.empty() {
break;
}
resized_frame = gray_frame;
}
let frame_bytes = resized_frame.data_bytes().unwrap();
if size != frame_bytes.len() {
size = frame_bytes.len();
frame_buffer = unsafe { Buffer::<cl_uchar>::create(&self.context, CL_MEM_READ_ONLY, size, ptr::null_mut()).unwrap() };
output_buffer = unsafe { Buffer::<cl_uchar>::create(&self.context, CL_MEM_WRITE_ONLY, size, ptr::null_mut()).unwrap() };
}
let _ = unsafe { self.queue.enqueue_write_buffer(&mut frame_buffer, CL_BLOCKING, 0, resized_frame.data_bytes().unwrap(), &[]) };
let execute = unsafe {
ExecuteKernel::new(&self.kernel)
.set_arg(&frame_buffer)
.set_arg(&chars_buffer)
.set_arg(&char_len)
.set_arg(&grayscale)
.set_arg(&step)
.set_arg(&output_buffer)
.set_event_wait_list(&[chars_buffer_write_event.as_ref().unwrap().get()])
.set_global_work_size(size)
.enqueue_nd_range(&self.queue).unwrap()
};
let mut string: Vec<cl_uchar> = vec![0; size];
let _ = unsafe { self.queue.enqueue_read_buffer(&output_buffer, CL_BLOCKING, 0, &mut string, &[execute.get()]).unwrap() };
let mut rgb = Vec::new();
if grayscale == 0 {
rgb = frame_bytes.to_vec();
}
self.media_sender.send(StringInfo {string, rgb}).unwrap();
let time = now.elapsed();
if time.is_err() {
break;
}
let time = time.unwrap();
frame_index += 1;
let deadtime_to_frame_preparing = Duration::from_millis(ms_per_frame * frame_index);
if time < deadtime_to_frame_preparing {
sleep(deadtime_to_frame_preparing - time);
continue;
}
let frames_to_skip = (time - deadtime_to_frame_preparing).div_duration_f64(Duration::from_millis(ms_per_frame)).ceil() as u64;
frame_index += frames_to_skip;
{
let mut skipped = Mat::default();
for _ in 0..frames_to_skip {
if !video.read(&mut skipped).unwrap_or(false) || skipped.empty() {
break;
}
}
}
}
__inline void write(__global uchar* output, __global uchar* input, uint output_start_index, uint input_start_index, uint len) {
for (uint i = 0; i < len; i++) {
output[output_start_index + i] = input[input_start_index + i];
}
}
__kernel void calculate(__global uchar* frame, __global uchar* chars, uint char_len, uint grayscale, uint step, __global uchar* out) {
int index = get_global_id(0);
int brightness = frame[index];
if (!grayscale) {
brightness = (frame[index * 3] + frame[index * 3 + 1] + frame[index * 3 + 2]) / 3;
}
int char_index = brightness / step - 1;
write(out, chars, index + char_len, char_index + char_len, char_len);
The code is a bit incomplete, but it works. And everything works fine, but after 10-15 seconds of playback, the program freezes and the CPU usage increases to 100%. It can end up with the whole system freezing. I don't understand what this can be due to, because when I used the CPU for calculations, there were no such problems. Perhaps I am using opencl incorrectly.
How can I fix the freezing problem?
And one more question, my program uses relatively few resources during operation - 15-20% of my processor, but the console (I have kitty) uses 60-70%, can I optimize it? Maybe reduce the number of frames per second or something else
My processor is Ryzen 5 5600 And GPU is GeForce 1060