syntax.h (19742B)
1 #ifndef SYNTAX_H_ 2 #define SYNTAX_H_ 3 4 #include "../../se.h" 5 #include "../../config.h" 6 #include "../../extension.h" 7 #include <ctype.h> 8 9 10 /////////////////////////////////// 11 // the user must fill this inn 12 // null terminated 13 extern const struct syntax_scheme* syntax_schemes; 14 15 static int fb_set_syntax_scheme(struct file_buffer* fb); 16 static int apply_syntax(struct window_split_node* wn, const int offset_start, const int offset_end, uint8_t* move_buffer, const int move_buffer_len); 17 18 static const struct extension syntax_e = { 19 .fb_new_file_opened = fb_set_syntax_scheme, 20 .window_written_to_screen = apply_syntax 21 }; 22 23 #define UPPER_CASE_WORD_MIN_LEN 3 24 25 enum syntax_scheme_mode { 26 // needs two strings 27 COLOR_AROUND, 28 // needs two strings 29 COLOR_AROUND_TO_LINE, 30 // needs two strings 31 COLOR_INSIDE, 32 // needs two strings 33 COLOR_INSIDE_TO_LINE, 34 // needs two strings 35 COLOR_WORD_INSIDE, 36 // needs one string 37 COLOR_WORD, 38 // needs one string 39 COLOR_WORD_ENDING_WITH_STR, 40 // needs one string 41 COLOR_WORD_STARTING_WITH_STR, 42 // needs one string 43 COLOR_STR, 44 // needs two strings 45 // colors word if string is found after it 46 COLOR_WORD_STR, 47 // needs one string 48 // can be combined with others if this is first 49 COLOR_STR_AFTER_WORD, 50 // needs one string 51 // "(" would color like this "not_colored colored(" 52 // "[" would color like this "not_colored colored [" 53 COLOR_WORD_BEFORE_STR, 54 // needs one string 55 // "(" would color like this "colored not_colored(" 56 // "=" would color like this "colored not_colored =" 57 COLOR_WORD_BEFORE_STR_STR, 58 // no arguments needed 59 COLOR_UPPER_CASE_WORD, 60 }; 61 62 struct syntax_scheme_entry { 63 const enum syntax_scheme_mode mode; 64 const struct delimiter arg; 65 const struct glyph attr; 66 }; 67 68 // TODO: INDENT_LINE_CONTAINS_STR_AND_STR 69 enum indent_scheme_mode { 70 INDENT_LINE_ENDS_WITH_STR, 71 INDENT_LINE_DOES_NOT_END_WITH_STR, 72 73 INDENT_LINE_CONTAINS_WORD, 74 INDENT_LINE_ONLY_CONTAINS_STR, 75 // neds two strings 76 INDENT_LINE_CONTAINS_STR_MORE_THAN_STR, 77 }; 78 79 enum indent_scheme_type { 80 INDENT_REMOVE = -1, 81 INDENT_KEEP_OPENER = 0, 82 INDENT_NEW = 1, 83 84 INDENT_KEEP, 85 // needs two strings, requires closer to be string 1 and opener to be string two 86 INDENT_RETURN_TO_OPENER_BASE_INDENT, 87 }; 88 89 struct indent_scheme_entry { 90 const enum indent_scheme_type type; 91 const enum indent_scheme_mode mode; 92 const unsigned int line_offset; 93 const struct delimiter arg; 94 }; 95 96 struct syntax_scheme { 97 const char* file_ending; 98 const char* word_seperators; 99 100 const struct syntax_scheme_entry* entries; 101 const int entry_count; 102 103 const struct indent_scheme_entry* indents; 104 const int indent_count; 105 }; 106 107 108 static int fb_auto_indent(struct file_buffer* fb, int offset); 109 110 static void do_syntax_scheme(struct file_buffer* fb, const struct syntax_scheme* cs, int offset); 111 112 static void whitespace_count_to_indent_amount(int indent_len, int count, int* indent_count, int* extra_spaces); 113 static int get_line_leading_whitespace_count(const char* line); 114 static int get_line_relative_offset(struct file_buffer* fb, int offset, int count); 115 116 static const struct syntax_scheme* 117 fb_get_syntax_scheme(struct file_buffer* fb) 118 { 119 return fb->syntax_index < 0 ? NULL : &syntax_schemes[fb->syntax_index]; 120 } 121 122 static int 123 fb_set_syntax_scheme(struct file_buffer* fb) 124 { 125 for (int i = 0; syntax_schemes[i].file_ending; i++) 126 if (is_file_type(fb->file_path, syntax_schemes[i].file_ending)) 127 fb->syntax_index = i; 128 return 0; 129 } 130 131 static int 132 apply_syntax(struct window_split_node* wn, const int offset_start, const int offset_end, uint8_t* move_buffer, const int move_buffer_len) 133 { 134 global_attr = default_attributes; 135 struct window_buffer* wb = &wn->wb; 136 struct file_buffer* fb = get_fb(wb); 137 const struct syntax_scheme* cs = fb_get_syntax_scheme(fb); 138 if (!cs) 139 return 0; 140 141 // clear state 142 do_syntax_scheme(NULL, &(struct syntax_scheme){0}, 0); 143 144 // search backwards to find multi-line syntax highlighting 145 for (int i = 0; i < cs->entry_count; i++) { 146 const struct syntax_scheme_entry cse = cs->entries[i]; 147 if (cse.mode == COLOR_AROUND || cse.mode == COLOR_INSIDE) { 148 int offset = 0; 149 int count = 0; 150 int start_len = strlen(cse.arg.start); 151 while((offset = fb_seek_string(fb, offset, cse.arg.start)) >= 0) { 152 offset += start_len; 153 if (offset >= offset_start) 154 break; 155 count++; 156 } 157 158 if (strcmp(cse.arg.start, cse.arg.end) != 0) { 159 int end_len = strlen(cse.arg.end); 160 offset = 0; 161 while((offset = fb_seek_string(fb, offset, cse.arg.end)) >= 0) { 162 offset += end_len; 163 if (offset >= offset_start) 164 break; 165 count--; 166 } 167 } 168 if (count > 0) { 169 offset = fb_seek_string_backwards(fb, offset_start, cse.arg.start); 170 do_syntax_scheme(fb, cs, offset); 171 break; 172 } 173 } 174 } 175 176 int x = wn->minx + move_buffer[0], y = wn->miny; 177 int move_buffer_index = 0; 178 int charsize = 1; 179 for(int i = offset_start; i < offset_end && y < wn->maxy 180 && move_buffer_index < move_buffer_len; i += charsize) { 181 do_syntax_scheme(fb, cs, i); 182 screen_set_attr(x, y)->fg = global_attr.fg; 183 screen_set_attr(x, y)->bg = global_attr.bg; 184 185 uint8_t amount = move_buffer[move_buffer_index]; 186 if (amount & (1<<7)) { 187 x = wn->minx; 188 y++; 189 amount &= ~(1<<7); 190 } 191 x += amount; 192 193 rune_t u; 194 charsize = utf8_decode_buffer(fb->contents + i, i - offset_start, &u); 195 if (charsize == 0) 196 charsize = 1; 197 move_buffer_index++; 198 } 199 200 do_syntax_scheme(NULL, &(struct syntax_scheme){0}, 0); 201 global_attr = default_attributes; 202 return 0; 203 } 204 205 void 206 do_syntax_scheme(struct file_buffer* fb, const struct syntax_scheme* cs, int offset) 207 { 208 static int end_at_whitespace = 0; 209 static const char* end_condition; 210 static int end_condition_len; 211 static struct glyph next_word_attr; 212 static int color_next_word = 0; 213 static int around = 0; 214 215 if (!fb || !cs) { 216 // reset 217 end_at_whitespace = 0; 218 end_condition_len = 0; 219 around = 0; 220 color_next_word = 0; 221 end_condition = NULL; 222 global_attr = default_attributes; 223 return; 224 } 225 226 char* buf = fb->contents; 227 int buflen = fb->len; 228 229 if (end_condition && !color_next_word) { 230 if (buflen - offset <= end_condition_len) 231 return; 232 if (end_at_whitespace && buf[offset] == '\n') { 233 // *_TO_LINE reached end of line 234 end_condition_len = 0; 235 end_condition = NULL; 236 end_at_whitespace = 0; 237 global_attr = default_attributes; 238 } else if (fb_offset_starts_with(fb, offset, end_condition)) { 239 if (isspace(end_condition[end_condition_len-1])) { 240 end_condition_len--; 241 if (end_condition_len <= 0) 242 global_attr = default_attributes; 243 } 244 // if it's around not inside, don't reset color until later 245 if (around) 246 around = 0; 247 else 248 global_attr = default_attributes; 249 250 end_condition = NULL; 251 end_at_whitespace = 0; 252 } 253 return; 254 } else if (end_at_whitespace) { 255 if (!fb_is_on_a_word(fb, offset, cs->word_seperators)) { 256 end_at_whitespace = 0; 257 global_attr = default_attributes; 258 } else { 259 return; 260 } 261 } else if (color_next_word) { 262 // check if new word encountered 263 if (!fb_is_on_a_word(fb, offset, cs->word_seperators)) 264 return; 265 global_attr = next_word_attr; 266 color_next_word = 0; 267 end_at_whitespace = 1; 268 return; 269 } else if (end_condition_len > 0) { 270 // wait for the word/sequence to finish 271 // NOTE: does not work with utf8 chars 272 // TODO: ??? 273 if (--end_condition_len <= 0) 274 global_attr = default_attributes; 275 else 276 return; 277 } 278 279 for (int i = 0; i < cs->entry_count; i++) { 280 struct syntax_scheme_entry entry = cs->entries[i]; 281 enum syntax_scheme_mode mode = entry.mode; 282 283 if (mode == COLOR_UPPER_CASE_WORD) { 284 if (!fb_is_start_of_a_word(fb, offset, cs->word_seperators)) 285 continue; 286 287 int end_len = 0; 288 while (offset + end_len < fb->len && !str_contains_char(cs->word_seperators, buf[offset + end_len])) { 289 if (!isupper(buf[offset + end_len]) && buf[offset + end_len] != '_' 290 && (!end_len || (buf[offset + end_len] < '0' || buf[offset + end_len] > '9'))) 291 goto not_upper_case; 292 end_len++; 293 } 294 // upper case words must be longer than UPPER_CASE_WORD_MIN_LEN chars 295 if (end_len < UPPER_CASE_WORD_MIN_LEN) 296 continue; 297 298 global_attr = entry.attr; 299 end_condition_len = end_len; 300 return; 301 302 not_upper_case: 303 continue; 304 } 305 306 int len = strlen(entry.arg.start); 307 308 if (mode == COLOR_WORD_BEFORE_STR || mode == COLOR_WORD_BEFORE_STR_STR || mode == COLOR_WORD_ENDING_WITH_STR) { 309 // check if this is a new word 310 if (str_contains_char(cs->word_seperators, buf[offset])) continue; 311 312 int offset_tmp = offset; 313 // find new word twice if it's BEFORE_STR_STR 314 int times = mode == COLOR_WORD_BEFORE_STR_STR ? 2 : 1; 315 int first_word_len = 0; 316 int first_time = 1; 317 while (times--) { 318 // seek end of word 319 offset_tmp = fb_seek_word_end(fb, offset_tmp, cs->word_seperators); 320 if (offset_tmp == offset && mode == COLOR_WORD_BEFORE_STR_STR) 321 goto exit_word_before_str_str; 322 if (first_time) 323 first_word_len = offset_tmp - offset; 324 325 if (mode != COLOR_WORD_ENDING_WITH_STR) 326 offset_tmp = fb_seek_not_whitespace(fb, offset_tmp); 327 328 first_time = 0; 329 } 330 331 if (mode == COLOR_WORD_ENDING_WITH_STR) { 332 offset_tmp -= len; 333 if (offset_tmp < 0) 334 continue; 335 } 336 if (fb_offset_starts_with(fb, offset_tmp, entry.arg.start)) { 337 global_attr = entry.attr; 338 end_condition_len = first_word_len; 339 return; 340 } 341 exit_word_before_str_str: 342 continue; 343 } 344 345 if (mode == COLOR_INSIDE || mode == COLOR_INSIDE_TO_LINE || mode == COLOR_WORD_INSIDE) { 346 if (offset - len < 0) 347 continue; 348 // check the if what's behind the cursor is the first string 349 if (fb_offset_starts_with(fb, offset - len, entry.arg.start)) { 350 if (offset < fb->len && fb_offset_starts_with(fb, offset, entry.arg.end)) 351 continue; 352 353 if (mode == COLOR_WORD_INSIDE) { 354 // verify that only one word exists inside 355 int offset_tmp = offset; 356 offset_tmp = fb_seek_not_whitespace(fb, offset_tmp); 357 offset_tmp = fb_seek_whitespace(fb, offset_tmp); 358 int offset_tmp1 = offset_tmp - strlen(entry.arg.end); 359 offset_tmp = fb_seek_not_whitespace(fb, offset_tmp); 360 361 if ((!fb_offset_starts_with(fb, offset_tmp, entry.arg.end) 362 && !fb_offset_starts_with(fb, offset_tmp1, entry.arg.end)) 363 || offset_tmp1 - offset <= 1 || offset_tmp - offset <= 1) 364 continue; 365 } else if (mode == COLOR_INSIDE_TO_LINE) { 366 if (fb_seek_char(fb, offset, '\n') < fb_seek_string(fb, offset, entry.arg.end)) 367 continue; 368 } 369 370 371 end_condition = entry.arg.end; 372 end_condition_len = strlen(entry.arg.end); 373 global_attr = entry.attr; 374 around = 0; 375 return; 376 } 377 continue; 378 } 379 380 if ((mode == COLOR_AROUND || mode == COLOR_AROUND_TO_LINE) && 381 fb_offset_starts_with(fb, offset, entry.arg.start)) { 382 end_condition = entry.arg.end; 383 end_condition_len = strlen(entry.arg.end); 384 around = 1; 385 if (entry.mode == COLOR_AROUND_TO_LINE) 386 end_at_whitespace = 1; 387 global_attr = entry.attr; 388 return; 389 } 390 if (mode == COLOR_WORD || mode == COLOR_STR_AFTER_WORD || 391 mode == COLOR_WORD_STR || mode == COLOR_WORD_STARTING_WITH_STR) { 392 393 // check if this is the start of a new word that matches word exactly(except for WORD_STARTING_WITH_STR) 394 if(!fb_offset_starts_with(fb, offset, entry.arg.start) || 395 !fb_is_start_of_a_word(fb, offset, cs->word_seperators) || 396 (fb_is_on_a_word(fb, offset + len, cs->word_seperators) && mode != COLOR_WORD_STARTING_WITH_STR)) 397 continue; 398 399 if (mode == COLOR_WORD_STR) { 400 int offset_str = fb_seek_not_whitespace(fb, offset + len); 401 402 if (!fb_offset_starts_with(fb, offset_str, entry.arg.end)) 403 continue; 404 end_condition_len = strlen(entry.arg.start); 405 } else { 406 end_at_whitespace = 1; 407 } 408 if (mode == COLOR_STR_AFTER_WORD) { 409 next_word_attr = entry.attr; 410 color_next_word = 1; 411 continue; 412 } 413 global_attr = entry.attr; 414 return; 415 } 416 if (mode == COLOR_STR) { 417 if (!fb_offset_starts_with(fb, offset, entry.arg.start)) 418 continue; 419 end_condition_len = len; 420 global_attr = entry.attr; 421 return; 422 } 423 } 424 } 425 426 427 //////////////////////// 428 // Auto indent 429 // 430 // 431 432 int 433 fb_auto_indent(struct file_buffer* fb, int offset) 434 { 435 const struct syntax_scheme* cs = fb_get_syntax_scheme(fb); 436 LIMIT(offset, 0, fb->len-1); 437 438 int indent_diff = 0; 439 int indent_keep_x = -1; 440 int keep_pos = 0; 441 442 int get_line_offset; 443 char* get_line = NULL; 444 445 for (int i = 0; i < cs->indent_count; i++) { 446 const struct indent_scheme_entry indent = cs->indents[i]; 447 448 get_line_offset = get_line_relative_offset(fb, offset, indent.line_offset); 449 if (get_line) 450 free(get_line); 451 get_line = fb_get_line_at_offset(fb, get_line_offset); 452 453 switch(indent.mode) { 454 int temp_offset, len; 455 char* res; 456 case INDENT_LINE_CONTAINS_WORD: 457 case INDENT_LINE_ONLY_CONTAINS_STR: 458 case INDENT_LINE_CONTAINS_STR_MORE_THAN_STR: 459 res = strstr(get_line, indent.arg.start); 460 if (!res) 461 continue; 462 if (INDENT_LINE_CONTAINS_WORD) { 463 if (res > get_line && !str_contains_char(cs->word_seperators, *(res-1))) 464 continue; 465 res += strlen(indent.arg.start); 466 if (*res && !str_contains_char(cs->word_seperators, *res)) 467 continue; 468 } else if (INDENT_LINE_CONTAINS_STR_MORE_THAN_STR == indent.mode) { 469 char* start_last = get_line; 470 char* end_last = get_line; 471 for (int count = 0; count >= 0; ) { 472 if (start_last && (start_last = strstr(start_last, indent.arg.start))) { 473 start_last++; 474 count--; 475 } else { 476 goto indent_for_loop_continue; 477 } 478 if (end_last && (end_last = strstr(end_last, indent.arg.end))) { 479 end_last++; 480 count++; 481 } 482 } 483 } else if (indent.mode == INDENT_LINE_ONLY_CONTAINS_STR) { 484 int str_start = fb_seek_string_backwards(fb, get_line_offset, indent.arg.start); 485 int str_end = str_start + strlen(indent.arg.start); 486 int line_end = MIN(get_line_offset + 1, fb->len); 487 int line_start = MAX(fb_seek_char_backwards(fb, get_line_offset, '\n'), 0); 488 if (fb_seek_not_whitespace_backwards(fb, str_start-1) >= line_start || 489 fb_seek_not_whitespace(fb, str_end+1) <= line_end) 490 continue; 491 } 492 if (indent.type == INDENT_KEEP_OPENER || indent.type == INDENT_RETURN_TO_OPENER_BASE_INDENT) 493 keep_pos = fb_seek_string_backwards(fb, get_line_offset, indent.arg.start); 494 goto set_indent_type; 495 496 case INDENT_LINE_ENDS_WITH_STR: 497 case INDENT_LINE_DOES_NOT_END_WITH_STR: 498 len = strlen(indent.arg.start); 499 temp_offset = fb_seek_not_whitespace_backwards(fb, get_line_offset) - len; 500 if (temp_offset < 0 || 501 temp_offset < fb_seek_char_backwards(fb, get_line_offset, '\n')) 502 continue; 503 if (memcmp(fb->contents + get_line_offset, indent.arg.start, strlen(indent.arg.start)) == 0) { 504 if (indent.mode == INDENT_LINE_DOES_NOT_END_WITH_STR) 505 continue; 506 } else { 507 if (indent.mode == INDENT_LINE_ENDS_WITH_STR) 508 continue; 509 } 510 keep_pos = temp_offset; 511 goto set_indent_type; 512 513 set_indent_type: 514 if (indent.type == INDENT_KEEP_OPENER) { 515 if (indent_keep_x >= 0) 516 continue; 517 int tmp; 518 fb_offset_to_xy(fb, keep_pos, 0, 0, &indent_keep_x, &tmp, &tmp); 519 } else if (indent.type == INDENT_RETURN_TO_OPENER_BASE_INDENT) { 520 if (indent_keep_x >= 0) 521 continue; 522 int opener, closer; 523 if (!fb_get_delimiter(fb, keep_pos, indent.arg, NULL, &opener, &closer)) { 524 indent_keep_x = -1; 525 goto indent_for_loop_continue; 526 } 527 keep_pos = fb_seek_not_whitespace(fb, fb_seek_char_backwards(fb, opener, '\n')); 528 int tmp; 529 fb_offset_to_xy(fb, keep_pos, 0, 0, &indent_keep_x, &tmp, &tmp); 530 //TODO: why does this miss by one? 531 if (indent_keep_x > 0) 532 indent_keep_x--; 533 } else { 534 if (indent_diff) 535 continue; 536 indent_diff += indent.type; 537 } 538 } 539 indent_for_loop_continue: 540 continue; 541 } 542 543 if (get_line) 544 free(get_line); 545 546 int indents = 0, extra_spaces = 0; 547 548 int prev_line_offset = fb_seek_char_backwards(fb, offset, '\n') - 1; 549 if (prev_line_offset < 0) 550 return 0; 551 char* prev_line = fb_get_line_at_offset(fb, prev_line_offset); 552 553 if (indent_keep_x >= 0) { 554 whitespace_count_to_indent_amount(fb->indent_len, indent_keep_x, &indents, &extra_spaces); 555 } else { 556 whitespace_count_to_indent_amount(fb->indent_len, get_line_leading_whitespace_count(prev_line), &indents, &extra_spaces); 557 } 558 if (indent_diff != INDENT_KEEP) { 559 indents += indent_diff; 560 indents = MAX(indents, 0); 561 } 562 563 // remove the lines existing indent 564 int removed = 0; 565 int line_code_start = MIN(fb_seek_not_whitespace(fb, prev_line_offset + 1), fb_seek_char(fb, prev_line_offset + 1, '\n')); 566 if (line_code_start - (prev_line_offset) >= 1) { 567 removed = line_code_start - (prev_line_offset+1); 568 fb_remove(fb, prev_line_offset + 1, removed, 1, 1); 569 } 570 571 if (indents + extra_spaces <= 0) { 572 free(prev_line); 573 return -removed; 574 } 575 576 unsigned int indent_str_len = 0; 577 while(indents--) 578 indent_str_len += fb->indent_len ? fb->indent_len : 1; 579 char indent_str[indent_str_len + extra_spaces]; 580 581 char space_tab = fb->indent_len > 0 ? ' ' : '\t'; 582 int i = 0; 583 if (!fb->indent_len) 584 for (/* i = 0 */; i < indent_str_len; i++) 585 indent_str[i] = space_tab; 586 for (/* i = 0 or indent_str_len */ ; i < indent_str_len + extra_spaces; i++) 587 indent_str[i] = ' '; 588 589 fb_insert(fb, indent_str, indent_str_len + extra_spaces, prev_line_offset + 1, 0); 590 591 free(prev_line); 592 return indent_str_len + extra_spaces - removed; 593 } 594 595 int get_line_leading_whitespace_count(const char* line) 596 { 597 int count = 0; 598 while(*line) { 599 if (*line == ' ') 600 count++; 601 else if (*line == '\t') 602 count += tabspaces - (count % tabspaces); 603 else 604 break; 605 line++; 606 } 607 return count; 608 } 609 610 void 611 whitespace_count_to_indent_amount(int indent_len, int count, int* indent_count, int* extra_spaces) 612 { 613 *indent_count = 0, *extra_spaces = 0; 614 int space_count = indent_len > 0 ? indent_len : tabspaces; 615 while(count >= space_count) { 616 *indent_count += 1; 617 count -= space_count; 618 } 619 *extra_spaces = count; 620 } 621 622 int 623 get_line_relative_offset(struct file_buffer* fb, int offset, int count) 624 { 625 offset = fb_seek_char(fb, offset, '\n'); 626 if (offset < 0) 627 offset = fb->len; 628 if (count > 0) { 629 while(count-- && offset >= 0) 630 offset = fb_seek_char(fb, offset+1, '\n'); 631 if (offset < 0) 632 offset = fb->len; 633 } else if (count < 0) { 634 offset = fb_seek_char_backwards(fb, offset, '\n'); 635 while(count++ && offset >= 0) 636 offset = fb_seek_char_backwards(fb, offset-1, '\n'); 637 if (offset < 0) 638 offset = 0; 639 } 640 offset = fb_seek_char(fb, offset, '\n'); 641 if (offset > 0 && fb->contents[offset-1] != '\n') 642 offset--; 643 if (offset < 0) 644 offset = fb->len; 645 return offset; 646 } 647 648 649 #endif // SYNTAX_H_