/** * MIDI Olympiad in Informatics, 2007 * UTF-8 Reference solution * by Adomas Paltanavicius */ #include #include #define OK 1 #define FAIL 0 const char infile[] = "utf-8.in"; const char outfile[] = "utf-8.out"; /* Number of characters decoded so far. */ int characters; /* Which of the range cases are we working in? */ int range_case; /* How many additional (10xxxxxx) bytes remain? */ int additional_remain; /* Decoded value. */ int current_value; /* Output answer and terminate. */ void answer(int ok) { freopen(outfile, "w", stdout); printf("%s %d\n", ok ? "OK" : "FAIL", characters); exit(0); } /* Check if given character fits in current range case. Used to check against overlong forms. */ int check_range() { switch (range_case) { case 1: return (0x80 <= current_value) && (current_value <= 0x7FF); case 2: return (0x800 <= current_value) && (current_value <= 0xFFFF); case 3: return (0x10000 <= current_value) && (current_value <= 0x10FFFF); default: return 0; } } /* Process given byte in current state. */ inline void process(int c) { unsigned char uc = (unsigned char)c; if (additional_remain) { if (c != EOF) { if ((uc & 0xC0) == 0x80) { current_value = current_value << 6; current_value |= uc & 0x3F; if (!--additional_remain) { if (check_range()) { ++characters; } else { answer(FAIL); /* Overlong sequence. */ } } } else { answer(FAIL); /* Wrong start bits. */ } } else { answer(FAIL); /* EOF while waiting. */ } } else if (c != EOF) { if ((c & 0x80) == 0) { ++characters; } else if ((c & 0xE0) == 0xC0) { range_case = 1; additional_remain = 1; current_value = c & 0x1F; } else if ((c & 0xF0) == 0xE0) { range_case = 2; additional_remain = 2; current_value = c & 0x0F; } else if ((c & 0xF8) == 0xF0) { range_case = 3; additional_remain = 3; current_value = c & 0x07; } else { answer(FAIL); /* Wrong prefix. */ } } } int main() { FILE *in = fopen(infile, "rb"); int c; do { c = fgetc(in); process(c); } while (c != EOF); answer(OK); }