#include "utf8.h"

Include dependency graph for utf8.c:

Macros
#define	SINGLE_BYTE_MASK 0x80

#define	SINGLE_BYTE_VALUE 0

#define	DOUBLE_BYTE_MASK 0xE0

#define	DOUBLE_BYTE_VALUE 0xC0

#define	TRIPLE_BYTE_MASK 0xF0

#define	TRIPLE_BYTE_VALUE 0xE0

#define	FOLLOW_BYTE_MASK 0xC0

#define	FOLLOW_BYTE_VALUE 0x80

Functions
uint8_t	nextUTF8Character (const uint8_t utf8_bytes, int32_t utf8_len, uint32_t outCharacter)
	Function to iterate over the bytes of a UTF-8. More...

char	cmp_UTF8_Ascii (const uint8_t utf8_bytes, int32_t utf8_len, const uint8_t ascii_bytes, int32_t ascii_len)
	Function to compare two strings, one in UTF-8 and other in ASCII. More...

char	cmp_UTF8 (const uint8_t utf8A_bytes, int32_t utf8A_len, const uint8_t utf8B_bytes, int32_t utf8B_len)
	Function to compare two strings, both in UTF-8. More...

char	cmp_UTF8_FilePath (const uint8_t utf8A_bytes, int32_t utf8A_len, const uint8_t utf8B_bytes, int32_t utf8B_len)
	Function to compare two strings that contains file paths, both in UTF-8. More...

uint32_t	UTF8_to_Ascii (uint8_t out_buffer, int32_t buffer_len, const uint8_t utf8_bytes, int32_t utf8_len)
	Function that translates a UTF-8 stream to ASCII. More...

uint32_t	UTF8StringLength (const uint8_t *utf8_bytes, int32_t utf8_len)
	Returns the number of characters a UTF-8 string has. More...

Macro Definition Documentation

§ DOUBLE_BYTE_MASK

#define DOUBLE_BYTE_MASK 0xE0

§ DOUBLE_BYTE_VALUE

#define DOUBLE_BYTE_VALUE 0xC0

§ FOLLOW_BYTE_MASK

#define FOLLOW_BYTE_MASK 0xC0

§ FOLLOW_BYTE_VALUE

#define FOLLOW_BYTE_VALUE 0x80

§ SINGLE_BYTE_MASK

#define SINGLE_BYTE_MASK 0x80

§ SINGLE_BYTE_VALUE

#define SINGLE_BYTE_VALUE 0

§ TRIPLE_BYTE_MASK

#define TRIPLE_BYTE_MASK 0xF0

§ TRIPLE_BYTE_VALUE

#define TRIPLE_BYTE_VALUE 0xE0

Function Documentation

§ cmp_UTF8()

char cmp_UTF8	(	const uint8_t *	utf8A_bytes,
		int32_t	utf8A_len,
		const uint8_t *	utf8B_bytes,
		int32_t	utf8B_len
	)

Function to compare two strings, both in UTF-8.

Parameters

const	uint8_t* utf8A_bytes - pointer to the bytes that make the UTF-8 A string to be compared
const	uint8_t* utf8B_bytes - pointer to the bytes that make the UTF-8 B string to be compared
int32_t	utf8A_len - length of the bytes that make the A string
int32_t	utf8B_len - length of the bytes that make the B string

Returns: return value is 1 in case the strings are equal (case sensitive), 0 otherwise.

Here is the caller graph for this function:

§ cmp_UTF8_Ascii()

char cmp_UTF8_Ascii	(	const uint8_t *	utf8_bytes,
		int32_t	utf8_len,
		const uint8_t *	ascii_bytes,
		int32_t	ascii_len
	)

Function to compare two strings, one in UTF-8 and other in ASCII.

Parameters

const	uint8_t* utf8_bytes - bytes of the UTF-8 string
int32_t	utf8_len - length of the UTF-8 string
const	uint8_t* ascii_bytes - ASCII string to compare to
int32_t	ascii_len - length of ASCII string

Returns: Will return 1 if the strings are equal (case sensitive), 0 otherwise.

Note: Doesn't matter if the string is null terminated or not, as long as the length is correct.

Here is the call graph for this function:

Here is the caller graph for this function:

§ cmp_UTF8_FilePath()

char cmp_UTF8_FilePath	(	const uint8_t *	utf8A_bytes,
		int32_t	utf8A_len,
		const uint8_t *	utf8B_bytes,
		int32_t	utf8B_len
	)

Function to compare two strings that contains file paths, both in UTF-8.

The difference in this function is that it considers slashes (/) and backslashes () the same characters, and consecutive slashes or backslashes are treated like one character, for sake of checking if two strings are actually the path to the same file/directory. utf8A_bytes and utf8B_bytes are the pointers to the bytes that make the UTF-8 strings A and B that will be compared. utf8A_len and utf8B_len are the length of the bytes that make those strings, respectively.

Parameters

const	uint8_t* utf8A_bytes - pointer to the bytes that make the UTF-8 A string to be compared
const	uint8_t* utf8B_bytes pointer to the bytes that make the UTF-8 B string to be compared
int32_t	utf8A_len - length of the bytes that make the A string
int32_t	utf8B_len - length of the bytes that make the B string

Returns: return value is 1 in case the strings are equal (case sensitive), 0 otherwise.

Here is the call graph for this function:

Here is the caller graph for this function:

§ nextUTF8Character()

uint8_t nextUTF8Character	(	const uint8_t *	utf8_bytes,
		int32_t	utf8_len,
		uint32_t *	outCharacter
	)

Function to iterate over the bytes of a UTF-8.

Parameters

const	uint8_t* utf8_bytes - represents the character being read
int32_t	utf8_len - is the number of characteres
uint32_t*	outCharacter - pointer where the character being read is written, if it isn't NULL

Returns: uint8_t - the number of bytes read from the UTF-8 stream to represent that single character. If the return value is 0, then nothing was read. It could mean that the length is not sufficient, that the UTF-8 encoding is wrong or the stream has a four-byte character, which isn't supported by this program.

Here is the caller graph for this function:

§ UTF8_to_Ascii()

uint32_t UTF8_to_Ascii	(	uint8_t *	out_buffer,
		int32_t	buffer_len,
		const uint8_t *	utf8_bytes,
		int32_t	utf8_len
	)

Function that translates a UTF-8 stream to ASCII.

Parameters

const	uint8_t* utf8_bytes - UTF-8 strem to be translate
int32_t	utf8_len - length of the bytes that make the UTF-8 strem
uint8_t*	out_buffer - pointer where the result will be stored
int32_t	buffer_len - length of the bytes that make out_buffer

Note: "buffer_len" characters will be written to the buffer, NULL character included.

Here is the call graph for this function:

Here is the caller graph for this function:

§ UTF8StringLength()

uint32_t UTF8StringLength	(	const uint8_t *	utf8_bytes,
		int32_t	utf8_len
	)

Returns the number of characters a UTF-8 string has.

Parameters

const	uint8_t* utf8_bytes - UTF-8 strem
length	of the bytes that make utf8_bytes

Returns: number of characters a UTF-8 string has

Here is the call graph for this function:

Macros

Functions

Macro Definition Documentation

§ DOUBLE_BYTE_MASK

§ DOUBLE_BYTE_VALUE

§ FOLLOW_BYTE_MASK

§ FOLLOW_BYTE_VALUE

§ SINGLE_BYTE_MASK

§ SINGLE_BYTE_VALUE

§ TRIPLE_BYTE_MASK

§ TRIPLE_BYTE_VALUE

Function Documentation

§ cmp_UTF8()

§ cmp_UTF8_Ascii()

§ cmp_UTF8_FilePath()

§ nextUTF8Character()

§ UTF8_to_Ascii()

§ UTF8StringLength()