libmobi
C library for handling MOBI format ebook documents
|
Functions to parse index records. More...
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include "index.h"
#include "util.h"
#include "memory.h"
#include "debug.h"
#include "buffer.h"
Macros | |
#define | _GNU_SOURCE 1 |
#define | __USE_BSD /* for strdup on linux/glibc */ |
Functions | |
size_t | mobi_indx_get_label (unsigned char *output, MOBIBuffer *buf, const size_t length, const size_t has_ligatures) |
Read index entry label from buffer pointing at index record data. More... | |
size_t | mobi_ordt_getbuffer (const MOBIOrdt *ordt, MOBIBuffer *buf, uint16_t *offset) |
Get encoded character from dictionary index The characters are offsets into ORDT table. More... | |
uint16_t | mobi_ordt_lookup (const MOBIOrdt *ordt, const uint16_t offset) |
Fetch UTF-16 value from ORDT2 table. More... | |
size_t | mobi_getstring_ordt (const MOBIOrdt *ordt, MOBIBuffer *buf, unsigned char *output, size_t length) |
Get UTF-8 string from buffer, decoded by lookups in ORDT2 table. More... | |
MOBI_RET | mobi_parse_indx (const MOBIPdbRecord *indx_record, MOBIIndx *indx, MOBITagx *tagx, MOBIOrdt *ordt) |
Parser of INDX record. More... | |
MOBI_RET | mobi_parse_index (const MOBIData *m, MOBIIndx *indx, const size_t indx_record_number) |
Parser of a set of index records. More... | |
MOBI_RET | mobi_get_indxentry_tagvalue (uint32_t *tagvalue, const MOBIIndexEntry *entry, const unsigned tag_arr[]) |
Get a value of tag[tagid][tagindex] for given index entry. More... | |
size_t | mobi_get_indxentry_tagarray (uint32_t **tagarr, const MOBIIndexEntry *entry, const size_t tagid) |
Get array of tagvalues of tag[tagid] for given index entry. More... | |
uint32_t | mobi_get_orth_entry_offset (const MOBIIndexEntry *entry) |
Get entry start offset for the orth entry. More... | |
uint32_t | mobi_get_orth_entry_length (const MOBIIndexEntry *entry) |
Get text length for the orth entry. More... | |
bool | mobi_indx_has_tag (const MOBIIndx *indx, const size_t tagid) |
Check if given tagid is present in the index. More... | |
char * | mobi_get_cncx_string (const MOBIPdbRecord *cncx_record, const uint32_t cncx_offset) |
Get compiled index entry string. More... | |
char * | mobi_get_cncx_string_utf8 (const MOBIPdbRecord *cncx_record, const uint32_t cncx_offset, MOBIEncoding cncx_encoding) |
Get compiled index entry string, converted to utf8 encoding. More... | |
char * | mobi_get_cncx_string_flat (const MOBIPdbRecord *cncx_record, const uint32_t cncx_offset, const size_t length) |
Get flat index entry string. More... | |
MOBI_RET | mobi_decode_infl (unsigned char *decoded, int *decoded_size, const unsigned char *rule) |
Decode compiled infl index entry. More... | |
size_t | mobi_trie_get_inflgroups (char **infl_strings, MOBITrie *const root, const char *string) |
Get all matches for given string from trie structure. More... | |
MOBI_RET | mobi_trie_insert_infl (MOBITrie **root, const MOBIIndx *indx, size_t i) |
Insert inversed inlection string for given entry into trie structure. More... | |
Functions to parse index records.
Copyright (c) 2020 Bartek Fabiszewski http://www.fabiszewski.net
This file is part of libmobi. Licensed under LGPL, either version 3, or any later. See http://www.gnu.org/licenses/
MOBI_RET mobi_decode_infl | ( | unsigned char * | decoded, |
int * | decoded_size, | ||
const unsigned char * | rule | ||
) |
Decode compiled infl index entry.
Buffer decoded must be initialized with basic index entry. Basic index entry will be transformed into inflected form, based on compiled rule. Min. size of input buffer (decoded) must be INDX_INFLBUF_SIZEMAX + 1
[in,out] | decoded | Decoded entry string |
[in,out] | decoded_size | Decoded entry size |
[in] | rule | Compiled rule |
char* mobi_get_cncx_string | ( | const MOBIPdbRecord * | cncx_record, |
const uint32_t | cncx_offset | ||
) |
Get compiled index entry string.
Allocates memory for the string. Must be freed by caller.
[in] | cncx_record | MOBIPdbRecord structure with cncx record |
[in] | cncx_offset | Offset of string entry from the beginning of the record |
char* mobi_get_cncx_string_flat | ( | const MOBIPdbRecord * | cncx_record, |
const uint32_t | cncx_offset, | ||
const size_t | length | ||
) |
Get flat index entry string.
Allocates memory for the string. Must be freed by caller.
[in] | cncx_record | MOBIPdbRecord structure with cncx record |
[in] | cncx_offset | Offset of string entry from the beginning of the record |
[in] | length | Length of the string to be extracted |
char* mobi_get_cncx_string_utf8 | ( | const MOBIPdbRecord * | cncx_record, |
const uint32_t | cncx_offset, | ||
MOBIEncoding | cncx_encoding | ||
) |
Get compiled index entry string, converted to utf8 encoding.
Allocates memory for the string. Must be freed by caller.
[in] | cncx_record | MOBIPdbRecord structure with cncx record |
[in] | cncx_offset | Offset of string entry from the beginning of the record |
[in] | cncx_encoding | Encoding |
size_t mobi_get_indxentry_tagarray | ( | uint32_t ** | tagarr, |
const MOBIIndexEntry * | entry, | ||
const size_t | tagid | ||
) |
Get array of tagvalues of tag[tagid] for given index entry.
[in,out] | tagarr | Pointer to tagvalues array |
[in] | entry | Index entry to be search for the value |
[in] | tagid | Id of the tag |
MOBI_RET mobi_get_indxentry_tagvalue | ( | uint32_t * | tagvalue, |
const MOBIIndexEntry * | entry, | ||
const unsigned | tag_arr[] | ||
) |
Get a value of tag[tagid][tagindex] for given index entry.
[in,out] | tagvalue | Will be set to a tag value |
[in] | entry | Index entry to be search for the value |
[in] | tag_arr | Array: tag_arr[0] = tagid, tag_arr[1] = tagindex |
size_t mobi_getstring_ordt | ( | const MOBIOrdt * | ordt, |
MOBIBuffer * | buf, | ||
unsigned char * | output, | ||
size_t | length | ||
) |
Get UTF-8 string from buffer, decoded by lookups in ORDT2 table.
[in] | ordt | MOBIOrdt structure (ORDT data and metadata) |
[in,out] | buf | MOBIBuffer structure with input string |
[in,out] | output | Output buffer (INDX_LABEL_SIZEMAX + 1 bytes) |
[in] | length | Length of input string contained in buf |
size_t mobi_indx_get_label | ( | unsigned char * | output, |
MOBIBuffer * | buf, | ||
const size_t | length, | ||
const size_t | has_ligatures | ||
) |
Read index entry label from buffer pointing at index record data.
[in,out] | output | Output buffer (INDX_LABEL_SIZEMAX + 1 bytes) |
[in,out] | buf | MOBIBuffer structure, offset pointing at index entry label |
[in] | length | Number of bytes to be read |
[in] | has_ligatures | Decode ligatures if true |
bool mobi_indx_has_tag | ( | const MOBIIndx * | indx, |
const size_t | tagid | ||
) |
Check if given tagid is present in the index.
[in] | indx | Index MOBIIndx structure |
[in] | tagid | Id of the tag |
size_t mobi_ordt_getbuffer | ( | const MOBIOrdt * | ordt, |
MOBIBuffer * | buf, | ||
uint16_t * | offset | ||
) |
Get encoded character from dictionary index The characters are offsets into ORDT table.
[in] | ordt | MOBIOrdt structure (ORDT data and metadata) |
[in,out] | buf | MOBIBuffer structure with index data |
[in,out] | offset | Value read from buffer |
uint16_t mobi_ordt_lookup | ( | const MOBIOrdt * | ordt, |
const uint16_t | offset | ||
) |
Fetch UTF-16 value from ORDT2 table.
[in] | ordt | MOBIOrdt structure (ORDT data and metadata) |
[in] | offset | Offset in ORDT2 table |
MOBI_RET mobi_parse_indx | ( | const MOBIPdbRecord * | indx_record, |
MOBIIndx * | indx, | ||
MOBITagx * | tagx, | ||
MOBIOrdt * | ordt | ||
) |
Parser of INDX record.
[in] | indx_record | MOBIPdbRecord structure with INDX record |
[in,out] | indx | MOBIIndx structure to be filled with parsed entries |
[in,out] | tagx | MOBITagx structure, will be filled with parsed TAGX section data if present in the INDX record, otherwise TAGX data will be used to parse the record |
[in,out] | ordt | MOBIOrdt structure, will be filled with parsed ORDT sections |
size_t mobi_trie_get_inflgroups | ( | char ** | infl_strings, |
MOBITrie *const | root, | ||
const char * | string | ||
) |
Get all matches for given string from trie structure.
Matches are made agains reversed string and all its substrings
[in,out] | infl_strings | Array of returned strings |
[in] | root | Root node of the tree |
[in] | string | Index entry number |
Insert inversed inlection string for given entry into trie structure.
[in,out] | root | Root node of the tree, created if NULL |
[in] | indx | MOBIIndx infl index records |
[in] | i | Index entry number |