![]() |
|
|||
File indexing completed on 2025-05-11 08:24:10
0001 /* 0002 * LZMA2 definitions 0003 * 0004 * Authors: Lasse Collin <lasse.collin@tukaani.org> 0005 * Igor Pavlov <http://7-zip.org/> 0006 * 0007 * This file has been put into the public domain. 0008 * You can do whatever you want with this file. 0009 */ 0010 0011 #ifndef XZ_LZMA2_H 0012 #define XZ_LZMA2_H 0013 0014 /* Range coder constants */ 0015 #define RC_SHIFT_BITS 8 0016 #define RC_TOP_BITS 24 0017 #define RC_TOP_VALUE (1 << RC_TOP_BITS) 0018 #define RC_BIT_MODEL_TOTAL_BITS 11 0019 #define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS) 0020 #define RC_MOVE_BITS 5 0021 0022 /* 0023 * Maximum number of position states. A position state is the lowest pb 0024 * number of bits of the current uncompressed offset. In some places there 0025 * are different sets of probabilities for different position states. 0026 */ 0027 #define POS_STATES_MAX (1 << 4) 0028 0029 /* 0030 * This enum is used to track which LZMA symbols have occurred most recently 0031 * and in which order. This information is used to predict the next symbol. 0032 * 0033 * Symbols: 0034 * - Literal: One 8-bit byte 0035 * - Match: Repeat a chunk of data at some distance 0036 * - Long repeat: Multi-byte match at a recently seen distance 0037 * - Short repeat: One-byte repeat at a recently seen distance 0038 * 0039 * The symbol names are in from STATE_oldest_older_previous. REP means 0040 * either short or long repeated match, and NONLIT means any non-literal. 0041 */ 0042 enum lzma_state { 0043 STATE_LIT_LIT, 0044 STATE_MATCH_LIT_LIT, 0045 STATE_REP_LIT_LIT, 0046 STATE_SHORTREP_LIT_LIT, 0047 STATE_MATCH_LIT, 0048 STATE_REP_LIT, 0049 STATE_SHORTREP_LIT, 0050 STATE_LIT_MATCH, 0051 STATE_LIT_LONGREP, 0052 STATE_LIT_SHORTREP, 0053 STATE_NONLIT_MATCH, 0054 STATE_NONLIT_REP 0055 }; 0056 0057 /* Total number of states */ 0058 #define STATES 12 0059 0060 /* The lowest 7 states indicate that the previous state was a literal. */ 0061 #define LIT_STATES 7 0062 0063 /* Indicate that the latest symbol was a literal. */ 0064 static inline void lzma_state_literal(enum lzma_state *state) 0065 { 0066 if (*state <= STATE_SHORTREP_LIT_LIT) 0067 *state = STATE_LIT_LIT; 0068 else if (*state <= STATE_LIT_SHORTREP) 0069 *state -= 3; 0070 else 0071 *state -= 6; 0072 } 0073 0074 /* Indicate that the latest symbol was a match. */ 0075 static inline void lzma_state_match(enum lzma_state *state) 0076 { 0077 *state = *state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH; 0078 } 0079 0080 /* Indicate that the latest state was a long repeated match. */ 0081 static inline void lzma_state_long_rep(enum lzma_state *state) 0082 { 0083 *state = *state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP; 0084 } 0085 0086 /* Indicate that the latest symbol was a short match. */ 0087 static inline void lzma_state_short_rep(enum lzma_state *state) 0088 { 0089 *state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP; 0090 } 0091 0092 /* Test if the previous symbol was a literal. */ 0093 static inline bool lzma_state_is_literal(enum lzma_state state) 0094 { 0095 return state < LIT_STATES; 0096 } 0097 0098 /* Each literal coder is divided in three sections: 0099 * - 0x001-0x0FF: Without match byte 0100 * - 0x101-0x1FF: With match byte; match bit is 0 0101 * - 0x201-0x2FF: With match byte; match bit is 1 0102 * 0103 * Match byte is used when the previous LZMA symbol was something else than 0104 * a literal (that is, it was some kind of match). 0105 */ 0106 #define LITERAL_CODER_SIZE 0x300 0107 0108 /* Maximum number of literal coders */ 0109 #define LITERAL_CODERS_MAX (1 << 4) 0110 0111 /* Minimum length of a match is two bytes. */ 0112 #define MATCH_LEN_MIN 2 0113 0114 /* Match length is encoded with 4, 5, or 10 bits. 0115 * 0116 * Length Bits 0117 * 2-9 4 = Choice=0 + 3 bits 0118 * 10-17 5 = Choice=1 + Choice2=0 + 3 bits 0119 * 18-273 10 = Choice=1 + Choice2=1 + 8 bits 0120 */ 0121 #define LEN_LOW_BITS 3 0122 #define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS) 0123 #define LEN_MID_BITS 3 0124 #define LEN_MID_SYMBOLS (1 << LEN_MID_BITS) 0125 #define LEN_HIGH_BITS 8 0126 #define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS) 0127 #define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS) 0128 0129 /* 0130 * Maximum length of a match is 273 which is a result of the encoding 0131 * described above. 0132 */ 0133 #define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1) 0134 0135 /* 0136 * Different sets of probabilities are used for match distances that have 0137 * very short match length: Lengths of 2, 3, and 4 bytes have a separate 0138 * set of probabilities for each length. The matches with longer length 0139 * use a shared set of probabilities. 0140 */ 0141 #define DIST_STATES 4 0142 0143 /* 0144 * Get the index of the appropriate probability array for decoding 0145 * the distance slot. 0146 */ 0147 static inline uint32_t lzma_get_dist_state(uint32_t len) 0148 { 0149 return len < DIST_STATES + MATCH_LEN_MIN 0150 ? len - MATCH_LEN_MIN : DIST_STATES - 1; 0151 } 0152 0153 /* 0154 * The highest two bits of a 32-bit match distance are encoded using six bits. 0155 * This six-bit value is called a distance slot. This way encoding a 32-bit 0156 * value takes 6-36 bits, larger values taking more bits. 0157 */ 0158 #define DIST_SLOT_BITS 6 0159 #define DIST_SLOTS (1 << DIST_SLOT_BITS) 0160 0161 /* Match distances up to 127 are fully encoded using probabilities. Since 0162 * the highest two bits (distance slot) are always encoded using six bits, 0163 * the distances 0-3 don't need any additional bits to encode, since the 0164 * distance slot itself is the same as the actual distance. DIST_MODEL_START 0165 * indicates the first distance slot where at least one additional bit is 0166 * needed. 0167 */ 0168 #define DIST_MODEL_START 4 0169 0170 /* 0171 * Match distances greater than 127 are encoded in three pieces: 0172 * - distance slot: the highest two bits 0173 * - direct bits: 2-26 bits below the highest two bits 0174 * - alignment bits: four lowest bits 0175 * 0176 * Direct bits don't use any probabilities. 0177 * 0178 * The distance slot value of 14 is for distances 128-191. 0179 */ 0180 #define DIST_MODEL_END 14 0181 0182 /* Distance slots that indicate a distance <= 127. */ 0183 #define FULL_DISTANCES_BITS (DIST_MODEL_END / 2) 0184 #define FULL_DISTANCES (1 << FULL_DISTANCES_BITS) 0185 0186 /* 0187 * For match distances greater than 127, only the highest two bits and the 0188 * lowest four bits (alignment) is encoded using probabilities. 0189 */ 0190 #define ALIGN_BITS 4 0191 #define ALIGN_SIZE (1 << ALIGN_BITS) 0192 #define ALIGN_MASK (ALIGN_SIZE - 1) 0193 0194 /* Total number of all probability variables */ 0195 #define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE) 0196 0197 /* 0198 * LZMA remembers the four most recent match distances. Reusing these 0199 * distances tends to take less space than re-encoding the actual 0200 * distance value. 0201 */ 0202 #define REPS 4 0203 0204 #endif
[ Source navigation ] | [ Diff markup ] | [ Identifier search ] | [ general search ] |
This page was automatically generated by the 2.3.7 LXR engine. The LXR team |
![]() ![]() |