Repo for the search and displace ingest module that takes odf, docx and pdf and transforms it into .md to be used with search and displace operations
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1116 lines
33 KiB

3 years ago
  1. unit imjchuff;
  2. { This file contains Huffman entropy encoding routines.
  3. Much of the complexity here has to do with supporting output suspension.
  4. If the data destination module demands suspension, we want to be able to
  5. back up to the start of the current MCU. To do this, we copy state
  6. variables into local working storage, and update them back to the
  7. permanent JPEG objects only upon successful completion of an MCU. }
  8. { Original: jchuff.c; Copyright (C) 1991-1997, Thomas G. Lane. }
  9. interface
  10. {$I imjconfig.inc}
  11. uses
  12. imjmorecfg, { longptr definition missing }
  13. imjpeglib,
  14. imjdeferr,
  15. imjerror,
  16. imjutils,
  17. imjinclude,
  18. imjcomapi;
  19. { The legal range of a DCT coefficient is
  20. -1024 .. +1023 for 8-bit data;
  21. -16384 .. +16383 for 12-bit data.
  22. Hence the magnitude should always fit in 10 or 14 bits respectively. }
  23. {$ifdef BITS_IN_JSAMPLE_IS_8}
  24. const
  25. MAX_COEF_BITS = 10;
  26. {$else}
  27. const
  28. MAX_COEF_BITS = 14;
  29. {$endif}
  30. { Derived data constructed for each Huffman table }
  31. { Declarations shared with jcphuff.c }
  32. type
  33. c_derived_tbl_ptr = ^c_derived_tbl;
  34. c_derived_tbl = record
  35. ehufco : array[0..256-1] of uInt; { code for each symbol }
  36. ehufsi : array[0..256-1] of byte; { length of code for each symbol }
  37. { If no code has been allocated for a symbol S, ehufsi[S] contains 0 }
  38. end;
  39. { for JCHUFF und JCPHUFF }
  40. type
  41. TLongTable = array[0..256] of long;
  42. TLongTablePtr = ^TLongTable;
  43. { Compute the derived values for a Huffman table.
  44. Note this is also used by jcphuff.c. }
  45. {GLOBAL}
  46. procedure jpeg_make_c_derived_tbl (cinfo : j_compress_ptr;
  47. isDC : boolean;
  48. tblno : int;
  49. var pdtbl : c_derived_tbl_ptr);
  50. { Generate the optimal coding for the given counts, fill htbl.
  51. Note this is also used by jcphuff.c. }
  52. {GLOBAL}
  53. procedure jpeg_gen_optimal_table (cinfo : j_compress_ptr;
  54. htbl : JHUFF_TBL_PTR;
  55. var freq : TLongTable); { Nomssi }
  56. { Module initialization routine for Huffman entropy encoding. }
  57. {GLOBAL}
  58. procedure jinit_huff_encoder (cinfo : j_compress_ptr);
  59. implementation
  60. { Expanded entropy encoder object for Huffman encoding.
  61. The savable_state subrecord contains fields that change within an MCU,
  62. but must not be updated permanently until we complete the MCU. }
  63. type
  64. savable_state = record
  65. put_buffer : INT32; { current bit-accumulation buffer }
  66. put_bits : int; { # of bits now in it }
  67. last_dc_val : array[0..MAX_COMPS_IN_SCAN-1] of int;
  68. { last DC coef for each component }
  69. end;
  70. type
  71. huff_entropy_ptr = ^huff_entropy_encoder;
  72. huff_entropy_encoder = record
  73. pub : jpeg_entropy_encoder; { public fields }
  74. saved : savable_state; { Bit buffer & DC state at start of MCU }
  75. { These fields are NOT loaded into local working state. }
  76. restarts_to_go : uInt; { MCUs left in this restart interval }
  77. next_restart_num : int; { next restart number to write (0-7) }
  78. { Pointers to derived tables (these workspaces have image lifespan) }
  79. dc_derived_tbls : array[0..NUM_HUFF_TBLS-1] of c_derived_tbl_ptr;
  80. ac_derived_tbls : array[0..NUM_HUFF_TBLS-1] of c_derived_tbl_ptr;
  81. {$ifdef ENTROPY_OPT_SUPPORTED} { Statistics tables for optimization }
  82. dc_count_ptrs : array[0..NUM_HUFF_TBLS-1] of TLongTablePtr;
  83. ac_count_ptrs : array[0..NUM_HUFF_TBLS-1] of TLongTablePtr;
  84. {$endif}
  85. end;
  86. { Working state while writing an MCU.
  87. This struct contains all the fields that are needed by subroutines. }
  88. type
  89. working_state = record
  90. next_output_byte : JOCTETptr; { => next byte to write in buffer }
  91. free_in_buffer : size_t; { # of byte spaces remaining in buffer }
  92. cur : savable_state; { Current bit buffer & DC state }
  93. cinfo : j_compress_ptr; { dump_buffer needs access to this }
  94. end;
  95. { Forward declarations }
  96. {METHODDEF}
  97. function encode_mcu_huff (cinfo : j_compress_ptr;
  98. const MCU_data : array of JBLOCKROW) : boolean;
  99. forward;
  100. {METHODDEF}
  101. procedure finish_pass_huff (cinfo : j_compress_ptr); forward;
  102. {$ifdef ENTROPY_OPT_SUPPORTED}
  103. {METHODDEF}
  104. function encode_mcu_gather (cinfo : j_compress_ptr;
  105. const MCU_data: array of JBLOCKROW) : boolean;
  106. forward;
  107. {METHODDEF}
  108. procedure finish_pass_gather (cinfo : j_compress_ptr); forward;
  109. {$endif}
  110. { Initialize for a Huffman-compressed scan.
  111. If gather_statistics is TRUE, we do not output anything during the scan,
  112. just count the Huffman symbols used and generate Huffman code tables. }
  113. {METHODDEF}
  114. procedure start_pass_huff (cinfo : j_compress_ptr;
  115. gather_statistics : boolean);
  116. var
  117. entropy : huff_entropy_ptr;
  118. ci, dctbl, actbl : int;
  119. compptr : jpeg_component_info_ptr;
  120. begin
  121. entropy := huff_entropy_ptr (cinfo^.entropy);
  122. if (gather_statistics) then
  123. begin
  124. {$ifdef ENTROPY_OPT_SUPPORTED}
  125. entropy^.pub.encode_mcu := encode_mcu_gather;
  126. entropy^.pub.finish_pass := finish_pass_gather;
  127. {$else}
  128. ERREXIT(j_common_ptr(cinfo), JERR_NOT_COMPILED);
  129. {$endif}
  130. end
  131. else
  132. begin
  133. entropy^.pub.encode_mcu := encode_mcu_huff;
  134. entropy^.pub.finish_pass := finish_pass_huff;
  135. end;
  136. for ci := 0 to pred(cinfo^.comps_in_scan) do
  137. begin
  138. compptr := cinfo^.cur_comp_info[ci];
  139. dctbl := compptr^.dc_tbl_no;
  140. actbl := compptr^.ac_tbl_no;
  141. if (gather_statistics) then
  142. begin
  143. {$ifdef ENTROPY_OPT_SUPPORTED}
  144. { Check for invalid table indexes }
  145. { (make_c_derived_tbl does this in the other path) }
  146. if (dctbl < 0) or (dctbl >= NUM_HUFF_TBLS) then
  147. ERREXIT1(j_common_ptr(cinfo), JERR_NO_HUFF_TABLE, dctbl);
  148. if (actbl < 0) or (actbl >= NUM_HUFF_TBLS) then
  149. ERREXIT1(j_common_ptr(cinfo), JERR_NO_HUFF_TABLE, actbl);
  150. { Allocate and zero the statistics tables }
  151. { Note that jpeg_gen_optimal_table expects 257 entries in each table! }
  152. if (entropy^.dc_count_ptrs[dctbl] = NIL) then
  153. entropy^.dc_count_ptrs[dctbl] := TLongTablePtr(
  154. cinfo^.mem^.alloc_small (j_common_ptr(cinfo), JPOOL_IMAGE,
  155. 257 * SIZEOF(long)) );
  156. MEMZERO(entropy^.dc_count_ptrs[dctbl], 257 * SIZEOF(long));
  157. if (entropy^.ac_count_ptrs[actbl] = NIL) then
  158. entropy^.ac_count_ptrs[actbl] := TLongTablePtr(
  159. cinfo^.mem^.alloc_small (j_common_ptr(cinfo), JPOOL_IMAGE,
  160. 257 * SIZEOF(long)) );
  161. MEMZERO(entropy^.ac_count_ptrs[actbl], 257 * SIZEOF(long));
  162. {$endif}
  163. end
  164. else
  165. begin
  166. { Compute derived values for Huffman tables }
  167. { We may do this more than once for a table, but it's not expensive }
  168. jpeg_make_c_derived_tbl(cinfo, TRUE, dctbl,
  169. entropy^.dc_derived_tbls[dctbl]);
  170. jpeg_make_c_derived_tbl(cinfo, FALSE, actbl,
  171. entropy^.ac_derived_tbls[actbl]);
  172. end;
  173. { Initialize DC predictions to 0 }
  174. entropy^.saved.last_dc_val[ci] := 0;
  175. end;
  176. { Initialize bit buffer to empty }
  177. entropy^.saved.put_buffer := 0;
  178. entropy^.saved.put_bits := 0;
  179. { Initialize restart stuff }
  180. entropy^.restarts_to_go := cinfo^.restart_interval;
  181. entropy^.next_restart_num := 0;
  182. end;
  183. { Compute the derived values for a Huffman table.
  184. This routine also performs some validation checks on the table.
  185. Note this is also used by jcphuff.c. }
  186. {GLOBAL}
  187. procedure jpeg_make_c_derived_tbl (cinfo : j_compress_ptr;
  188. isDC : boolean;
  189. tblno : int;
  190. var pdtbl : c_derived_tbl_ptr);
  191. var
  192. htbl : JHUFF_TBL_PTR;
  193. dtbl : c_derived_tbl_ptr;
  194. p, i, l, lastp, si, maxsymbol : int;
  195. huffsize : array[0..257-1] of byte;
  196. huffcode : array[0..257-1] of uInt;
  197. code : uInt;
  198. begin
  199. { Note that huffsize[] and huffcode[] are filled in code-length order,
  200. paralleling the order of the symbols themselves in htbl->huffval[]. }
  201. { Find the input Huffman table }
  202. if (tblno < 0) or (tblno >= NUM_HUFF_TBLS) then
  203. ERREXIT1(j_common_ptr(cinfo), JERR_NO_HUFF_TABLE, tblno);
  204. if isDC then
  205. htbl := cinfo^.dc_huff_tbl_ptrs[tblno]
  206. else
  207. htbl := cinfo^.ac_huff_tbl_ptrs[tblno];
  208. if (htbl = NIL) then
  209. ERREXIT1(j_common_ptr(cinfo), JERR_NO_HUFF_TABLE, tblno);
  210. { Allocate a workspace if we haven't already done so. }
  211. if (pdtbl = NIL) then
  212. pdtbl := c_derived_tbl_ptr(
  213. cinfo^.mem^.alloc_small (j_common_ptr(cinfo), JPOOL_IMAGE,
  214. SIZEOF(c_derived_tbl)) );
  215. dtbl := pdtbl;
  216. { Figure C.1: make table of Huffman code length for each symbol }
  217. p := 0;
  218. for l := 1 to 16 do
  219. begin
  220. i := int(htbl^.bits[l]);
  221. if (i < 0) and (p + i > 256) then { protect against table overrun }
  222. ERREXIT(j_common_ptr(cinfo), JERR_BAD_HUFF_TABLE);
  223. while (i > 0) do
  224. begin
  225. huffsize[p] := byte(l);
  226. Inc(p);
  227. Dec(i);
  228. end;
  229. end;
  230. huffsize[p] := 0;
  231. lastp := p;
  232. { Figure C.2: generate the codes themselves }
  233. { We also validate that the counts represent a legal Huffman code tree. }
  234. code := 0;
  235. si := huffsize[0];
  236. p := 0;
  237. while (huffsize[p] <> 0) do
  238. begin
  239. while (( int(huffsize[p]) ) = si) do
  240. begin
  241. huffcode[p] := code;
  242. Inc(p);
  243. Inc(code);
  244. end;
  245. { code is now 1 more than the last code used for codelength si; but
  246. it must still fit in si bits, since no code is allowed to be all ones. }
  247. if (INT32(code) >= (INT32(1) shl si)) then
  248. ERREXIT(j_common_ptr(cinfo), JERR_BAD_HUFF_TABLE);
  249. code := code shl 1;
  250. Inc(si);
  251. end;
  252. { Figure C.3: generate encoding tables }
  253. { These are code and size indexed by symbol value }
  254. { Set all codeless symbols to have code length 0;
  255. this lets us detect duplicate VAL entries here, and later
  256. allows emit_bits to detect any attempt to emit such symbols. }
  257. MEMZERO(@dtbl^.ehufsi, SIZEOF(dtbl^.ehufsi));
  258. { This is also a convenient place to check for out-of-range
  259. and duplicated VAL entries. We allow 0..255 for AC symbols
  260. but only 0..15 for DC. (We could constrain them further
  261. based on data depth and mode, but this seems enough.) }
  262. if isDC then
  263. maxsymbol := 15
  264. else
  265. maxsymbol := 255;
  266. for p := 0 to pred(lastp) do
  267. begin
  268. i := htbl^.huffval[p];
  269. if (i < 0) or (i > maxsymbol) or (dtbl^.ehufsi[i] <> 0) then
  270. ERREXIT(j_common_ptr(cinfo), JERR_BAD_HUFF_TABLE);
  271. dtbl^.ehufco[i] := huffcode[p];
  272. dtbl^.ehufsi[i] := huffsize[p];
  273. end;
  274. end;
  275. { Outputting bytes to the file }
  276. {LOCAL}
  277. function dump_buffer (var state : working_state) : boolean;
  278. { Empty the output buffer; return TRUE if successful, FALSE if must suspend }
  279. var
  280. dest : jpeg_destination_mgr_ptr;
  281. begin
  282. dest := state.cinfo^.dest;
  283. if (not dest^.empty_output_buffer (state.cinfo)) then
  284. begin
  285. dump_buffer := FALSE;
  286. exit;
  287. end;
  288. { After a successful buffer dump, must reset buffer pointers }
  289. state.next_output_byte := dest^.next_output_byte;
  290. state.free_in_buffer := dest^.free_in_buffer;
  291. dump_buffer := TRUE;
  292. end;
  293. { Outputting bits to the file }
  294. { Only the right 24 bits of put_buffer are used; the valid bits are
  295. left-justified in this part. At most 16 bits can be passed to emit_bits
  296. in one call, and we never retain more than 7 bits in put_buffer
  297. between calls, so 24 bits are sufficient. }
  298. {LOCAL}
  299. function emit_bits (var state : working_state;
  300. code : uInt;
  301. size : int) : boolean; {INLINE}
  302. { Emit some bits; return TRUE if successful, FALSE if must suspend }
  303. var
  304. { This routine is heavily used, so it's worth coding tightly. }
  305. {register} put_buffer : INT32;
  306. {register} put_bits : int;
  307. var
  308. c : int;
  309. begin
  310. put_buffer := INT32 (code);
  311. put_bits := state.cur.put_bits;
  312. { if size is 0, caller used an invalid Huffman table entry }
  313. if (size = 0) then
  314. ERREXIT(j_common_ptr(state.cinfo), JERR_HUFF_MISSING_CODE);
  315. put_buffer := put_buffer and pred(INT32(1) shl size);
  316. { mask off any extra bits in code }
  317. Inc(put_bits, size); { new number of bits in buffer }
  318. put_buffer := put_buffer shl (24 - put_bits);
  319. { align incoming bits }
  320. put_buffer := put_buffer or state.cur.put_buffer;
  321. { and merge with old buffer contents }
  322. while (put_bits >= 8) do
  323. begin
  324. c := int ((put_buffer shr 16) and $FF);
  325. {emit_byte(state, c, return FALSE);}
  326. { Emit a byte, return FALSE if must suspend. }
  327. state.next_output_byte^ := JOCTET (c);
  328. Inc(state.next_output_byte);
  329. Dec(state.free_in_buffer);
  330. if (state.free_in_buffer = 0) then
  331. if not dump_buffer(state) then
  332. begin
  333. emit_bits := FALSE;
  334. exit;
  335. end;
  336. if (c = $FF) then { need to stuff a zero byte? }
  337. begin
  338. {emit_byte(state, 0, return FALSE);}
  339. state.next_output_byte^ := JOCTET (0);
  340. Inc(state.next_output_byte);
  341. Dec(state.free_in_buffer);
  342. if (state.free_in_buffer = 0) then
  343. if not dump_buffer(state) then
  344. begin
  345. emit_bits := FALSE;
  346. exit;
  347. end;
  348. end;
  349. put_buffer := put_buffer shl 8;
  350. Dec(put_bits, 8);
  351. end;
  352. state.cur.put_buffer := put_buffer; { update state variables }
  353. state.cur.put_bits := put_bits;
  354. emit_bits := TRUE;
  355. end;
  356. {LOCAL}
  357. function flush_bits (var state : working_state) : boolean;
  358. begin
  359. if (not emit_bits(state, $7F, 7)) then { fill any partial byte with ones }
  360. begin
  361. flush_bits := FALSE;
  362. exit;
  363. end;
  364. state.cur.put_buffer := 0; { and reset bit-buffer to empty }
  365. state.cur.put_bits := 0;
  366. flush_bits := TRUE;
  367. end;
  368. { Encode a single block's worth of coefficients }
  369. {LOCAL}
  370. function encode_one_block (var state : working_state;
  371. const block : JBLOCK;
  372. last_dc_val : int;
  373. dctbl : c_derived_tbl_ptr;
  374. actbl : c_derived_tbl_ptr) : boolean;
  375. var
  376. {register} temp, temp2 : int;
  377. {register} nbits : int;
  378. {register} k, r, i : int;
  379. begin
  380. { Encode the DC coefficient difference per section F.1.2.1 }
  381. temp2 := block[0] - last_dc_val;
  382. temp := temp2;
  383. if (temp < 0) then
  384. begin
  385. temp := -temp; { temp is abs value of input }
  386. { For a negative input, want temp2 := bitwise complement of abs(input) }
  387. { This code assumes we are on a two's complement machine }
  388. Dec(temp2);
  389. end;
  390. { Find the number of bits needed for the magnitude of the coefficient }
  391. nbits := 0;
  392. while (temp <> 0) do
  393. begin
  394. Inc(nbits);
  395. temp := temp shr 1;
  396. end;
  397. { Check for out-of-range coefficient values.
  398. Since we're encoding a difference, the range limit is twice as much. }
  399. if (nbits > MAX_COEF_BITS+1) then
  400. ERREXIT(j_common_ptr(state.cinfo), JERR_BAD_DCT_COEF);
  401. { Emit the Huffman-coded symbol for the number of bits }
  402. if not emit_bits(state, dctbl^.ehufco[nbits], dctbl^.ehufsi[nbits]) then
  403. begin
  404. encode_one_block := FALSE;
  405. exit;
  406. end;
  407. { Emit that number of bits of the value, if positive, }
  408. { or the complement of its magnitude, if negative. }
  409. if (nbits <> 0) then { emit_bits rejects calls with size 0 }
  410. if not emit_bits(state, uInt(temp2), nbits) then
  411. begin
  412. encode_one_block := FALSE;
  413. exit;
  414. end;
  415. { Encode the AC coefficients per section F.1.2.2 }
  416. r := 0; { r := run length of zeros }
  417. for k := 1 to pred(DCTSIZE2) do
  418. begin
  419. temp := block[jpeg_natural_order[k]];
  420. if (temp = 0) then
  421. begin
  422. Inc(r);
  423. end
  424. else
  425. begin
  426. { if run length > 15, must emit special run-length-16 codes ($F0) }
  427. while (r > 15) do
  428. begin
  429. if not emit_bits(state, actbl^.ehufco[$F0], actbl^.ehufsi[$F0]) then
  430. begin
  431. encode_one_block := FALSE;
  432. exit;
  433. end;
  434. Dec(r, 16);
  435. end;
  436. temp2 := temp;
  437. if (temp < 0) then
  438. begin
  439. temp := -temp; { temp is abs value of input }
  440. { This code assumes we are on a two's complement machine }
  441. Dec(temp2);
  442. end;
  443. { Find the number of bits needed for the magnitude of the coefficient }
  444. nbits := 0; { there must be at least one 1 bit }
  445. repeat
  446. Inc(nbits);
  447. temp := temp shr 1;
  448. until (temp = 0);
  449. { Check for out-of-range coefficient values }
  450. if (nbits > MAX_COEF_BITS) then
  451. ERREXIT(j_common_ptr(state.cinfo), JERR_BAD_DCT_COEF);
  452. { Emit Huffman symbol for run length / number of bits }
  453. i := (r shl 4) + nbits;
  454. if not emit_bits(state, actbl^.ehufco[i], actbl^.ehufsi[i]) then
  455. begin
  456. encode_one_block := FALSE;
  457. exit;
  458. end;
  459. { Emit that number of bits of the value, if positive, }
  460. { or the complement of its magnitude, if negative. }
  461. if not emit_bits(state, uInt(temp2), nbits) then
  462. begin
  463. encode_one_block := FALSE;
  464. exit;
  465. end;
  466. r := 0;
  467. end;
  468. end;
  469. { If the last coef(s) were zero, emit an end-of-block code }
  470. if (r > 0) then
  471. if not emit_bits(state, actbl^.ehufco[0], actbl^.ehufsi[0]) then
  472. begin
  473. encode_one_block := FALSE;
  474. exit;
  475. end;
  476. encode_one_block := TRUE;
  477. end;
  478. { Emit a restart marker & resynchronize predictions. }
  479. {LOCAL}
  480. function emit_restart (var state : working_state;
  481. restart_num : int) : boolean;
  482. var
  483. ci : int;
  484. begin
  485. if (not flush_bits(state)) then
  486. begin
  487. emit_restart := FALSE;
  488. exit;
  489. end;
  490. {emit_byte(state, $FF, return FALSE);}
  491. { Emit a byte, return FALSE if must suspend. }
  492. state.next_output_byte^ := JOCTET ($FF);
  493. Inc(state.next_output_byte);
  494. Dec(state.free_in_buffer);
  495. if (state.free_in_buffer = 0) then
  496. if not dump_buffer(state) then
  497. begin
  498. emit_restart := FALSE;
  499. exit;
  500. end;
  501. {emit_byte(state, JPEG_RST0 + restart_num, return FALSE);}
  502. { Emit a byte, return FALSE if must suspend. }
  503. state.next_output_byte^ := JOCTET (JPEG_RST0 + restart_num);
  504. Inc(state.next_output_byte);
  505. Dec(state.free_in_buffer);
  506. if (state.free_in_buffer = 0) then
  507. if not dump_buffer(state) then
  508. begin
  509. emit_restart := FALSE;
  510. exit;
  511. end;
  512. { Re-initialize DC predictions to 0 }
  513. for ci := 0 to pred(state.cinfo^.comps_in_scan) do
  514. state.cur.last_dc_val[ci] := 0;
  515. { The restart counter is not updated until we successfully write the MCU. }
  516. emit_restart := TRUE;
  517. end;
  518. { Encode and output one MCU's worth of Huffman-compressed coefficients. }
  519. {METHODDEF}
  520. function encode_mcu_huff (cinfo : j_compress_ptr;
  521. const MCU_data: array of JBLOCKROW) : boolean;
  522. var
  523. entropy : huff_entropy_ptr;
  524. state : working_state;
  525. blkn, ci : int;
  526. compptr : jpeg_component_info_ptr;
  527. begin
  528. entropy := huff_entropy_ptr (cinfo^.entropy);
  529. { Load up working state }
  530. state.next_output_byte := cinfo^.dest^.next_output_byte;
  531. state.free_in_buffer := cinfo^.dest^.free_in_buffer;
  532. {ASSIGN_STATE(state.cur, entropy^.saved);}
  533. state.cur := entropy^.saved;
  534. state.cinfo := cinfo;
  535. { Emit restart marker if needed }
  536. if (cinfo^.restart_interval <> 0) then
  537. begin
  538. if (entropy^.restarts_to_go = 0) then
  539. if not emit_restart(state, entropy^.next_restart_num) then
  540. begin
  541. encode_mcu_huff := FALSE;
  542. exit;
  543. end;
  544. end;
  545. { Encode the MCU data blocks }
  546. for blkn := 0 to pred(cinfo^.blocks_in_MCU) do
  547. begin
  548. ci := cinfo^.MCU_membership[blkn];
  549. compptr := cinfo^.cur_comp_info[ci];
  550. if not encode_one_block(state,
  551. MCU_data[blkn]^[0],
  552. state.cur.last_dc_val[ci],
  553. entropy^.dc_derived_tbls[compptr^.dc_tbl_no],
  554. entropy^.ac_derived_tbls[compptr^.ac_tbl_no]) then
  555. begin
  556. encode_mcu_huff := FALSE;
  557. exit;
  558. end;
  559. { Update last_dc_val }
  560. state.cur.last_dc_val[ci] := MCU_data[blkn]^[0][0];
  561. end;
  562. { Completed MCU, so update state }
  563. cinfo^.dest^.next_output_byte := state.next_output_byte;
  564. cinfo^.dest^.free_in_buffer := state.free_in_buffer;
  565. {ASSIGN_STATE(entropy^.saved, state.cur);}
  566. entropy^.saved := state.cur;
  567. { Update restart-interval state too }
  568. if (cinfo^.restart_interval <> 0) then
  569. begin
  570. if (entropy^.restarts_to_go = 0) then
  571. begin
  572. entropy^.restarts_to_go := cinfo^.restart_interval;
  573. Inc(entropy^.next_restart_num);
  574. with entropy^ do
  575. next_restart_num := next_restart_num and 7;
  576. end;
  577. Dec(entropy^.restarts_to_go);
  578. end;
  579. encode_mcu_huff := TRUE;
  580. end;
  581. { Finish up at the end of a Huffman-compressed scan. }
  582. {METHODDEF}
  583. procedure finish_pass_huff (cinfo : j_compress_ptr);
  584. var
  585. entropy : huff_entropy_ptr;
  586. state : working_state;
  587. begin
  588. entropy := huff_entropy_ptr (cinfo^.entropy);
  589. { Load up working state ... flush_bits needs it }
  590. state.next_output_byte := cinfo^.dest^.next_output_byte;
  591. state.free_in_buffer := cinfo^.dest^.free_in_buffer;
  592. {ASSIGN_STATE(state.cur, entropy^.saved);}
  593. state.cur := entropy^.saved;
  594. state.cinfo := cinfo;
  595. { Flush out the last data }
  596. if not flush_bits(state) then
  597. ERREXIT(j_common_ptr(cinfo), JERR_CANT_SUSPEND);
  598. { Update state }
  599. cinfo^.dest^.next_output_byte := state.next_output_byte;
  600. cinfo^.dest^.free_in_buffer := state.free_in_buffer;
  601. {ASSIGN_STATE(entropy^.saved, state.cur);}
  602. entropy^.saved := state.cur;
  603. end;
  604. { Huffman coding optimization.
  605. We first scan the supplied data and count the number of uses of each symbol
  606. that is to be Huffman-coded. (This process MUST agree with the code above.)
  607. Then we build a Huffman coding tree for the observed counts.
  608. Symbols which are not needed at all for the particular image are not
  609. assigned any code, which saves space in the DHT marker as well as in
  610. the compressed data. }
  611. {$ifdef ENTROPY_OPT_SUPPORTED}
  612. { Process a single block's worth of coefficients }
  613. {LOCAL}
  614. procedure htest_one_block (cinfo : j_compress_ptr;
  615. const block : JBLOCK;
  616. last_dc_val : int;
  617. dc_counts : TLongTablePtr;
  618. ac_counts : TLongTablePtr);
  619. var
  620. {register} temp : int;
  621. {register} nbits : int;
  622. {register} k, r : int;
  623. begin
  624. { Encode the DC coefficient difference per section F.1.2.1 }
  625. temp := block[0] - last_dc_val;
  626. if (temp < 0) then
  627. temp := -temp;
  628. { Find the number of bits needed for the magnitude of the coefficient }
  629. nbits := 0;
  630. while (temp <> 0) do
  631. begin
  632. Inc(nbits);
  633. temp := temp shr 1;
  634. end;
  635. { Check for out-of-range coefficient values.
  636. Since we're encoding a difference, the range limit is twice as much. }
  637. if (nbits > MAX_COEF_BITS+1) then
  638. ERREXIT(j_common_ptr(cinfo), JERR_BAD_DCT_COEF);
  639. { Count the Huffman symbol for the number of bits }
  640. Inc(dc_counts^[nbits]);
  641. { Encode the AC coefficients per section F.1.2.2 }
  642. r := 0; { r := run length of zeros }
  643. for k := 1 to pred(DCTSIZE2) do
  644. begin
  645. temp := block[jpeg_natural_order[k]];
  646. if (temp = 0) then
  647. begin
  648. Inc(r);
  649. end
  650. else
  651. begin
  652. { if run length > 15, must emit special run-length-16 codes ($F0) }
  653. while (r > 15) do
  654. begin
  655. Inc(ac_counts^[$F0]);
  656. Dec(r, 16);
  657. end;
  658. { Find the number of bits needed for the magnitude of the coefficient }
  659. if (temp < 0) then
  660. temp := -temp;
  661. { Find the number of bits needed for the magnitude of the coefficient }
  662. nbits := 0; { there must be at least one 1 bit }
  663. repeat
  664. Inc(nbits);
  665. temp := temp shr 1;
  666. until (temp = 0);
  667. { Count Huffman symbol for run length / number of bits }
  668. Inc(ac_counts^[(r shl 4) + nbits]);
  669. r := 0;
  670. end;
  671. end;
  672. { If the last coef(s) were zero, emit an end-of-block code }
  673. if (r > 0) then
  674. Inc(ac_counts^[0]);
  675. end;
  676. { Trial-encode one MCU's worth of Huffman-compressed coefficients.
  677. No data is actually output, so no suspension return is possible. }
  678. {METHODDEF}
  679. function encode_mcu_gather (cinfo : j_compress_ptr;
  680. const MCU_data: array of JBLOCKROW) : boolean;
  681. var
  682. entropy : huff_entropy_ptr;
  683. blkn, ci : int;
  684. compptr : jpeg_component_info_ptr;
  685. begin
  686. entropy := huff_entropy_ptr (cinfo^.entropy);
  687. { Take care of restart intervals if needed }
  688. if (cinfo^.restart_interval <> 0) then
  689. begin
  690. if (entropy^.restarts_to_go = 0) then
  691. begin
  692. { Re-initialize DC predictions to 0 }
  693. for ci := 0 to pred(cinfo^.comps_in_scan) do
  694. entropy^.saved.last_dc_val[ci] := 0;
  695. { Update restart state }
  696. entropy^.restarts_to_go := cinfo^.restart_interval;
  697. end;
  698. Dec(entropy^.restarts_to_go);
  699. end;
  700. for blkn := 0 to pred(cinfo^.blocks_in_MCU) do
  701. begin
  702. ci := cinfo^.MCU_membership[blkn];
  703. compptr := cinfo^.cur_comp_info[ci];
  704. htest_one_block(cinfo, MCU_data[blkn]^[0],
  705. entropy^.saved.last_dc_val[ci],
  706. entropy^.dc_count_ptrs[compptr^.dc_tbl_no],
  707. entropy^.ac_count_ptrs[compptr^.ac_tbl_no]);
  708. entropy^.saved.last_dc_val[ci] := MCU_data[blkn]^[0][0];
  709. end;
  710. encode_mcu_gather := TRUE;
  711. end;
  712. { Generate the best Huffman code table for the given counts, fill htbl.
  713. Note this is also used by jcphuff.c.
  714. The JPEG standard requires that no symbol be assigned a codeword of all
  715. one bits (so that padding bits added at the end of a compressed segment
  716. can't look like a valid code). Because of the canonical ordering of
  717. codewords, this just means that there must be an unused slot in the
  718. longest codeword length category. Section K.2 of the JPEG spec suggests
  719. reserving such a slot by pretending that symbol 256 is a valid symbol
  720. with count 1. In theory that's not optimal; giving it count zero but
  721. including it in the symbol set anyway should give a better Huffman code.
  722. But the theoretically better code actually seems to come out worse in
  723. practice, because it produces more all-ones bytes (which incur stuffed
  724. zero bytes in the final file). In any case the difference is tiny.
  725. The JPEG standard requires Huffman codes to be no more than 16 bits long.
  726. If some symbols have a very small but nonzero probability, the Huffman tree
  727. must be adjusted to meet the code length restriction. We currently use
  728. the adjustment method suggested in JPEG section K.2. This method is *not*
  729. optimal; it may not choose the best possible limited-length code. But
  730. typically only very-low-frequency symbols will be given less-than-optimal
  731. lengths, so the code is almost optimal. Experimental comparisons against
  732. an optimal limited-length-code algorithm indicate that the difference is
  733. microscopic --- usually less than a hundredth of a percent of total size.
  734. So the extra complexity of an optimal algorithm doesn't seem worthwhile. }
  735. {GLOBAL}
  736. procedure jpeg_gen_optimal_table (cinfo : j_compress_ptr;
  737. htbl : JHUFF_TBL_PTR;
  738. var freq : TLongTable);
  739. const
  740. MAX_CLEN = 32; { assumed maximum initial code length }
  741. var
  742. bits : array[0..MAX_CLEN+1-1] of UINT8; { bits[k] := # of symbols with code length k }
  743. codesize : array[0..257-1] of int; { codesize[k] := code length of symbol k }
  744. others : array[0..257-1] of int; { next symbol in current branch of tree }
  745. c1, c2 : int;
  746. p, i, j : int;
  747. v : long;
  748. begin
  749. { This algorithm is explained in section K.2 of the JPEG standard }
  750. MEMZERO(@bits, SIZEOF(bits));
  751. MEMZERO(@codesize, SIZEOF(codesize));
  752. for i := 0 to 256 do
  753. others[i] := -1; { init links to empty }
  754. freq[256] := 1; { make sure 256 has a nonzero count }
  755. { Including the pseudo-symbol 256 in the Huffman procedure guarantees
  756. that no real symbol is given code-value of all ones, because 256
  757. will be placed last in the largest codeword category. }
  758. { Huffman's basic algorithm to assign optimal code lengths to symbols }
  759. while TRUE do
  760. begin
  761. { Find the smallest nonzero frequency, set c1 := its symbol }
  762. { In case of ties, take the larger symbol number }
  763. c1 := -1;
  764. v := long(1000000000);
  765. for i := 0 to 256 do
  766. begin
  767. if (freq[i] <> 0) and (freq[i] <= v) then
  768. begin
  769. v := freq[i];
  770. c1 := i;
  771. end;
  772. end;
  773. { Find the next smallest nonzero frequency, set c2 := its symbol }
  774. { In case of ties, take the larger symbol number }
  775. c2 := -1;
  776. v := long(1000000000);
  777. for i := 0 to 256 do
  778. begin
  779. if (freq[i] <> 0) and (freq[i] <= v) and (i <> c1) then
  780. begin
  781. v := freq[i];
  782. c2 := i;
  783. end;
  784. end;
  785. { Done if we've merged everything into one frequency }
  786. if (c2 < 0) then
  787. break;
  788. { Else merge the two counts/trees }
  789. Inc(freq[c1], freq[c2]);
  790. freq[c2] := 0;
  791. { Increment the codesize of everything in c1's tree branch }
  792. Inc(codesize[c1]);
  793. while (others[c1] >= 0) do
  794. begin
  795. c1 := others[c1];
  796. Inc(codesize[c1]);
  797. end;
  798. others[c1] := c2; { chain c2 onto c1's tree branch }
  799. { Increment the codesize of everything in c2's tree branch }
  800. Inc(codesize[c2]);
  801. while (others[c2] >= 0) do
  802. begin
  803. c2 := others[c2];
  804. Inc(codesize[c2]);
  805. end;
  806. end;
  807. { Now count the number of symbols of each code length }
  808. for i := 0 to 256 do
  809. begin
  810. if (codesize[i]<>0) then
  811. begin
  812. { The JPEG standard seems to think that this can't happen, }
  813. { but I'm paranoid... }
  814. if (codesize[i] > MAX_CLEN) then
  815. ERREXIT(j_common_ptr(cinfo), JERR_HUFF_CLEN_OVERFLOW);
  816. Inc(bits[codesize[i]]);
  817. end;
  818. end;
  819. { JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
  820. Huffman procedure assigned any such lengths, we must adjust the coding.
  821. Here is what the JPEG spec says about how this next bit works:
  822. Since symbols are paired for the longest Huffman code, the symbols are
  823. removed from this length category two at a time. The prefix for the pair
  824. (which is one bit shorter) is allocated to one of the pair; then,
  825. skipping the BITS entry for that prefix length, a code word from the next
  826. shortest nonzero BITS entry is converted into a prefix for two code words
  827. one bit longer. }
  828. for i := MAX_CLEN downto 17 do
  829. begin
  830. while (bits[i] > 0) do
  831. begin
  832. j := i - 2; { find length of new prefix to be used }
  833. while (bits[j] = 0) do
  834. Dec(j);
  835. Dec(bits[i], 2); { remove two symbols }
  836. Inc(bits[i-1]); { one goes in this length }
  837. Inc(bits[j+1], 2); { two new symbols in this length }
  838. Dec(bits[j]); { symbol of this length is now a prefix }
  839. end;
  840. end;
  841. { Delphi 2: FOR-loop variable 'i' may be undefined after loop }
  842. i := 16; { Nomssi: work around }
  843. { Remove the count for the pseudo-symbol 256 from the largest codelength }
  844. while (bits[i] = 0) do { find largest codelength still in use }
  845. Dec(i);
  846. Dec(bits[i]);
  847. { Return final symbol counts (only for lengths 0..16) }
  848. MEMCOPY(@htbl^.bits, @bits, SIZEOF(htbl^.bits));
  849. { Return a list of the symbols sorted by code length }
  850. { It's not real clear to me why we don't need to consider the codelength
  851. changes made above, but the JPEG spec seems to think this works. }
  852. p := 0;
  853. for i := 1 to MAX_CLEN do
  854. begin
  855. for j := 0 to 255 do
  856. begin
  857. if (codesize[j] = i) then
  858. begin
  859. htbl^.huffval[p] := UINT8 (j);
  860. Inc(p);
  861. end;
  862. end;
  863. end;
  864. { Set sent_table FALSE so updated table will be written to JPEG file. }
  865. htbl^.sent_table := FALSE;
  866. end;
  867. { Finish up a statistics-gathering pass and create the new Huffman tables. }
  868. {METHODDEF}
  869. procedure finish_pass_gather (cinfo : j_compress_ptr);
  870. var
  871. entropy : huff_entropy_ptr;
  872. ci, dctbl, actbl : int;
  873. compptr : jpeg_component_info_ptr;
  874. htblptr : ^JHUFF_TBL_PTR;
  875. did_dc : array[0..NUM_HUFF_TBLS-1] of boolean;
  876. did_ac : array[0..NUM_HUFF_TBLS-1] of boolean;
  877. begin
  878. entropy := huff_entropy_ptr (cinfo^.entropy);
  879. { It's important not to apply jpeg_gen_optimal_table more than once
  880. per table, because it clobbers the input frequency counts! }
  881. MEMZERO(@did_dc, SIZEOF(did_dc));
  882. MEMZERO(@did_ac, SIZEOF(did_ac));
  883. for ci := 0 to pred(cinfo^.comps_in_scan) do
  884. begin
  885. compptr := cinfo^.cur_comp_info[ci];
  886. dctbl := compptr^.dc_tbl_no;
  887. actbl := compptr^.ac_tbl_no;
  888. if (not did_dc[dctbl]) then
  889. begin
  890. htblptr := @(cinfo^.dc_huff_tbl_ptrs[dctbl]);
  891. if ( htblptr^ = NIL) then
  892. htblptr^ := jpeg_alloc_huff_table(j_common_ptr(cinfo));
  893. jpeg_gen_optimal_table(cinfo, htblptr^, entropy^.dc_count_ptrs[dctbl]^);
  894. did_dc[dctbl] := TRUE;
  895. end;
  896. if (not did_ac[actbl]) then
  897. begin
  898. htblptr := @(cinfo^.ac_huff_tbl_ptrs[actbl]);
  899. if ( htblptr^ = NIL) then
  900. htblptr^ := jpeg_alloc_huff_table(j_common_ptr(cinfo));
  901. jpeg_gen_optimal_table(cinfo, htblptr^, entropy^.ac_count_ptrs[actbl]^);
  902. did_ac[actbl] := TRUE;
  903. end;
  904. end;
  905. end;
  906. {$endif} { ENTROPY_OPT_SUPPORTED }
  907. { Module initialization routine for Huffman entropy encoding. }
  908. {GLOBAL}
  909. procedure jinit_huff_encoder (cinfo : j_compress_ptr);
  910. var
  911. entropy : huff_entropy_ptr;
  912. i : int;
  913. begin
  914. entropy := huff_entropy_ptr(
  915. cinfo^.mem^.alloc_small (j_common_ptr(cinfo), JPOOL_IMAGE,
  916. SIZEOF(huff_entropy_encoder)) );
  917. cinfo^.entropy := jpeg_entropy_encoder_ptr (entropy);
  918. entropy^.pub.start_pass := start_pass_huff;
  919. { Mark tables unallocated }
  920. for i := 0 to pred(NUM_HUFF_TBLS) do
  921. begin
  922. entropy^.ac_derived_tbls[i] := NIL;
  923. entropy^.dc_derived_tbls[i] := NIL;
  924. {$ifdef ENTROPY_OPT_SUPPORTED}
  925. entropy^.ac_count_ptrs[i] := NIL;
  926. entropy^.dc_count_ptrs[i] := NIL;
  927. {$endif}
  928. end;
  929. end;
  930. end.