[project @ 1997-03-14 08:02:40 by simonpj]
[nofib.git] / real / compress / Decode.hs
1 {-
2 - Decode.hs
3 -
4 - Module containing the code to decode LZW encodings
5 -
6 - Paul Sanders, Applications Research Division, BTL 1992
7 -
8 - DEC_VERSION 1 uses a list with keys in ascending order as a table, ie.
9 - entry n is given by table!!n.
10 -
11 - DEC_VERSION 2 uses a list with keys in descending order as a table, ie.
12 - entry n is given by table!!(#table-n). We don't need to calculate the
13 - length of the table however as this is given by the value of the next
14 - code to be added.
15 -
16 - DEC_VERSION 3 uses a balanced binary tree to store the keys. We can do
17 - this cheaply by putting the key in the correct place straight away and
18 - therefore not doing any rebalancing.
19 -}
20
21 module Decode (decode)
22 where
23
24 import Prelude hiding( lookup ) -- lookup defined locally
25 import Defaults
26 import BinConv
27
28 data Optional a = NONE | SOME a deriving (Eq, Show{-was:Text-})
29
30 {- We ideally want to store the table as an array but these are inefficient
31 - so we use a list instead. We don't use the tree used by encode since we
32 - can make use of the fact that all our keys (the codes) come in order and
33 - will be placed at the end of the table, at position 'code'.
34 -
35 - An entry of (SOME n, 'c') indicates that this code has prefix code n
36 - and final character c.
37 -}
38
39
40 {- Kick off the decoding giving the real function the first code value and
41 - the initial table.
42 -}
43
44 decode :: [Int] -> String
45 decode []
46 = []
47 decode cs
48 = decode' cs first_code init_table
49
50 {- decode` decodes the first character which is special since no new code
51 - gets added for it. It is also special in so far as we know that the
52 - code is a singleton character and thus has prefix NONE. The '@' is a
53 - dummy character and can be anything.
54 -}
55
56 decode' [] _ _ = []
57 decode' (c:cs) n t
58 = ch : do_decode cs n c ch t
59 where
60 (NONE, ch) = lookup c t
61
62 {- do_decode decodes all the codes bar the first.
63 -
64 - If the code is in the table (ie the code is less than the next code to be
65 - added) then we output the string for that code (using unfold if a prefix
66 - type) and add a new code to the table with the final character output as
67 - the extension and the previous code as prefix.
68 -
69 - If the code is not one we know about then we give it to decode_special for
70 - special treatment
71 -}
72
73 do_decode [] _ _ _ _ = []
74 do_decode (c:cs) n old_n fin_char t
75 = if c >= n -- we don't have this code in the table yet
76 then decode_special (c:cs) n old_n fin_char t
77 else outchs ++ do_decode cs n' c (head outchs) t'
78 where
79 outchs = reverse (unfold c (n-1) t)
80 (n', t') = if n == max_entries
81 then (n, t)
82 else (n+1, insert n (SOME old_n, head outchs) t)
83
84 {- decode_special decodes a code that isn't in the table.
85 -
86 - The algorithm in Welch describes why this works, suffice it to say that
87 - the output string is given by the last character output and the string
88 - given by the previous code. An entry is also made in the table for the
89 - last character output and the old code.
90 -}
91
92 decode_special (c:cs) n old_n fin_char t
93 = outchs ++ do_decode cs n' c (head outchs) t'
94 where
95 outchs = reverse (fin_char : unfold old_n (n-1) t)
96 (n', t') = if n == max_entries
97 then (n, t)
98 else (n+1, insert n (SOME old_n, fin_char) t)
99
100 {- unfold a prefix code.
101 -
102 - chain back through the prefixes outputting the extension characters as we
103 - go.
104 -}
105
106 unfold n t_len t
107 = if prefix == NONE
108 then [c]
109 else c : unfold n' t_len t
110 where
111 (prefix, c) = lookup n t
112 SOME n' = prefix
113
114 data DecompTable = Branch DecompTable DecompTable | Leaf (Optional Int, Char) deriving (Show{-was:Text-})
115
116 {- Insert a code pair into the table. The position of the code is given by
117 - the breakdown of the key into its binary digits
118 -}
119
120 insert n v t = insert' (dec_to_binx code_bits n) v t
121
122 {- We can place a code exactly where it belongs using the following algorithm.
123 - Take the code's binary rep expanded to the maximum number of bits. Start
124 - at the first bit, if a 0 then insert the code to the left, if a 1 then
125 - insert to the right. Carry on with the other bits until we run out and are
126 - thus at the right place and can construct the node.
127 -}
128
129 insert' [] v (Leaf _)
130 = Leaf v
131 insert' ('0' : bs) v (Branch l r)
132 = Branch (insert' bs v l) r
133 insert' ('1' : bs) v (Branch l r)
134 = Branch l (insert' bs v r)
135 insert' ('0' : bs) v t
136 = Branch (insert' bs v t) t
137 insert' ('1' : bs) v t
138 = Branch t (insert' bs v t)
139
140 {- For a lookup we use the same mechanism to locate the position of the item
141 - in the tree but if we find that the route has not been constructed or the
142 - node has the dummy value then that code is not yet in the tree. The way
143 - in which the decode algorithm works this should never happen.
144 -}
145
146 lookup n t = lookup' (dec_to_binx code_bits n) t
147
148 lookup' [] (Leaf v)
149 = v
150 lookup' ('0' : bs) (Branch l _)
151 = lookup' bs l
152 lookup' ('1' : bs) (Branch _ r)
153 = lookup' bs r
154 lookup' _ _ = error "tree insert error - seek professional help"
155
156 init_table = mk_init_table 0 (Leaf (SOME 99999, '@'))
157
158 mk_init_table 256 t = t
159 mk_init_table n t = mk_init_table (n+1) (insert n (NONE, toEnum n) t)
160