Boron 2.1.0
atoms.c
1/*
2 Copyright 2009,2011 Karl Robillard
3
4 This file is part of the Urlan datatype system.
5
6 Urlan is free software: you can redistribute it and/or modify
7 it under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 Urlan is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with Urlan. If not, see <http://www.gnu.org/licenses/>.
18*/
19
20
21#define KEEP_CASE 1
22#define MAX_WORD_LEN 64
23#define LOWERCASE(c) if(c >= 'A' && c <= 'Z') c -= 'A' - 'a'
24
25
26typedef struct
27{
28 uint32_t hash;
29 uint16_t nameIndex; // Index into atomNames.ptr.c
30 uint16_t nameLen;
31
32 uint16_t head;
33 uint16_t chain;
34}
35AtomRec;
36
37
47const char* ur_atomCStr( UThread* ut, UAtom atom /*, int* plen*/ )
48{
49 UEnv* env = ut->env;
50 AtomRec* rec = ((AtomRec*) env->atomTable.ptr.v) + atom;
51 //if( plen )
52 // *plen = rec->nameLen;
53 return env->atomNames.ptr.c + rec->nameIndex;
54}
55
56
57uint32_t ur_hash( const uint8_t* str, const uint8_t* end )
58{
59 int c;
60 uint32_t hashval = 0;
61
62 while( str != end )
63 {
64 c = *str++;
65 LOWERCASE( c );
66 hashval = (33 * hashval) + 720 + c;
67 }
68
69 return hashval;
70}
71
72
73/*
74 Example atom hash table with five used and eight avail slots.
75 The names some, wordH, & wordP all hash to the same slot and must be chained.
76
77 hash name | head chain
78 -----------------|-----------
79 0 [ 0x3D24461D word1 | 2 ]
80 1 [ 0x01D81D74 some | 3 ]
81 2 [ 0x01D88B98 this | ]
82 3 [ 0x3D244654 wordH | 4 ]
83 4 [ 0x3D24465C wordP | 1 ]
84 [ | 0 ]
85 [ | ]
86 [ | ]
87*/
88
89#define HASH_INSERT( atoms, avail, table, node, hash, index ) \
90 node = table + (hash % avail); \
91 if( node->head == 0xffff ) \
92 node->head = index; \
93 else { \
94 node = table + node->head; \
95 while( node->chain != 0xffff ) \
96 node = table + node->chain; \
97 node->chain = index; \
98 }
99
100
101static void _rebuildAtomHash( UBuffer* buf )
102{
103 AtomRec* table;
104 AtomRec* node;
105 AtomRec* it;
106 AtomRec* end;
107 UIndex avail;
108
109 table = ur_ptr(AtomRec, buf);
110 avail = ur_avail(buf);
111
112 // Clear lookup table.
113
114 it = table;
115 end = table + avail;
116 while( it != end )
117 {
118 it->head = 0xffff;
119 it->chain = 0xffff;
120 ++it;
121 }
122
123 // Re-insert existing entries.
124
125 it = table;
126 end = table + buf->used;
127 while( it != end )
128 {
129 HASH_INSERT( buf, avail, table, node, it->hash, it - table )
130 ++it;
131 }
132}
133
134
135/*
136 This must be called inside LOCK_GLOBAL/UNLOCK_GLOBAL.
137
138 \param ut If non-zero, then ur_error() is called when the atom tables
139 are full.
140
141 \return UR_INVALID_ATOM if atom tables are full.
142*/
143static UAtom _internAtom( UThread* ut, UBuffer* atoms, UBuffer* names,
144 const uint8_t* str, const uint8_t* end )
145{
146 uint8_t* cp;
147 const uint8_t* it;
148 const uint8_t* sp;
149 int c, d;
150 int len;
151 uint32_t hash;
152 UIndex avail;
153 AtomRec* table;
154 AtomRec* node;
155
156#if 0
157 uint8_t rep[32];
158 cp = rep;
159 sp = str;
160 while( sp != end )
161 *cp++ = *sp++;
162 *cp = '\0';
163 printf( "KR intern {%s}\n", rep );
164#endif
165
166 len = end - str;
167 if( len > MAX_WORD_LEN ) /* LIMIT: Maximum word length */
168 {
169 len = MAX_WORD_LEN;
170 end = str + len;
171 }
172 hash = ur_hash( str, end );
173
174 table = ur_ptr(AtomRec, atoms);
175 avail = ur_avail(atoms);
176
177 node = table + (hash % avail);
178 if( node->head == 0xffff )
179 {
180 node->head = atoms->used;
181 }
182 else
183 {
184 node = table + node->head;
185 while( 1 )
186 {
187 if( node->nameLen == len )
188 {
189 sp = names->ptr.b + node->nameIndex;
190 it = str;
191 while( it != end )
192 {
193#ifdef KEEP_CASE
194 c = *sp++;
195 d = *it++;
196 if( c == d )
197 continue;
198 LOWERCASE( c );
199 LOWERCASE( d );
200 if( c != d )
201 break;
202#else
203 if( *sp++ != *it++ )
204 break;
205#endif
206 }
207 if( it == end )
208 goto done;
209 }
210
211 if( node->chain == 0xffff )
212 {
213 node->chain = atoms->used;
214 break;
215 }
216 node = table + node->chain;
217 }
218 }
219
220 // Nope, add new atom.
221
222 if( atoms->used == avail )
223 {
224 // Atom table size is fixed so read only access does not need to be
225 // locked. When the table is full, we are finished.
226 if( ut )
227 ur_error( ut, UR_ERR_INTERNAL, "Atom table is full" );
228 return UR_INVALID_ATOM;
229 }
230 node = table + atoms->used;
231 ++atoms->used;
232
233 node->hash = hash;
234 node->nameIndex = names->used;
235 node->nameLen = len;
236
237#if 1
238 if( (names->used + len + 1) > ur_avail(names) )
239 {
240 if( ut )
241 ur_error( ut, UR_ERR_INTERNAL, "Atom name buffer is full" );
242 return UR_INVALID_ATOM;
243 }
244#else
245 ur_arrayReserve( names, sizeof(char), names->used + len + 1 );
246#endif
247
248 cp = names->ptr.b + names->used;
249 names->used += len + 1;
250 while( str != end )
251 {
252#ifdef KEEP_CASE
253 *cp++ = *str++;
254#else
255 c = *str++;
256 LOWERCASE( c );
257 *cp++ = c;
258#endif
259 }
260 *cp = '\0';
261
262done:
263
264 return node - table;
265}
266
267
268#ifdef DEBUG
269void dumpAtoms( UThread* ut )
270{
271 UEnv* env = ut->env;
272
273 LOCK_GLOBAL
274 {
275 const char* names = env->atomNames.ptr.c;
276 AtomRec* table = (AtomRec*) env->atomTable.ptr.v;
277 AtomRec* it = table;
278 AtomRec* end = table + env->atomTable.used;
279
280#if __WORDSIZE == 64
281#define OFFINT "%4ld"
282#else
283#define OFFINT "%4d"
284#endif
285
286 while( it != end )
287 {
288 dprint( OFFINT " %08x %5d %5d %s\n", it - table, it->hash,
289 it->head, it->chain,
290 names + it->nameIndex );
291 ++it;
292 }
293 }
294 UNLOCK_GLOBAL
295}
296#endif
297
298
299/*EOF*/
const char * ur_atomCStr(UThread *ut, UAtom atom)
Get name of atom.
Definition atoms.c:47
UStatus ur_error(UThread *, int errorType, const char *fmt,...)
Create error! exception.
Definition env.c:964
The UBuffer struct holds information about a resource, usually a chunk of memory.
Definition urlan.h:266
uint8_t * b
bytes
Definition urlan.h:277
void * v
Other.
Definition urlan.h:285
char * c
chars
Definition urlan.h:276
UIndex used
This typically holds the number of elements in the buffer.
Definition urlan.h:271
union UBuffer::@312146223224040072236377336057316010374162171270 ptr
This typically holds a pointer to a chunk of memory.
The UThread struct stores the data specific to a thread of execution.
Definition urlan.h:309
UEnv * env
Pointer to the shared environment.
Definition urlan.h:321
#define ur_avail(buf)
Returns the capacity of a UBuffer that is known to have been allocated.
Definition urlan.h:687
@ UR_ERR_INTERNAL
Fatal internal problem.
Definition urlan.h:129
int32_t UIndex
This is an index into an array.
Definition urlan.h:150