Boron 2.1.0
sort.c
1/*
2 Copyright 2013 Karl Robillard
3
4 This file is part of the Boron programming language.
5
6 Boron is free software: you can redistribute it and/or modify
7 it under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 Boron is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with Boron. If not, see <http://www.gnu.org/licenses/>.
18*/
19
20
21#include "quickSortIndex.h"
22
23
24#define OPT_SORT_CASE 0x01
25#define OPT_SORT_GROUP 0x02
26#define OPT_SORT_FIELD 0x04
27
28
29struct CompareField
30{
31 UThread* ut;
32 UBlockIt fb;
33 uint32_t opt;
34};
35
36
37/*
38static int _binByte( const UBuffer* bin, UIndex pos )
39{
40 if( pos >= 0 && pos < bin->used )
41 return bin->ptr.b[ pos ];
42 return -1;
43}
44*/
45
46
47static int _compareField( struct CompareField* cf,
48 const UCell* a, const UCell* b )
49{
50 UThread* ut = cf->ut;
51 const UCell* it;
52 int i;
53 int rev;
54 int type = ur_type(a);
55
56 if( type != ur_type(b) )
57 return 0;
58
59 switch( type )
60 {
61#if 0
62 case UT_BINARY:
63 case UT_STRING:
64 case UT_FILE:
65 {
66 const UBuffer* bufA = ur_bufferSer( a );
67 const UBuffer* bufB = ur_bufferSer( b );
68 int ca;
69
70 for( it = cf->fb.it; it != cf->fb.end; ++it )
71 {
72 if( ur_is(it, UT_INT) )
73 {
74 // Convert from one-based index but allow negative
75 // position to skip from end.
76 i = ur_int(it);
77 if( i > 0 )
78 --i;
79
80 if( type == UT_BINARY )
81 {
82 ca = _binByte( bufA, i + a->series.it );
83 i = _binByte( bufB, i + b->series.it );
84 }
85 else
86 {
87 ca = ur_strChar( bufA, i + a->series.it );
88 i = ur_strChar( bufB, i + b->series.it );
89 if( ! (cf->opt & OPT_SORT_CASE) )
90 {
91 ca = ur_charLowercase( ca );
92 i = ur_charLowercase( i );
93 }
94 }
95
96 if( ca > i )
97 return 1;
98 if( ca < i )
99 return -1;
100 }
101 }
102 }
103 break;
104#endif
105 case UT_BLOCK:
106 {
107 USeriesIter siA;
108 USeriesIter siB;
109 int pos;
110
111 ur_seriesSlice( ut, &siA, a );
112 ur_seriesSlice( ut, &siB, b );
113
114 for( it = cf->fb.it; it != cf->fb.end; )
115 {
116 if( ur_is(it, UT_INT) )
117 {
118 // Convert from one-based index but allow negative
119 // position to pick from end.
120 i = ur_int(it);
121 if( i > 0 )
122 --i;
123
124 if( (++it != cf->fb.end) && ur_is(it, UT_OPTION) )
125 {
126 ++it;
127 rev = 1;
128 }
129 else
130 {
131 rev = 0;
132 }
133
134#define SORT_BLK_CELL(res, si, index) \
135 pos = index + ((index < 0) ? si.end : si.it); \
136 if( pos < si.it || pos >= si.end ) \
137 return 0; \
138 res = si.buf->ptr.cell + pos;
139
140 SORT_BLK_CELL( a, siA, i );
141 SORT_BLK_CELL( b, siB, i );
142
143 if( (i = ur_compare( ut, a, b )) )
144 return rev ? -i : i;
145 }
146 else
147 {
148 ++it;
149 }
150 }
151 }
152 break;
153
154 case UT_CONTEXT:
155 {
156 const UBuffer* bufA = ur_bufferSer( a );
157 const UBuffer* bufB = ur_bufferSer( b );
158 UAtom word;
159
160 for( it = cf->fb.it; it != cf->fb.end; )
161 {
162 if( ur_is(it, UT_WORD) )
163 {
164 word = ur_atom(it);
165
166 if( (++it != cf->fb.end) && ur_is(it, UT_OPTION) )
167 {
168 ++it;
169 rev = 1;
170 }
171 else
172 {
173 rev = 0;
174 }
175
176 if( (i = ur_ctxLookup( bufA, word )) < 0 )
177 return 0;
178 a = ur_ctxCell( bufA, i );
179
180 if( (i = ur_ctxLookup( bufB, word )) < 0 )
181 return 0;
182 b = ur_ctxCell( bufB, i );
183
184 if( (i = ur_compare( ut, a, b )) )
185 return rev ? -i : i;
186 }
187 else
188 {
189 ++it;
190 }
191 }
192 }
193 break;
194 }
195 return 0;
196}
197
198
199/*-cf-
200 sort
201 set series
202 /case Use case-sensitive comparison with string types.
203 /group Compare groups of elements by first value in group.
204 size int!
205 /field Sort on specified context words or block indices.
206 which block!
207 return: New series with sorted elements.
208 group: series
209*/
210CFUNC(cfunc_sort)
211{
212 int type = ur_type(a1);
213
214 if( ur_isBlockType(type) )
215 {
216 QuickSortIndex qs;
217 struct CompareField fld;
218 UBlockIt bi;
219 UBuffer* blk;
220 uint32_t* ip;
221 uint32_t* iend;
222 int group;
223 int len;
224 int indexLen;
225
226 ur_blockIt( ut, &bi, a1 );
227 len = bi.end - bi.it;
228
229 fld.opt = CFUNC_OPTIONS;
230 if( fld.opt & OPT_SORT_GROUP )
231 {
232 group = ur_int(CFUNC_OPT_ARG(2));
233 if( group < 1 )
234 group = 1;
235 indexLen = len / group;
236 len = indexLen * group; // Remove any partial group.
237 }
238 else
239 {
240 group = 1;
241 indexLen = len;
242 }
243
244 blk = ur_makeBlockCell( ut, type, len, res ); // gc!
245
246 qs.index = ((uint32_t*) (blk->ptr.cell + len)) - indexLen;
247 qs.data = (uint8_t*) bi.it;
248 qs.elemSize = sizeof(UCell);
249
250 if( fld.opt & OPT_SORT_FIELD )
251 {
252 fld.ut = ut;
253 ur_blockIt( ut, &fld.fb, CFUNC_OPT_ARG(3) );
254
255 qs.user = (void*) &fld;
256 qs.compare = (QuickSortFunc) _compareField;
257 }
258 else
259 {
260 qs.user = (void*) ut;
261 qs.compare = (QuickSortFunc) ((fld.opt & OPT_SORT_CASE) ?
263 }
264
265 ip = qs.index;
266 iend = ip + quickSortIndex( &qs, 0, len, group );
267
268 len = qs.elemSize * group;
269 while( ip != iend )
270 {
271 memCpy( blk->ptr.cell + blk->used, bi.it + *ip, len );
272 blk->used += group;
273 ++ip;
274 }
275 return UR_OK;
276 }
277 return ur_error( ut, UR_ERR_INTERNAL, "FIXME: sort only supports block!" );
278}
279
280
281/*EOF*/
#define CFUNC(name)
Macro to define C functions.
Definition boron.h:57
#define CFUNC_OPTIONS
Macro to get uint16_t option flags from inside a C function.
Definition boron.h:59
UBuffer * ur_makeBlockCell(UThread *, int type, int size, UCell *cell)
Generate a single block and set cell to reference it.
Definition block.c:76
int ur_ctxLookup(const UBuffer *, UAtom atom)
Find word in context by atom.
Definition context.c:579
#define ur_ctxCell(c, n)
Get pointer of UCell in context by index.
Definition urlan.h:668
int ur_strChar(const UBuffer *, UIndex pos)
Return the character at a given position.
Definition string.c:1659
void ur_seriesSlice(const UThread *, USeriesIter *si, const UCell *cell)
Set USeriesIter to series slice.
Definition env.c:1338
#define ur_bufferSer(c)
Convenience macro for ur_bufferSeries().
Definition urlan.h:752
UStatus ur_error(UThread *, int errorType, const char *fmt,...)
Create error! exception.
Definition env.c:964
const UCell * end
End position.
Definition urlan.h:388
const UCell * it
Start position.
Definition urlan.h:387
@ UR_OK
Returned to indicate successful evaluation/operation.
Definition urlan.h:118
const UBuffer * ur_blockIt(const UThread *ut, UBlockIt *bi, const UCell *blkCell)
Set UBlockIt to the start and end of a block slice.
Definition env.c:1388
int ur_compare(UThread *, const UCell *a, const UCell *b)
Definition env.c:1079
int ur_compareCase(UThread *, const UCell *a, const UCell *b)
Case-sensitive ordering comparison.
Definition env.c:1093
#define ur_type(c)
Return UrlanDataType of cell.
Definition urlan.h:695
Iterator for const UCell array.
Definition urlan.h:386
The UBuffer struct holds information about a resource, usually a chunk of memory.
Definition urlan.h:266
UIndex used
This typically holds the number of elements in the buffer.
Definition urlan.h:271
UCell * cell
Array of cells.
Definition urlan.h:275
union UBuffer::@312146223224040072236377336057316010374162171270 ptr
This typically holds a pointer to a chunk of memory.
UIndex it
Iterator index.
Definition urlan.h:229
Iterator for const series of any type.
Definition urlan.h:336
The UThread struct stores the data specific to a thread of execution.
Definition urlan.h:309
A cell holds a single value of a simple type or a reference (often to a UBuffer) for a complex type.
Definition urlan.h:248
UCellSeries series
For binary!, bitset!, string!, file!, block!, paren!, path! types.
Definition urlan.h:255
int ur_charLowercase(int c)
Convert UCS2 character to lowercase.
Definition ucs2_case.c:293
@ UR_ERR_INTERNAL
Fatal internal problem.
Definition urlan.h:129