Boron 2.1.0
encode.c
1/*
2 Copyright 2013 Karl Robillard
3
4 This file is part of the Boron programming language.
5
6 Boron is free software: you can redistribute it and/or modify
7 it under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 Boron is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with Boron. If not, see <http://www.gnu.org/licenses/>.
18*/
19
20
21#define URLENC_COMMON 1
22#define URLENC_FUNC_ENCODE urlenc_enc_u16
23#define URLENC_FUNC_DECODE urlenc_dec_u16
24#define URLENC_T uint16_t
25#include "url_encoding.c"
26
27#define URLENC_FUNC_ENCODE urlenc_enc_u8
28#define URLENC_FUNC_DECODE urlenc_dec_u8
29#define URLENC_T uint8_t
30#include "url_encoding.c"
31
32
33static int url_encode( UThread* ut, const UCell* strC, UCell* res, int decode )
34{
35 USeriesIter si;
36 UBuffer* nstr;
37 int enc;
38 int nlen;
39
40
41 ur_seriesSlice( ut, &si, strC );
42 enc = si.buf->form;
43
44 nlen = si.end - si.it;
45 nstr = ur_makeStringCell( ut, enc, decode ? nlen : nlen * 3, res );
46 if( enc == UR_ENC_LATIN1 )
47 {
48 uint8_t* cp = si.buf->ptr.b;
49 uint8_t* (*func)(const uint8_t*, const uint8_t*, uint8_t*) =
50 decode ? urlenc_dec_u8 : urlenc_enc_u8;
51
52 cp = func( cp + si.it, cp + si.end, nstr->ptr.b );
53 nstr->used = cp - nstr->ptr.b;
54 }
55 else if( enc == UR_ENC_UCS2 )
56 {
57 uint16_t* cp = si.buf->ptr.u16;
58 uint16_t* (*func)(const uint16_t*, const uint16_t*, uint16_t*) =
59 decode ? urlenc_dec_u16 : urlenc_enc_u16;
60
61 cp = func( cp + si.it, cp + si.end, nstr->ptr.u16 );
62 nstr->used = cp - nstr->ptr.u16;
63 }
64 else
65 {
66 return ur_error( ut, UR_ERR_INTERNAL,
67 "encode 'url requires latin1 or ucs2 string" );
68 }
69 return UR_OK;
70}
71
72
73/*-cf-
74 encode
75 type int!/word! 2, 16, 64, latin1, utf8, ucs2, url
76 data binary!/string!
77 /bom Prepend Unicode BOM for utf8 or ucs2 and return binary.
78 return: String or binary with data converted to encoding type.
79 group: data
80 see: decode, encoding?
81
82 When data is a string! then the type must be a word! and a new string
83 is returned.
84
85 If data is a binary! then the type must be an int! and the input value
86 is returned with only the base indicator modified.
87*/
88CFUNC(cfunc_encode)
89{
90#define OPT_ENCODE_BOM 0x01
91 static const uint8_t _bomUtf8[3] = { 0xef, 0xbb, 0xbf };
92 const UCell* data = a2;
93 int type = ur_type(data);
94
95 if( ur_isStringType( type ) )
96 {
97 USeriesIter si;
98 int enc;
99
100 if( ! ur_is(a1, UT_WORD) )
101 return errorType( "encode expected word! type" );
102
103 switch( ur_atom(a1) )
104 {
105 case UR_ATOM_LATIN1:
106 enc = UR_ENC_LATIN1;
107 break;
108 case UR_ATOM_UTF8:
109 enc = UR_ENC_UTF8;
110 break;
111 case UR_ATOM_UCS2:
112 enc = UR_ENC_UCS2;
113 break;
114 case UR_ATOM_URL:
115 return url_encode( ut, data, res, 0 );
116 default:
117 return ur_error( ut, UR_ERR_TYPE,
118 "encode passed invalid type '%s",
119 ur_wordCStr( a1 ) );
120 }
121
122 if( CFUNC_OPTIONS & OPT_ENCODE_BOM )
123 {
124 UBuffer* bin = ur_makeBinaryCell( ut, 0, res );
125
126 if( enc == UR_ENC_UTF8 )
127 {
128 ur_binAppendData( bin, _bomUtf8, 3 );
129 }
130 else if( enc == UR_ENC_UCS2 )
131 {
132 uint16_t bom = 0xfeff;
133 ur_binAppendData( bin, (uint8_t*) &bom, 2 );
134 }
135
136 ur_seriesSlice( ut, &si, data );
137
138 if( enc == si.buf->form )
139 {
140 ur_binAppendArray( bin, &si );
141 }
142 else
143 {
144 UBuffer tmp;
145 ur_strInit( &tmp, enc, 0 );
146 ur_strAppend( &tmp, si.buf, si.it, si.end );
147
148 si.buf = &tmp;
149 si.it = 0;
150 si.end = tmp.used;
151 ur_binAppendArray( bin, &si );
152
153 ur_strFree( &tmp );
154 }
155 }
156 else
157 {
158 UBuffer* nstr = ur_makeStringCell( ut, enc, 0, res );
159 ur_seriesSlice( ut, &si, data );
160 ur_strAppend( nstr, si.buf, si.it, si.end );
161 }
162 return UR_OK;
163 }
164 else if( type == UT_BINARY )
165 {
166 UBuffer* bin;
167
168 if( ! ur_is(a1, UT_INT) )
169 {
170bad_type:
171 return errorType( "encode expected type 2, 16, or 64 for binary" );
172 }
173
174 if( ! (bin = ur_bufferSerM(data)) )
175 return UR_THROW;
176 switch( ur_int(a1) )
177 {
178 case 2:
179 bin->form = UR_BENC_2;
180 break;
181 case 16:
182 bin->form = UR_BENC_16;
183 break;
184 case 64:
185 bin->form = UR_BENC_64;
186 break;
187 default:
188 goto bad_type;
189 }
190 *res = *data;
191 return UR_OK;
192 }
193 return errorType( "encode expected binary!/string! data" );
194}
195
196
197/*-cf-
198 decode
199 type word! url
200 data string!
201 return: New string with data converted to encoding type.
202 group: data
203 see: encode, encoding?
204
205 Undoes URL encoding.
206*/
207CFUNC(cfunc_decode)
208{
209 if( ur_atom(a1) == UR_ATOM_URL )
210 return url_encode( ut, a2, res, 1 );
211 return ur_error( ut, UR_ERR_SCRIPT, "decode expected 'url" );
212}
213
214
215/*-cf-
216 encoding?
217 data
218 return: Encoding type or none! if data is not a string!/binary!.
219 group: data
220 see: encode, decode
221
222 A string! data value will return a word! (latin1, utf8, or ucs2).
223 A binary! data value will return the base int! (2, 16, or 64).
224*/
225CFUNC(cfunc_encodingQ)
226{
227 static UAtom encAtoms[4] = {
228 UR_ATOM_LATIN1, UR_ATOM_UTF8, UR_ATOM_UCS2, UT_UNSET
229 };
230 static char bencBase[4] = { 16, 2, 64, 16 };
231
232 if( ur_isStringType( ur_type(a1) ) )
233 {
234 const UBuffer* buf = ur_bufferSer(a1);
235 ur_setId(res, UT_WORD);
236 ur_setWordUnbound(res, encAtoms[buf->form & 3] );
237 }
238 else if( ur_is(a1, UT_BINARY) )
239 {
240 const UBuffer* buf = ur_bufferSer(a1);
241 ur_setId(res, UT_INT);
242 ur_int(res) = bencBase[buf->form & 3];
243 }
244 else
245 ur_setId(res, UT_NONE);
246 return UR_OK;
247}
248
249
250/*EOF*/
#define CFUNC(name)
Macro to define C functions.
Definition boron.h:57
#define CFUNC_OPTIONS
Macro to get uint16_t option flags from inside a C function.
Definition boron.h:59
void ur_binAppendData(UBuffer *, const uint8_t *data, int len)
Append data to binary buffer.
Definition binary.c:213
UBuffer * ur_makeBinaryCell(UThread *, int size, UCell *cell)
Generate a single binary and set cell to reference it.
Definition binary.c:74
void ur_binAppendArray(UBuffer *, const USeriesIter *si)
Append array slice to binary buffer.
Definition binary.c:227
#define ur_strFree
A string is a simple array.
Definition urlan.h:629
UBuffer * ur_makeStringCell(UThread *, int enc, int size, UCell *cell)
Generate a single string and set cell to reference it.
Definition string.c:104
void ur_strInit(UBuffer *, int enc, int size)
Initialize buffer to type UT_STRING.
Definition string.c:430
void ur_strAppend(UBuffer *, const UBuffer *strB, UIndex itB, UIndex endB)
Append another string buffer to this string.
Definition string.c:899
void ur_seriesSlice(const UThread *, USeriesIter *si, const UCell *cell)
Set USeriesIter to series slice.
Definition env.c:1338
#define ur_bufferSer(c)
Convenience macro for ur_bufferSeries().
Definition urlan.h:752
#define ur_setId(c, t)
Set type and initialize the other 24 bits of UCellId to zero.
Definition urlan.h:701
UIndex it
Start position.
Definition urlan.h:338
UStatus ur_error(UThread *, int errorType, const char *fmt,...)
Create error! exception.
Definition env.c:964
UIndex end
End position.
Definition urlan.h:339
@ UR_THROW
Returned to indicate an evaluation exception occured.
Definition urlan.h:117
@ UR_OK
Returned to indicate successful evaluation/operation.
Definition urlan.h:118
#define ur_bufferSerM(c)
Convenience macro for ur_bufferSeriesM().
Definition urlan.h:753
const UBuffer * buf
Buffer pointer.
Definition urlan.h:337
#define ur_type(c)
Return UrlanDataType of cell.
Definition urlan.h:695
The UBuffer struct holds information about a resource, usually a chunk of memory.
Definition urlan.h:266
uint8_t * b
bytes
Definition urlan.h:277
uint8_t form
This can indicate a specific form of the data (such as a string encoding).
Definition urlan.h:269
UIndex used
This typically holds the number of elements in the buffer.
Definition urlan.h:271
uint16_t * u16
uint16_t
Definition urlan.h:279
union UBuffer::@312146223224040072236377336057316010374162171270 ptr
This typically holds a pointer to a chunk of memory.
Iterator for const series of any type.
Definition urlan.h:336
The UThread struct stores the data specific to a thread of execution.
Definition urlan.h:309
A cell holds a single value of a simple type or a reference (often to a UBuffer) for a complex type.
Definition urlan.h:248
@ UR_ERR_SCRIPT
General script evaluation error.
Definition urlan.h:126
@ UR_ERR_INTERNAL
Fatal internal problem.
Definition urlan.h:129
@ UR_ERR_TYPE
Invalid argument/parameter datatype.
Definition urlan.h:125