Skip to content
Navigation Menu
{{ message }}
forked from s3rvac/cpp-bencoding
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDecoder.cpp
More file actions
344 lines (313 loc) · 9.2 KB
/
Copy pathDecoder.cpp
File metadata and controls
344 lines (313 loc) · 9.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
/**
* @file Decoder.cpp
* @copyright (c) 2014 by Petr Zemek (s3rvac@gmail.com) and contributors
* @license BSD, see the @c LICENSE file for more details
* @brief Implementation of the Decoder class.
*/
#include "Decoder.h"
#include <cassert>
#include <regex>
#include <sstream>
#include "BDictionary.h"
#include "BInteger.h"
#include "BList.h"
#include "BString.h"
#include "Utils.h"
namespace bencoding {
/**
* @brief Constructs a new exception with the given message.
*/
DecodingError::DecodingError(const std::string &what):
std::runtime_error(what) {}
/**
* @brief Constructs a decoder.
*/
Decoder::Decoder() {}
/**
* @brief Creates a new decoder.
*/
std::shared_ptr<Decoder> Decoder::create() {
return std::shared_ptr<Decoder>(new Decoder());
}
/**
* @brief Decodes the given bencoded @a data and returns them.
*
* If there are some characters left after the decoded data, this function
* throws DecodingError.
*/
std::shared_ptr<BItem> Decoder::decode(const std::string &data) {
std::istringstream input(data);
auto decodedData = decode(input);
validateInputDoesNotContainUndecodedCharacters(input);
return decodedData;
}
/**
* @brief Reads the data from the given @a input, decodes them and returns them.
*
* If there are some characters left after the decoding, they are left in @a
* input, i.e. they are not read. This behavior differs for the overload of
* decode() that takes @c std::string as the input.
*/
std::shared_ptr<BItem> Decoder::decode(std::istream &input) {
switch (input.peek()) {
case 'd':
return decodeDictionary(input);
case 'i':
return decodeInteger(input);
case 'l':
return decodeList(input);
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
return decodeString(input);
default:
throw DecodingError(std::string("unexpected character: '") +
static_cast<char>(input.peek()) + "'");
}
assert(false && "should never happen");
return std::shared_ptr<BItem>();
}
/**
* @brief Reads @a expected_char from @a input and discards it.
*/
void Decoder::readExpectedChar(std::istream &input, char expected_char) const {
int c = input.get();
if (c != expected_char) {
throw DecodingError(std::string("expected '") + expected_char +
"', got '" + static_cast<char>(c) + "'");
}
}
/**
* @brief Decodes a dictionary from @a input.
*
* @par Format
* @code
* d<bencoded string><bencoded element>e
* @endcode
*
* @par Example
* @code
* d3:cow3:moo4:spam4:eggse represents the dictionary {"cow": "moo", "spam": "eggs"}
* d4:spaml1:a1:bee represents the dictionary {"spam": ["a", "b"]}
* @endcode
*
* The keys must be bencoded strings. The values may be any bencoded type,
* including integers, strings, lists, and other dictionaries. This function
* supports decoding of dictionaries whose keys are not lexicographically sorted
* (according to the <a
* href="https://wiki.theory.org/BitTorrentSpecification#Bencoding">specification</a>,
* they must be sorted).
*/
std::shared_ptr<BDictionary> Decoder::decodeDictionary(std::istream &input) {
readExpectedChar(input, 'd');
auto bDictionary = decodeDictionaryItemsIntoDictionary(input);
readExpectedChar(input, 'e');
return bDictionary;
}
/**
* @brief Decodes items from @a input, adds them to a dictionary, and returns
* that dictionary.
*/
std::shared_ptr<BDictionary> Decoder::decodeDictionaryItemsIntoDictionary(
std::istream &input) {
auto bDictionary = BDictionary::create();
while (input && input.peek() != 'e') {
std::shared_ptr<BString> key(decodeDictionaryKey(input));
std::shared_ptr<BItem> value(decodeDictionaryValue(input));
(*bDictionary)[key] = value;
}
return bDictionary;
}
/**
* @brief Decodes a dictionary key from @a input.
*/
std::shared_ptr<BString> Decoder::decodeDictionaryKey(std::istream &input) {
std::shared_ptr<BItem> key(decode(input));
// A dictionary key has to be a string.
std::shared_ptr<BString> keyAsBString(key->as<BString>());
if (!keyAsBString) {
throw DecodingError(
"found a dictionary key that is not a bencoded string"
);
}
return keyAsBString;
}
/**
* @brief Decodes a dictionary value from @a input.
*/
std::shared_ptr<BItem> Decoder::decodeDictionaryValue(std::istream &input) {
return decode(input);
}
/**
* @brief Decodes an integer from @a input.
*
* @par Format
* @code
* i<integer encoded in base ten ASCII>e
* @endcode
*
* @par Example
* @code
* i3e represents the integer 3
* @endcode
*
* Moreover, only the significant digits should be used, one cannot pad the
* integer with zeroes, such as @c i04e (see the <a
* href="https://wiki.theory.org/BitTorrentSpecification#Bencoding">
* specification</a>).
*/
std::shared_ptr<BInteger> Decoder::decodeInteger(std::istream &input) const {
return decodeEncodedInteger(readEncodedInteger(input));
}
/**
* @brief Reads an encoded integer from @a input.
*/
std::string Decoder::readEncodedInteger(std::istream &input) const {
// See the description of decodeInteger() for the format and example.
std::string encodedInteger;
bool encodedIntegerReadCorrectly = readUntil(input, encodedInteger, 'e');
if (!encodedIntegerReadCorrectly) {
throw DecodingError("error during the decoding of an integer near '" +
encodedInteger + "'");
}
return encodedInteger;
}
/**
* @brief Decodes the given encoded integer.
*/
std::shared_ptr<BInteger> Decoder::decodeEncodedInteger(
const std::string &encodedInteger) const {
// See the description of decodeInteger() for the format and example.
std::regex integerRegex("i([-+]?(0|[1-9][0-9]*))e");
std::smatch match;
bool valid = std::regex_match(encodedInteger, match, integerRegex);
if (!valid) {
throw DecodingError("encountered an encoded integer of invalid format: '" +
encodedInteger + "'");
}
BInteger::ValueType integerValue;
strToNum(match[1].str(), integerValue);
return BInteger::create(integerValue);
}
/**
* @brief Decodes a list from @a input.
*
* @par Format
* @code
* l<bencoded values>e
* @endcode
*
* @par Example
* @code
* l4:spam4:eggse represents a list containing two strings "spam" and "eggs"
* @endcode
*/
std::shared_ptr<BList> Decoder::decodeList(std::istream &input) {
readExpectedChar(input, 'l');
auto bList = decodeListItemsIntoList(input);
readExpectedChar(input, 'e');
return bList;
}
/**
* @brief Decodes items from @a input, appends them to a list, and returns that
* list.
*/
std::shared_ptr<BList> Decoder::decodeListItemsIntoList(std::istream &input) {
auto bList = BList::create();
while (input && input.peek() != 'e') {
bList->push_back(decode(input));
}
return bList;
}
/**
* @brief Decodes a string from @a input.
*
* @par Format
* @code
* <string length encoded in base ten ASCII>:<string data>
* @endcode
*
* @par Example
* @code
* 4:test represents the string "test"
* @endcode
*/
std::shared_ptr<BString> Decoder::decodeString(std::istream &input) const {
std::string::size_type stringLength(readStringLength(input));
readExpectedChar(input, ':');
std::string str(readStringOfGivenLength(input, stringLength));
return BString::create(str);
}
/**
* @brief Reads the string length from @a input, validates it, and returns it.
*/
std::string::size_type Decoder::readStringLength(std::istream &input) const {
std::string stringLengthInASCII;
bool stringLengthInASCIIReadCorrectly = readUpTo(input, stringLengthInASCII, ':');
if (!stringLengthInASCIIReadCorrectly) {
throw DecodingError("error during the decoding of a string near '" +
stringLengthInASCII + "'");
}
std::string::size_type stringLength;
bool stringLengthIsValid = strToNum(stringLengthInASCII, stringLength);
if (!stringLengthIsValid) {
throw DecodingError("invalid string length: '" + stringLengthInASCII + "'");
}
return stringLength;
}
/**
* @brief Reads a string of the given @a length from @a input and returns it.
*/
std::string Decoder::readStringOfGivenLength(std::istream &input,
std::string::size_type length) const {
std::string str(length, char());
input.read(&str[0], length);
std::string::size_type numOfReadChars(input.gcount());
if (numOfReadChars != length) {
throw DecodingError("expected a string containing " + std::to_string(length) +
" characters, but read only " + std::to_string(numOfReadChars) +
" characters");
}
return str;
}
/**
* @brief Throws DecodingError if @a input has not been completely read.
*/
void Decoder::validateInputDoesNotContainUndecodedCharacters(std::istream &input) {
if (input.peek() != std::char_traits<char>::eof()) {
throw DecodingError("input contains undecoded characters");
}
}
/**
* @brief Decodes the given bencoded @a data and returns them.
*
* This function can be handy if you just want to decode bencoded data without
* explicitly creating a decoder and calling @c decode() on it.
*
* See Decoder::decode() for more details.
*/
std::shared_ptr<BItem> decode(const std::string &data) {
auto decoder = Decoder::create();
return decoder->decode(data);
}
/**
* @brief Reads all the data from the given @a input, decodes them and returns
* them.
*
* This function can be handy if you just want to decode bencoded data without
* explicitly creating a decoder and calling @c decode() on it.
*
* See Decoder::decode() for more details.
*/
std::shared_ptr<BItem> decode(std::istream &input) {
auto decoder = Decoder::create();
return decoder->decode(input);
}
} // namespace bencoding
You can’t perform that action at this time.
