Skip to content
Navigation Menu
{{ message }}
forked from pradeep-k/RedisGraph
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathast_validations.c
More file actions
1704 lines (1445 loc) · 64.4 KB
/
Copy pathast_validations.c
File metadata and controls
1704 lines (1445 loc) · 64.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright 2018-2020 Redis Labs Ltd. and Contributors
*
* This file is available under the Redis Labs Source Available License Agreement
*/
#include "ast.h"
#include "ast_shared.h"
#include "../util/arr.h"
#include "cypher_whitelist.h"
#include "../procedures/procedure.h"
#include "../arithmetic/repository.h"
#include "../arithmetic/arithmetic_expression.h"
#include <assert.h>
#include "../util/rax_extensions.h"
#include "../query_ctx.h"
// Forward declaration
static void _AST_GetDefinedIdentifiers(const cypher_astnode_t *node, rax *identifiers);
inline static void _prepareIterateAll(rax *map, raxIterator *iter) {
raxStart(iter, map);
raxSeek(iter, "^", NULL, 0);
}
// Validate that an input string can be completely converted to a positive integer in range.
static inline AST_Validation _ValidatePositiveInteger(const char *input) {
assert(input);
char *endptr; // If the entire string is converted, endptr will point to a null byte
errno = 0; // If underflow or overflow occurs, errno will be set
strtol(input, &endptr, 0); // Perform conversion
if(errno != 0 || *endptr != '\0') return AST_INVALID;
return AST_VALID;
}
static void _AST_GetIdentifiers(const cypher_astnode_t *node, rax *identifiers) {
if(!node) return;
assert(identifiers);
if(cypher_astnode_type(node) == CYPHER_AST_IDENTIFIER) {
const char *identifier = cypher_ast_identifier_get_name(node);
raxInsert(identifiers, (unsigned char *)identifier, strlen(identifier), NULL, NULL);
}
uint child_count = cypher_astnode_nchildren(node);
/* In case current node is of type projection
* inspect first child only,
* @10 20..26 > > > projection expression=@11, alias=@14
* @11 20..26 > > > > apply @12(@13)
* @12 20..23 > > > > > function name `max`
* @13 24..25 > > > > > identifier `z`
* @14 20..26 > > > > identifier `max(z)` */
cypher_astnode_type_t type = cypher_astnode_type(node);
if(type == CYPHER_AST_PROJECTION) child_count = 1;
for(uint i = 0; i < child_count; i++) {
const cypher_astnode_t *child = cypher_astnode_get_child(node, i);
_AST_GetIdentifiers(child, identifiers);
}
if(type == CYPHER_AST_LIST_COMPREHENSION || type == CYPHER_AST_ANY || type == CYPHER_AST_ALL) {
// A list comprehension has a local variable that should only be accessed within its scope;
// do not leave it in the identifiers map.
const cypher_astnode_t *variable_node = cypher_ast_list_comprehension_get_identifier(node);
const char *variable = cypher_ast_identifier_get_name(variable_node);
raxRemove(identifiers, (unsigned char *)variable, strlen(variable), NULL);
}
}
static void _AST_GetWithAliases(const cypher_astnode_t *node, rax *aliases) {
if(!node) return;
if(cypher_astnode_type(node) != CYPHER_AST_WITH) return;
assert(aliases);
uint num_with_projections = cypher_ast_with_nprojections(node);
for(uint i = 0; i < num_with_projections; i ++) {
const cypher_astnode_t *child = cypher_ast_with_get_projection(node, i);
const cypher_astnode_t *alias_node = cypher_ast_projection_get_alias(child);
const char *alias;
if(alias_node) {
alias = cypher_ast_identifier_get_name(alias_node);
} else {
const cypher_astnode_t *expr = cypher_ast_projection_get_expression(child);
// This expression not being an identifier is an error case, but will be captured in a later validation.
if(cypher_astnode_type(expr) != CYPHER_AST_IDENTIFIER) continue;
// Retrieve "a" from "WITH a"
alias = cypher_ast_identifier_get_name(expr);
}
raxInsert(aliases, (unsigned char *)alias, strlen(alias), NULL, NULL);
}
}
static void _AST_GetWithReferences(const cypher_astnode_t *node, rax *identifiers) {
if(!node) return;
if(cypher_astnode_type(node) != CYPHER_AST_WITH) return;
assert(identifiers);
uint num_with_projections = cypher_ast_with_nprojections(node);
for(uint i = 0; i < num_with_projections; i ++) {
const cypher_astnode_t *child = cypher_ast_with_get_projection(node, i);
_AST_GetIdentifiers(child, identifiers);
}
}
// Extract identifiers / aliases from a procedure call.
static void _AST_GetProcCallAliases(const cypher_astnode_t *node, rax *identifiers) {
// CALL db.labels() yield label
// CALL db.labels() yield label as l
assert(node && identifiers);
assert(cypher_astnode_type(node) == CYPHER_AST_CALL);
uint projection_count = cypher_ast_call_nprojections(node);
for(uint i = 0; i < projection_count; i++) {
const char *identifier = NULL;
const cypher_astnode_t *proj_node = cypher_ast_call_get_projection(node, i);
const cypher_astnode_t *alias_node = cypher_ast_projection_get_alias(proj_node);
if(alias_node) {
// Alias is given: YIELD label AS l.
identifier = cypher_ast_identifier_get_name(alias_node);
} else {
// No alias, use identifier: YIELD label
const cypher_astnode_t *exp_node = cypher_ast_projection_get_expression(proj_node);
identifier = cypher_ast_identifier_get_name(exp_node);
}
assert(identifiers);
raxInsert(identifiers, (unsigned char *)identifier, strlen(identifier), NULL, NULL);
}
}
// UNWIND and WITH also form aliases, but don't need special handling for us yet.
static void _AST_GetReturnAliases(const cypher_astnode_t *node, rax *aliases) {
assert(node && aliases && cypher_astnode_type(node) == CYPHER_AST_RETURN);
uint num_return_projections = cypher_ast_return_nprojections(node);
if(num_return_projections == 0) return;
for(uint i = 0; i < num_return_projections; i ++) {
const cypher_astnode_t *child = cypher_ast_return_get_projection(node, i);
const cypher_astnode_t *alias_node = cypher_ast_projection_get_alias(child);
if(alias_node == NULL) continue;
const char *alias = cypher_ast_identifier_get_name(alias_node);
raxInsert(aliases, (unsigned char *)alias, strlen(alias), NULL, NULL);
}
}
static void _CollectIdentifiers(const cypher_astnode_t *root, rax *projections) {
if(cypher_astnode_type(root) == CYPHER_AST_IDENTIFIER) {
// Identifier found, add to triemap
const char *identifier = cypher_ast_identifier_get_name(root);
raxInsert(projections, (unsigned char *)identifier, strlen(identifier), NULL, NULL);
} else {
uint child_count = cypher_astnode_nchildren(root);
for(uint i = 0; i < child_count; i ++) {
_CollectIdentifiers(cypher_astnode_get_child(root, i), projections);
}
}
}
/* Collect all identifiers used in the RETURN clause (but not aliases defined there) */
static rax *_AST_GetReturnProjections(const cypher_astnode_t *return_clause) {
if(!return_clause) return NULL;
uint projection_count = cypher_ast_return_nprojections(return_clause);
if(projection_count == 0) return NULL;
rax *projections = raxNew();
for(uint i = 0; i < projection_count; i ++) {
const cypher_astnode_t *projection = cypher_ast_return_get_projection(return_clause, i);
_CollectIdentifiers(projection, projections);
}
return projections;
}
/* Compares a triemap of user-specified functions with the registered functions we provide. */
static AST_Validation _ValidateReferredFunctions(rax *referred_functions, bool include_aggregates) {
AST_Validation res = AST_VALID;
char funcName[32];
raxIterator it;
_prepareIterateAll(referred_functions, &it);
bool found = true;
while(raxNext(&it)) {
size_t len = it.key_len;
// No functions have a name longer than 32 characters
if(len >= 32) {
res = AST_INVALID;
break;
}
// Copy the triemap key so that we can safely add a terinator character
memcpy(funcName, it.key, len);
funcName[len] = 0;
if(AR_FuncExists(funcName)) continue;
if(Agg_FuncExists(funcName)) {
if(include_aggregates) {
continue;
} else {
// Provide a unique error for using aggregate functions from inappropriate contexts
QueryCtx_SetError("Invalid use of aggregating function '%s'", funcName);
res = AST_INVALID;
break;
}
}
// If we reach this point, the function was not found
found = false;
res = AST_INVALID;
break;
}
// If the function was not found, provide a reason if one is not set
if(res == AST_INVALID && !found) {
QueryCtx_SetError("Unknown function '%s'", funcName);
}
raxStop(&it);
return res;
}
// Recursively collect function names and perform validations on functions with STAR arguments.
static AST_Validation _VisitFunctions(const cypher_astnode_t *node, rax *func_names) {
cypher_astnode_type_t type = cypher_astnode_type(node);
if(type == CYPHER_AST_APPLY_ALL_OPERATOR) {
// Working with a function call that has * as its argument.
const cypher_astnode_t *func = cypher_ast_apply_all_operator_get_func_name(node);
const char *func_name = cypher_ast_function_name_get_value(func);
// Verify that this is a COUNT call.
if(strcasecmp(func_name, "COUNT")) {
QueryCtx_SetError("COUNT is the only function which can accept * as an argument");
return AST_INVALID;
}
// Verify that DISTINCT is not specified.
if(cypher_ast_apply_all_operator_get_distinct(node)) {
// TODO consider opening a parser error, this construction is invalid in Neo's parser.
QueryCtx_SetError("Cannot specify both DISTINCT and * in COUNT(DISTINCT *)");
return AST_INVALID;
}
// Collect the function name, which is always "count" here.
raxInsert(func_names, (unsigned char *)"count", 5, NULL, NULL);
// As Apply All operators have no children, we can return here.
return AST_VALID;
}
if(type == CYPHER_AST_APPLY_OPERATOR) {
// Collect the function name.
const cypher_astnode_t *func = cypher_ast_apply_operator_get_func_name(node);
const char *func_name = cypher_ast_function_name_get_value(func);
raxInsert(func_names, (unsigned char *)func_name, strlen(func_name), NULL, NULL);
}
uint child_count = cypher_astnode_nchildren(node);
for(uint i = 0; i < child_count; i ++) {
const cypher_astnode_t *child = cypher_astnode_get_child(node, i);
AST_Validation res = _VisitFunctions(child, func_names);
if(res != AST_VALID) return res;
}
return AST_VALID;
}
static AST_Validation _ValidateFunctionCalls(const cypher_astnode_t *node,
bool include_aggregates) {
AST_Validation res = AST_VALID;
rax *func_names = raxNew();
// Collect function names and perform in-place validations.
res = _VisitFunctions(node, func_names);
if(res != AST_VALID) goto cleanup;
// Validate all provided function names.
res = _ValidateReferredFunctions(func_names, include_aggregates);
cleanup:
raxFree(func_names);
return res;
}
static inline bool _AliasIsReturned(rax *projections, const char *identifier) {
return raxFind(projections, (unsigned char *)identifier, strlen(identifier)) != raxNotFound;
}
// If we have a multi-hop traversal (fixed or variable length), we cannot currently return that entity.
static AST_Validation _ValidateMultiHopTraversal(rax *projections, const cypher_astnode_t *edge,
const cypher_astnode_t *range) {
int start = 1;
int end = INT_MAX - 2;
const cypher_astnode_t *range_start = cypher_ast_range_get_start(range);
if(range_start) start = AST_ParseIntegerNode(range_start);
const cypher_astnode_t *range_end = cypher_ast_range_get_end(range);
if(range_end) end = AST_ParseIntegerNode(range_end);
// Validate specified range
if(start > end) {
QueryCtx_SetError("Variable length path, maximum number of hops must be greater or equal to minimum number of hops.");
return AST_INVALID;
}
bool multihop = (start > 1) || (start != end);
if(!multihop) return AST_VALID;
// Multi-hop traversals cannot (currently) be filtered on
if(cypher_ast_rel_pattern_get_properties(edge) != NULL) {
QueryCtx_SetError("RedisGraph does not currently support filters on variable-length paths.");
return AST_INVALID;
}
// Multi-hop traversals cannot be referenced
if(!projections) return AST_VALID;
// Check if relation has an alias
const cypher_astnode_t *ast_identifier = cypher_ast_rel_pattern_get_identifier(edge);
if(!ast_identifier) return AST_VALID;
// Verify that the alias is not found in the RETURN clause.
const char *identifier = cypher_ast_identifier_get_name(ast_identifier);
if(_AliasIsReturned(projections, identifier)) {
QueryCtx_SetError("RedisGraph does not support the return of variable-length traversal edges '%s'. \
Instead, use a query in the style of: 'MATCH p = (a)-[%s*]->(b) RETURN relationships(p)'.",
identifier, identifier);
return AST_INVALID;
}
return AST_VALID;
}
static AST_Validation _Validate_ReusedEdges(const cypher_astnode_t *node, rax *edge_aliases) {
uint child_count = cypher_astnode_nchildren(node);
for(uint i = 0; i < child_count; i++) {
const cypher_astnode_t *child = cypher_astnode_get_child(node, i);
cypher_astnode_type_t type = cypher_astnode_type(child);
if(type == CYPHER_AST_IDENTIFIER) {
const char *alias = cypher_ast_identifier_get_name(child);
int new1 = raxInsert(edge_aliases, (unsigned char *)alias, strlen(alias), NULL,
NULL);
if(!new1) {
char *err = NULL;
QueryCtx_SetError("Cannot use the same relationship variable '%s' for multiple patterns.", alias);
return AST_INVALID;
}
}
}
return AST_VALID;
}
static AST_Validation _ValidateRelation(rax *projections, const cypher_astnode_t *edge,
rax *edge_aliases) {
AST_Validation res = AST_VALID;
// Make sure edge alias is unique
res = _Validate_ReusedEdges(edge, edge_aliases);
if(res != AST_VALID) return res;
// If this is a multi-hop traversal, validate it
const cypher_astnode_t *range = cypher_ast_rel_pattern_get_varlength(edge);
if(range) {
res = _ValidateMultiHopTraversal(projections, edge, range);
if(res != AST_VALID) return res;
}
return res;
}
static AST_Validation _ValidatePath(const cypher_astnode_t *path, rax *projections,
rax *edge_aliases) {
AST_Validation res = AST_VALID;
uint path_len = cypher_ast_pattern_path_nelements(path);
// Check all relations on the path (every odd offset) and collect aliases.
for(uint i = 1; i < path_len; i += 2) {
const cypher_astnode_t *edge = cypher_ast_pattern_path_get_element(path, i);
res = _ValidateRelation(projections, edge, edge_aliases);
if(res != AST_VALID) return res;
}
return res;
}
static AST_Validation _ValidatePattern(rax *projections, const cypher_astnode_t *pattern,
rax *edge_aliases) {
AST_Validation res = AST_VALID;
uint path_count = cypher_ast_pattern_npaths(pattern);
for(uint i = 0; i < path_count; i ++) {
res = _ValidatePath(cypher_ast_pattern_get_path(pattern, i), projections, edge_aliases);
if(res != AST_VALID) break;
}
return res;
}
// Validate the property maps used in node/edge patterns in MATCH, and CREATE clauses
static AST_Validation _ValidateInlinedProperties(const cypher_astnode_t *props) {
if(cypher_astnode_type(props) != CYPHER_AST_MAP) {
// Emit an error if the properties are not presented as a map, as in:
// MATCH (p {invalid_property_construction}) RETURN p
QueryCtx_SetError("Encountered unhandled type in inlined properties.");
return AST_INVALID;
}
return AST_VALID;
}
static AST_Validation _ValidateInlinedPropertiesOnPath(const cypher_astnode_t *path) {
uint path_len = cypher_ast_pattern_path_nelements(path);
// Check all nodes on path
for(uint i = 0; i < path_len; i += 2) {
const cypher_astnode_t *ast_identifier = NULL;
const cypher_astnode_t *node = cypher_ast_pattern_path_get_element(path, i);
const cypher_astnode_t *props = cypher_ast_node_pattern_get_properties(node);
if(props && (_ValidateInlinedProperties(props) != AST_VALID)) return AST_INVALID;
}
// Check all edges on path
for(uint i = 1; i < path_len; i += 2) {
const cypher_astnode_t *ast_identifier = NULL;
const cypher_astnode_t *edge = cypher_ast_pattern_path_get_element(path, i);
const cypher_astnode_t *props = cypher_ast_rel_pattern_get_properties(edge);
if(props && (_ValidateInlinedProperties(props) != AST_VALID)) return AST_INVALID;
}
return AST_VALID;
}
static AST_Validation _Validate_MATCH_Clause_Filters(const cypher_astnode_t *clause) {
const cypher_astnode_t *pattern = cypher_ast_match_get_pattern(clause);
uint path_count = cypher_ast_pattern_npaths(pattern);
for(uint i = 0; i < path_count; i ++) {
const cypher_astnode_t *path = cypher_ast_pattern_get_path(pattern, i);
if(_ValidateInlinedPropertiesOnPath(path) != AST_VALID) return AST_INVALID;
}
return AST_VALID;
}
static AST_Validation _Validate_CALL_Clauses(const AST *ast) {
/* Make sure procedure calls are valid:
* 1. procedure exists
* 2. number of arguments to procedure is as expected
* 3. yield refers to procedure output */
const cypher_astnode_t **call_clauses = AST_GetClauses(ast, CYPHER_AST_CALL);
if(call_clauses == NULL) return AST_VALID;
AST_Validation res = AST_VALID;
ProcedureCtx *proc = NULL;
rax *identifiers = raxNew();
uint call_count = array_len(call_clauses);
for(uint i = 0; i < call_count; i ++) {
const cypher_astnode_t *call_clause = call_clauses[i];
// Make sure procedure exists.
const char *proc_name = cypher_ast_proc_name_get_value(cypher_ast_call_get_proc_name(call_clause));
proc = Proc_Get(proc_name);
if(proc == NULL) {
QueryCtx_SetError("Procedure `%s` is not registered", proc_name);
res = AST_INVALID;
goto cleanup;
}
// Validate num of arguments.
if(proc->argc != PROCEDURE_VARIABLE_ARG_COUNT) {
unsigned int given_arg_count = cypher_ast_call_narguments(call_clause);
if(Procedure_Argc(proc) != given_arg_count) {
QueryCtx_SetError("Procedure `%s` requires %d arguments, got %d", proc_name, proc->argc,
given_arg_count);
res = AST_INVALID;
goto cleanup;
}
}
// Validate projections.
uint proj_count = cypher_ast_call_nprojections(call_clause);
if(proj_count > 0) {
// Collect call projections.
for(uint j = 0; j < proj_count; j++) {
const cypher_astnode_t *proj = cypher_ast_call_get_projection(call_clause, j);
const cypher_astnode_t *ast_exp = cypher_ast_projection_get_expression(proj);
assert(cypher_astnode_type(ast_exp) == CYPHER_AST_IDENTIFIER);
const char *identifier = cypher_ast_identifier_get_name(ast_exp);
// Make sure each yield output is mentioned only once.
if(!raxInsert(identifiers, (unsigned char *)identifier, strlen(identifier), NULL,
NULL)) {
QueryCtx_SetError("Variable `%s` already declared", identifier);
res = AST_INVALID;
goto cleanup;
}
}
// Make sure procedure is aware of each output.
char output[256];
raxIterator it;
_prepareIterateAll(identifiers, &it);
while(raxNext(&it)) {
size_t len = it.key_len;
unsigned char *identifier = it.key;
if(len >= 256) {
QueryCtx_SetError("Output name `%s` too long", identifier);
res = AST_INVALID;
goto cleanup;
}
memcpy(output, identifier, len);
output[len] = 0;
if(!Procedure_ContainsOutput(proc, output)) {
raxStop(&it);
QueryCtx_SetError("Procedure `%s` does not yield output `%s`", proc_name, output);
res = AST_INVALID;
goto cleanup;
}
}
raxStop(&it);
raxFree(identifiers);
identifiers = NULL;
}
Proc_Free(proc);
proc = NULL;
}
cleanup:
if(proc) Proc_Free(proc);
array_free(call_clauses);
if(identifiers) raxFree(identifiers);
return res;
}
static AST_Validation _ValidateNodeAlias(const cypher_astnode_t *node, rax *edge_aliases) {
const cypher_astnode_t *ast_alias = cypher_ast_node_pattern_get_identifier(node);
if(ast_alias == NULL) return AST_VALID;
// Verify that the node's alias is not in the map of edge aliases.
const char *alias = cypher_ast_identifier_get_name(ast_alias);
if(raxFind(edge_aliases, (unsigned char *)alias, strlen(alias)) != raxNotFound) {
QueryCtx_SetError("The alias '%s' was specified for both a node and a relationship.", alias);
return AST_INVALID;
}
return AST_VALID;
}
static AST_Validation _ValidateReusedAliases(rax *edge_aliases, const cypher_astnode_t *pattern) {
AST_Validation res = AST_VALID;
uint path_count = cypher_ast_pattern_npaths(pattern);
for(uint i = 0; i < path_count; i ++) {
const cypher_astnode_t *path = cypher_ast_pattern_get_path(pattern, i);
uint path_len = cypher_ast_pattern_path_nelements(path);
for(uint j = 0; j < path_len; j += 2) {
const cypher_astnode_t *node = cypher_ast_pattern_path_get_element(path, j);
res = _ValidateNodeAlias(node, edge_aliases);
if(res != AST_VALID) return res;
}
}
return res;
}
static AST_Validation _Validate_MATCH_Clauses(const AST *ast) {
// Check to see if all mentioned inlined, outlined functions exists.
// Inlined functions appear within entity definition ({a:v})
// Outlined functions appear within the WHERE clause.
// libcypher-parser doesn't have a WHERE node, all of the filters
// are specified within the MATCH node sub-tree.
const cypher_astnode_t **match_clauses = AST_GetClauses(ast, CYPHER_AST_MATCH);
if(match_clauses == NULL) return AST_VALID;
rax *edge_aliases = raxNew();
rax *reused_entities = raxNew();
AST_Validation res;
const cypher_astnode_t *return_clause = AST_GetClause(ast, CYPHER_AST_RETURN);
rax *projections = _AST_GetReturnProjections(return_clause);
uint match_count = array_len(match_clauses);
for(uint i = 0; i < match_count; i ++) {
const cypher_astnode_t *match_clause = match_clauses[i];
// Validate the pattern described by the MATCH clause
res = _ValidatePattern(projections, cypher_ast_match_get_pattern(match_clause), edge_aliases);
if(res == AST_INVALID) goto cleanup;
// Validate that inlined filters do not use parameters
res = _Validate_MATCH_Clause_Filters(match_clause);
if(res == AST_INVALID) goto cleanup;
// Validate all function references in clause. Aggregate calls cannot be made in MATCH
// clauses or their WHERE predicates.
bool include_aggregates = false;
res = _ValidateFunctionCalls(match_clause, include_aggregates);
if(res == AST_INVALID) goto cleanup;
}
// Verify that no relation alias is also used to denote a node
for(uint i = 0; i < match_count; i ++) {
const cypher_astnode_t *match_clause = match_clauses[i];
res = _ValidateReusedAliases(edge_aliases, cypher_ast_match_get_pattern(match_clause));
if(res == AST_INVALID) goto cleanup;
}
cleanup:
if(projections) raxFree(projections);
raxFree(edge_aliases);
raxFree(reused_entities);
array_free(match_clauses);
return res;
}
static AST_Validation _Validate_WITH_Clauses(const AST *ast) {
// Verify that all functions used in the WITH clause and (if present) its WHERE predicate
// are defined and used validly.
// An AST segment has at most 1 WITH clause.
const cypher_astnode_t *with_clause = AST_GetClause(ast, CYPHER_AST_WITH);
if(with_clause == NULL) return AST_VALID;
// Verify that each WITH projection either is aliased or is itself an identifier.
uint projection_count = cypher_ast_with_nprojections(with_clause);
for(uint i = 0; i < projection_count; i ++) {
const cypher_astnode_t *proj = cypher_ast_with_get_projection(with_clause, i);
if(!cypher_ast_projection_get_alias(proj) &&
cypher_astnode_type(cypher_ast_projection_get_expression(proj)) != CYPHER_AST_IDENTIFIER) {
QueryCtx_SetError("WITH clause projections must be aliased");
return AST_INVALID;
}
}
// Verify that functions invoked by the WITH clause are valid.
return _ValidateFunctionCalls(with_clause, true);
}
// Verify that MERGE doesn't redeclare bound relations and that one reltype is specified for unbound relations.
static AST_Validation _ValidateMergeRelation(const cypher_astnode_t *entity, rax *defined_aliases) {
const cypher_astnode_t *identifier = cypher_ast_rel_pattern_get_identifier(entity);
const char *alias = NULL;
if(identifier) {
alias = cypher_ast_identifier_get_name(identifier);
// Verify that we're not redeclaring a bound variable.
if(raxFind(defined_aliases, (unsigned char *)alias, strlen(alias)) != raxNotFound) {
QueryCtx_SetError("The bound variable %s' can't be redeclared in a MERGE clause", alias);
return AST_INVALID;
}
}
// Exactly one reltype should be specified for the introduced edge.
uint reltype_count = cypher_ast_rel_pattern_nreltypes(entity);
if(reltype_count != 1) {
QueryCtx_SetError("Exactly one relationship type must be specified for each relation in a MERGE pattern.");
return AST_INVALID;
}
// We don't need to validate the MERGE edge's direction, as an undirected edge in MERGE
// should cause a single outgoing edge to be created.
return AST_VALID;
}
// Verify that MERGE doesn't redeclare bound nodes.
static AST_Validation _ValidateMergeNode(const cypher_astnode_t *entity, rax *defined_aliases) {
if(raxSize(defined_aliases) == 0) return AST_VALID;
const cypher_astnode_t *identifier = cypher_ast_node_pattern_get_identifier(entity);
if(identifier == NULL) return AST_VALID;
const char *alias = cypher_ast_identifier_get_name(identifier);
if(raxFind(defined_aliases, (unsigned char *)alias, strlen(alias)) == raxNotFound) {
// If the entity is unaliased or not previously bound, it cannot be redeclared.
return AST_VALID;
}
// If the entity is already bound, the MERGE pattern should not introduce labels or properties.
if((cypher_ast_node_pattern_nlabels(entity) > 0) ||
cypher_ast_node_pattern_get_properties(entity)) {
QueryCtx_SetError("The bound node '%s' can't be redeclared in a MERGE clause", alias);
return AST_INVALID;
}
return AST_VALID;
}
static AST_Validation _Validate_MERGE_Clauses(const AST *ast) {
AST_Validation res = AST_VALID;
uint *merge_clause_indices = AST_GetClauseIndices(ast, CYPHER_AST_MERGE);
uint merge_count = array_len(merge_clause_indices);
rax *defined_aliases = NULL;
if(merge_count == 0) goto cleanup;
defined_aliases = raxNew();
uint start_offset = 0;
for(uint i = 0; i < merge_count; i ++) {
uint clause_idx = merge_clause_indices[i];
// Collect all entities that are bound before this MERGE clause.
for(uint j = start_offset; j < clause_idx; j ++) {
const cypher_astnode_t *clause = cypher_ast_query_get_clause(ast->root, j);
_AST_GetDefinedIdentifiers(clause, defined_aliases);
}
start_offset = clause_idx;
const cypher_astnode_t *merge_clause = cypher_ast_query_get_clause(ast->root, clause_idx);
const cypher_astnode_t *path = cypher_ast_merge_get_pattern_path(merge_clause);
uint nelems = cypher_ast_pattern_path_nelements(path);
for(uint j = 0; j < nelems; j ++) {
const cypher_astnode_t *entity = cypher_ast_pattern_path_get_element(path, j);
// Odd offsets correspond to edges, even offsets correspond to nodes.
res = (j % 2) ? _ValidateMergeRelation(entity, defined_aliases)
: _ValidateMergeNode(entity, defined_aliases);
if(res != AST_VALID) goto cleanup;
}
// Verify that any filters on the path refer to constants or resolved identifiers.
res = _ValidateInlinedPropertiesOnPath(path);
if(res != AST_VALID) goto cleanup;
}
cleanup:
array_free(merge_clause_indices);
if(defined_aliases) raxFree(defined_aliases);
return res;
}
// Validate each entity referenced in the CREATE clause.
static AST_Validation _Validate_CREATE_Entities(const cypher_astnode_t *clause,
rax *defined_aliases) {
const cypher_astnode_t *pattern = cypher_ast_create_get_pattern(clause);
// Verify that functions invoked in the CREATE pattern are valid.
if(_ValidateFunctionCalls(pattern, false) != AST_VALID) return AST_INVALID;
uint path_count = cypher_ast_pattern_npaths(pattern);
for(uint i = 0; i < path_count; i ++) {
const cypher_astnode_t *path = cypher_ast_pattern_get_path(pattern, i);
// Validate that inlined properties are valid.
if(_ValidateInlinedPropertiesOnPath(path) != AST_VALID) return AST_INVALID;
uint nelems = cypher_ast_pattern_path_nelements(path);
/* Visit every relationship (every odd offset) on the path to validate its alias and structure.
* TODO There should also be a syntax error for redeclaring nodes, as in:
* MATCH (a) CREATE (a)
* But this is a no-op query, and we don't have the logic to differentiate this from a valid query like
* MATCH (a) CREATE (a)-[:E]->(:B) */
for(uint j = 1; j < nelems; j += 2) {
const cypher_astnode_t *rel = cypher_ast_pattern_path_get_element(path, j);
const cypher_astnode_t *identifier = cypher_ast_rel_pattern_get_identifier(rel);
// Validate that no relation aliases are previously bound.
if(identifier) {
const char *alias = cypher_ast_identifier_get_name(identifier);
if(raxFind(defined_aliases, (unsigned char *)alias, strlen(alias)) != raxNotFound) {
QueryCtx_SetError("The bound variable %s' can't be redeclared in a CREATE clause", alias);
return AST_INVALID;
}
}
// Validate that each relation has exactly one type.
uint reltype_count = cypher_ast_rel_pattern_nreltypes(rel);
if(reltype_count != 1) {
QueryCtx_SetError("Exactly one relationship type must be specified for CREATE");
return AST_INVALID;
}
// Validate that each relation being created is directed.
if(cypher_ast_rel_pattern_get_direction(rel) == CYPHER_REL_BIDIRECTIONAL) {
QueryCtx_SetError("Only directed relationships are supported in CREATE");
return AST_INVALID;
}
}
}
return AST_VALID;
}
static AST_Validation _Validate_CREATE_Clauses(const AST *ast) {
AST_Validation res = AST_VALID;
uint *create_clause_indices = AST_GetClauseIndices(ast, CYPHER_AST_CREATE);
uint clause_count = array_len(create_clause_indices);
rax *defined_aliases = NULL;
if(clause_count == 0) goto cleanup;
defined_aliases = raxNew();
uint start_offset = 0;
for(uint i = 0; i < clause_count; i ++) {
uint clause_idx = create_clause_indices[i];
// Collect all entities that are bound before this CREATE clause.
for(uint j = start_offset; j < clause_idx; j ++) {
const cypher_astnode_t *prev_clause = cypher_ast_query_get_clause(ast->root, i);
_AST_GetDefinedIdentifiers(prev_clause, defined_aliases);
}
start_offset = clause_idx;
const cypher_astnode_t *clause = cypher_ast_query_get_clause(ast->root, clause_idx);
res = _Validate_CREATE_Entities(clause, defined_aliases);
if(res == AST_INVALID) goto cleanup;
}
/* Since we combine all our CREATE clauses in a segment into one operation,
* make sure no data-modifying clauses can separate them. TCK example:
* CREATE (a:A), (b:B) MERGE (a)-[:KNOWS]->(b) CREATE (b)-[:KNOWS]->(c:C) RETURN count(*)
*/
for(uint i = create_clause_indices[0] + 1; i < create_clause_indices[clause_count - 1]; i ++) {
const cypher_astnode_t *clause = cypher_ast_query_get_clause(ast->root, i);
if(cypher_astnode_type(clause) == CYPHER_AST_MERGE) {
QueryCtx_SetError("RedisGraph does not support queries of the form CREATE...MERGE...CREATE without a separating WITH clause.");
res = AST_INVALID;
goto cleanup;
}
}
cleanup:
array_free(create_clause_indices);
if(defined_aliases) raxFree(defined_aliases);
return res;
}
static AST_Validation _Validate_DELETE_Clauses(const AST *ast) {
const cypher_astnode_t *delete_clause = AST_GetClause(ast, CYPHER_AST_DELETE);
if(!delete_clause) return AST_VALID;
// TODO: Validated that the deleted entities are indeed matched or projected.
return AST_VALID;
}
static AST_Validation _Validate_RETURN_Clause(const AST *ast) {
const cypher_astnode_t *return_clause = AST_GetClause(ast, CYPHER_AST_RETURN);
if(!return_clause) return AST_VALID;
// Validate all user-specified functions in RETURN clause.
bool include_aggregates = true;
AST_Validation res = _ValidateFunctionCalls(return_clause, include_aggregates);
return res;
}
static AST_Validation _Validate_UNWIND_Clauses(const AST *ast) {
const cypher_astnode_t **unwind_clauses = AST_GetClauses(ast, CYPHER_AST_UNWIND);
if(!unwind_clauses) return AST_VALID;
AST_Validation res = AST_VALID;
uint clause_count = array_len(unwind_clauses);
for(uint i = 0; i < clause_count; i++) {
const cypher_astnode_t *expression = cypher_ast_unwind_get_expression(unwind_clauses[i]);
// Verify that all elements of the UNWIND collection are supported by RedisGraph
uint child_count = cypher_astnode_nchildren(expression);
for(uint j = 0; j < child_count; j ++) {
res = CypherWhitelist_ValidateQuery(cypher_astnode_get_child(expression, j));
if(res != AST_VALID) goto cleanup;
}
// Verify that UNWIND doesn't call non-existent or unsupported functions.
res = _ValidateFunctionCalls(expression, true);
if(res != AST_VALID) goto cleanup;
}
cleanup:
array_free(unwind_clauses);
return res;
}
// LIMIT and SKIP are not independent clauses, but modifiers that can be applied to WITH or RETURN clauses
static AST_Validation _Validate_LIMIT_SKIP_Modifiers(const AST *ast) {
// Handle modifiers on the RETURN clause
const cypher_astnode_t *return_clause = AST_GetClause(ast, CYPHER_AST_RETURN);
// Skip check if the RETURN clause does not specify a limit
if(return_clause) {
// Handle LIMIT modifier
const cypher_astnode_t *limit = cypher_ast_return_get_limit(return_clause);
if(limit) {
// Handle non-integer or non parameter types specified as LIMIT value
// The value validation of integer node or parameter node is done in run time evaluation.
if(cypher_astnode_type(limit) != CYPHER_AST_INTEGER &&
cypher_astnode_type(limit) != CYPHER_AST_PARAMETER) {
QueryCtx_SetError("LIMIT specified value of invalid type, must be a positive integer");
return AST_INVALID;
}
}
// Handle SKIP modifier
const cypher_astnode_t *skip = cypher_ast_return_get_skip(return_clause);
if(skip) {
// Handle non-integer or non parameter types specified as skip value
// The value validation of integer node or parameter node is done in run time evaluation.
if(cypher_astnode_type(skip) != CYPHER_AST_INTEGER &&
cypher_astnode_type(skip) != CYPHER_AST_PARAMETER) {
QueryCtx_SetError("SKIP specified value of invalid type, must be a positive integer");
return AST_INVALID;
}
}
}
// Handle LIMIT modifiers on all WITH clauses
const cypher_astnode_t **with_clauses = AST_GetClauses(ast, CYPHER_AST_WITH);
if(!with_clauses) return AST_VALID;
AST_Validation res = AST_VALID;
uint with_count = array_len(with_clauses);
for(uint i = 0; i < with_count; i++) {
const cypher_astnode_t *with_clause = with_clauses[i];
// Handle LIMIT modifier
const cypher_astnode_t *limit = cypher_ast_with_get_limit(with_clause);
if(limit) {
// Handle non-integer or non parameter types specified as LIMIT value
// The value validation of integer node or parameter node is done in run time evaluation.
if(cypher_astnode_type(limit) != CYPHER_AST_INTEGER &&
cypher_astnode_type(limit) != CYPHER_AST_PARAMETER) {
QueryCtx_SetError("LIMIT specified value of invalid type, must be a positive integer");
return AST_INVALID;
}
}
// Handle SKIP modifier
const cypher_astnode_t *skip = cypher_ast_with_get_skip(with_clause);
if(skip) {
// Handle non-integer or non parameter types specified as skip value
// The value validation of integer node or parameter node is done in run time evaluation.
if(cypher_astnode_type(skip) != CYPHER_AST_INTEGER &&
cypher_astnode_type(skip) != CYPHER_AST_PARAMETER) {
QueryCtx_SetError("SKIP specified value of invalid type, must be a positive integer");
return AST_INVALID;
}
}
}
array_free(with_clauses);
return res;
}
// A query must end in a RETURN clause, a procedure, or an updating clause
// (CREATE, MERGE, DELETE, SET, or REMOVE once supported)
static AST_Validation _ValidateQueryTermination(const AST *ast) {
uint clause_count = cypher_ast_query_nclauses(ast->root);
const cypher_astnode_t *last_clause = cypher_ast_query_get_clause(ast->root, clause_count - 1);
cypher_astnode_type_t type = cypher_astnode_type(last_clause);
if(type != CYPHER_AST_RETURN &&
type != CYPHER_AST_CREATE &&
type != CYPHER_AST_MERGE &&
type != CYPHER_AST_DELETE &&
type != CYPHER_AST_SET &&
type != CYPHER_AST_CALL
) {
QueryCtx_SetError("Query cannot conclude with %s (must be RETURN or an update clause)",
cypher_astnode_typestr(type));
return AST_INVALID;
}
return AST_VALID;
}
static void _AST_RegisterCallOutputs(const cypher_astnode_t *call_clause, rax *identifiers) {
const char *proc_name = cypher_ast_proc_name_get_value(cypher_ast_call_get_proc_name(call_clause));
ProcedureCtx *proc = Proc_Get(proc_name);
assert(proc);
unsigned int output_count = array_len(proc->output);
for(uint i = 0; i < output_count; i++) {
const char *name = Procedure_GetOutput(proc, i);
raxInsert(identifiers, (unsigned char *)name, strlen(name), NULL, NULL);
}
}
// Perform validations not constrained to a specific scope
static AST_Validation _ValidateQuerySequence(const AST *ast) {
// Validate the final clause
if(_ValidateQueryTermination(ast) != AST_VALID) return AST_INVALID;
uint clause_count = cypher_ast_query_nclauses(ast->root);
// The query cannot begin with a "WITH/RETURN *" projection.
const cypher_astnode_t *start_clause = cypher_ast_query_get_clause(ast->root, 0);
if(cypher_astnode_type(start_clause) == CYPHER_AST_WITH &&
cypher_ast_with_has_include_existing(start_clause)) {
QueryCtx_SetError("Query cannot begin with 'WITH *'.");
return AST_INVALID;
}
if(cypher_astnode_type(start_clause) == CYPHER_AST_RETURN &&
cypher_ast_return_has_include_existing(start_clause)) {
QueryCtx_SetError("Query cannot begin with 'RETURN *'.");
return AST_INVALID;
}
return AST_VALID;
}
/* In any given query scope, reading clauses (MATCH, UNWIND, and InQueryCall)
* cannot follow updating clauses (CREATE, MERGE, DELETE, SET, REMOVE).
* https://s3.amazonaws.com/artifacts.opencypher.org/railroad/SinglePartQuery.html
* Additionally, a MATCH clause cannot follow an OPTIONAL MATCH clause. */
static AST_Validation _ValidateClauseOrder(const AST *ast) {
uint clause_count = cypher_ast_query_nclauses(ast->root);
bool encountered_optional_match = false;
You can’t perform that action at this time.
