Load Data

Define the Loading Job

Below, we use the GSQL loading language to define a loading job script, which encodes all the mappings from the source CSV file (generated by the LDBC SNB benchmark data generator) to our schema.
The script to load the LDBC-SNB data is below.
# GSQL script for loading LDBC-SNB data
1
USE GRAPH ldbc_snb
2
CREATE LOADING JOB load_ldbc_snb FOR GRAPH ldbc_snb {
3
// define vertex
4
DEFINE FILENAME v_comment_file;
5
DEFINE FILENAME v_post_file;
6
DEFINE FILENAME v_organisation_file;
7
DEFINE FILENAME v_place_file;
8
DEFINE FILENAME v_forum_file;
9
DEFINE FILENAME v_person_file;
10
DEFINE FILENAME v_tag_file;
11
DEFINE FILENAME v_tagclass_file;
12
13
// define edge
14
DEFINE FILENAME forum_containerOf_post_file;
15
DEFINE FILENAME comment_hasCreator_person_file;
16
DEFINE FILENAME post_hasCreator_person_file;
17
DEFINE FILENAME person_hasInterest_tag_file;
18
DEFINE FILENAME forum_hasMember_person_file;
19
DEFINE FILENAME forum_hasModerator_person_file;
20
DEFINE FILENAME comment_hasTag_tag_file;
21
DEFINE FILENAME post_hasTag_tag_file;
22
DEFINE FILENAME forum_hasTag_tag_file;
23
DEFINE FILENAME tag_hasType_tagclass_file;
24
DEFINE FILENAME organisation_isLocatedIn_place_file;
25
DEFINE FILENAME comment_isLocatedIn_place_file;
26
DEFINE FILENAME post_isLocatedIn_place_file;
27
DEFINE FILENAME person_isLocatedIn_place_file;
28
DEFINE FILENAME place_isPartOf_place_file;
29
DEFINE FILENAME tagclass_isSubclassOf_tagclass_file;
30
DEFINE FILENAME person_knows_person_file;
31
DEFINE FILENAME person_likes_comment_file;
32
DEFINE FILENAME person_likes_post_file;
33
DEFINE FILENAME comment_replyOf_comment_file;
34
DEFINE FILENAME comment_replyOf_post_file;
35
DEFINE FILENAME person_studyAt_organisation_file;
36
DEFINE FILENAME person_workAt_organisation_file;
37
38
// load vertex
39
LOAD v_comment_file
40
TO VERTEX Comment VALUES ($0, $1, $2, $3, $4, $5) USING header="true", separator="|";
41
LOAD v_post_file
42
TO VERTEX Post VALUES ($0, $1, $2, $3, $4, $5, $6, $7) USING header="true", separator="|";
43
LOAD v_organisation_file
44
TO VERTEX Company VALUES ($0, $2, $3) WHERE $1=="company",
45
TO VERTEX University VALUES ($0, $2, $3) WHERE $1=="university" USING header="true", separator="|";
46
LOAD v_place_file
47
TO VERTEX City VALUES ($0, $1, $2) WHERE $3=="city",
48
TO VERTEX Country VALUES ($0, $1, $2) WHERE $3=="country",
49
TO VERTEX Continent VALUES ($0, $1, $2) WHERE $3=="continent" USING header="true", separator="|";
50
LOAD v_forum_file
51
TO VERTEX Forum VALUES ($0, $1, $2) USING header="true", separator="|";
52
LOAD v_person_file
53
TO VERTEX Person VALUES ($0, $1, $2, $3, $4, $5, $6, $7, SPLIT($8,";"), SPLIT($9,";")) USING header="true", separator="|";
54
LOAD v_tag_file
55
TO VERTEX Tag VALUES ($0, $1, $2) USING header="true", separator="|";
56
LOAD v_tagclass_file
57
TO VERTEX TagClass VALUES ($0, $1, $2) USING header="true", separator="|";
58
59
// load edge
60
LOAD forum_containerOf_post_file
61
TO EDGE CONTAINER_OF VALUES ($0, $1) USING header="true", separator="|";
62
LOAD comment_hasCreator_person_file
63
TO EDGE HAS_CREATOR VALUES ($0 Comment, $1) USING header="true", separator="|";
64
LOAD post_hasCreator_person_file
65
TO EDGE HAS_CREATOR VALUES ($0 Post, $1) USING header="true", separator="|";
66
LOAD person_hasInterest_tag_file
67
TO EDGE HAS_INTEREST VALUES ($0, $1) USING header="true", separator="|";
68
LOAD forum_hasMember_person_file
69
TO EDGE HAS_MEMBER VALUES ($0, $1, $2) USING header="true", separator="|";
70
LOAD forum_hasModerator_person_file
71
TO EDGE HAS_MODERATOR VALUES ($0, $1) USING header="true", separator="|";
72
LOAD comment_hasTag_tag_file
73
TO EDGE HAS_TAG VALUES ($0 Comment, $1) USING header="true", separator="|";
74
LOAD post_hasTag_tag_file
75
TO EDGE HAS_TAG VALUES ($0 Post, $1) USING header="true", separator="|";
76
LOAD forum_hasTag_tag_file
77
TO EDGE HAS_TAG VALUES ($0 Forum, $1) USING header="true", separator="|";
78
LOAD tag_hasType_tagclass_file
79
TO EDGE HAS_TYPE VALUES ($0, $1) USING header="true", separator="|";
80
LOAD organisation_isLocatedIn_place_file
81
TO EDGE IS_LOCATED_IN VALUES ($0 Company, $1 Country) WHERE to_int($1) < 111,
82
TO EDGE IS_LOCATED_IN VALUES ($0 University, $1 City) WHERE to_int($1) > 110 USING header="true", separator="|";
83
LOAD comment_isLocatedIn_place_file
84
TO EDGE IS_LOCATED_IN VALUES ($0 Comment, $1 Country) USING header="true", separator="|";
85
LOAD post_isLocatedIn_place_file
86
TO EDGE IS_LOCATED_IN VALUES ($0 Post, $1 Country) USING header="true", separator="|";
87
LOAD person_isLocatedIn_place_file
88
TO EDGE IS_LOCATED_IN VALUES ($0 Person, $1 City) USING header="true", separator="|";
89
LOAD place_isPartOf_place_file
90
TO EDGE IS_PART_OF VALUES ($0 Country, $1 Continent) WHERE to_int($0) < 111,
91
TO EDGE IS_PART_OF VALUES ($0 City, $1 Country) WHERE to_int($0) > 110 USING header="true", separator="|";
92
LOAD tagclass_isSubclassOf_tagclass_file
93
TO EDGE IS_SUBCLASS_OF VALUES ($0, $1) USING header="true", separator="|";
94
LOAD person_knows_person_file
95
TO EDGE KNOWS VALUES ($0, $1, $2) USING header="true", separator="|";
96
LOAD person_likes_comment_file
97
TO EDGE LIKES VALUES ($0, $1 Comment, $2) USING header="true", separator="|";
98
LOAD person_likes_post_file
99
TO EDGE LIKES VALUES ($0, $1 Post, $2) USING header="true", separator="|";
100
LOAD comment_replyOf_comment_file
101
TO EDGE REPLY_OF VALUES ($0, $1 Comment) USING header="true", separator="|";
102
LOAD comment_replyOf_post_file
103
TO EDGE REPLY_OF VALUES ($0, $1 Post) USING header="true", separator="|";
104
LOAD person_studyAt_organisation_file
105
TO EDGE STUDY_AT VALUES ($0, $1, $2) USING header="true", separator="|";
106
LOAD person_workAt_organisation_file
107
TO EDGE WORK_AT VALUES ($0, $1, $2) USING header="true", separator="|";
108
}
Copied!

Prepare The Raw Data

We have generated a data set with scale factor 1 (approximate 1GB). You can download it from https://s3-us-west-1.amazonaws.com/tigergraph-benchmark-dataset/LDBC/SF-1/ldbc_snb_data-sf1.tar.gz
# Linux Bash
1
wget https://s3-us-west-1.amazonaws.com/tigergraph-benchmark-dataset/LDBC/SF-1/ldbc_snb_data-sf1.tar.gz
Copied!
After downloading the raw file, run the tar command below to decompress the downloaded file.
# Linux Bash
1
tar -xzf ldbc_snb_data-sf1.tar.gz
Copied!
After decompressing the file, you will see a folder named "ldbc_snb_data". Within it, you will see two subfolders
  • social_network
  • substitution_parameters
The raw data is in the social_network folder.

Run The Loading Job

Download setup_schema.gsql which combines the schema script and loading job script shown before.
Specify the environment variable LDBC_SNB_DATA_DIR to point to your raw file folder un-tarred in the previous section. In our example below, the raw data is in /home/tigergraph/ldbc_snb_data/social_network, so we use the export shell command to specify its location. Then, start your TigerGraph services if needed. Finally, run the setup_schema.gsql script to create your LDBC Social Network graph.
# Linux Bash
1
#change the directory to your raw file directory
2
export LDBC_SNB_DATA_DIR=/home/tigergraph/ldbc_snb_data/social_network/
3
4
#start all TigerGraph services
5
gadmin start all
6
7
#setup schema and loading job
8
gsql setup_schema.gsql
Copied!
Download the loading job script and invoke it on the command line. #
# Linux Bash
1
./load_data.sh
Copied!
# Sample Loading Progress Output
1
tigergraph/gsql_102$ ./load_data.sh
2
[Tip: Use "CTRL + C" to stop displaying the loading status update, then use "SHOW LOADING STATUS jobid" to track the loading progress again]
3
[Tip: Manage loading jobs with "ABORT/RESUME LOADING JOB jobid"]
4
Starting the following job, i.e.
5
JobName: load_ldbc_snb, jobid: ldbc_snb.load_ldbc_snb.file.m1.1558053156447
6
Loading log: '/mnt/data/tigergraph/logs/restpp/restpp_loader_logs/ldbc_snb/ldbc_snb.load_ldbc_snb.file.m1.1558053156447.log'
7
8
Job "ldbc_snb.load_ldbc_snb.file.m1.1558053156447" loading status
9
[FINISHED] m1 ( Finished: 31 / Total: 31 )
10
[LOADED]
11
+----------------------------------------------------------------------------------------------------------------------------------+
12
| FILENAME | LOADED LINES | AVG SPEED | DURATION|
13
| /mnt/data/download/ldbc_snb_data/social_network/comment_0_0.csv | 2052170 | 281 kl/s | 7.28 s|
14
| /mnt/data/download/ldbc_snb_data/social_network/comment_hasCreator_person_0_0.csv | 2052170 | 251 kl/s | 8.17 s|
15
| /mnt/data/download/ldbc_snb_data/social_network/comment_hasTag_tag_0_0.csv | 2698394 | 422 kl/s | 6.38 s|
16
| /mnt/data/download/ldbc_snb_data/social_network/comment_isLocatedIn_place_0_0.csv | 2052170 | 291 kl/s | 7.04 s|
17
| /mnt/data/download/ldbc_snb_data/social_network/comment_replyOf_comment_0_0.csv | 1040750 | 253 kl/s | 4.11 s|
18
| /mnt/data/download/ldbc_snb_data/social_network/comment_replyOf_post_0_0.csv | 1011421 | 248 kl/s | 4.07 s|
19
| /mnt/data/download/ldbc_snb_data/social_network/forum_0_0.csv | 90493 | 87 kl/s | 1.03 s|
20
| /mnt/data/download/ldbc_snb_data/social_network/forum_containerOf_post_0_0.csv | 1003606 | 240 kl/s | 4.18 s|
21
| /mnt/data/download/ldbc_snb_data/social_network/forum_hasMember_person_0_0.csv | 1611870 | 431 kl/s | 3.74 s|
22
| /mnt/data/download/ldbc_snb_data/social_network/forum_hasModerator_person_0_0.csv | 90493 | 89 kl/s | 1.01 s|
23
| /mnt/data/download/ldbc_snb_data/social_network/forum_hasTag_tag_0_0.csv | 309767 | 297 kl/s | 1.04 s|
24
| /mnt/data/download/ldbc_snb_data/social_network/organisation_0_0.csv | 7956 | 7 kl/s | 1.00 s|
25
|/mnt/data/download/ldbc_snb_data/social_network/organisation_isLocatedIn_place_0_0.csv | 7956 | 7 kl/s | 1.00 s|
26
| /mnt/data/download/ldbc_snb_data/social_network/person_0_0.csv | 9893 | 9 kl/s | 1.05 s|
27
| /mnt/data/download/ldbc_snb_data/social_network/person_hasInterest_tag_0_0.csv | 229167 | 223 kl/s | 1.03 s|
28
| /mnt/data/download/ldbc_snb_data/social_network/person_isLocatedIn_place_0_0.csv | 9893 | 9 kl/s | 1.00 s|
29
| /mnt/data/download/ldbc_snb_data/social_network/person_knows_person_0_0.csv | 180624 | 169 kl/s | 1.06 s|
30
| /mnt/data/download/ldbc_snb_data/social_network/person_likes_comment_0_0.csv | 1438419 | 449 kl/s | 3.20 s|
31
| /mnt/data/download/ldbc_snb_data/social_network/person_likes_post_0_0.csv | 751678 | 331 kl/s | 2.27 s|
32
| /mnt/data/download/ldbc_snb_data/social_network/person_studyAt_organisation_0_0.csv | 7950 | 7 kl/s | 1.00 s|
33
| /mnt/data/download/ldbc_snb_data/social_network/person_workAt_organisation_0_0.csv | 21655 | 21 kl/s | 1.00 s|
34
| /mnt/data/download/ldbc_snb_data/social_network/place_0_0.csv | 1461 | 1 kl/s | 1.00 s|
35
| /mnt/data/download/ldbc_snb_data/social_network/place_isPartOf_place_0_0.csv | 1455 | 1 kl/s | 1.00 s|
36
| /mnt/data/download/ldbc_snb_data/social_network/post_0_0.csv | 1003606 | 195 kl/s | 5.14 s|
37
| /mnt/data/download/ldbc_snb_data/social_network/post_hasCreator_person_0_0.csv | 1003606 | 320 kl/s | 3.13 s|
38
| /mnt/data/download/ldbc_snb_data/social_network/post_hasTag_tag_0_0.csv | 713259 | 341 kl/s | 2.09 s|
39
| /mnt/data/download/ldbc_snb_data/social_network/post_isLocatedIn_place_0_0.csv | 1003606 | 327 kl/s | 3.07 s|
40
| /mnt/data/download/ldbc_snb_data/social_network/tag_0_0.csv | 16081 | 16 kl/s | 1.00 s|
41
| /mnt/data/download/ldbc_snb_data/social_network/tag_hasType_tagclass_0_0.csv | 16081 | 16 kl/s | 1.00 s|
42
| /mnt/data/download/ldbc_snb_data/social_network/tagclass_0_0.csv | 72 | 71 l/s | 1.00 s|
43
|/mnt/data/download/ldbc_snb_data/social_network/tagclass_isSubclassOf_tagclass_0_0.csv | 71 | 70 l/s | 1.00 s|
44
+----------------------------------------------------------------------------------------------------------------------------------+
Copied!
After loading, you can check the graph's size using built-in REST endpoint calls.
Below we call two functions, stat_vertex_number and stat_edge_number to return the cardinality of each vertex and edge type.
Linux shell
1
curl -X POST 'http://localhost:9000/builtins/ldbc_snb' -d '{"function":"stat_vertex_number","type":"*"}' | jq .
2
curl -X POST 'http://localhost:9000/builtins/ldbc_snb' -d '{"function":"stat_edge_number","type":"*"}' | jq .
Copied!