Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion codepropertygraph/build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name := "codepropertygraph"
dependsOn(Projects.protoBindings)

libraryDependencies ++= Seq(
"io.shiftleft" % "tinkergraph-gremlin" % "3.3.4.16",
"io.shiftleft" % "tinkergraph-gremlin" % "3.3.4.17-MP-SNAPSHOT",
"com.michaelpollmeier" %% "gremlin-scala" % "3.3.4.13",
"com.google.guava" % "guava" % "21.0",
"org.apache.commons" % "commons-lang3" % "3.5",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package io.shiftleft.codepropertygraph.cpgloading
import scala.compat.java8.OptionConverters._

/** configure graphdb to use ondisk overflow.
* if the file specified by `graphLocation` already exists, we'll initialize the graph from there
* if `graphLocation` is specified, graph will be saved there on close, and can be reloaded by just instantiating one with the same setting
* otherwise, system tmp directory is used (e.g. `/tmp`) and graph won't be saved on close */
case class OnDiskOverflowConfig(graphLocation: Option[String] = None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,20 @@
import org.apache.tinkerpop.gremlin.tinkergraph.storage.Serializer;

import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import java.util.SortedMap;
import java.util.TreeMap;

public class ProtoEdgeSerializer extends Serializer<ProtoEdgeWithId> {

/* TODO move definition of property indices to json schema
* (or - better - ensure it's always in the same order when generating the cpg.proto and
* use the index there) */
final Map<String, Map<String, Integer>> propertyIndexByEdgeAndPropertyName;

public ProtoEdgeSerializer(Map<String, Map<String, Integer>> propertyIndexByEdgeAndPropertyName) {
this.propertyIndexByEdgeAndPropertyName = propertyIndexByEdgeAndPropertyName;
}

@Override
protected long getId(ProtoEdgeWithId edgeWithId) {
return edgeWithId.id;
Expand All @@ -24,10 +34,13 @@ protected String getLabel(ProtoEdgeWithId edgeWithId) {
}

@Override
protected Map<String, Object> getProperties(ProtoEdgeWithId edgeWithId) {
final Map<String, Object> propertyMap = new THashMap<>(edgeWithId.edge.getPropertyCount());
protected SortedMap<Integer, Object> getProperties(ProtoEdgeWithId edgeWithId) {
final SortedMap<Integer, Object> propertyMap = new TreeMap<>();
final String edgeType = edgeWithId.edge.getType().name();
final Map<String, Integer> propertyIndexByName = propertyIndexByEdgeAndPropertyName.get(edgeType);

for (Cpg.CpgStruct.Edge.Property property : edgeWithId.edge.getPropertyList()) {
final String key = property.getName().name();
final Integer key = propertyIndexByName.get(property.getName().name());
final Cpg.PropertyValue propertyValue = property.getValue();
switch(propertyValue.getValueCase()) {
case INT_VALUE:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,24 @@
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;

public class ProtoNodeSerializer extends Serializer<Cpg.CpgStruct.Node> {

/* TODO move definition of property indices to json schema
* (or - better - ensure it's always in the same order when generating the cpg.proto and
* use the index there) */
final Map<String, Map<String, Integer>> propertyIndexByEdgeAndPropertyName;

//NodeId -> EdgeLabel -> EdgeId
private final Map<Long, Map<String, TLongSet>> inEdgesByNodeId;
private final Map<Long, Map<String, TLongSet>> outEdgesByNodeId;

public ProtoNodeSerializer(Map<Long, Map<String, TLongSet>> inEdgesByNodeId, Map<Long, Map<String, TLongSet>> outEdgesByNodeId) {
public ProtoNodeSerializer(Map<String, Map<String, Integer>> propertyIndexByEdgeAndPropertyName,
Map<Long, Map<String, TLongSet>> inEdgesByNodeId,
Map<Long, Map<String, TLongSet>> outEdgesByNodeId) {
this.propertyIndexByEdgeAndPropertyName = propertyIndexByEdgeAndPropertyName;
this.inEdgesByNodeId = inEdgesByNodeId;
this.outEdgesByNodeId = outEdgesByNodeId;
}
Expand All @@ -33,10 +43,13 @@ protected String getLabel(Cpg.CpgStruct.Node node) {
}

@Override
protected Map<String, Object> getProperties(Cpg.CpgStruct.Node node) {
final Map<String, Object> propertyMap = new THashMap<>(node.getPropertyCount());
protected SortedMap<Integer, Object> getProperties(Cpg.CpgStruct.Node node) {
final SortedMap<Integer, Object> propertyMap = new TreeMap<>();
final String nodeType = node.getType().name();
final Map<String, Integer> propertyIndexByName = propertyIndexByEdgeAndPropertyName.get(nodeType);

for (Cpg.CpgStruct.Node.Property property : node.getPropertyList()) {
final String key = property.getName().name();
final Integer key = propertyIndexByName.get(property.getName().name());
final Cpg.PropertyValue propertyValue = property.getValue();
switch (propertyValue.getValueCase()) {
case INT_VALUE:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
package io.shiftleft.codepropertygraph.cpgloading

import java.io.{File, FileInputStream}
import java.io.File
import java.nio.file.{Files, Path}
import java.util.{HashMap => JHashMap, Map => JMap}

import gnu.trove.set.TLongSet
import gnu.trove.set.hash.TLongHashSet
import io.shiftleft.proto.cpg.Cpg.CpgStruct
import io.shiftleft.codepropertygraph.generated.{edges, nodes}
import org.apache.logging.log4j.LogManager
import org.apache.tinkerpop.gremlin.tinkergraph.storage.OndiskOverflow

Expand All @@ -27,7 +28,20 @@ object ProtoToOverflowDb extends App {
type EdgeLabel = String

private lazy val logger = LogManager.getLogger(getClass)
private lazy val edgeSerializer = new ProtoEdgeSerializer

private val edgePropertyIndexByNameAndElementName: JMap[String, JMap[String, Integer]] =
edges.Factories.All.map { factory =>
(factory.forLabel, propertyIndexByName(factory.propertyNamesByIndex))
}.toMap.asJava

private lazy val nodePropertyIndexByNameAndElementName: JMap[String, JMap[String, Integer]] =
nodes.Factories.All.map { factory =>
(factory.forLabel, propertyIndexByName(factory.propertyNamesByIndex))
}.toMap.asJava

private lazy val edgeSerializer =
new ProtoEdgeSerializer(edgePropertyIndexByNameAndElementName)

private lazy val nodeFilter = new NodeFilter

parseConfig.map(run)
Expand Down Expand Up @@ -71,7 +85,7 @@ object ProtoToOverflowDb extends App {
overflowDb.getEdgeMVMap.put(edgeWithId.id, edgeSerializer.serialize(edgeWithId))
}

val nodeSerializer = new ProtoNodeSerializer(inEdgesByNodeId, outEdgesByNodeId)
val nodeSerializer = new ProtoNodeSerializer(nodePropertyIndexByNameAndElementName, inEdgesByNodeId, outEdgesByNodeId)
cpgProto.getNodeList.asScala.par.filter(nodeFilter.filterNode).foreach { node =>
overflowDb.getVertexMVMap.put(node.getKey, nodeSerializer.serialize(node))
}
Expand All @@ -89,6 +103,13 @@ object ProtoToOverflowDb extends App {
}.parse(args, Config(cpg = null))
}


private def propertyIndexByName(
propertyNamesByIndex: JMap[Integer, String]): JMap[String, Integer] =
propertyNamesByIndex.asScala.map {
case (idx, name) => (name, idx)
}.asJava

}

case class Config(cpg: File, writeTo: Option[File] = None)
Loading