diff --git a/docs/literate/sketches/dblcat-db.jl b/docs/literate/sketches/dblcat-db.jl new file mode 100644 index 000000000..edc520698 --- /dev/null +++ b/docs/literate/sketches/dblcat-db.jl @@ -0,0 +1,130 @@ +using Catlab + +# For the following discussion, we will "Set-valued functor" in place of "copresheaf" because it more clearly states the relationship between data when the restriction maps are trivial. It's also important to review the concept of a `Presentation`, which is tantamount to a finitely-presented model of a theory. + +# It is common to receive many-to-many relations in data science. We can rearrange a many-many mapping into a Set-valued functor: +df = Dict(:Fiona => [:Math, :Philosophy, :Music], + :Gregorio => [:Cooking, :Math, :CompSci], + :Heather => [:Gym, :Art, :Music, :Math]) + +# Now how would we add this data to a table? In the ACSet framework, we might write this schema. Technically, we are defining a way of presenting relationships in the world of schema which we call `SchJunct`, which defines `Student`, `Class` tables with `name` and `subject` attributes respectively, as well as the object which represents the Junction table, which has two more attributes that play the role of connecting it to the tables we defined. +junction_schema = @present SchJunct(FreeSchema) begin + Name::AttrType + # + Student::Ob + name::Attr(Student, Name) + # + Class::Ob + subject::Attr(Class, Name) + # + Junct::Ob + student::Hom(Junct,Student) + class::Hom(Junct,Class) +end + +# We may add data to the junction table with the following code... +@acset_type JunctionTable(SchJunct) + +# ...and then instantiate it... +j = JunctionTable{Symbol}() + +# ...and then add the data... +function Base.insert!(j::JunctionTable{Symbol}, df::Dict{Symbol, Vector{Symbol}}) + foreach(keys(df)) do student + classes = df[student] + # check student exists + student_id = incident(j, student, :name) + if isempty(student_id); student_id = add_part!(j, :Student, name=student) end + foreach(classes) do class + class_id = incident(j, class, :subject) + if isempty(class_id); class_id = add_part!(j, :Class, subject=class) end + # enforce pair constraint + id_pair = incident(j, only(student_id), :student) ∩ incident(j, only(class_id), :class) + # TODO I added 'only' because incident will return e.g., [1] and not 1 + if isempty(id_pair); add_part!(j, :Junct, student=only(student_id), class=only(class_id)) end + end + end + j +end + +# Let's take a look + +insert!(j, df) + +# This junction table now defines a linkage between two different types of data. This is well and good, but it introduces some boilerplate. For what is a commonplace relationship in data science, I am in the unsympathetic position of having to add three lines in my schema and almost ten lines of code to source. But these meager requirements can obscure the fact that while the Junction Table is apparently just a table, its role in the database schema is to relate multiple tables together. Therefore it isn't really concerned with presenting data to the user the same way an ordinary table would, its instead a table being used to organize other tables. This role means that there might be a more elegant design principle at play which is capable of making this distinction. In double category theory, this takes the form of a proarrow between Students and Classes. Let's present a schema over the model of a different theory, `FreeDoubleSchema`, +# + +p = @present SchSpan(FreeDoubleSchema) begin + Name::AttrType + # + Student::Ob + name::Attr(Student, Name) + # + Class::Ob + subject::Attr(Class, Name) + # produces junction table as well as "student" and "class" morphisms + Junct::Pro(Student, Class) +end + +# Here, the data is presented as Julia Symbols representing the names for either students or classes. We'll call this AttrType "Name". So we will need to handle AttrTypes in our presentation, and in turn we need a theory of Double Categories with an additional set of proarrows responsible for AttrTypes. To accomplish this, we defined in the standard library a theory of "Double Schemas," which is the theory of Double Categories with the AttrType axioms copy-pasted into it. + +# We'll now define a function called `ACSet` to convert the schema into that for an ACSet. We define that and two helper functions which just extract the name. + +Symbol(s::FreeDoubleSchema.AttrType{:generator}) = first(GATlab.args(s)) +Symbol(s::FreeDoubleSchema.Attr{:generator}) = first(GATlab.args(s)) +Symbol(s::FreeDoubleSchema.Ob{:generator}) = first(GATlab.args(s)) + +function ACSet(p::Presentation{ThDoubleSchema.Meta.T, Symbol}) + # construct ACSet presentation + out = Presentation(FreeSchema) + # add AttrType + attrtypes = map(generators(p, :AttrType)) do attrtype + add_generator!(out, AttrType(FreeSchema.AttrType, Symbol(attrtype))) + end + # add Obs + map(generators(p, :Ob)) do ob + add_generator!(out, Ob(FreeSchema.Ob, Symbol(ob))) + end + # add AttrVals + map(generators(p, :Attr)) do attr + add_generator!(out, Attr(Symbol(attr), [out[Symbol(t)] for t in attr.type_args]...)) + end + # convert proarrows to homs + map(generators(p, :Pro)) do pro + ob = Ob(FreeSchema.Ob, first(pro)) + add_generator!(out, ob) + map(pro.type_args) do hom + name = Symbol(lowercase(string(only(hom.args)))) # XXX assumes the names are not yet taken + add_generator!(out, Hom(name, ob, out[only(hom.args)])) + end + end + out +end + +# Let's instantiate the schema +SchAttr = ACSet(p) +@acset_type JunctionNameTable(SchAttr) +j = JunctionNameTable{Symbol}() + +function Base.insert!(j::JunctionNameTable{Symbol}, df::Dict{Symbol, Vector{Symbol}}) + foreach(keys(df)) do student + classes = df[student] + # check student exists + student_id = incident(j, student, :name) + if isempty(student_id); student_id = add_part!(j, :Student, name=student) end + foreach(classes) do class + class_id = incident(j, class, :subject) + if isempty(class_id); class_id = add_part!(j, :Class, subject=class) end + # enforce pair constraint + id_pair = incident(j, only(student_id), :student) ∩ incident(j, only(class_id), :class) + # TODO I added 'only' because incident will return e.g., [1] and not 1 + if isempty(id_pair); add_part!(j, :Junct, student=only(student_id), class=only(class_id)) end + end + end + j +end + +# Let's take a look +insert!(j, df) + +# In database practice, junction tables are also just tables and, if your DBA is considerate, will be distinguished in name from other tables by, say, a prefix `J_`. However in the double theory perpsective, the junction table is a distinguished object. Therefore, we can treat it programmatically. The above INSERT code is for instance something we might be able to generalize with access to the junction table as a proarrow generator, `Pro(Student, Class)`. So we will go one step further by generating the INSERT code while we materialize the ACSet presentation. diff --git a/src/theories/HigherCategory.jl b/src/theories/HigherCategory.jl index 7dd4b5210..acc78c54e 100644 --- a/src/theories/HigherCategory.jl +++ b/src/theories/HigherCategory.jl @@ -168,6 +168,45 @@ show_unicode(io::IO, expr::CategoryExpr{:pcompose}; kw...) = show_latex(io::IO, expr::CategoryExpr{:pcompose}; kw...) = show_latex_infix(io, expr, "*"; kw...) + + +# Lawless Double Category +########################## + +# @theory ThLawlessDoubleCategory <: ThCategory begin +# Pro(src::Ob, tgt::Ob)::TYPE + +# Cell(dom::Pro(A,B), codom::Pro(C,D), +# src::Hom(A,C), tgt::Hom(B,D))::TYPE ⊣ [A::Ob, B::Ob, C::Ob, D::Ob] + +# @op begin +# (↛) := Pro +# (⇒) := Cell +# (*) := pcompose +# end +# # Category D₀: category structure on objects and arrows. +# # Inherited from `ThCategory`. + +# # Category D₁: category structure on proarrows and cells. +# compose(α::Cell(m, n, f, g), β::Cell(n, p, h, k))::Cell(m, p, f⋅h, g⋅k) ⊣ +# [A::Ob, B::Ob, C::Ob, X::Ob, Y::Ob, Z::Ob, +# m::(A ↛ X), n::(B ↛ Y), p::(C ↛ Z), +# f::(A → B), g::(X → Y), h::(B → C), k::(Y → Z)] +# id(m::(A ↛ B))::Cell(m, m, id(A), id(B)) ⊣ [A::Ob, B::Ob] + +# # External category operations. +# pcompose(m::(A ↛ B), n::(B ↛ C))::(A ↛ C) ⊣ [A::Ob, B::Ob, C::Ob] +# pid(A)::(A ↛ A) ⊣ [A::Ob] +# end + +# @symbolic_model FreeLawlessDoubleCategory{ObExpr,HomExpr,ProExpr,CellExpr} ThLawlessDoubleCategory begin +# compose(f::Hom, g::Hom) = associate_unit(new(f,g; strict=true), id) +# compose(α::Cell, β::Cell) = associate_unit(new(α,β), id) +# pcompose(m::Pro, n::Pro) = associate_unit(new(m,n; strict=true), pid) +# pcompose(α::Cell, β::Cell) = associate_unit(new(α,β), pid) +# end + + # Tabulators ############ diff --git a/src/theories/Schema.jl b/src/theories/Schema.jl index 5f749d5c7..099e874d2 100644 --- a/src/theories/Schema.jl +++ b/src/theories/Schema.jl @@ -52,3 +52,32 @@ end compose(f::Hom,g::Hom) = associate_unit(normalize_zero(new(f,g; strict=true)), id) compose(f::Hom,a::Attr) = associate_unit(normalize_zero(new(f,a; strict=true)), id) end + + +@theory ThDoubleSchema <: ThDoubleCategory begin + AttrType::TYPE + Attr(dom::Ob,codom::AttrType)::TYPE + + # """ Composition is given by the action of the profunctor on C. + # """ + compose(f::Hom(A,B), g::Attr(B,X))::Attr(A,X) ⊣ [A::Ob, B::Ob, X::AttrType] + + (compose(f, compose(g, a)) == compose(compose(f, g), a) + ⊣ [A::Ob, B::Ob, C::Ob, X::AttrType, f::Hom(A,B), g::Hom(B,C), a::Attr(C, X)]) + compose(id(A), a) == a ⊣ [A::Ob, X::AttrType, a::Attr(A,X)] +end + +""" Syntax for a double category with "attr types". + +Doesn't check for +""" +@symbolic_model FreeDoubleSchema{ObExpr,HomExpr,ProExpr,CellExpr,AttrTypeExpr, AttrExpr} ThDoubleSchema begin + compose(f::Hom, g::Hom) = associate_unit(new(f,g; strict=true), id) + compose(α::Cell, β::Cell) = associate_unit(new(α,β), id) + pcompose(m::Pro, n::Pro) = associate_unit(new(m,n; strict=true), pid) + pcompose(α::Cell, β::Cell) = associate_unit(new(α,β), pid) + compose(f::Hom,g::Hom) = associate_unit(normalize_zero(new(f,g; strict=true)), id) + compose(f::Hom,a::Attr) = associate_unit(normalize_zero(new(f,a; strict=true)), id) +end + +export ThDoubleSchema, FreeDoubleSchema