Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 130 additions & 0 deletions docs/literate/sketches/dblcat-db.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
using Catlab

# For the following discussion, we will "Set-valued functor" in place of "copresheaf" because it more clearly states the relationship between data when the restriction maps are trivial. It's also important to review the concept of a `Presentation`, which is tantamount to a finitely-presented model of a theory.

# It is common to receive many-to-many relations in data science. We can rearrange a many-many mapping into a Set-valued functor:
df = Dict(:Fiona => [:Math, :Philosophy, :Music],
:Gregorio => [:Cooking, :Math, :CompSci],
:Heather => [:Gym, :Art, :Music, :Math])

# Now how would we add this data to a table? In the ACSet framework, we might write this schema. Technically, we are defining a way of presenting relationships in the world of schema which we call `SchJunct`, which defines `Student`, `Class` tables with `name` and `subject` attributes respectively, as well as the object which represents the Junction table, which has two more attributes that play the role of connecting it to the tables we defined.
junction_schema = @present SchJunct(FreeSchema) begin
Name::AttrType
#
Student::Ob
name::Attr(Student, Name)
#
Class::Ob
subject::Attr(Class, Name)
#
Junct::Ob
student::Hom(Junct,Student)
class::Hom(Junct,Class)
end

# We may add data to the junction table with the following code...
@acset_type JunctionTable(SchJunct)

# ...and then instantiate it...
j = JunctionTable{Symbol}()

# ...and then add the data...
function Base.insert!(j::JunctionTable{Symbol}, df::Dict{Symbol, Vector{Symbol}})
foreach(keys(df)) do student
classes = df[student]
# check student exists
student_id = incident(j, student, :name)
if isempty(student_id); student_id = add_part!(j, :Student, name=student) end
foreach(classes) do class
class_id = incident(j, class, :subject)
if isempty(class_id); class_id = add_part!(j, :Class, subject=class) end
# enforce pair constraint
id_pair = incident(j, only(student_id), :student) ∩ incident(j, only(class_id), :class)
# TODO I added 'only' because incident will return e.g., [1] and not 1
if isempty(id_pair); add_part!(j, :Junct, student=only(student_id), class=only(class_id)) end
end
end
j
end

# Let's take a look

insert!(j, df)

# This junction table now defines a linkage between two different types of data. This is well and good, but it introduces some boilerplate. For what is a commonplace relationship in data science, I am in the unsympathetic position of having to add three lines in my schema and almost ten lines of code to source. But these meager requirements can obscure the fact that while the Junction Table is apparently just a table, its role in the database schema is to relate multiple tables together. Therefore it isn't really concerned with presenting data to the user the same way an ordinary table would, its instead a table being used to organize other tables. This role means that there might be a more elegant design principle at play which is capable of making this distinction. In double category theory, this takes the form of a proarrow between Students and Classes. Let's present a schema over the model of a different theory, `FreeDoubleSchema`,
#

p = @present SchSpan(FreeDoubleSchema) begin
Name::AttrType
#
Student::Ob
name::Attr(Student, Name)
#
Class::Ob
subject::Attr(Class, Name)
# produces junction table as well as "student" and "class" morphisms
Junct::Pro(Student, Class)
end

# Here, the data is presented as Julia Symbols representing the names for either students or classes. We'll call this AttrType "Name". So we will need to handle AttrTypes in our presentation, and in turn we need a theory of Double Categories with an additional set of proarrows responsible for AttrTypes. To accomplish this, we defined in the standard library a theory of "Double Schemas," which is the theory of Double Categories with the AttrType axioms copy-pasted into it.

# We'll now define a function called `ACSet` to convert the schema into that for an ACSet. We define that and two helper functions which just extract the name.

Symbol(s::FreeDoubleSchema.AttrType{:generator}) = first(GATlab.args(s))
Symbol(s::FreeDoubleSchema.Attr{:generator}) = first(GATlab.args(s))
Symbol(s::FreeDoubleSchema.Ob{:generator}) = first(GATlab.args(s))

function ACSet(p::Presentation{ThDoubleSchema.Meta.T, Symbol})
# construct ACSet presentation
out = Presentation(FreeSchema)
# add AttrType
attrtypes = map(generators(p, :AttrType)) do attrtype
add_generator!(out, AttrType(FreeSchema.AttrType, Symbol(attrtype)))
end
# add Obs
map(generators(p, :Ob)) do ob
add_generator!(out, Ob(FreeSchema.Ob, Symbol(ob)))
end
# add AttrVals
map(generators(p, :Attr)) do attr
add_generator!(out, Attr(Symbol(attr), [out[Symbol(t)] for t in attr.type_args]...))
end
# convert proarrows to homs
map(generators(p, :Pro)) do pro
ob = Ob(FreeSchema.Ob, first(pro))
add_generator!(out, ob)
map(pro.type_args) do hom
name = Symbol(lowercase(string(only(hom.args)))) # XXX assumes the names are not yet taken
add_generator!(out, Hom(name, ob, out[only(hom.args)]))
end
end
out
end

# Let's instantiate the schema
SchAttr = ACSet(p)
@acset_type JunctionNameTable(SchAttr)
j = JunctionNameTable{Symbol}()

function Base.insert!(j::JunctionNameTable{Symbol}, df::Dict{Symbol, Vector{Symbol}})
foreach(keys(df)) do student
classes = df[student]
# check student exists
student_id = incident(j, student, :name)
if isempty(student_id); student_id = add_part!(j, :Student, name=student) end
foreach(classes) do class
class_id = incident(j, class, :subject)
if isempty(class_id); class_id = add_part!(j, :Class, subject=class) end
# enforce pair constraint
id_pair = incident(j, only(student_id), :student) ∩ incident(j, only(class_id), :class)
# TODO I added 'only' because incident will return e.g., [1] and not 1
if isempty(id_pair); add_part!(j, :Junct, student=only(student_id), class=only(class_id)) end
end
end
j
end

# Let's take a look
insert!(j, df)

# In database practice, junction tables are also just tables and, if your DBA is considerate, will be distinguished in name from other tables by, say, a prefix `J_`. However in the double theory perpsective, the junction table is a distinguished object. Therefore, we can treat it programmatically. The above INSERT code is for instance something we might be able to generalize with access to the junction table as a proarrow generator, `Pro(Student, Class)`. So we will go one step further by generating the INSERT code while we materialize the ACSet presentation.
39 changes: 39 additions & 0 deletions src/theories/HigherCategory.jl
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,45 @@ show_unicode(io::IO, expr::CategoryExpr{:pcompose}; kw...) =
show_latex(io::IO, expr::CategoryExpr{:pcompose}; kw...) =
show_latex_infix(io, expr, "*"; kw...)



# Lawless Double Category
##########################

# @theory ThLawlessDoubleCategory <: ThCategory begin
# Pro(src::Ob, tgt::Ob)::TYPE

# Cell(dom::Pro(A,B), codom::Pro(C,D),
# src::Hom(A,C), tgt::Hom(B,D))::TYPE ⊣ [A::Ob, B::Ob, C::Ob, D::Ob]

# @op begin
# (↛) := Pro
# (⇒) := Cell
# (*) := pcompose
# end
# # Category D₀: category structure on objects and arrows.
# # Inherited from `ThCategory`.

# # Category D₁: category structure on proarrows and cells.
# compose(α::Cell(m, n, f, g), β::Cell(n, p, h, k))::Cell(m, p, f⋅h, g⋅k) ⊣
# [A::Ob, B::Ob, C::Ob, X::Ob, Y::Ob, Z::Ob,
# m::(A ↛ X), n::(B ↛ Y), p::(C ↛ Z),
# f::(A → B), g::(X → Y), h::(B → C), k::(Y → Z)]
# id(m::(A ↛ B))::Cell(m, m, id(A), id(B)) ⊣ [A::Ob, B::Ob]

# # External category operations.
# pcompose(m::(A ↛ B), n::(B ↛ C))::(A ↛ C) ⊣ [A::Ob, B::Ob, C::Ob]
# pid(A)::(A ↛ A) ⊣ [A::Ob]
# end

# @symbolic_model FreeLawlessDoubleCategory{ObExpr,HomExpr,ProExpr,CellExpr} ThLawlessDoubleCategory begin
# compose(f::Hom, g::Hom) = associate_unit(new(f,g; strict=true), id)
# compose(α::Cell, β::Cell) = associate_unit(new(α,β), id)
# pcompose(m::Pro, n::Pro) = associate_unit(new(m,n; strict=true), pid)
# pcompose(α::Cell, β::Cell) = associate_unit(new(α,β), pid)
# end


# Tabulators
############

Expand Down
29 changes: 29 additions & 0 deletions src/theories/Schema.jl
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,32 @@ end
compose(f::Hom,g::Hom) = associate_unit(normalize_zero(new(f,g; strict=true)), id)
compose(f::Hom,a::Attr) = associate_unit(normalize_zero(new(f,a; strict=true)), id)
end


@theory ThDoubleSchema <: ThDoubleCategory begin
AttrType::TYPE
Attr(dom::Ob,codom::AttrType)::TYPE

# """ Composition is given by the action of the profunctor on C.
# """
compose(f::Hom(A,B), g::Attr(B,X))::Attr(A,X) ⊣ [A::Ob, B::Ob, X::AttrType]

(compose(f, compose(g, a)) == compose(compose(f, g), a)
⊣ [A::Ob, B::Ob, C::Ob, X::AttrType, f::Hom(A,B), g::Hom(B,C), a::Attr(C, X)])
compose(id(A), a) == a ⊣ [A::Ob, X::AttrType, a::Attr(A,X)]
end

""" Syntax for a double category with "attr types".

Doesn't check for
"""
@symbolic_model FreeDoubleSchema{ObExpr,HomExpr,ProExpr,CellExpr,AttrTypeExpr, AttrExpr} ThDoubleSchema begin
compose(f::Hom, g::Hom) = associate_unit(new(f,g; strict=true), id)
compose(α::Cell, β::Cell) = associate_unit(new(α,β), id)
pcompose(m::Pro, n::Pro) = associate_unit(new(m,n; strict=true), pid)
pcompose(α::Cell, β::Cell) = associate_unit(new(α,β), pid)
compose(f::Hom,g::Hom) = associate_unit(normalize_zero(new(f,g; strict=true)), id)
compose(f::Hom,a::Attr) = associate_unit(normalize_zero(new(f,a; strict=true)), id)
end

export ThDoubleSchema, FreeDoubleSchema
Loading